Currently just a starter done by [DDG]. Please add comments and suggestions. package require snit 0.97 package require oomk package require md5pure # a type for retrieving sequences from fasta files # usage: # FastaFile ff -filename filename # puts [ff getSeq id] snit::type FastaFile { option -filename "" option -indexfile "" constructor {args} { $self configurelist $args if {$options(-indexfile) eq ""} { set options(-indexfile) $options(-filename).mk } if {![file exists $options(-indexfile)]} { $self CreateIndex } }; method CreateIndex {} { if [catch {open $options(-filename) r} infh] { puts stderr "Cannot open $options(-filename): $infh" return 0 } else { set db [mkstorage %AUTO% $options(-indexfile)] $db layout positions {id md5 pos} [$db view positions] as pvPositions while {[gets $infh line] >= 0} { if {[regexp {^>([^\s]+)} $line -> id]} { set pos [tell $infh] incr pos [expr [string length $line] * -1 - 2] set md5 [md5pure::md5 $id] $pvPositions append id $id md5 $md5 pos $pos } } close $infh $db commit $db close } }; method getSeq {id args} { # get the sequence either via id or via the md5 hash of the id set arg(-md5) false array set arg $args set db [mkstorage %AUTO% $options(-indexfile)] [$db view positions] as pvPositions if {$arg(-md5)} { [$pvPositions select -exact md5 $id] as pSel if {[$pSel size] > 0} { set id [$pSel get 0 id] } } [$pvPositions select -exact id $id] as pSel if {[$pSel size] > 0} { set pos [$pSel get 0 pos] } if {[info exists pos]} { incr pos -1 if [catch {open $options(-filename) r} infh] { puts stderr "Cannot open $options(-filename): $infh" return 0 } else { seek $infh $pos set flag false while {[gets $infh line] >= 0} { if {[regexp {^>} $line] && $flag} { break } elseif {[regexp {^>} $line]} { set flag true } append seq "$line\n" } close $infh return $seq } } else { return "" } }; } proc test {} { set file 20050210-134509--ciona2.seq set sf [FastaFile %AUTO% -filename /project/goblet/data/goblet-results/$file] puts [$sf getSeq ci0100130002] # example with a md5hash puts [$sf getSeq 1d206c9b9b91badbd93fb9f78594a9d4 -md5 true] } ---- [Category Science]