package require snit 0.97 package require oomk package require md5pure snit::type BlastFile { # public options # BLAST resultfile option -filename "" # metakit indexfile optional argument # defaults to $options(-filename).mk option -indexfile "" # number of hits stored inside the indexfile option -hits 10 # private variables variable db # the views inside the database variable pvPositions variable pvTop1 variable pvTopX constructor {args} { $self configurelist $args if {$options(-filename) eq ""} { error "`-filename blastfile' argument is required" } if {$options(-indexfile) eq ""} { set options(-indexfile) $options(-filename).mk } set db [mkstorage %AUTO% $options(-indexfile)] if {[file size $options(-indexfile)] < 2 } { $self CreateIndex } else { [$db view positions] as pvPositions [$db view top1] as pvTop1 [$db view topX] as pvTopX } } destructor { $db close } # public methods (are lowercase) method getQueryIds {} { # returns a list with query ids # each item is a list with the # * query id # * a flag indicating if this query id has a hit #[$db view top1] as pvTop1 set res [list] $pvTop1 loop c { lappend res [array get c] } return $res } method getMatchIds {id args} { set arg(-md5) false array set arg $args set res [list] if {$arg(-md5)} { [$pvPositions select -exact md5 $id] as pSel if {[$pSel size] > 0} { set id [$pSel get 0 id] } else { return [list] } } [$pvTopX select -exact id $id] as pSel $pSel loop c { lappend res [array get c] } return $res } method hasHit {id args} { # return true or false # if we have BlastHits for a certain queryid set arg(-md5) false array set arg $args if {$arg(-md5)} { [$pvPositions select -exact md5 $id] as pSel if {[$pSel size] > 0} { set id [$pSel get 0 id] } else { return false } } [$pvTop1 select -exact id $id] as pSel if {[$pSel size] > 0} { set res [$pSel get 0 match] } else { set res 0 } if {$res eq "0"} { return false } else { return true } } method getBlast {id args} { set arg(-md5) false array set arg $args set blast "" if {$arg(-md5)} { [$pvPositions select -exact md5 $id] as pSel } else { [$pvPositions select -exact id $id] as pSel } if {[$pSel size] > 0} { set pos [$pSel get 0 pos] } else { return "" } if [catch {open $options(-filename) r} infh] { error "Cannot open $options(-filename): $infh" } else { seek $infh $pos set flag false while {[gets $infh line] >= 0} { if {[regexp {^T?BLAST[XNP]} $line] && $flag} { break } elseif {[regexp {^T?BLAST[XNP]} $line]} { set flag true } append blast "$line\n" } close $infh } return $blast } # private methods (are uppercase) method CreateIndex {} { if [catch {open $options(-filename) r} infh] { error "Cannot open $options(-filename): $infh" } else { $db layout positions {id md5 pos} $db layout top1 {id match score:I evalue} $db layout topX {id match score:I evalue} [$db view positions] as pvPositions [$db view top1] as pvTop1 [$db view topX] as pvTopX set hit -1 set prog [Progress %AUTO% -file $options(-filename)] while {[gets $infh line] >= 0} { if {[regexp {^T?BLAST[XNP]} $line]} { set pos [tell $infh] $prog progress $pos incr pos [expr [string length $line] * -1 - 1] } elseif {[regexp {^Query= ([^\s]+)} $line -> id]} { set md5 [md5pure::md5 $id] $pvPositions append id $id md5 $md5 pos $pos } elseif {[regexp {No hits found} $line]} { $pvTop1 append id $id match 0 score 0 evalue 1 set hit -1 } elseif {[regexp {^Sequences producing significant alignments} $line]} { set hit 0 } elseif {[regexp {^>([^\s]+)(.+)} $line]} { set hit -1 ; } elseif {$hit >= 0 && [regexp {^([^\s]+).+\s([0-9]+)\s+([-e0-9\.]+)} $line -> matchID score evalue]} { incr hit regsub {^e} $evalue {1e} evalue if {$hit == 1} { $pvTop1 append id $id match $matchID score $score evalue $evalue } if {$hit <= $options(-hits)} { $pvTopX append id $id match $matchID score $score evalue $evalue } } } close $infh $db commit } } } # test proc test {} { source [file join [file dirname [info script]] Progress.tcl] BlastFile bfp -filename E:/links/project/goblet/data/goblet-results/20050209-143332--nrprot-test_blastp_nrprot #puts [bfp getBlast 27ccfed00cacd48a458a2057eec4e3c8 -md5 true] foreach x {0 1 2 3 4} { puts "ci010013000$x [bfp hasHit ci010013000$x]" } puts "27ccfed00cacd48a458a2057eec4e3c8 [bfp hasHit 27ccfed00cacd48a458a2057eec4e3c8 -md5 true]" puts [bfp getQueryIds] puts [bfp getMatchIds ci0100130000] }