DESCRIPTION
A snit for working with the Gene Ontology framework [L1 ]. Candidate for including into the coming biotcl package.
# Author : Dr. Detlef Groth, MPIMG Berlin # Created By : Dr. Detlef Groth # Created : 2005-02-21 # Last Modified : <050222.0600> # # Description package require snit 0.97 package require oomk snit::type GeneOntol { # public options option -obofile -default "" -configuremethod SetObofile option -mkfile "" # private variables variable go variable db # views variable pvNames variable pvParents variable pvAltids variable pvObsoletes variable GETTREE # `GeneOntol gon -obofile obofile ?-mkfile metakitfile?' -- # constructor for the GeneOntol type # Arguments: # `-obofile filename' the Gene Ontology obofile # `?-mkfile?' the metakit databasefile, defaults to obofile.mk # ----------------------------------------------- constructor {args} { $self configurelist $args if {$options(-mkfile) eq ""} { set options(-mkfile) $options(-obofile).mk } $self Init } destructor { if {[info exists db]} { $db close } } # public methods (are lowercase) # `gon' isAlt -- # info if an given GO-id is an alternative id # Arguments: # `id' a GO-id # Returns: # true if id is an alternative GO-id or false if it is not # ------------------------------------------------------------ method isAlt {id} { [$pvAltids select -exact altid $id -count 1] as pSel if {[$pSel size] > 0} { return true } else { return false } } # `gon' alt2id -- # getting an the right GO-id for an alternative one # Arguments: # `id' a GO id # Returns: # returns the right GO-id for an alternative one # ------------------------------------------------------------ method alt2id {id} { [$pvAltids select -exact altid $id -count 1] as pSel if {[$pSel size] > 0} { return [$pSel get 0 id] } else { return "" } } # `gon' isObsolete -- # info if an GO-id is an older obsolete GO-id # Arguments: # `id' a GO id # Returns: # true if id is an obsolete GO-id, false if it its not # ------------------------------------------------------------ method isObsolete {id} { [$pvObsoletes select -exact id $id -count 1] as pSel if {[$pSel size] > 0} { return true } else { return false } } # `gon' parents -- # getting a list of parents for a certain GO-id # Arguments: # `id' a GO-id # Returns: # a list of lists with the keys parent, relation and id foreach listitem # ------------------------------------------------------------ method parents {id} { set parents [list] [$pvParents select -exact id $id] as pSel $pSel loop c { lappend parents [array get c] } return $parents }; # `gon' children -- # getting a list of children for a certain GO-id # Arguments: # `id' a GO-id # Returns: # a list of lists with the keys parent, relation and id foreach listitem # ------------------------------------------------------------ method children {id} { set children [list] if {[$self isAlt $id]} { set id [$self alt id] } [$pvParents select -exact parent $id] as pSel $pSel loop c { lappend children [array get c] } return $children } # `gon' name -- # getting the name for a GO-id # Arguments: # `id' a GO-id # Returns: # the name of the GO-id, or an empty string if no valid GO-id # ------------------------------------------------------------ method name {id} { if {[$self isAlt $id]} { set id [$self alt $id] } [$pvNames select -count 1 -exact id $id] as pSel if {[$pSel size] > 0} { return [$pSel get 0 name] } else { return "" } } # `gon' id -- # getting the id for a GO-term # Arguments: # `name' a GO-term # Returns: # the GO-id for the given term # ------------------------------------------------------------ method id {name} { [$pvNames select -count 1 -exact name $name] as pSel if {[$pSel size] > 0} { return [$pSel get 0 id] } else { return "" } } # `gon' like -- # performs a glob style pattern search against all GO-terms # Arguments: # `pattern' a glob style pattern # Returns: # a list of lists with the keys id,name foreach matching term # ------------------------------------------------------------ method like {pattern} { set res [list] [$pvNames select -glob name $pattern] as pSel $pSel loop c { lappend res [array get c] } return $res } # `gon' getTree -- # method to get all GO-ids forming a full topdown tree for a certain # GO-id # Arguments: # `id' a GO-id # Returns: # a list of GO-ids, which constructs the full tree # ------------------------------------------------------------ method getTree {id {stack 0}} { if {$stack == 0} { array unset GETTREE } incr stack set GETTREE($id) 1 if {![lsearch [list GO:0003674 GO:0005575 GO:0008150] $id] >= 0} { foreach par [$self parents $id] { array set c $par $self getTree $c(parent) $stack } } return [array names GETTREE] } method getDefinition {id} { error "method getDefinition not yet implemented" # use an index for the obofile # (index done inside Init) # jump via seek inside obofile # extract the definition } method getInfo {} { error "method getInfo not yet implemented" # inside the mkfile during Init # collect number of MF,BP,CC terms # ua } # private methods (are uppercase) method SetObofile {option value} { if {$options($option) ne ""} { error "option $option can only be set a object creation time" } set options($option) $value #$self Init } method Init {} { if {[info exists db]} { $db close } if {[file exists "$options(-mkfile)"]} { puts stderr filexists set db [mkstorage %AUTO% $options(-mkfile)] [$db view names] as pvNames [$db view parents] as pvParents [$db view altids] as pvAltids [$db view obsoletes] as pvObsoletes return } puts stderr indexcreation set prog [Progress %AUTO% -file $options(-obofile)] set db [mkstorage %AUTO% $options(-mkfile)] $db layout names {id name} $db layout altids {id altid} $db layout parents {id relation parent} $db layout obsoletes {id} [$db view names] as pvNames [$db view parents] as pvParents [$db view altids] as pvAltids [$db view obsoletes] as pvObsoletes set x 0 set id "" if [catch {open $options(-obofile) r} infh] { error "Cannot open $options(-obofile) : $infh" } while {[gets $infh line] >= 0} { if {[incr x] % 500 == 0} { $prog progress [tell $infh] } if {[regexp {^id: *(GO:[0-9]{7})} $line -> id]} { #puts stderr $id } elseif {[regexp {^alt_id: *(GO:[0-9]{7})} $line -> alt]} { $pvAltids append id $id altid $alt } elseif {[regexp {^name: *(.+)} $line -> go(name,$id)]} { $pvNames append id $id name $go(name,$id) } elseif {[regexp {^(is_a:|relationship: part_of) *(GO:[0-9]{7})} $line -> kind goid]} { regsub {relationship: } $kind "" kind $pvParents append id $id relation $kind parent $goid } elseif {[regexp {^is_obsolete: *true} $line ]} { $pvObsoletes append id $id } } $db commit } } # test proc test {} { source [file join [file dirname [info script]] Progress.tcl] set datadir e:/links/project/goblet/data/ foreach version {2004-05-01 2004-09-01 2005-02-01} { set t [clock seconds] set gon [GeneOntol %AUTO% -obofile $datadir/goblet-databases/gene_ontology.obo.$version] puts stderr "Loading version $version in [expr [clock seconds] - $t] seconds" puts [$gon parents GO:0003720] puts [$gon children GO:0003674] puts [$gon name GO:0003674] foreach go {GO:0003674 GO:0003720} { puts "Children of $go :" foreach child [$gon children $go] { array set c $child set name [$gon name $c(id)] puts " $c(id) $go $name [$gon id $name]" } puts stderr "[expr [clock seconds] - $t] seconds" } foreach go [$gon like membrane*] { puts stderr " pattern $go [$gon name $go]" } puts stderr [$gon name GO:0003674] puts stderr [$gon id [$gon name GO:0003674]] puts stderr [$gon children GO:0003674] puts stderr [$gon name GO:0048201] puts stderr [$gon id [$gon name GO:0048201]] puts stderr [$gon parents GO:0048201] puts stderr "obsolet: GO:0000211 [$gon name GO:0000211]" puts stderr [$gon parents GO:0000211] puts stderr "obsolet-children: GO:0000211" puts stderr [$gon children GO:0000211] puts stderr "is_alt GO:0004752" puts stderr [$gon name GO:0004752] puts stderr [$gon parents GO:0004752] puts stderr "is_alt children GO:0004752" puts stderr [$gon children GO:0004752] puts stderr "[expr [clock seconds] - $t] seconds" puts stderr "A tree for GO:0048201" puts stderr [$gon getTree GO:0048201] } }