[George Peter Staplin] Nov 28, 2008 - A hashing trie is a type of [trie]. They are much more scalable than hash tables, though they use a lot more memory. When I originally created my first trie (in C), I called it a pig structure, as I was unaware of the trie data structure. There are now 2 versions below. I have added a trie-unset and support for empty values. Caveat: the trie-get could be done with a single lindex rather than the foreach loop. Removing unused paths in the trie might be beneficial at some point, beyond just trie-unset. ---- #By George Peter Staplin # #Revision 2 #This is a simple hashing trie in Tcl. #A trie uses the actual values of a key for the path to the data. #This uses 4 bits per path for 16 directions per node. # #A would generate a path of: 1 4 #AB would generate a path of: 1 4 2 4 package require Tcl 8.5 ;#uses {*} proc trie-path str { set r [list] foreach c [split $str ""] { set n [scan $c %c] while {$n > 0} { #We use 4 bits for each path, so we have 16 nodes. set p [expr {$n & 15}] #Append this to the path. lappend r $p #Shift 4 bits to the right now that we got 2^4 - 1 with our & earlier. set n [expr {$n >> 4}] } } return $r } proc trie-create {} { #We create 17 list items. The 17th is for the value of the association. return [lrepeat 17 [list]] } proc trie-set {tvar key value} { upvar 1 $tvar t set pathlist [trie-path $key] #First build up a path of valid sublists. set path [list] foreach p $pathlist { lappend path $p set subt [lindex $t {*}$path] if {![llength $subt]} { lset t {*}$path [trie-create] } } #We use a 2 element list to differentiate from an empty string value. lset t {*}$pathlist 16 [list DATA $value] #Enable this to see the actual serialized structure of the trie. #puts $t } proc trie-get {t key valuevar} { upvar 1 $valuevar value set pathlist [trie-path $key] set path [list] foreach p $pathlist { lappend path $p if {![llength [lindex $t {*}$path]]} { #This key's path is not in the trie. return 0 } } set valuelist [lindex $t {*}$pathlist 16] if {2 != [llength $valuelist]} { #The value is not a valid [list DATA $value]. return 0 } set value [lindex $valuelist 1] #We found the value associated with the key. return 1 } proc trie-unset {tvar key} { upvar 1 $tvar t set pathlist [trie-path $key] #Here we rely on lindex returning an empty list for an invalid offset. set valuelist [lindex $t {*}$pathlist 16] if {2 != [llength $valuelist]} { #The value is not set, or the key is invalid. return } #We have no need for a NULL value, just use an empty list. lset t {*}$pathlist [list] } proc main {} { set t [trie-create] set key1 "Peachy!" set key2 "Harder! Better! Faster! Stronger!" set key3 "words of wisdom" trie-set t $key1 123 if {[trie-get $t $key1 value]} { puts "$value is associated with $key1" } if {[trie-get $t "invalid" value]} { puts stderr "err invalid key generated: $value" } trie-set t $key2 456 if {[trie-get $t $key2 value]} { puts "$value is associated with $key2" } trie-unset t $key2 if {[trie-get $t $key2 value]} { puts stderr "err got $value for an unset key!" } puts $t trie-set t $key3 [list "Don't bite off more than you can chew." \ "The cost of wisdom is pain."] puts $t if {[trie-get $t $key3 value]} { puts "'$key3' is associated with the value: $value" } } main ---- #By George Peter Staplin # #This is a simple hashing trie in Tcl. #A trie uses the actual values of a key for the path to the data. #This uses 4 bits per path for 16 directions per node. # #A would generate a path of: 1 4 #AB would generate a path of: 1 4 2 4 package require Tcl 8.5 ;#uses {*} proc trie-path str { set r [list] foreach c [split $str ""] { set n [scan $c %c] while {$n > 0} { #We use 4 bits for each path, so we have 16 nodes. set p [expr {$n & 15}] #Append this to the path. lappend r $p #Shift 4 bits to the right now that we got 2^4 - 1 with our & earlier. set n [expr {$n >> 4}] } } return $r } proc trie-create {} { #We create 17 list items. The 17th is for the value of the association. return [lrepeat 17 [list]] } proc trie-set {tvar key value} { upvar 1 $tvar t set pathlist [trie-path $key] #First build up a path of valid sublists. set path [list] foreach p $pathlist { lappend path $p set subt [lindex $t {*}$path] if {![llength $subt]} { lset t {*}$path [trie-create] } } lset t {*}$pathlist 16 $value #Enable this to see the actual serialized structure of the trie. #puts $t } proc trie-get {t key valuevar} { upvar 1 $valuevar value set pathlist [trie-path $key] set path [list] foreach p $pathlist { lappend path $p if {![llength [lindex $t {*}$path]]} { #This key's path is not in the trie. return 0 } } set value [lindex $t {*}$pathlist 16] #We found the value associated with the key. return 1 } proc main {} { set t [trie-create] set key1 "Peachy!" set key2 "Harder! Better! Faster! Stronger!" trie-set t $key1 123 if {[trie-get $t $key1 value]} { puts "$value is associated with $key1" } if {[trie-get $t "invalid" value]} { puts "err invalid key generated: $value" } trie-set t $key2 456 if {[trie-get $t $key2 value]} { puts "$value is associated with $key2" } } main ---- [kruzalex] With changes allowing run it under Tcl 8.4 and without forcing 17th as the value of the association see the result after running the code below: proc lrepeat {value number} { set res [list] for {set i 0} {$i<$number} {incr i} { lappend res $value } set res } ;# RS proc trie-create {} { return [lrepeat [list] 17] } proc trie-set {tvar key value} { upvar 1 $tvar t set pathlist [trie-path $key] foreach p $pathlist { lappend path $p set subt [lindex $t $path] if {![llength $subt]} { lset t $path [trie-create] } } lset t $pathlist $value } proc trie-get {t key} { #upvar 1 $valuevar value set pathlist [trie-path $key] set path [list] foreach p $pathlist { lappend path $p if {![llength [lindex $t $path]]} { return 0 } } set value [lindex $t $pathlist] return $value } proc main {} { set t [trie-create] set key "A" trie-set t $key result #if {[trie-get $t $key value]} { # puts "$value is associated with $key" #} puts [trie-get $t $key] } main ---- 28/01/10 [CMcC] has written another trie implementation ... no reflection on the foregoing. This one might be a Patricia Trie. # Trie data structure package provide Trie 1.0 if {[catch {package require Debug}]} { proc Debug.trie {args} { #puts stderr [uplevel subst $args] } } oo::class create ::Trie { variable trie id # search for longest prefix, return matching prefix, element and suffix method matches {t what} { set matches {} set wlen [string length $what] foreach k [lsort -decreasing -dictionary [dict keys $t]] { set klen [string length $k] set match "" for {set i 0} {$i < $klen && $i < $wlen && [string index $k $i] eq [string index $what $i] } {incr i} { append match [string index $k $i] } if {$match ne ""} { lappend matches $match $k } } Debug.trie {matches: $what -> $matches} if {[dict size $matches]} { # find the longest matching prefix set match [lindex [lsort -dictionary [dict keys $matches]] end] set mel [dict get $matches $match] set suffix [string range $what [string length $match] end] return [list $match $mel $suffix] } else { return {} ;# no matches } } # return next unique id if there's no proffered value method id {value} { if {$value} { return $value } else { return [incr id] } } # insert an element with a given optional value into trie # along path given by $args (no need to specify) method insert {what {value 0} args} { if {[llength $args]} { set t [dict get $trie {*}$args] } else { set t $trie } if {[dict exists $t $what]} { Debug.trie {$what is an exact match on path ($args $what)} if {[catch {dict size [dict get $trie {*}$args $what]} size]} { # the match is a leaf - we're done } else { # the match is a dict - we have to add a null dict set trie {*}$args $what "" [my id $value] } return ;# exact match - no change } # search for longest prefix set match [my matches $t $what] if {![llength $match]} { ;# no matching prefix - new element Debug.trie {no matching prefix of '$what' in $t - add it on path ($args $what)} dict set trie {*}$args $what [my id $value] return } lassign $match match mel suffix ;# prefix, element of match, suffix if {$match ne $mel} { # the matching element shares a prefix, but has a variant suffix # it must be split Debug.trie {splitting '$mel' along '$match'} set melC [dict get $t $mel] dict unset trie {*}$args $mel dict set trie {*}$args $match [string range $mel [string length $match] end] $melC } if {[catch {dict size [dict get $trie {*}$args $match]} size]} { # the match is a leaf - must be split if {$match eq $mel} { # the matching element shares a prefix, but has a variant suffix # it must be split Debug.trie {splitting '$mel' along '$match'} set melC [dict get $t $mel] dict unset trie {*}$args $mel dict set trie {*}$args $match "" $melC } Debug.trie {'$mel' is the longest prefix '$match' but was a leaf - insert '$suffix'} set melid [dict get $t $mel] dict set trie {*}$args $match $suffix [my id $value] } else { # it's a dict - keep searching Debug.trie {'$mel' is the longest prefix '$match' and is a dict - search for '$suffix' on path ($args $match)} my insert $suffix $value {*}$args $match } } # find a path matching an element $what # if the element's not found, return the nearest path method find_path {what args} { if {[llength $args]} { set t [dict get $trie {*}$args] } else { set t $trie } if {[dict exists $t $what]} { Debug.trie {$what is an exact match on path ($args $what)} return [list {*}$args $what] ;# exact match - no change } # search for longest prefix set match [my matches $t $what] if {![llength $match]} { return $args } lassign $match match mel suffix ;# prefix, element of match, suffix if {$match ne $mel} { # the matching element shares a prefix, but has a variant suffix # no match return $args } if {[catch {dict size [dict get $trie {*}$args $match]} size] || $size == 0} { # got to a non-matching leaf - no match return $args } else { # it's a dict - keep searching Debug.trie {'$mel' is the longest prefix '$match' and is a dict - search for '$suffix' on path ($args $match)} return [my find_path $suffix {*}$args $match] } } # given a trie, which may have been modified by deletion, # optimize it by removing empty nodes and coalescing singleton nodes method optimize {args} { if {[llength $args]} { set t [dict get $trie {*}$args] } else { set t $trie } if {[catch {dict size $t} size]} { Debug.trie {optimize leaf '$t' along '$args'} # leaf - leave it } else { switch -- $size { 0 { Debug.trie {optimize empty dict ($t) along '$args'} if {[llength $args]} { dict unset trie {*}$args } } 1 { Debug.trie {optimize singleton dict ($t) along '$args'} lassign $t k v if {[llength $args]} { dict unset trie {*}$args } append args $k if {[llength $v]} { dict set trie {*}$args $v } my optimize {*}$args } default { Debug.trie {optimize dict ($t) along '$args'} dict for {k v} $t { my optimize {*}$args $k } } } } } # delete element $what from trie method delete {what} { set path [my find_path $what] if {[join $path ""] eq $what} { Debug.trie {del '$what' along ($path) was [dict get $trie {*}$path]} if {[catch {dict size [dict get $trie {*}$path]} size]} { # got to a matching leaf - delete it dict unset trie {*}$path set path [lrange $path 0 end-1] } else { dict unset trie {*}$path "" } my optimize ;# remove empty and singleton elements } else { # nothing to delete, guess we're done } } # find the value of element $what in trie, # error if not found method find {what} { set path [my find_path $what] if {[join $path ""] eq $what} { if {[catch {dict size [dict get $trie {*}$path]} size]} { # got to a matching leaf - done return [dict get $trie {*}$path] } else { return [dict get $trie {*}$path ""] } } else { error "'$what' not found" } } # dump the trie as a string method dump {} { return $trie } # return a string rep of the trie sorted in dict order method order {{t {}}} { if {![llength $t]} { set t $trie } elseif {[llength $t] == 1} { return $t } set acc {} foreach key [lsort -dictionary [dict keys $t]] { lappend acc $key [my order [dict get $t $key]] } return $acc } # return the trie as a dict of names with values method flatten {{t {}} {prefix ""}} { if {![llength $t]} { set t $trie } elseif {[llength $t] == 1} { return [list $prefix $t] } set acc {} foreach key [dict keys $t] { lappend acc {*}[my flatten [dict get $t $key] $prefix$key] } return $acc } # overwrite the trie method set {t} { set trie $t } constructor {args} { set trie {} set id 0 foreach a $args { my insert $a } } } if {[info script] eq $argv0} { set data { rubber romane eunt domus romanus romulus rubens ruber rube rubicon rubicundus roman an antidote anecdotal ant all alloy allotrope allot aloe are ate be cataract catatonic catenary } ::Trie create example {*}$data puts "TRIE: [example dump]" puts "OTRIE: [example order]" example set [example order] puts "FLAT: [example flatten]" foreach d $data { puts "$d -> '[example find_path $d]' -> [example find $d]" } foreach d $data { example delete $d puts "DEL '$d': [example dump]" } puts "TRIE: [example dump]" } ---- !!!!!! %| [Category Data Structure] |% !!!!!!