Version 2 of A Hashing Trie in Tcl

Updated 2008-11-28 20:00:18 by nem

George Peter Staplin Nov 28, 2008 - A hashing trie is a type of trie. They are much more scalable than hash tables, though they use a lot more memory. When I originally created my first trie (in C), I called it a pig structure, as I was unaware of the trie data structure.

There are now 2 versions below. I have added a trie-unset and support for empty values.

Caveat: the trie-get could be done with a single lindex rather than the foreach loop. Removing unused paths in the trie might be beneficial at some point, beyond just trie-unset.


 #By George Peter Staplin
 #
 #Revision 2
 #This is a simple hashing trie in Tcl.
 #A trie uses the actual values of a key for the path to the data.
 #This uses 4 bits per path for 16 directions per node.
 #
 #A would generate a path of: 1 4
 #AB would generate a path of: 1 4 2 4

 package require Tcl 8.5 ;#uses {*}


 proc trie-path str {
     set r [list]

     foreach c [split $str ""] {
         set n [scan $c %c]

         while {$n > 0} {
             #We use 4 bits for each path, so we have 16 nodes.
             set p [expr {$n & 15}]

             #Append this to the path.
             lappend r $p

             #Shift 4 bits to the right now that we got 2^4 - 1 with our & earlier.
             set n [expr {$n >> 4}]
         }
     }

     return $r
 }

 proc trie-create {} {
     #We create 17 list items.  The 17th is for the value of the association.

     return [lrepeat 17 [list]]
 }

 proc trie-set {tvar key value} {
     upvar 1 $tvar t

     set pathlist [trie-path $key]


     #First build up a path of valid sublists.
     set path [list]

     foreach p $pathlist {
         lappend path $p

         set subt [lindex $t {*}$path]
         if {![llength $subt]} {
             lset t {*}$path [trie-create]
         }
     }

     #We use a 2 element list to differentiate from an empty string value.
     lset t {*}$pathlist 16 [list DATA $value]

     #Enable this to see the actual serialized structure of the trie.
     #puts $t
 }

 proc trie-get {t key valuevar} {
     upvar 1 $valuevar value

     set pathlist [trie-path $key]

     set path [list]
     foreach p $pathlist {
         lappend path $p

         if {![llength [lindex $t {*}$path]]} {
             #This key's path is not in the trie.
             return 0
         }
     }

     set valuelist [lindex $t {*}$pathlist 16]

     if {2 != [llength $valuelist]} {
         #The value is not a valid [list DATA $value].
         return 0
     }


     set value [lindex $valuelist 1]
     #We found the value associated with the key.
     return 1
 }

 proc trie-unset {tvar key} {
     upvar 1 $tvar t

     set pathlist [trie-path $key]

     #Here we rely on lindex returning an empty list for an invalid offset.
     set valuelist [lindex $t {*}$pathlist 16]

     if {2 != [llength $valuelist]} {
         #The value is not set, or the key is invalid.
         return
     }

     #We have no need for a NULL value, just use an empty list.
     lset t {*}$pathlist [list]
 }


 proc main {} {
     set t [trie-create]

     set key1 "Peachy!"
     set key2 "Harder!  Better!  Faster!  Stronger!"
     set key3 "words of wisdom"

     trie-set t $key1 123 

     if {[trie-get $t $key1 value]} {
         puts "$value is associated with $key1"
     }

     if {[trie-get $t "invalid" value]} {
         puts stderr "err invalid key generated: $value"
     }

     trie-set t $key2 456

     if {[trie-get $t $key2 value]} {
         puts "$value is associated with $key2"
     }

     trie-unset t $key2

     if {[trie-get $t $key2 value]} {
         puts stderr "err got $value for an unset key!"
     }


     puts $t

     trie-set t $key3 [list "Don't bite off more than you can chew." \
                           "The cost of wisdom is pain."]

     puts $t

     if {[trie-get $t $key3 value]} {
         puts "'$key3' is associated with the value: $value"
     }
 }
 main

 #By George Peter Staplin
 #
 #This is a simple hashing trie in Tcl.
 #A trie uses the actual values of a key for the path to the data.
 #This uses 4 bits per path for 16 directions per node.
 #
 #A would generate a path of: 1 4
 #AB would generate a path of: 1 4 2 4

 package require Tcl 8.5 ;#uses {*}


 proc trie-path str {
     set r [list]

     foreach c [split $str ""] {
         set n [scan $c %c]

         while {$n > 0} {
             #We use 4 bits for each path, so we have 16 nodes.
             set p [expr {$n & 15}]

             #Append this to the path.
             lappend r $p

             #Shift 4 bits to the right now that we got 2^4 - 1 with our & earlier.
             set n [expr {$n >> 4}]
         }
     }

     return $r
 }

 proc trie-create {} {
     #We create 17 list items.  The 17th is for the value of the association.

     return [lrepeat 17 [list]]
 }

 proc trie-set {tvar key value} {
     upvar 1 $tvar t

     set pathlist [trie-path $key]


     #First build up a path of valid sublists.
     set path [list]

     foreach p $pathlist {
         lappend path $p

         set subt [lindex $t {*}$path]
         if {![llength $subt]} {
             lset t {*}$path [trie-create]
         }
     }

     lset t {*}$pathlist 16 $value

     #Enable this to see the actual serialized structure of the trie.
     #puts $t
 }

 proc trie-get {t key valuevar} {
     upvar 1 $valuevar value

     set pathlist [trie-path $key]

     set path [list]
     foreach p $pathlist {
         lappend path $p

         if {![llength [lindex $t {*}$path]]} {
             #This key's path is not in the trie.
             return 0
         }
     }

     set value [lindex $t {*}$pathlist 16]
     #We found the value associated with the key.
     return 1
 }

 proc main {} {
     set t [trie-create]

     set key1 "Peachy!"
     set key2 "Harder!  Better!  Faster!  Stronger!"

     trie-set t $key1 123 

     if {[trie-get $t $key1 value]} {
         puts "$value is associated with $key1"
     }

     if {[trie-get $t "invalid" value]} {
         puts "err invalid key generated: $value"
     }

     trie-set t $key2 456

     if {[trie-get $t $key2 value]} {
         puts "$value is associated with $key2"
     }

 }
 main