This is an improved version of purse, orginally by CMcC. The code below attempts to minimise the amount of file descriptors that are kept open at one time. The original implementation would only close file descriptors on array removals, which can be problematic in virtual machines (I've had problems on Ubuntu in OpenVZ-based machines). Closing of file descriptors uses the following two heuristics:
The API is exactly the same as the one of purse, except that there are more parameters that can be set for a purse (and good defaults, highlighted at the beginning of the code).
I have been also been looking at tie, which is part of tcllib. But the implementation also keeps the file descriptors opened forever, to what I can understand.
# purse makes tcl arrays purse-istant package require Tcl 8.5 package provide Purse 0.3 namespace eval ::purse { namespace eval vars { variable nofile 128; # Default max number of journalling file to keep open variable keep 5000; # Default number of milliseconds to keep journalling files open variable debug ""; # File descritor for debug output (empty for none) variable dtfmt "%Y%m%d %T"; # Format for date output in log (empty for none) } } # param - sets a purse's parameters # dir - directory in which purses are stored (default [pwd]) proc ::purse::param {var val {control purse}} { variable $control upvar 0 $control cont if {![info exists cont]} { #::purse::purse $control $control set cont() [dict create dir [pwd]] } if {$var == "dir"} { file mkdir $val } dict set cont() $var $val Debug "Set parameter $var to $val in $control" } # Restart from current state on disk proc ::purse::restart { {glob ::*} {control purse}} { variable $control upvar 0 $control cont init $control set arrays [glob -directory [dict get $cont() dir] \ -nocomplain \ -tails [string map [dict get $cont() todisk] $glob]] foreach array $arrays { set array [FromDisk $array] # creates namespace ondemand, if necessary set nspace [namespace qualifiers $array] if { $nspace ne "" } { namespace eval $nspace {} } R $control $array $array } return $arrays } # Initialise purse storage in control variable proc ::purse::init { {control purse} } { global tcl_platform variable $control upvar 0 $control cont if {![info exists cont]} { #::purse::purse $control $control set cont() [dict create] } # Make sure we have a "dir" parameter, this will be the directory # where we will be persisting arrays. if {![dict exists $cont() dir]} { param dir [pwd] $control } # Make sure we know how to map unallowed characters onto # characters that can be used for file names. if { ![dict exists $cont() todisk] } { if { $tcl_platform(platform) eq "windows" } { param todisk [list ":" "¨"] $control } else { param todisk {} $control } } # The opposite... if { ![dict exists $cont() fromdisk] } { if { $tcl_platform(platform) eq "windows" } { param fromdisk [list "¨" ":"] $control } else { param fromdisk {} $control } } if {![dict exists $cont() nofile]} { param nofile $vars::nofile $control } if {![dict exists $cont() keep]} { param keep $vars::keep $control } } # purse - purses an array proc ::purse::purse {array {control purse}} { #puts stderr "purse $array $control" #set array [namespace which -variable $array] upvar $array arr variable $control upvar 0 $control cont init $control; # Initialise storage. trace add variable arr read [list ::purse::R $control $array] ;# one shot load file trace add variable arr array [list ::purse::R $control $array] ;# one shot load file trace add variable arr write [list ::purse::W $control $array] trace add variable arr unset [list ::purse::U $control $array] # register a single purse flush at exit set pfx ::exit_purse_[string map [list ":" ""] $control]_ if { [info commands ${pfx}*] eq "" } { set newex ${pfx}[expr rand()] rename ::exit $newex proc ::exit {} [subst { #puts stderr "flush $control" ::purse::flush * $control $newex }] } } # flush arrays matching glob proc ::purse::flush {{glob *} {control purse}} { variable $control upvar 0 $control cont foreach {array d} [array get cont $glob] { if { $array != "" } { Serialize $array $control } } } # Return full path to where to store an array on disk. proc ::purse::ToDisk { name {control purse}} { variable $control upvar 0 $control cont init $control return [file join [dict get $cont() dir] \ [string map [dict get $cont() todisk] $name]] } # Return name of array, given its name on disk. proc ::purse::FromDisk { name {control purse}} { variable $control upvar 0 $control cont init $control return [string map [dict get $cont() fromdisk] $name] } # initializes pursed array - one shot proc ::purse::R {control name array args} { #puts stderr "read $control $name $array $args" upvar $array arr variable $control upvar 0 $control cont trace remove variable arr read [list ::purse::R $control $name] ;# one shot load file trace remove variable arr array [list ::purse::R $control $name] ;# one shot load file trace remove variable arr write [list ::purse::W $control $name] trace remove variable arr unset [list ::purse::U $control $name] set file [ToDisk $name $control] if {[file exists $file]} { # if the purse exists, load its contents to array Debug "Loading content of array $name from $file" set fd [open $file r+] while {![eof $fd]} { set content [gets $fd] set len [llength $content] if { [expr {$len%2}] } { puts "Possibly corrupt data when reading $name" set content [lrange $content 0 end-1] } array set arr $content } close $fd Serialize $name $control } else { # brand new purse - create the file Debug "Shadowing content of array $name to $file" set fd [open $file w] fconfigure $fd -buffering line dict set cont($name) fd $fd Spacer $name $control AutoClose $name $control } # we no longer need a read trace trace add variable arr write [list ::purse::W $control $name] trace add variable arr unset [list ::purse::U $control $name] } # trace unset - writes an element to purse proc ::purse::W {control name array el op} { #puts stderr "write $control $name $array $el $op" upvar $array arr variable $control upvar 0 $control cont if {![info exists cont($name)]} { R $control $name arr } # Append to existing file, reserialize all content if it had been closed set fd [dict get $cont($name) fd] if { $fd eq "" } { Serialize $name $control } set fd [dict get $cont($name) fd] array set junk [list $el $arr($el)] puts $fd [array get junk] Spacer $name $control AutoClose $name $control } # trace unset - unsets an element in a pursed array proc ::purse::U {control name array el op} { #puts stderr "unset $control $name $array $el $op" upvar $array arr variable $control upvar 0 $control cont if {![info exists cont($name)]} { r $control $name arr if {$el != "" && [info exists arr($el)]} { unset arr($el) ;# we have recreated the element - recurse } return } if {$el == ""} { # removing the entire array - destroy the purse set file [ToDisk $name $control] CloseCmd $name $control unset cont($name) if {[file exists $file]} { file delete -force -- $file } # we no longer need traces trace remove variable arr write [list ::purse::W $control $name] trace remove variable arr unset [list ::purse::U $control $name] } else { # removing an element - flush the purse Serialize $name $control } } # Conditional debug output, this cost close to zero. proc ::purse::Debug {msg} { if {$vars::debug ne ""} { if { $vars::dtfmt eq "" } { puts $vars::debug $msg } else { set dt [clock format [clock seconds] -format $vars::dtfmt] puts $vars::debug "\[$dt\] $msg" } } } # Serialize the content of an array to the journalling file, i.e. write a # complete copy of the array, loosing the history. proc ::purse::Serialize {array {control purse}} { variable $control upvar 0 $control cont upvar $array arr # Close current file descriptor to journalling file catch { set fd [dict get $cont($array) fd] close $fd } # Write complete copy of current content of array to the file set file [ToDisk $array $control] Debug "Serialize $array to $file" set fd [open $file w] fconfigure $fd -buffering line puts $fd [array get arr] ::flush $fd dict set cont($array) fd $fd; # Remember the file descriptor # Make space for the array by closing older arrays and arrange to close the # journal after a while. Spacer $array $control AutoClose $array $control } # Record latest write timestamp to an array, this is used to automatically close # journalling files of arrays that we haven't written to for a while. proc ::purse::LatestWrite { name {control purse}} { variable $control upvar 0 $control cont set nofile [dict get $cont() nofile] if { $nofile > 0 } { dict set cont($name) access [clock clicks] } } # Make space to write to an array, this will close the journalling files of the # arrays that we haven't written to for a while, or rather the "oldest" ones in # that list. proc ::purse::Spacer { name { control purse} } { variable $control upvar 0 $control cont set nofile [dict get $cont() nofile] if { $nofile > 0 } { # Make sure we keep the array that we want to make space for as the # latest accessed one. LatestWrite $name $control # Construct a list with the names of the arrays that are under our # control, paired to their last access time. set accesses {} foreach nm [array names cont] { if {$nm ne ""} { if {[dict exists $cont($nm) access]} { lappend accesses [list $nm [dict get $cont($nm) access]] } } } # Sort the list so that latest accessed are first. set accesses [lsort -index 1 -integer -decreasing $accesses] # Remove all the ones that are at the end of the list, which now are the # oldest ones. set removals [lrange $accesses $nofile end] if { [llength $removals] > 0 } { Debug "Making space for array $name among the oldest ones" foreach nfo $removals { foreach {nm access} $nfo break CloseCmd $nm $control } } } } # Arrange to automatically close journalling file after a while, if relevant for # the purse options. proc ::purse::AutoClose { name {control purse}} { variable $control upvar 0 $control cont # Cancel current timer, if any if { [dict exists $cont($name) timer] } { after cancel [dict get $cont($name) timer] dict unset cont($name) timer } # Arrange to close the journalling file to which we write in a little while set period [dict get $cont() keep] if { $period > 0 } { dict set cont($name) \ timer [after $period [list [namespace current]::CloseCmd $name $control]] } } # Forcedly close the journalling file for an array at once. This is to ensure # that we can keep low the number of resources alloted to the program as a # whole. proc ::purse::CloseCmd { name {control purse}} { variable $control upvar 0 $control cont # Return at once in case we don't have any information for the array # (anymore?) if {![info exists cont($name)]} { return } # Cancel current timer, if any if { [dict exists $cont($name) timer] } { after cancel [dict get $cont($name) timer] dict unset cont($name) timer } # Close the file to which we are writing if { [dict exists $cont($name) fd] } { set fd [dict get $cont($name) fd] if { $fd ne "" } { Debug "Closing journalling output file for array $name" catch {close $fd} } } # Remember that we are now not writing to any file anymore. dict set cont($name) fd "" } namespace export -clear purse