tcl size

See also:


CMcC 20050301 - jyl was trying to track down what seemed to be a leak in a tcl program, which reminded me of a component of tclhttpd which tries to (approximately and imprecisely) measure the size of variables and procs in an interpreter.

I've packaged that up here, may it be of use.

namespace eval tclsize {}
 
# DataSize --
#
#        return the data size for the interpreter or for a particular namespace.
#
# Arguments:
#        ns        (optional) if given, show the data size for this namespace
#
# Results:
#        Returns {size namecount varcount}
proc tclsize::DataSize {{ns "::"} {interp ""}} {
     set ng 0
     set nv 0
     set size 0
 
     foreach g [interp eval $interp info vars ${ns}::*] {
         incr ng
         if {[interp eval $interp array exists $g]} {
             foreach {name value} [interp eval $interp array get $g] {
                 incr size [string length $name]
                 incr size [string length $value]
                 incr nv
             }
         } elseif {[interp eval $interp info exists $g]} {
             # info vars returns declared but undefined namespace vars
             incr size [string length $g]
             incr size [string length [interp eval $interp set $g]]
         }
         incr nv
     }
 
     foreach child [namespace children $ns] {
         lassign [DataSize $child $interp] csize cnv cng
         incr size $csize
         incr nv $cnv
         incr ng $cng
     }
     
     return [list $size $nv $ng]
}

# CodeSize --
#
#        return the code size for the interpreter or for a particular namespace.
#
# Arguments:
#        ns        (optional) if given, show the code size for this namespace
#
# Results:
#        Returns {size proc_count}

proc tclsize::CodeSize {{ns ::} {interp ""}} {
     set np 0
     set size 0
 
     foreach g [interp eval $interp info procs ${ns}::*] {
         incr np
         incr size [string length $g]
         incr size [string length [interp eval $interp info args $g]]
         incr size [string length [interp eval $interp info body $g]]
     }
     
     foreach child [interp eval $interp namespace children $ns] {
         lassign [CodeSize $child $interp] csize cnp
         incr size $csize
         incr np $cnp
     }
     
     return [list $size $np]
}

# InterpSize --
#
#        return the data and code size for an interpreter
#
# Arguments:
#        ns        (optional) if given, show the data size for this namespace
#
# Results:
#        Returns {total datasize namecount varcount codesize proccount}
proc tclsize::InterpSize {{interp ""} {recurse 1}} {
     lassign  [DataSize] data names vars
     lassign  [CodeSize] code procs
 
     if {$recurse} {
         foreach child [interp slaves $interp] {
             lassign [DataSize $child] cdata cnames cvars
             incr data $cdata
             incr names $cnames
             incr cvars $cvars
             
             lassign [CodeSize $child] ccode cprocs
             incr code $ccode
             incr procs $cprocs
         }
     }
 
     return [list [expr {$data + $code}] $data $names $vars $code $procs]
}

if {[info script] eq $argv0} {
     lassign [tclsize::InterpSize] total data names vars code procs
     puts [subst {
         Total accounted: $total bytes.
         Data size: $data bytes in $vars variables ($names names.)
         Code size: $code bytes in $procs procs.
     }]
}

Lars H: I think string bytelength would be more appropriate than string length here. (The latter will also lead to a lot of shimmering if you've got a lot of data as lists.) Also, the above has the disadvantage that it only looks at the size of the string representation, which sometimes can be quite different from the size of the internal representation.

CMcC: would string bytelength avoid shimmering? - LH: Yes. It too forces generation of a string representation, but it does not as string length also junk every non-string internal representation. - Then it should certainly be that. I think these procs can be useful even though they will only ever be an analog of true memory usage, for the reasons given, and others: hash tables have internal space wasted, malloc has fragmentation, the size of open file descriptors isn't accounted for, ...