Version 19 of du

Updated 2015-01-24 16:07:15 by dkf

Somewhat like the UNIX command du ("Disk Usage") - it returns the number of bytes, kilobytes, or megabytes in a directory hierarchy. Intentionally does NOT count links!!


proc du { args } {
    switch -exact [llength $args] {
        0 {
            set dir .
            set switch -k
        }
        1 {
            set dir $args
            set switch -k
        }
        2 {
            set switch [lindex $args 0]
            set dir [lindex $args 1]
        }
        default {
            set msg "only one switch and one dir "
            append msg "currently supported"
            return -code error $msg
        }
    }

    set switch [string tolower $switch]

    set -b 1
    set -k 1024
    set -m [expr 1024*1024]

    set result [list]

    if {![file isdirectory $dir]} {
        set ary($dir,bytes) [file size $dir]
        set globpats [list]
    } else {
        set globpats $dir/*
    }

    while {[llength $globpats]} {
        foreach globpat $globpats {
            set cwd [string trim $globpat */]
            set ary($cwd,bytes) 0
            set files [glob -nocomplain $globpat]
            set globpats [list]
            foreach file $files {
                if {![catch {
                    file readlink $file
                }]} {
                    continue
                }
                if {[file isdirectory $file]} {
                    lappend globpats $file/*
                } else {
                    incr ary($cwd,bytes) [file size $file]
                }
            }
        }
    }

    set dirs [array names ary]

    # Since the directories are arranged by nesting level,
    # this can be optimised to not iterate in the inner loop
    # over directories already processed by the outer loop.
    # I have no time right now...
    if {[llength $dirs] > 1} {
        foreach dir $dirs {
            set dir [lindex [split $dir ","] 0]
            foreach Dir $dirs {
                set Dir [lindex [split $Dir ","] 0]
                if { [string match $dir/* $Dir]} {
                    incr ary($dir,bytes) $ary($Dir,bytes)
                }
            }
        }
    }

    foreach dir $dirs {
        set name [lindex [split $dir ","] 0]
        set size [expr {$ary($dir) / [set $switch]}]
        lappend retval [list $name $size]
    }
    # copyright 2002 by The LIGO Laboratory
    return $retval
}

# Test:
catch {console show}
catch {wm withdraw .}

puts "disc usage - start at current directory:"
puts "[du]"

puts "Test2 - show result as one long line:"
set  tx [du ..]
puts $tx

puts "Test3 - show result as one dir per line, size in MB:"
set  tx [du -m C://WINNT]
foreach dir $tx {puts "$dir"}
#.

Anyone that cares to may fill in the missing command line switches...


I (cjl) was intrigued by the none-recursive way the directory tree is walked, but it didn't look like it should work. Trying it (as implemented above) reveals that it's not counting everything it should, due to the way 'globpats' is reset and re-established. The little test below illustrates the problem:

    set things {1 2 3}

    while {[llength $things]} {
        puts "Entered 'while' loop ([join $things])"
        foreach thing $things {
            puts "Entered 'foreach' loop ($thing : [join $things])"
            set things [list]
    
            if {$thing == 2} {
                lappend things a
            }
        }
    }

The 'a' added to 'things' never gets seen by the outer loop. Indeed, in the 'du' implementation a sub-directory will only be walked if it is the last item found by the 'glob'.


See also