Version 11 of file size

Updated 2008-07-01 10:37:08 by MB

file size name

Returns a decimal string giving the size of file name in bytes. If the file doesn't exist or its size cannot be queried then an error is generated.


Before Tcl 8.4a4, file size would actually throw an error on a file larger than 2gb. Now it works, and returns the correct value for such 'large files'.

file size must return the complete size of a file including any data which the OS might not yet have written to disk. On Unix this is easy, but on Windows, Tcl actually has to force the OS to flush all buffers before returning the information. (Of course all of this happens behind the scenes).


The following is a package to convert a file size in bytes into a human readable form. For example, if one has a 10 000 bytes file :

  hrfilesize::bytestohr 10000

returns "9.8 Kbytes"

Notice that the following package is based on the Tcllib package "bigfloat", so that it can handle file sizes up to the epta byte.

    #
    # Convert a size in byte into a human readable form.
    # http://en.wikipedia.org/wiki/Byte
    # http://en.wikipedia.org/wiki/File_size
    # http://physics.nist.gov/cuu/Units/binary.html
    # Copyright: Michael Baudin [email protected]
    #

    package provide hrfilesize 1.0

    namespace eval hrfilesize  {
        # System used to convert from bytes into the new unit
        variable unitsystem
        # Format used to convert from real file size to string
        variable realformat
        # Integer to convert from 1 byte (binary or decimal) into the new unit
        variable kilobytes
        # This is a map from the power to the unit name
        variable powertounitmap
        # Maximum available power
        variable powermax 6
        # Component used to process the integers
        variable integerpackage
    }

    #
    # hrfilesize::bytestohr --
    #   Returns a string containing an human-readable form
    #   representing the given size in bytes by computing
    #   the size in the suitable units :
    #   - bytes,
    #   - kilobytes,
    #   - megabytes,
    #   - gigabytes,
    #   - terabytes,
    #   - petabytes,
    #   - exabytes.
    # Example:
    #   If one have a 10 000 bytes file :
    #     hrfilesize::bytestohr 10000
    #   returns "10.0 KB"
    # Arguments:
    #   size: the size in bytes
    #   -realformat value : the format used to convert from the full real size
    #      to a sexy short real. Defaults to "%.1f"
    #   -unit value : the unit system to convert from bytes.
    #      If "value" is "binary" then one kilobyte is made of 1024 bytes.
    #      If "value" is "decimal" then one kilobyte is made of 1000 bytes.
    #      The default unit system is decimal.
    #   -integerpackage value : the package to process integer values
    #      If "value" is "Tcl" then the integers are processed with Tcl "expr" command
    #      If "value" is "bigfloat" then the integers are processed with Tcl lib package "bigfloat"
    #
    proc hrfilesize::bytestohr {size args} {
        #
        # Process options
        #
        foreach {key value} $args {
            hrfilesize::configure $key $value
        }
        #
        # Compute the size in the new unit
        #
        set newsize [hrfilesize::newsize $size]
        set y [lindex $newsize 0]
        set power [lindex $newsize 1]
        # Limits the power to 6
        if {$power>$hrfilesize::powermax} then {
            error "File size larger than the maximum available size unit (power : $power)"
        }
        array set unitarray $hrfilesize::powertounitmap
        set unit $unitarray($power)
        set shortdouble [format $hrfilesize::realformat $y]
        set result "$shortdouble $unit"
        return $result
    }
    #
    # hrfilesize::configure --
    #   Configure the conversion system depending on the couples (key,value)
    #   given in the list args.
    # Arguments:
    #   -realformat value : the format used to convert from the full real size
    #      to a sexy short real. Defaults to "%.1f"
    #   -unit value : the unit system to convert from bytes.
    #      If "value" is "binary" then one kilobyte is made of 1024 bytes.
    #      If "value" is "decimal" then one kilobyte is made of 1000 bytes.
    #   -integerpackage value : the package to process integer values
    #      If "value" is "Tcl" then the integers are processed with Tcl "expr" command
    #      If "value" is "bigfloat" then the integers are processed with Tcl lib package "bigfloat"
    #
    proc hrfilesize::configure {args} {
        #
        # Process options
        #
        foreach {key value} $args {
            switch -- $key {
                "-realformat" {
                    set hrfilesize::realformat $value
                }
                "-unit" {
                    set hrfilesize::unitsystem $value
                }
                "-integerpackage" {
                    set hrfilesize::integerpackage $value
                }
                default {
                    error "Unknown key $key"
                }
            }
        }
        #
        # Configure internal settings depending on the unit system
        #
        switch -- $hrfilesize::unitsystem {
            "binary" {
                set hrfilesize::kilobytes 1024
                set hrfilesize::powertounitmap [list 0 "B" \
                                                    1 "KiB" \
                                                    2 "MiB" \
                                                    3 "GiB" \
                                                    4 "TiB" \
                                                    5 "PiB" \
                                                    6 "EiB" \
                                                    ]
            }
            "decimal" {
                set hrfilesize::kilobytes 1000
                set hrfilesize::powertounitmap [list 0 "B" \
                                                    1 "KB" \
                                                    2 "MB" \
                                                    3 "GB" \
                                                    4 "TB" \
                                                    5 "PB" \
                                                    6 "EB" \
                                                    ]
            }
            default {
                error "Unknown unit $unit"
            }
        }
        return ""
    }
    #
    # hrfilesize::newsize --
    #   Returns a couple made of two items :
    #   - the size in the new unit (y),
    #   - the power of the kilobytes multiple,
    #   that is, computes newsize and power such that :
    #     size = y x 1000^power if the unit is decimal
    #     size = y x 1024^power if the unit is binary
    #
    proc hrfilesize::newsize {size} {
        switch -- $hrfilesize::integerpackage {
            "Tcl" {
                set result [hrfilesize::newsize_tcl $size]
            }
            "bigfloat" {
                set result [hrfilesize::newsize_bifloat $size]
            }
            default {
                error "Unknown integer package $hrfilesize::integerpackage"
            }
        }
        return $result
    }
    #
    # hrfilesize::newsize_tcl --
    #   Compute the new size based on Tcl "expr" command.
    #
    # Limitations
    #   Tcl string to integer conversion is based on the C integer long type,
    #   so that the maximum integer is approximately 2 GB if we suppose that
    #   the long integer is based on 32 bits.
    #   If the given size is greater that 2 GB, the Tcl "expr" command
    #   would fail to process the integer.
    #   This is why we begin by converting the integer value into a real value,
    #   with a direct operation on the string, that is to say, without using the
    #   "double" operator of the expr command.
    #   Therefore, if the given size is not an integer, the following code
    #   will fail.
    #
    proc hrfilesize::newsize_tcl {size} {
        set y [expr {double($size)}]
        set power 0
        while {$y >= $hrfilesize::kilobytes } {
            incr power
            set y [expr {$y / double($hrfilesize::kilobytes)}]
        }
        return [list $y $power]
    }
    #
    # hrfilesize::newsize_bifloat --
    #   Compute the new size based on the bigfloat package.
    #
    proc hrfilesize::newsize_bifloat {size} {
        package require math::bigfloat
        set y [math::bigfloat::fromstr $size]
        set y [math::bigfloat::int2float $y]
        set kb [math::bigfloat::fromstr $hrfilesize::kilobytes]
        set kb [math::bigfloat::int2float $kb]
        set power 0
        set compare [math::bigfloat::compare $y $kb]
        while {$compare>=0 } {
            incr power
            set y [math::bigfloat::div $y $kb]
            set compare [math::bigfloat::compare $y $kb]
        }
        set y [math::bigfloat::todouble $y]
        return [list $y $power]
    }
    #
    # Automatic configuration of the package at loading.
    #
    hrfilesize::configure -unit "decimal"
    hrfilesize::configure -realformat "%.1f"
    hrfilesize::configure -integerpackage "bigfloat"

And here are the tests.

    package require tcltest
    namespace import tcltest::test
    lappend ::auto_path .
    package require hrfilesize 1.0

    #
    # Caution !
    #   Use constant string values instead of using Tcl expr in integer
    #   values only.
    #   This is because Tcl string to integer conversion is based on
    #   the C integer long type, so that the maximum integer is approximately
    #   2 GB.
    #
    tcltest::test hrfilesize-binary-1 {50 bytes} {
        set res [hrfilesize::bytestohr 50 -unit binary]
    } {50.0 B}
    tcltest::test hrfilesize-binary-2 {10 000 bytes} {
        set res [hrfilesize::bytestohr 10000 -unit binary]
    } {9.8 KiB}
    tcltest::test hrfilesize-binary-3 {10 000 000 bytes} {
        set res [hrfilesize::bytestohr 10000000 -unit binary]
    } {9.5 MiB}
    tcltest::test hrfilesize-binary-4 {10 000 000 000 bytes} {
        set res [hrfilesize::bytestohr 10000000000 -unit binary]
    } {9.3 GiB}
    tcltest::test hrfilesize-binary-5 {10 000 000 000 000 bytes} {
        set res [hrfilesize::bytestohr 10000000000000 -unit binary]
    } {9.1 TiB}
    tcltest::test hrfilesize-binary-6 {Pib bytes} {
        set res [hrfilesize::bytestohr 10000000000000000 -unit binary]
    } {8.9 PiB}
    tcltest::test hrfilesize-binary-7 {Eib bytes} {
        set res [hrfilesize::bytestohr 10000000000000000000 -unit binary]
    } {8.7 EiB}
    tcltest::test hrfilesize-binary-8 {1024 bytes} {
        set res [hrfilesize::bytestohr 1024 -unit binary]
    } {1.0 KiB}
    tcltest::test hrfilesize-binary-9 {1025 bytes} {
        set res [hrfilesize::bytestohr 1025 -unit binary]
    } {1.0 KiB}
    tcltest::test hrfilesize-binary-10 {0 bytes} {
        set res [hrfilesize::bytestohr 0 -unit binary]
    } {0.0 B}
    tcltest::test hrfilesize-binary-11 {1 Mbyte bytes} {
        set res [hrfilesize::bytestohr [expr {1024*1024}] -unit binary]
    } {1.0 MiB}

    tcltest::test hrfilesize-decimal-1 {50 bytes} {
        set res [hrfilesize::bytestohr 50 -unit decimal]
    } {50.0 B}
    tcltest::test hrfilesize-decimal-2 {10000 bytes} {
        set res [hrfilesize::bytestohr 10000 -unit decimal]
    } {10.0 KB}
    tcltest::test hrfilesize-decimal-3 {10000000 bytes} {
        set res [hrfilesize::bytestohr 10000000 -unit decimal]
    } {10.0 MB}
    tcltest::test hrfilesize-decimal-4 {10000000000 bytes} {
        set res [hrfilesize::bytestohr 10000000000 -unit decimal]
    } {10.0 GB}
    tcltest::test hrfilesize-decimal-5 {10000000000000 bytes} {
        set res [hrfilesize::bytestohr 10000000000000 -unit decimal]
    } {10.0 TB}
    tcltest::test hrfilesize-decimal-6 {Pib bytes} {
        set res [hrfilesize::bytestohr 10000000000000000 -unit decimal]
    } {10.0 PB}
    tcltest::test hrfilesize-decimal-7 {Eib bytes} {
        set res [hrfilesize::bytestohr 10000000000000000000 -unit decimal]
    } {10.0 EB}
    tcltest::test hrfilesize-decimal-8 {1024 bytes} {
        set res [hrfilesize::bytestohr 1024 -unit decimal]
    } {1.0 KB}
    tcltest::test hrfilesize-decimal-9 {1025 bytes} {
        set res [hrfilesize::bytestohr 1025 -unit decimal]
    } {1.0 KB}
    tcltest::test hrfilesize-decimal-10 {0 bytes} {
        set res [hrfilesize::bytestohr 0 -unit decimal]
    } {0.0 B}
    tcltest::test hrfilesize-decimal-11 {1 Mbyte bytes} {
        set res [hrfilesize::bytestohr [expr {1024*1024}] -unit decimal]
    } {1.1 MB}

    tcltest::test hrfilesize-error-1 {what happens if we give a real value ?} {
        set errflag [catch {hrfilesize::bytestohr 50.} errmsg]
        set result [list $errflag $errmsg]
    } {1 {first argument is not an integer}}
    tcltest::test hrfilesize-error-2 {what happens if we give a size larger than the larger unit ?} {
        set errflag [catch {hrfilesize::bytestohr 10000000000000000000000 -unit decimal} errmsg]
        set result [list $errflag $errmsg]
    } {1 {File size larger than the maximum available size unit (power : 7)}}

    ::tcltest::cleanupTests

See also: