IEEE floating numbers

#####################################################################
# procedures to show internal IEEE standard "double" representation #
#####################################################################

# big endian code

proc floatToBinaryBigEndian {d} {
    binary scan [binary format d $d] B* v
    set sign [string index $v 0]
    set exponent [string range $v 1 11]
    set mantissa [string range $v 12 end]
    return [list $sign $mantissa $exponent]
}

proc binaryToFloatBigEndian {sign mantissa exponent} {
    if {$sign != {0} && $sign != {1}} {
        error "bad sign \"$sign\""
    }
    if {[string length $mantissa] != 52} {
        error "bad mantissa \"$mantissa\""
    }
    if {[string length $exponent] != 11} {
        error "bad exponent \"$exponent\""
    }
    set v [binary format B64 $sign$exponent$mantissa]
    binary scan $v d v
    return $v
}

# little endian code

proc __reverse__ {s} {
    for {set i [string length $s]} {$i >= 0} {incr i -1} {
        append sr [string index $s $i]
    }
    return $sr
}

proc floatToBinaryLittleEndian {d} {
    binary scan [binary format d $d] b* v
    set v [__reverse__ $v]
    set sign [string index $v 0]
    set exponent [string range $v 1 11]
    set mantissa [string range $v 12 end]
    return [list $sign $mantissa $exponent]
}

proc binaryToFloatLittleEndian {sign mantissa exponent} {
    if {$sign != {0} && $sign != {1}} {
        error "bad sign \"$sign\""
    }
    if {[string length $mantissa] != 52} {
        error "bad mantissa \"$mantissa\""
    }
    if {[string length $exponent] != 11} {
        error "bad exponent \"$exponent\""
    }
    set v [binary format b64 [__reverse__ $sign$exponent$mantissa]]
    binary scan $v d v
    return $v
}

# platform independent procedures #

proc floatToBinary {d} {
    global tcl_platform
    switch $tcl_platform(byteOrder) {
        bigEndian {return [floatToBinaryBigEndian $d]}
        littleEndian {return [floatToBinaryLittleEndian $d]}
        default {
            return -code error "unknown byteOrder \"$tcl_platform(byteOrder)\""
        }
    }
}

proc binaryToFloat {sign mantissa exponent} {
    global tcl_platform
    switch $tcl_platform(byteOrder) {
        bigEndian {return [binaryToFloatBigEndian $sign $mantissa $exponent]}
        littleEndian {
            return [binaryToFloatLittleEndian $sign $mantissa $exponent]
        }
        default {
            return -code error "unknown byteOrder \"$tcl_platform(byteOrder)\""
        }
    }
}

proc floatToBinaryTest {value sign mantissa exponent} {
    set r [floatToBinary $value]
    if {
        [lindex $r 0] != $sign ||
        [lindex $r 1] != $mantissa ||
        [lindex $r 2] != $exponent
    } {
        return -code error "this machine is not IEEE floating point compliant"
    }
}

# Some tests

floatToBinaryTest  1.0      0 0000000000000000000000000000000000000000000000000000 01111111111
floatToBinaryTest -1.0      1 0000000000000000000000000000000000000000000000000000 01111111111

# An example why you should put braces around "expr" argument

set tcl_precision 12
set pi [expr {acos(-1.0)}]
floatToBinaryTest $pi           0 1001001000011111101101010100010001000010110100011000 10000000000
floatToBinaryTest [expr {$pi}]  0 1001001000011111101101010100010001000010110100011000 10000000000
floatToBinaryTest [expr $pi]    0 1001001000011111101101010100010001000010111011101010 10000000000

# the 17 digits string representation is exact

set tcl_precision 17
set pi [expr {acos(-1.0)}]
floatToBinaryTest $pi           0 1001001000011111101101010100010001000010110100011000 10000000000
floatToBinaryTest [expr {$pi}]  0 1001001000011111101101010100010001000010110100011000 10000000000
floatToBinaryTest [expr $pi]    0 1001001000011111101101010100010001000010110100011000 10000000000 

puts [binaryToFloat 0 1001001000011111101101010100010001000010110100010111 10000000000] ;# 3.1415926535897927
puts [binaryToFloat 0 1001001000011111101101010100010001000010110100011000 10000000000] ;# 3.1415926535897931
puts [binaryToFloat 0 1001001000011111101101010100010001000010110100011001 10000000000] ;# 3.1415926535897936 

puts [binaryToFloat 0 1001001000011111101101010100010001000010110100010111 10000000000] ;# 3.1415926535897927
puts [binaryToFloat 0 1001001000011111101101010100010001000010110100011000 10000000000] ;# 3.1415926535897931
puts [binaryToFloat 0 1001001000011111101101010100010001000010110100011001 10000000000] ;# 3.1415926535897936 

# Special representations

binaryToFloat 0 0000000000000000000000000000000000000000000000000000 00000000000        ;# 0.0
binaryToFloat 0 0000000000000000000000000000000000000000000000000001 00000000000        ;# 4.9406564584124654e-324
binaryToFloat 0 1111111111111111111111111111111111111111111111111111 00000000000        ;# 2.2250738585072009e-308
binaryToFloat 0 0000000000000000000000000000000000000000000000000000 00000000001        ;# 2.2250738585072014e-308
binaryToFloat 0 0000000000000000000000000000000000000000000000000000 11111111110        ;# 8.9884656743115795e+307
binaryToFloat 0 1111111111111111111111111111111111111111111111111111 11111111110        ;# 1.7976931348623157e+308
binaryToFloat 0 0000000000000000000000000000000000000000000000000000 11111111111        ;# inf
binaryToFloat 1 0000000000000000000000000000000000000000000000000000 11111111111        ;# -inf
binaryToFloat 0 1111111111111111111111111111111111111111111111111111 11111111111        ;# nan
binaryToFloat 1 1111111111111111111111111111111111111111111111111111 11111111111        ;# nan

As the code above is quite capable of recognizing itself, it will only work if your platform uses the IEEE format [1 ] as its native representation of floating point numbers. Tcl depends on the C language for this matter, and ISO C does not require floating point numbers to adhere to any specific format.


PYK 2014-05-14: The code above purports to show an example why you should put braces around "expr" argument, but I don't see any difference in behavior between the braced and unbraced versions of the commands. Was there one at some point ? Am I missing something ?

AMG: Look closer:

set tcl_precision 12
set pi [expr {acos(-1.0)}]
floatToBinaryTest $pi           0 1001001000011111101101010100010001000010110100011000 10000000000
floatToBinaryTest [expr {$pi}]  0 1001001000011111101101010100010001000010110100011000 10000000000
floatToBinaryTest [expr $pi]    0 1001001000011111101101010100010001000010111011101010 10000000000
                                                                            ^^^^^^  ^

As for what's happening, it's rather deep and confusing. In this instance, the only functional difference between the last two lines is whether it's the Tcl interpreter or the [expr] engine that performs the substitution. $pi's string representation is 3.14159265359, though in the first two lines it remains a pure double.

The third line causes $pi to shimmer from double to string to expression before [expr] reconstitutes the numeric value from the string value, which (thanks to the limited tcl_precision) lost some precision when it was generated.

Compare with the first line which passes the (pure double) value to [floatToBinaryTest] which does [binary format] on it (as a double) without needing its string value. Then look at the second line which asks [expr] to simply return its argument without interpreting it, before passing it along to [floatToBinaryTest]. But the third line goes the long route from double to string to expression to double.

Brace your expr-essions!!

PYK: A most excellent explanation. Thank you!

PYK: I wouldn't have expected the third line to cause $pi to shimmer to a string, though, because my understanding was that such strings remained "pure" across being passed into a command, as no substitution is required. I'm not quite sure what to make of my understanding now.

AMG: [expr] doesn't want a double argument, it wants an expression argument. Hence shimmering. [expr] could be optimized to directly recognize int and double arguments, but what would be the gain?

See Also

Computers and real numbers
tcl_precision
IEEE binary float to string conversion
IEEE float dissection