Binary strings for storing matrices of numerical data

Arjen Markus (12 june 2016) Following a discussion on comp.lang.tcl regarding wavelets transforms, I decided to put a simple idea to the test: can you use binary strings to store numerical data? Such binary strings are much more compact than Tcl's lists and they can be passed to C-based commands without any conversion (they act as ordinary arrays of integers or floating-point numbers there). The main problems are the manipulation of the data on the Tcl side - that could be clumsy - and the performance. Well, here is a small experiment:

  • The manipulation of numerical matrices can be conveniently dealt with via list-like or string-like commands
  • The performance is something that may require more attention, by implementing typical manipulations directly in C.

Discussion: should we try and define a convenient data type for this sort of things? There are several extensions that do similar or somewhat things - Vectcl, TArray, Tensor.

Anyway, here is the code.

# matrix.tcl --
#     Compact matrix data type for storing large amounts of numerical data
#
#     The implementation uses a binary string to store the data. It is significantly
#     slower than an implementation with nested lists, but it takes only 8 bytes
#     to store a double-precision value instead of the 24 bytes of a Tcl_Obj.
#
#     One possible use: avoid the overhead of converting from a Tcl list to a C array
#     when passing the data to and from commands implemented in C.
#
#     TODO:
#     - getsubmatrix
#     - fromlist
#     - unify the allowed types - check the dictionary
#

# math::matrix --
#     Create the namespace
#
namespace eval ::math::matrix {
    variable bytes [dict create a 1 s 2 i 4 w 8 f 4 d 8]
    variable types [dict create a char s short i integer w wide f float d double]
}

# MatrixCreate --
#     Create a compact matrix of given dimensions
#
# Arguments:
#     nrows      Number of rows
#     ncols      Number of columns
#     type       Type of data (char, short, int, wide, float, double)
#                Defaults to double
#
proc ::math::matrix::MatrixCreate {nrows ncols {type double}} {
    if { $type in {c char s short i int integer w wide f float d double} } {
        set code [string range $type 0 0]
    } else {
        return -code error "Unknown type - should one of char, short, int, wide, float or double"
    }

    if { $code == "c" } {
        set code  "a"
        set value " "
    } else {
        set value 0
    }
    set nullValue    [binary format $code $value]
    set binaryString [string repeat $nullValue [expr {$nrows * $ncols}]]

    return [binary format a8wwa* $code $nrows $ncols $binaryString]
}

# MatrixSet --
#     Set an element in the matrix to a given value
#
# Arguments:
#     matrix     Compact matrix
#     row        Row of the element
#     column     Column of the element
#     value      New value for the element
#
proc ::math::matrix::MatrixSet {matrix row column value} {
    variable bytes

    binary scan $matrix aa7ww code dummy nrows ncols

    if { $row < 0 || $row >= $nrows } {
        return -code error "Row out of range - $row - should be 0 ... [expr {$nrows-1}]"
    }
    if { $column < 0 || $column >= $ncols } {
        return -code error "Column out of range - $cloumn - should be 0 ... [expr {$ncols-1}]"
    }

    #
    # Calculate the position in the binary string
    #
    set valueLength  [dict get $bytes $code]

    set position [expr {24 + ($row * $ncols + $column) * $valueLength}]

    return [binary format a*@${position}$code $matrix $value]
}

# MatrixSetRow --
#     Set a row in the matrix using a given list
#
# Arguments:
#     matrix     Compact matrix
#     row        Row of the element
#     list       New values for the row
#
proc ::math::matrix::MatrixSetRow {matrix row list} {
    variable bytes

    binary scan $matrix aa7ww code dummy nrows ncols

    if { $row < 0 || $row >= $nrows } {
        return -code error "Row out of range - $row - should be 0 ... [expr {$nrows-1}]"
    }
    if { [llength $list] != $ncols } {
        return -code error "Number of values ([llength $list]) should match the number of columns ($ncols)"
    }

    #
    # Calculate the position in the binary string
    #
    set valueLength  [dict get $bytes $code]

    set position [expr {24 + $row * $ncols * $valueLength}]

    return [binary format a*@${position}$code* $matrix $list]
}

# MatrixSetColumn --
#     Set a column in the matrix using a given list
#
# Arguments:
#     matrix     Compact matrix
#     column     Column of the element
#     list       New values for the column
#
proc ::math::matrix::MatrixSetColumn {matrix column list} {
    variable bytes

    binary scan $matrix aa7ww code dummy nrows ncols

    if { $column < 0 || $column >= $ncols } {
        return -code error "Column out of range - $column - should be 0 ... [expr {$ncols-1}]"
    }
    if { [llength $list] != $nrows } {
        return -code error "Number of values ([llength $list]) should match the number of rows ($nrows)"
    }

    #
    # Calculate the positions in the binary string
    #
    set valueLength  [dict get $bytes $code]
    set format       "a*"

    for {set row 0} {$row < $nrows} {incr row} {
        set position [expr {24 + ($row * $ncols + $column) * $valueLength}]
        set format   "${format}@${position}$code"
    }

    return [binary format $format $matrix {*}$list]
}

# MatrixGet --
#     Get an element in the matrix
#
# Arguments:
#     matrix     Compact matrix
#     row        Row of the element
#     column     Column of the element
#
proc ::math::matrix::MatrixGet {matrix row column} {
    variable bytes

    binary scan $matrix aa7ww code dummy nrows ncols

    if { $row < 0 || $row >= $nrows } {
        return -code error "Row out of range - $row - should be 0 ... [expr {$nrows-1}]"
    }
    if { $column < 0 || $column >= $ncols } {
        return -code error "Column out of range - $cloumn - should be 0 ... [expr {$ncols-1}]"
    }

    #
    # Calculate the position in the binary string
    #
    set valueLength  [dict get $bytes $code]

    set position [expr {24 + ($row * $ncols + $column) * $valueLength}]

    binary scan $matrix @${position}$code value

    return $value
}

# MatrixGetRow --
#     Get a row from the matrix
#
# Arguments:
#     matrix     Compact matrix
#     row        Row to retrieve
#
proc ::math::matrix::MatrixGetRow {matrix row} {
    variable bytes

    binary scan $matrix aa7ww code dummy nrows ncols

    if { $row < 0 || $row >= $nrows } {
        return -code error "Row out of range - $row - should be 0 ... [expr {$nrows-1}]"
    }

    #
    # Calculate the position in the binary string
    #
    set valueLength  [dict get $bytes $code]

    set position [expr {24 + $row * $ncols * $valueLength}]

    binary scan $matrix @${position}$code$ncols values

    return $values
}

# MatrixGetColumn --
#     Get a column from the matrix
#
# Arguments:
#     matrix     Compact matrix
#     column     Column to retrieve
#
proc ::math::matrix::MatrixGetColumn {matrix column} {
    variable bytes

    binary scan $matrix aa7ww code dummy nrows ncols

    if { $column < 0 || $column >= $ncols } {
        return -code error "Column out of range - $column - should be 0 ... [expr {$ncols-1}]"
    }

    #
    # Calculate the positions in the binary string and retrieve the data
    #
    set valueLength  [dict get $bytes $code]
    set recordLength [dict get $bytes $code]

    set values {}
    for {set row 0} {$row <$nrows} {incr row} {
        set position [expr {24 + ($row * $ncols + $column) * $valueLength}]

        binary scan $matrix @${position}$code value

        lappend values $value
    }

    return $values
}

# MatrixToList --
#     Converts a compact matrix to a nested list
#
# Arguments:
#     matrix     Compact matrix
#
proc ::math::matrix::MatrixToList {matrix} {
    variable bytes

    binary scan $matrix aa7ww code dummy nrows ncols

    set format  "d$ncols"
    set valueLength  [dict get $bytes $code]
    set recordLength [expr {$valueLength * $ncols}]

    set start 24
    set end   [expr {$start + $recordLength - 1}]

    set list  {}

    for {set i 0} {$i < $nrows } {incr i} {

        binary scan [string range $matrix $start $end] $code* values
        lappend list $values

        incr start $recordLength
        incr end   $recordLength
    }

    return $list
}


# matrix --
#     Ensemble like command, dispatching to the specific procs
#
proc ::math::matrix::matrix {subcommand args} {
    variable types

    switch -- $subcommand {
        "create" {
            return [MatrixCreate {*}$args]
        }
        "set" {
            return [MatrixSet {*}$args]
        }
        "setrow" {
            return [MatrixSetRow {*}$args]
        }
        "setcolumn" {
            return [MatrixSetColumn {*}$args]
        }
        "get" {
            return [MatrixGet {*}$args]
        }
        "getrow" {
            return [MatrixGetRow {*}$args]
        }
        "getcolumn" {
            return [MatrixGetColumn {*}$args]
        }
        "type" {
            if { [llength $args] < 1 } {
                return -code error "Error - no matrix given"
            }
            binary scan [lindex $args 0] a type
            return [dict get $types $type]
        }
        "sizes" {
            if { [llength $args] < 1 } {
                return -code error "Error - no matrix given"
            }
            binary scan [lindex $args 0] a8ww dummy nrows ncols
            return [list $nrows $ncols]
        }
        "tolist" {
            return [MatrixToList {*}$args]
        }
        default {
            return -code error "Unknown subcommand - $subcommand"
        }
    }
}

# test --
#     Test the various subcommands
#
set mat  [::math::matrix::matrix create 5 5]

set list [::math::matrix::matrix tolist $mat]
foreach row $list {
    puts $row
}

# Make it an identity matrix
for {set i 0} {$i < 5} {incr i} {
    set mat [::math::matrix::matrix set $mat $i $i 1.0]
}


set list [::math::matrix::matrix tolist $mat]
foreach row $list {
    puts $row
}

#
# Timing? At a size 50x50 the matrix routine to create
# an identity matrix is 15 times as slow: ~300 us versus ~18 us.
#
# It would probably help to be able to set new values in an existing
# binary string, analogous to [lset].
#
proc mkIdent {mat} {
    for {set i 0} {$i < 50} {incr i} {
        set mat [::math::matrix::matrix set $mat $i $i 1.0]
    }
}

proc listIdent {list} {
    for {set i 0} {$i < 50} {incr i} {
        lset list $i $i 1.0
    }
}

set mat  [::math::matrix::matrix create 50 50]
set list [::math::matrix::matrix tolist $mat]

puts [time [list mkIdent $mat] 10000]
puts [time [list listIdent $list] 10000]

#
# Get values ...
#
for {set i 0} {$i < 50} {incr i} {
    set mat [::math::matrix::matrix set $mat $i $i 1.0]
    set mat [::math::matrix::matrix set $mat $i 0  $i]
    set mat [::math::matrix::matrix set $mat 0  $i $i]
}

for {set i 0} {$i < 10} {incr i} {
    puts "$i: [::math::matrix::matrix get $mat 3 $i]"
}

puts "Row 10: [::math::matrix::matrix getrow $mat 10]"
puts "Column 10: [::math::matrix::matrix getcolumn $mat 10]"


#
# Information about the matrix
#
puts "Type:  [::math::matrix::matrix type $mat]"
puts "Sizes: [::math::matrix::matrix sizes $mat]"

#
# Setting a row/column
#
set mat    [::math::matrix::matrix create 10 10]
set values [list 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0]

set mat    [::math::matrix::matrix setrow    $mat 3 $values]
set mat    [::math::matrix::matrix setcolumn $mat 3 $values]

set list [::math::matrix::matrix tolist $mat]
foreach row $list {
    puts $row
}

#
# Timing getting/setting a column
#
# These operations are only 3 to 4 times slower
#
proc listSetColumn {list} {
    for {set i 0} {$i < 50} {incr i} {
        lset list $i 10 1.0
    }
}
proc listGetColumn {list} {
    set values {}
    for {set i 0} {$i < 50} {incr i} {
        lappend values [lindex $list $i 10]
    }
    return $values
}

set mat  [::math::matrix::matrix create 50 50]
set list [::math::matrix::matrix tolist $mat]

set values [lrepeat 50 1.0]

puts "Get a column:"
puts [time [list ::math::matrix::matrix getcolumn $mat 10] 10000]
puts [time [list listGetColumn $list] 10000]

puts "Set a column:"
puts [time [list ::math::matrix::matrix setcolumn $mat 10 $values] 10000]
puts [time [list listSetColumn $list] 10000]