[Arjen Markus] (12 june 2016) Following a discussion on comp.lang.tcl regarding wavelets transforms, I decided to put a simple idea to the test: can you use binary strings to store numerical data? Such binary strings are much more compact than Tcl's lists and they can be passed to C-based commands without any conversion (they act as ordinary arrays of integers or floating-point numbers there). The main problems are the manipulation of the data on the Tcl side - that could be clumsy - and the performance. Well, here is a small experiment: * The manipulation of numerical matrices can be conveniently dealt with via list-like or string-like commands * The performance is something that may require more attention, by implementing typical manipulations directly in C. Discussion: should we try and define a convenient data type for this sort of things? There are several extensions that do similar or somewhat things - [Vectcl], [TArray], [Tensor]. Anyway, here is the code. ====== # matrix.tcl -- # Compact matrix data type for storing large amounts of numerical data # # The implementation uses a binary string to store the data. It is significantly # slower than an implementation with nested lists, but it takes only 8 bytes # to store a double-precision value instead of the 24 bytes of a Tcl_Obj. # # One possible use: avoid the overhead of converting from a Tcl list to a C array # when passing the data to and from commands implemented in C. # # TODO: # - getsubmatrix # - fromlist # - unify the allowed types - check the dictionary # # math::matrix -- # Create the namespace # namespace eval ::math::matrix { variable bytes [dict create a 1 s 2 i 4 w 8 f 4 d 8] variable types [dict create a char s short i integer w wide f float d double] } # MatrixCreate -- # Create a compact matrix of given dimensions # # Arguments: # nrows Number of rows # ncols Number of columns # type Type of data (char, short, int, wide, float, double) # Defaults to double # proc ::math::matrix::MatrixCreate {nrows ncols {type double}} { if { $type in {c char s short i int integer w wide f float d double} } { set code [string range $type 0 0] } else { return -code error "Unknown type - should one of char, short, int, wide, float or double" } if { $code == "c" } { set code "a" set value " " } else { set value 0 } set nullValue [binary format $code $value] set binaryString [string repeat $nullValue [expr {$nrows * $ncols}]] return [binary format a8wwa* $code $nrows $ncols $binaryString] } # MatrixSet -- # Set an element in the matrix to a given value # # Arguments: # matrix Compact matrix # row Row of the element # column Column of the element # value New value for the element # proc ::math::matrix::MatrixSet {matrix row column value} { variable bytes binary scan $matrix aa7ww code dummy nrows ncols if { $row < 0 || $row >= $nrows } { return -code error "Row out of range - $row - should be 0 ... [expr {$nrows-1}]" } if { $column < 0 || $column >= $ncols } { return -code error "Column out of range - $cloumn - should be 0 ... [expr {$ncols-1}]" } # # Calculate the position in the binary string # set valueLength [dict get $bytes $code] set position [expr {24 + ($row * $ncols + $column) * $valueLength}] return [binary format a*@${position}$code $matrix $value] } # MatrixSetRow -- # Set a row in the matrix using a given list # # Arguments: # matrix Compact matrix # row Row of the element # list New values for the row # proc ::math::matrix::MatrixSetRow {matrix row list} { variable bytes binary scan $matrix aa7ww code dummy nrows ncols if { $row < 0 || $row >= $nrows } { return -code error "Row out of range - $row - should be 0 ... [expr {$nrows-1}]" } if { [llength $list] != $ncols } { return -code error "Number of values ([llength $list]) should match the number of columns ($ncols)" } # # Calculate the position in the binary string # set valueLength [dict get $bytes $code] set position [expr {24 + $row * $ncols * $valueLength}] return [binary format a*@${position}$code* $matrix $list] } # MatrixSetColumn -- # Set a column in the matrix using a given list # # Arguments: # matrix Compact matrix # column Column of the element # list New values for the column # proc ::math::matrix::MatrixSetColumn {matrix column list} { variable bytes binary scan $matrix aa7ww code dummy nrows ncols if { $column < 0 || $column >= $ncols } { return -code error "Column out of range - $column - should be 0 ... [expr {$ncols-1}]" } if { [llength $list] != $nrows } { return -code error "Number of values ([llength $list]) should match the number of rows ($nrows)" } # # Calculate the positions in the binary string # set valueLength [dict get $bytes $code] set format "a*" for {set row 0} {$row < $nrows} {incr row} { set position [expr {24 + ($row * $ncols + $column) * $valueLength}] set format "${format}@${position}$code" } return [binary format $format $matrix {*}$list] } # MatrixGet -- # Get an element in the matrix # # Arguments: # matrix Compact matrix # row Row of the element # column Column of the element # proc ::math::matrix::MatrixGet {matrix row column} { variable bytes binary scan $matrix aa7ww code dummy nrows ncols if { $row < 0 || $row >= $nrows } { return -code error "Row out of range - $row - should be 0 ... [expr {$nrows-1}]" } if { $column < 0 || $column >= $ncols } { return -code error "Column out of range - $cloumn - should be 0 ... [expr {$ncols-1}]" } # # Calculate the position in the binary string # set valueLength [dict get $bytes $code] set position [expr {24 + ($row * $ncols + $column) * $valueLength}] binary scan $matrix @${position}$code value return $value } # MatrixGetRow -- # Get a row from the matrix # # Arguments: # matrix Compact matrix # row Row to retrieve # proc ::math::matrix::MatrixGetRow {matrix row} { variable bytes binary scan $matrix aa7ww code dummy nrows ncols if { $row < 0 || $row >= $nrows } { return -code error "Row out of range - $row - should be 0 ... [expr {$nrows-1}]" } # # Calculate the position in the binary string # set valueLength [dict get $bytes $code] set position [expr {24 + $row * $ncols * $valueLength}] binary scan $matrix @${position}$code$ncols values return $values } # MatrixGetColumn -- # Get a column from the matrix # # Arguments: # matrix Compact matrix # column Column to retrieve # proc ::math::matrix::MatrixGetColumn {matrix column} { variable bytes binary scan $matrix aa7ww code dummy nrows ncols if { $column < 0 || $column >= $ncols } { return -code error "Column out of range - $column - should be 0 ... [expr {$ncols-1}]" } # # Calculate the positions in the binary string and retrieve the data # set valueLength [dict get $bytes $code] set recordLength [dict get $bytes $code] set values {} for {set row 0} {$row <$nrows} {incr row} { set position [expr {24 + ($row * $ncols + $column) * $valueLength}] binary scan $matrix @${position}$code value lappend values $value } return $values } # MatrixToList -- # Converts a compact matrix to a nested list # # Arguments: # matrix Compact matrix # proc ::math::matrix::MatrixToList {matrix} { variable bytes binary scan $matrix aa7ww code dummy nrows ncols set format "d$ncols" set valueLength [dict get $bytes $code] set recordLength [expr {$valueLength * $ncols}] set start 24 set end [expr {$start + $recordLength - 1}] set list {} for {set i 0} {$i < $nrows } {incr i} { binary scan [string range $matrix $start $end] $code* values lappend list $values incr start $recordLength incr end $recordLength } return $list } # matrix -- # Ensemble like command, dispatching to the specific procs # proc ::math::matrix::matrix {subcommand args} { variable types switch -- $subcommand { "create" { return [MatrixCreate {*}$args] } "set" { return [MatrixSet {*}$args] } "setrow" { return [MatrixSetRow {*}$args] } "setcolumn" { return [MatrixSetColumn {*}$args] } "get" { return [MatrixGet {*}$args] } "getrow" { return [MatrixGetRow {*}$args] } "getcolumn" { return [MatrixGetColumn {*}$args] } "type" { if { [llength $args] < 1 } { return -code error "Error - no matrix given" } binary scan [lindex $args 0] a type return [dict get $types $type] } "sizes" { if { [llength $args] < 1 } { return -code error "Error - no matrix given" } binary scan [lindex $args 0] a8ww dummy nrows ncols return [list $nrows $ncols] } "tolist" { return [MatrixToList {*}$args] } default { return -code error "Unknown subcommand - $subcommand" } } } # test -- # Test the various subcommands # set mat [::math::matrix::matrix create 5 5] set list [::math::matrix::matrix tolist $mat] foreach row $list { puts $row } # Make it an identity matrix for {set i 0} {$i < 5} {incr i} { set mat [::math::matrix::matrix set $mat $i $i 1.0] } set list [::math::matrix::matrix tolist $mat] foreach row $list { puts $row } # # Timing? At a size 50x50 the matrix routine to create # an identity matrix is 15 times as slow: ~300 us versus ~18 us. # # It would probably help to be able to set new values in an existing # binary string, analogous to [lset]. # proc mkIdent {mat} { for {set i 0} {$i < 50} {incr i} { set mat [::math::matrix::matrix set $mat $i $i 1.0] } } proc listIdent {list} { for {set i 0} {$i < 50} {incr i} { lset list $i $i 1.0 } } set mat [::math::matrix::matrix create 50 50] set list [::math::matrix::matrix tolist $mat] puts [time [list mkIdent $mat] 10000] puts [time [list listIdent $list] 10000] # # Get values ... # for {set i 0} {$i < 50} {incr i} { set mat [::math::matrix::matrix set $mat $i $i 1.0] set mat [::math::matrix::matrix set $mat $i 0 $i] set mat [::math::matrix::matrix set $mat 0 $i $i] } for {set i 0} {$i < 10} {incr i} { puts "$i: [::math::matrix::matrix get $mat 3 $i]" } puts "Row 10: [::math::matrix::matrix getrow $mat 10]" puts "Column 10: [::math::matrix::matrix getcolumn $mat 10]" # # Information about the matrix # puts "Type: [::math::matrix::matrix type $mat]" puts "Sizes: [::math::matrix::matrix sizes $mat]" # # Setting a row/column # set mat [::math::matrix::matrix create 10 10] set values [list 1.0 2.0 3.0 4.0 5.0 6.0 7.0 8.0 9.0 10.0] set mat [::math::matrix::matrix setrow $mat 3 $values] set mat [::math::matrix::matrix setcolumn $mat 3 $values] set list [::math::matrix::matrix tolist $mat] foreach row $list { puts $row } # # Timing getting/setting a column # # These operations are only 3 to 4 times slower # proc listSetColumn {list} { for {set i 0} {$i < 50} {incr i} { lset list $i 10 1.0 } } proc listGetColumn {list} { set values {} for {set i 0} {$i < 50} {incr i} { lappend values [lindex $list $i 10] } return $values } set mat [::math::matrix::matrix create 50 50] set list [::math::matrix::matrix tolist $mat] set values [lrepeat 50 1.0] puts "Get a column:" puts [time [list ::math::matrix::matrix getcolumn $mat 10] 10000] puts [time [list listGetColumn $list] 10000] puts "Set a column:" puts [time [list ::math::matrix::matrix setcolumn $mat 10 $values] 10000] puts [time [list listSetColumn $list] 10000] ====== <>Mathematics | Binary Data