[Richard Suchenwirth] 2007-02-05 - The cross-tabulation code at crosstab works like it should, but I very soon didn't like it - the ''main'' proc is too big and has [puts] sprinkled over the place. So I rewrote it in more [functional programming] terms by adding some clean functions operating on matrixes (implemented as lists of row lists), lists, resp. strings. The code hasn't become shorter from that, but better organized, and the functions could be re-used in other situations set usage { usage: crosstab.tcl infile ?-c x,y? ?-sep '\t'? -w 12 Reads the infile (or stdin if infile is "-") for cases Prints on stdout a cross-tabulation for the specified columns. -c x,y : columns to use (1...), default is 1,2 -sep x : column separator, default is ";" -w n : output column width, default is 12 } if {[llength $argv] == 0} {puts stderr $usage; exit 1} proc main argv { getopt argv -sep sep ";" getopt argv -c cols 1,2 getopt argv -w w 12 set infile [lindex $argv 0] if {$infile eq "-"} { set f stdin } elseif {[file exists $infile]} { set f [open $infile] } else {puts stderr "no such file $infile\n$::usage"; exit 1} foreach {x y z} [split $cols ,] break incr x -1; incr y -1 if {$z ne ""} {incr z -1} while {[gets $f line] >= 0} { set fields [split $line $sep] set key [lindex $fields $x],[lindex $fields $y], if {$z ne ""} {append key [lindex $fields $z]} inc N($key) } if {$f ne "stdout"} {close $f} set xs [get_values N 0] set ys [get_values N 1] if {$z ne ""} { set zs [get_values N 2] } else {set zs {{}}} set totals [lrepeat [llength $ys] [lrepeat [llength $xs] 0]] set ylabels [map [list format %-${w}s] [concat $ys Total]] foreach zv $zs { array unset a foreach i [array names N *,$zv] { foreach {p1 p2} [split $i ,] break set a($p1,$p2) $N($i) } set m [marray a $xs $ys] set totals [mexpr $totals + $m] set m [mlabel [msums $m] [concat $xs Total] $ylabels] puts $zv\n[mformat $m $w 1]\n } if {$zs ne "{}"} { set m [mlabel [msums $totals] [concat $xs Total] $ylabels] puts "Grand Total:\n[mformat $m $w 1]\n" } } #------------------------ additional list functions proc lrepeat {n args} { #-- built-in from 8.5 set res {} for {set i 0} {$i<$n} {incr i} {eval lappend res $args} set res } #-- [Map] a function to a list, returning the results proc map {script list} { set res {} foreach i $list {lappend res [eval [linsert $script end $i]]} set res } #-- Sum of a list proc lsum list { set res 0 foreach i $list {set res [expr {$res+$i}]} set res } #-- Apply a binary operator element-wise to two matrixes, giving a third proc mexpr {mat1 op mat2} { set res {} foreach row1 $mat1 row2 $mat2 row "" { foreach col1 $row1 col2 $row2 { lappend row [expr {$col1} $op {$col2}] } lappend res $row } set res } #-- Create a matrix from an array with (x,y) keys proc marray {_arr cols rows} { upvar 1 $_arr arr set res {} foreach row $rows { set outrow {} foreach col $cols {lappend outrow [get arr($col,$row)]} lappend res $outrow } set res } #-- Compute row and columns sums, and add them to a matrix (at right resp. bottom proc msums matrix { set ncol -1 set ncols {} foreach i [lindex $matrix 0] { set [incr ncol] 0 lappend ncols $ncol } set res {} foreach row $matrix { foreach cell $row ncol $ncols { set $ncol [expr {[set $ncol]+$cell}] } lappend res [lappend row [lsum $row]] } set colsums {} foreach i [lindex $matrix 0] ncol $ncols { lappend colsums [set $ncol] } lappend res [lappend colsums [lsum $colsums]] } #-- turn a matrix into a formatted multiline string proc mformat {matrix {w 12} {underline 0}} { set res "" foreach row $matrix line "" { foreach cell $row {append line [format %${w}s $cell]} lappend res $line } if $underline { set length [string length [lindex $res 0]] set res [linsert $res 1 [string repeat - $length]] } join $res \n } #-- Add column and row labels to a matrix proc mlabel {matrix collabels {rowlabels {}}} { #-- Add column and row labels to a matrix set res [list [linsert $collabels 0 {}]] foreach row $matrix label $rowlabels { lappend res [linsert $row 0 $label] } set res } #-- Retrieve a variable's value, if present, else 0 proc get _var { upvar 1 $_var var if {[info exists var]} {set var} else {return 0} } #-- Get the values of (x,y,...) array keys by position proc get_values {_arr pos} { upvar 1 $_arr arr set values {} foreach i [array names arr] { lappend values [lindex [split $i ,] $pos] } lsort -unique $values } #-- See [getopt] for discussion proc getopt {_argv name {_var ""} {default ""}} { upvar 1 $_argv argv $_var var set pos [lsearch -regexp $argv ^$name] if {$pos>=0} { set to $pos if {$_var ne ""} { set var [lindex $argv [incr to]] } set argv [lreplace $argv $pos $to] return 1 } else { if {[llength [info level 0]] == 5} {set var $default} return 0 } } #-- auto-initializing increment (standard in 8.5) proc inc {_var {amount 1}} { upvar 1 $_var var if ![info exists var] {set var 0} incr var $amount } main $argv