Version 0 of crosstab again

Updated 2007-02-05 09:09:22 by suchenwi

Richard Suchenwirth 2007-02-05 - The cross-tabulation code at crosstab works like it should, but I very soon didn't like it - the main proc is too big and has puts sprinkled over the place. So I rewrote it in more functional programming terms by adding some clean functions operating on matrixes (implemented as lists of row lists), lists, resp. strings. The code hasn't become shorter from that, but better organized, and the functions could be re-used in other situations

 set usage {
    usage: crosstab.tcl infile ?-c x,y? ?-sep '\t'? -w 12
    Reads the infile (or stdin if infile is "-") for cases
    Prints on stdout a cross-tabulation for the specified columns.
    -c x,y : columns to use (1...), default is 1,2
    -sep x : column separator, default is ";"
    -w   n : output column width, default is 12
 }
 if {[llength $argv] == 0} {puts stderr $usage; exit 1}

 proc main argv {
     getopt argv -sep sep  ";" 
     getopt argv -c   cols 1,2
     getopt argv -w   w    12

     set infile [lindex $argv 0]
     if {$infile eq "-"} {
         set f stdin
     } elseif {[file exists $infile]} {
         set f [open $infile]
     } else {puts stderr "no such file $infile\n$::usage"; exit 1}

     foreach {x y z} [split $cols ,] break
     incr x -1; incr y -1
     if {$z ne ""} {incr z -1}
     while {[gets $f line] >= 0} {
         set fields [split $line $sep]
         set key [lindex $fields $x],[lindex $fields $y],
         if {$z ne ""} {append key [lindex $fields $z]}
         inc N($key)
     }
     if {$f ne "stdout"} {close $f}

     set xs [get_values N 0]
     set ys [get_values N 1]
     if {$z ne ""} {
         set zs [get_values N 2]
     } else {set zs {{}}}
     set totals [lrepeat [llength $ys] [lrepeat [llength $xs] 0]]
     set ylabels [map [list format %-${w}s] [concat $ys Total]]
     foreach zv $zs {
         array unset a
         foreach i [array names N *,$zv] {
             foreach {p1 p2} [split $i ,] break
             set a($p1,$p2) $N($i)
         }
         set m [marray a $xs $ys]
         set totals [mexpr $totals + $m]
         set m [mlabel [msums $m] [concat $xs Total] $ylabels]
         puts $zv\n[mformat $m $w 1]\n
     }
     if {$zs ne "{}"} {
         set m [mlabel [msums $totals] [concat $xs Total] $ylabels]
         puts "Grand Total:\n[mformat $m $w 1]\n"
     }
 }

#------------------------ additional list functions

 proc lrepeat {n args} { #-- built-in from 8.5
    set res {}
    for {set i 0} {$i<$n} {incr i} {eval lappend res $args}
    set res
 }

#-- Map a function to a list, returning the results

 proc map {script list} {
    set res {}
    foreach i $list {lappend res [eval [linsert $script end $i]]}
    set res
 }

#-- Sum of a list

 proc lsum list {
    set res 0
    foreach i $list {set res [expr {$res+$i}]}
    set res
 }

#-- Apply a binary operator element-wise to two matrixes, giving a third

 proc mexpr {mat1 op mat2} {
    set res {}
    foreach row1 $mat1 row2 $mat2 row "" {
        foreach col1 $row1 col2 $row2 {
            lappend row [expr {$col1} $op {$col2}]
        }
        lappend res $row
    }
    set res
 }

#-- Create a matrix from an array with (x,y) keys

 proc marray {_arr cols rows} {
    upvar 1 $_arr arr
    set res {}
    foreach row $rows {
        set outrow {}
        foreach col $cols {lappend outrow [get arr($col,$row)]}
        lappend res $outrow
    }
    set res
 }

#-- Compute row and columns sums, and add them to a matrix (at right resp. bottom

 proc msums matrix {
    set ncol -1
    set ncols {}
    foreach i [lindex $matrix 0] {
        set [incr ncol] 0
        lappend ncols $ncol
    }
    set res {}
    foreach row $matrix {
        foreach cell $row ncol $ncols {
            set $ncol  [expr {[set $ncol]+$cell}]
        }
        lappend res [lappend row [lsum $row]]
    }
    set colsums {}
    foreach i [lindex $matrix 0] ncol $ncols {
        lappend colsums [set $ncol]
    }
    lappend res [lappend colsums [lsum $colsums]]
 }

#-- turn a matrix into a formatted multiline string

 proc mformat {matrix {w 12} {underline 0}} {
    set res ""
    foreach row $matrix line "" {
        foreach cell $row {append line [format %${w}s $cell]}
        lappend res $line
    }
    if $underline {
        set length [string length [lindex $res 0]]
        set res [linsert $res 1 [string repeat - $length]]
    }
    join $res \n
 }

#-- Add column and row labels to a matrix

 proc mlabel {matrix collabels {rowlabels {}}} {
    #-- Add column and row labels to a matrix
    set res [list [linsert $collabels 0 {}]]
    foreach row $matrix label $rowlabels {
        lappend res [linsert $row 0 $label]
    }
    set res
 }
 proc get _var {
     upvar 1 $_var var
     if {[info exists var]} {set var} else {return 0}
 }
 proc get_values {_arr pos} {
     upvar 1 $_arr arr
     set values {}
     foreach i [array names arr] {
         lappend values [lindex [split $i ,] $pos]
     }
     lsort -unique $values
 }
 proc getopt {_argv name {_var ""} {default ""}} {
     upvar 1 $_argv argv $_var var
     set pos [lsearch -regexp $argv ^$name]
     if {$pos>=0} {
         set to $pos
         if {$_var ne ""} {
             set var [lindex $argv [incr to]]
         }
         set argv [lreplace $argv $pos $to]
         return 1
     } else {
         if {[llength [info level 0]] == 5} {set var $default}
         return 0
     }
 }
 proc inc {_var {amount 1}} {
     upvar 1 $_var var
     if ![info exists var] {set var 0}
     incr var $amount
 }

 main $argv