Humble beginnings of a data analysis tool

Arjen Markus (15 september 2009) The program below will display a table of data like this one:

     X      Y      Z
   1.0    1.0    2.0
   2.0    1.0   10.0
   2.1    1.0    3.0
   4.1    3.0    3.0
  10.0   -1.0    3.0

It will also display a simple XY-plot of these data and some statistics.

I have been contemplating a program such as this for some time now. The requirements are:

  • Import simple data files, but with a wide variety of formats (now: space, comma, semicolon and tab are allowed with or without a line with the column names)
  • Select which columns to show (not yet)
  • Basic stuff is automatically done (yes)

Here is the current program:

# dataview.tcl --
#     Program to examine data with minimal effort
#
#     The program reads simple columnwise text files
#     and displays the contents as both graphs and
#     tables. It also allows for new columns to be
#     added as expressions of other columns.
#
package require Plotchart
package require Tablelist
package require math::statistics

# openFile --
#     Select a data file and read it
#
# Arguments:
#     None
#
# Returns:
#     Nothing
#
# Side effects:
#     Data read and windows updated
#
proc openFile {} {

    set filename [tk_getOpenFile -title "Select a data file"]

    if { $filename != "" } {
        readData $filename
        plotData
        fillTable
        fillStatistics

        wm title . "Data: [file tail $filename]"
    }
}

# readData --
#     Import the data from a given file
#
# Arguments:
#     filename       Name of the file to read
#
# Returns:
#     Nothing
#
# Side effects:
#     Several global variables are filled
#
proc readData {filename} {
    global data
    global title
    global selected
    global number

    set infile [open $filename r]

    set data {}

    #
    # Read the first line - it might be a header
    #
    gets $infile line
    if { [regexp {[A-Za-z]} $line] } {
        set title $line

        if { [regexp {[,;\t]} $line separator] == 0 } {
            set separator " "
        } else {
            set title [split $line $separator]
        }
        set number [llength $title]

        gets $infile line

    } else {
        if { [regexp {[,;\t]} $line separator] == 0 } {
            set separator " "
            set number [llength $line]
        } else {
            set number [llength [split $line $separator]
        }
        set title {}
        for {set i 0} {$i < $number} {incr i} {
            lappend title [lindex {A B C D E F G H I J K L M N O P Q R S T U V W X Y Z} $i]
        }
    }

    for {set i 0} {$i < $number} {incr i} {
        lappend selected 1
    }

    #
    # Then read the data
    #
    while {1} {
        if { $separator != " " } {
            lappend data [split $line $separator]
        } else {
            regsub -all {  +} [string trim $line] { } line
            lappend data [split $line]
        }
        if { [gets $infile line] <= 0 } {
            break
        }
    }

    close $infile
}

# plotData --
#     Plot all selected data
#
# Arguments:
#     None
#
# Returns:
#     Nothing
#
# Side effects:
#     The graphical window is filled
#
proc plotData {} {
    global data
    global title
    global selected
    global number

    #
    # Determine the scaling
    #
    set xcol 0
    set xmin {}
    set xmax {}
    set ymin {}
    set ymax {}

    foreach row $data {
        set colno 0
        foreach col $row include $selected {
            if { $include } {
                if { $colno == $xcol } {
                    if { $xmin == {} || $xmin > $col } {
                        set xmin $col
                    }
                    if { $xmax == {} || $xmax < $col } {
                        set xmax $col
                    }
                } else {
                    if { $ymin == {} || $ymin > $col } {
                        set ymin $col
                    }
                    if { $ymax == {} || $ymax < $col } {
                        set ymax $col
                    }
                }
            }
            incr colno
        }
    }

    set xscale [::Plotchart::determineScale $xmin $xmax]
    set yscale [::Plotchart::determineScale $ymin $ymax]

    #
    # Build the plot
    #
    set p [::Plotchart::createXYPlot .main.c $xscale $yscale]

    $p dataconfig data0 -colour black
    $p dataconfig data1 -colour blue
    $p dataconfig data2 -colour green
    $p dataconfig data3 -colour red
    $p dataconfig data4 -colour magenta
    $p dataconfig data5 -colour cyan

    foreach row $data {
        set colno  0
        set colour 0
        set xvalue [lindex $row $xcol]
        foreach col $row include $selected {
            if { $include && $colno != $xcol } {
                $p plot data$colour $xvalue $col
                incr colour
            }
            incr colno
        }
    }

}

# createTable --
#     Create an empty table and scrollbars
#
# Arguments:
#     None
#
# Returns:
#     Nothing
#
# Side effects:
#     Table window gets created
#
proc createTable {} {

    frame    .main.table
    set tbl  .main.table.t
    set hsb  [scrollbar .main.table.h -orient horizontal -command [list $tbl xview]]
    set vsb  [scrollbar .main.table.v -orient vertical   -command [list $tbl yview]]

    tablelist::tablelist $tbl -xscroll [list $hsb set] -yscroll [list $vsb set] -width 80 -height 18 \
        -stripebackground #DDDDFF

    grid $tbl $vsb -sticky news
    grid $hsb      -sticky news
}

# fillTable --
#     Fill the table window with the data
#
# Arguments:
#     None
#
# Returns:
#     Nothing
#
# Side effects:
#     Table window gets filled
#
proc fillTable {} {
    global data
    global title

    .main.table.t delete 0 end

    set columns {}
    foreach col $title {
        lappend columns 0 $col
    }

    .main.table.t configure -columns $columns

    foreach row $data {
        .main.table.t insert end $row
    }
}

# createStatistics --
#     Create an empty table for displaying the basic statistics
#
# Arguments:
#     None
#
# Returns:
#     Nothing
#
# Side effects:
#     Table window gets created
#
proc createStatistics {} {

    frame    .main.stat
    set tbl  .main.stat.t
    set hsb  [scrollbar .main.stat.h -orient horizontal -command [list $tbl xview]]
    set vsb  [scrollbar .main.stat.v -orient vertical   -command [list $tbl yview]]

    tablelist::tablelist $tbl -xscroll [list $hsb set] -yscroll [list $vsb set] -width 80 -height 18 \
        -stripebackground #DDDDFF

    grid $tbl $vsb -sticky news
    grid $hsb      -sticky news
}

# fillStatistics --
#     Fill the statistics window
#
# Arguments:
#     None
#
# Returns:
#     Nothing
#
# Side effects:
#     Table window gets filled
#
proc fillStatistics {} {
    global data
    global title
    global number

    .main.stat.t delete 0 end

    set columns {0 Statistic}
    foreach col $title {
        lappend columns 0 $col
    }

    .main.stat.t configure -columns $columns

    foreach row $data {
        set colno 0
        foreach col $row {
            lappend array($colno) $col
            incr colno
        }
    }

    array set stat {0 Mean 1 Minimum 2 Maximum 3 {{Number of values}} 4 {{Standard deviation}} 5 Variance}

    for {set i 0} {$i < $number} {incr i} {
        set basics [::math::statistics::basic-stats $array($i)]
        foreach {mean min max ndata stdev var} $basics {break}

        lappend stat(0) $mean
        lappend stat(1) $min
        lappend stat(2) $max
        lappend stat(3) $ndata
        lappend stat(4) $stdev
        lappend stat(5) $var
    }

    for {set i 0} {$i < 6} {incr i} {
        .main.stat.t insert end $stat($i)
    }
}

# createMenu --
#     Create the menu for the main window
#
# Arguments:
#     None
#
# Returns:
#     Nothing
#
# Side effects:
#     Menu with actions defined
#
proc createMenu {} {

    menu .menu -tearoff 0

    menu .menu.file -tearoff 0
    menu .menu.view -tearoff 0

    .menu add cascade -label "File" -menu .menu.file
    .menu.file add command -label "Open ..." -command {openFile}
    .menu.file add separator
    .menu.file add command -label "Exit" -command {exit}

    .menu add cascade -label "View" -menu .menu.view
    .menu.view add command -label "Select"    -command {selectColumns}
    .menu.view add command -label "Relations" -command {newRelation}

    . configure -menu .menu
}

# main --
#     Set up the main window and the user-interface to get it going
#

createMenu

ttk::notebook .main
createTable
createStatistics

.main add [canvas .main.c  -width 500 -height 300] -text "Plot of the data"
.main add .main.table      -text "Table"
.main add .main.stat       -text "Statistics"

grid .main -sticky news

AK - 2009-09-28 16:26:09

Hm. The 'Exit' menu entry is apparently a sub-menu, not a menu command (NEM fixed). That makes it unusable. What exactly is the data format ? CSV ?

LV From the example provided above, I would assume that the data is provided in columns.

AEC The code checks for a comma, semicolon or tab delimeter. If none are found a space is assumed.