MS Excel XML to tablelist

The question was raised @ [L1 ] how to create tables in the UI from an XML file created by MS Excel.

The original MS Excel XML file is to be found here MS Excel XML to tablelist XML file.

Here is one solution to read the XML file using tdom, parsing its nodes to extract wanted/needed information and to build a BWidget NoteBook and tablelist based visualisation. Note, that this solution targets the custom XML format used by Excel in Microsoft Office XP and Office 2003. Newer versions of Excel use the Office Open XML format.

!! The solution needs tcl 8.5 !!


 package require Tcl 8.5
 package require Tk;
 package require tdom;
 package require BWidget;
 package require tablelist;

 namespace import ::tablelist::*;

 #-----------------------------------------------------------------------------
 # helper procedures
 #
 proc ExcelDateToClock {excelDate} {
     return [clock scan $excelDate -format {%Y-%m-%dT%H:%M:%SZ}];
 }

 # syntax: processNodes nodeToken ?commandPrefix? ?arg arg ...?
 # desc:   this procedure uses the given node name as command name
 #         probably prepending the commandPrefix to process the
 #         children of the given node
 #
 proc processNodes {node args} {
     # separate the command prefix and the optional arguments
     #
     if {[llength $args] > 0} {
         set args    [lassign $args commandPrefix];
     } else {
         set commandPrefix   "";
     }

     # loop over all child nodes, working currently only on ELEMENT_NODE's
     #
     set result      [dict create];
     set child       [$node firstChild];
     set lastChild   [$node lastChild];

     while {1} {
         if {[$child nodeType] eq "ELEMENT_NODE"} {
             # build the procedure/command name
             #
             set procedureName   ${commandPrefix}[$child nodeName];

             if {[llength [info procs $procedureName]] == 1} {
                 # execute the procedure/command to process the child node
                 #
                 set childData   [$procedureName $child {*}$args];

                 # use the results
                 #
                 if {[llength $childData] == 1} {
                     set childData   [list [$child nodeName] $childData];
                 }

                 foreach {token data} $childData {
                     if {[dict exists $result $token] == 1} {
                         dict lappend result $token $data;
                     } else {
                         dict set result $token $data;
                     }
                 }
             }
         }

         if {$child eq $lastChild} {
             break;
         }

         set child   [$child nextSibling];
     }

     return $result;
 }

 proc tableSort {args} {
     lassign $args first second;

     if {$first eq $second} {
         return 0;
     }

     set args    [lsort -dictionary $args];

     if {[lindex $args 0] eq $second} {
         return 1;
     }

     return -1;
 }

 proc createTable {w tableName tableLabel columns} {
     set w               [$w insert end $tableName -text $tableLabel];
     set columnsCount    [expr {[llength $columns] / 2}];

     labelframe $w.lf1   -text $tableLabel;
         set table   $w.lf1.tlSheet

         tablelist   $w.lf1.tlSheet  \
             -listvariable       ::data($tableName) \
             -height             20 \
             -width              100 \
             -stretch            all \
             -selectmode         browse \
             -selecttype         cell \
             -movablerows        1 \
             -movablecolumns     1 \
             -resizablecolumns   1 \
             -showseparators     1 \
             -showarrow          1 \
             -labelcommand       ::tablelist::sortByColumn \
             -labelcommand2      ::tablelist::addToSortColumns \
             -sortcommand        tableSort \
             -columns            $columns \
             -xscrollcommand     [list $w.lf1.sbx set] \
             -yscrollcommand     [list $w.lf1.sby set];
         scrollbar $w.lf1.sby \
             -orient     vertical \
             -command    [list $w.lf1.tlSheet yview];
         scrollbar $w.lf1.sbx \
             -orient     horizontal \
             -command    [list $w.lf1.tlSheet xview];

         $w.lf1.tlSheet columnconfigure 0 \
             -showlinenumbers    1 \
             -resizable          0 \
             -stretchable        0;

         for {set index 1} {$index < $columnsCount} {incr index} {
             $w.lf1.tlSheet columnconfigure $index -maxwidth 0;
         }

         if {[$w.lf1.tlSheet columncount] >= 2} {
             $w.lf1.tlSheet sortbycolumn 1 -increasing;
         }
     grid $w.lf1.tlSheet  -column 0   -row 0  -sticky nesw    -padx {3 0} -pady {3 0};
     grid $w.lf1.sby      -column 1   -row 0  -sticky nes     -padx {0 3} -pady {3 0};
     grid $w.lf1.sbx      -column 0   -row 1  -sticky new     -padx {3 0} -pady {0 3};
     grid columnconfigure    $w.lf1 0    -weight 1;
     grid rowconfigure       $w.lf1 0    -weight 1;

     pack $w.lf1  -side top   -fill both  -expand 1   -padx 3 -pady 3;

     return;
 }

 #-----------------------------------------------------------------------------
 # node processing procedures
 #
 proc DocumentProperties {node} {
     return [list documentProperties [processNodes $node DocumentProperties.]];
 }

 proc DocumentProperties.Author {node} {
     return [list author [$node text]];
 }

 proc DocumentProperties.Created {node} {
     return [list ctime [ExcelDateToClock [$node text]]];
 }

 proc DocumentProperties.Company {node} {
     return [list company [$node text]];
 }

 proc DocumentProperties.Version {node} {
     return [list version [$node text]];
 }

 proc Worksheet {node} {
     # accessing the name attribute to get the worksheet name
     #
     set worksheetName   [$node getAttribute ss:Name];
     set tableData       [processNodes $node Worksheet. $worksheetName];

     if {[llength $tableData] == 0} {
         # there was a worksheet without data, so a dummy data structure
         # must be created
         #
         set tableData   [list \
             $worksheetName \
             [dict create \
                 attributes      [dict create] \
                 data            [list] \
                 rowsCount       0 \
                 columnsCount    0 \
             ] \
         ];
     }

     return [dict create \
         worksheets $worksheetName \
         {*}$tableData \
     ];
 }

 proc Worksheet.Table {node worksheetName} {
     # handling the table attributes
     #
     set attributes  [dict create];

     foreach attribute [$node attributes] {
         lassign $attribute attributeName "" attributeNameSpaceUri;

         set attributeValue  [$node getAttributeNS $attributeNameSpaceUri $attributeName];

         dict set attributes $attributeName $attributeValue;

         switch -exact -- $attributeName {
             ExpandedColumnCount {
                 set columnsCount    $attributeValue;
             }
             ExpandedRowCount    {
                 set rowsCount   $attributeValue;
             }
         }
     }

     # collecting the table data
     #
     set tableData   [list];

     foreach rowNode [$node getElementsByTagName Row] {
         set rowData [list];

         foreach cellNode [$rowNode getElementsByTagName Cell] {
             set dataNode    [$cellNode getElementsByTagName Data];

             lappend rowData [$dataNode text];
         }

         lappend tableData $rowData;
     }

     return [list \
         $worksheetName \
         [dict create \
             attributes      $attributes \
             rowsCount       $rowsCount \
             columnsCount    $columnsCount \
             data            $tableData \
         ] \
     ];
 }

 #-----------------------------------------------------------------------------
 # main procedure
 #
 proc main {argc args} {
     wm withdraw .;

     if {$argc != 1} {
         tk_messageBox \
             -parent    . \
             -icon      error \
             -title     "wrong # args" \
             -message   "wrong # args: should be \"[file rootname [file tail [info nameofexecutable]]] xmlFileName\"";
         exit 1;
     }

     if {[catch {
         # read the xml file
         #
         set xmlFile     [lindex $args 0];
         set xml         [tDOM::xmlReadFile $xmlFile]

         # parse the xml data
         #
         set document    [dom parse $xml];
     } reason] == 1} {
         tk_messageBox \
             -parent    . \
             -icon      error \
             -title     "invalid XML file or data" \
             -message   "couldn't load and/or parse the xml file ...\n\n... \"[file nativename $xmlFile]\":\n\n$reason";
         exit 2;
     }

     # process the parsed XML data
     #
     set root               [$document documentElement];
     set processedContents  [processNodes $root];

     if {[dict exists $processedContents worksheets] == 0} {
         error "no such defined worksheets";
     }

     # cleanup the xml document tree
     #
     $document delete;

     # create tables using the tablelist package,
     # inside the BWidget NoteBook widget
     #
     wm title . "MS Excel to tablelist";

     NoteBook .worksheets -homogeneous 1;

     # create the NoteBook page with the read document properties
     #
     set columns [list \
         0 "" \
         0 "Property Name" \
         0 "Property Value" \
     ];

     set ::data(properties)  [list];

     foreach {token value} [dict get $processedContents documentProperties] {
         if {$token eq "ctime"} {
             set value   [clock format $value -format {%Y-%m-%d, %T}];
         }

         lappend ::data(properties)  [list "" $token $value];
     }

     createTable .worksheets properties "Document Properties" $columns;

     # create each worksheet table as NoteBook page
     #
     foreach worksheet [dict get $processedContents worksheets] {
         set table               [dict get $processedContents $worksheet];
         set ::data($worksheet)  [dict get $table data];

         # create the columns description list
         #
         set columnsCount    [dict get $table columnsCount];
         set A               [scan "A" {%c}];
         set columns         [list 0 ""];

         for {set index 0} {$index < $columnsCount} {incr index} {
             lappend columns 0 [format {%c} [expr {$index + $A}]];
         }

         # prepare the data to be able to contain the line numbers column
         #
         set rowIndex    0;

         foreach row $::data($worksheet) {
             lset ::data($worksheet) $rowIndex [linsert $row -1 ""];

             incr rowIndex;
         }

         # add a new page to the NoteBook containing the current worksheet
         #
         createTable .worksheets $worksheet $worksheet $columns;
     }

     pack .worksheets -side top   -fill both  -expand 1   -padx 3 -pady 3;

     .worksheets raise properties;

     update;

     wm minsize . 640 480;
     wm deiconify .;

     grab  .worksheets;
     focus .worksheets;
     raise .;

     return;
 }

 main $argc $argv;

LV Note that Tcl does not require statements to be terminated by semi-colon. Having them just adds a number of additional characters to be transferred and interpreted.

Shrug


male I'm just used to them, so I forgot to remove them. But ... please don't start a discussion if its good or not to use semi-colons. For me it's just a habit, that helps a lot while developing in C, C++ and tcl.