ALX 2018-02-05
package require Tcl 8.6 package require vfs::zip package require tdom 0.9.0- array set cells {} proc Cell { name data } { global cells set row {} set tmp {} foreach char [string toupper [split $name {}]] { switch -glob -- $char { [A-Z] { binary scan $char cu val lappend tmp [incr val -64] } [0-9] { append row $char } } } set column 0 set idx [llength $tmp] foreach num [lreverse $tmp] { set column [expr {int($column + $num * pow(26,[incr idx -1]))}] } incr row -1 incr column -1 set cells($row,$column) $data } set sheet 1 set xlsxFile {MyExcelFile.xlsx} set mnt [vfs::zip::Mount $xlsxFile xlsx] if {![catch {open xlsx/xl/sharedStrings.xml r} fd]} { if {![catch {dom parse [read $fd]} doc]} { set root [$doc documentElement] set idx -1 foreach node [$root selectNodes -namespaces [list X [$root namespaceURI]] {/X:sst/X:si/X:t/text()}] { set sharedStrings([incr idx]) [$node nodeValue] } $doc delete } close $fd } if {![catch {open xlsx/xl/worksheets/sheet${sheet}.xml r} fd]} { if {![catch {dom parse [read $fd]} doc]} { set root [$doc documentElement] foreach cell [$root selectNodes -namespaces [list X [$root namespaceURI]] {/X:worksheet/X:sheetData/X:row/X:c}] { if {[$cell hasAttribute t]} { set type [$cell getAttribute t] } else { set type n } set value {} switch -- $type { n - b - d { if {[set node [$cell selectNodes -namespaces [list X [$cell namespaceURI]] X:v/text()]] ne {}} { set value [$node nodeValue] } else { continue } } s { if {[set node [$cell selectNodes -namespaces [list X [$cell namespaceURI]] X:v/text()]] ne {}} { set index [$node nodeValue] if {[info exists sharedStrings($index)]} { set value $sharedStrings($index) } } else { continue } } str { } inlineStr { if {[set node [$cell selectNodes -namespaces [list X [$cell namespaceURI]] X:is/text()]] ne {}} { set value [$node nodeValue] } else { continue } } e { } } Cell [$cell getAttribute r] $value } $doc delete } close $fd } vfs::zip::Unmount $mnt xlsx array unset -nocomplain sharedStrings foreach cell [lsort -dictionary [array names cells]] { puts "cell $cell = $cells($cell)" }
WEB: Standard ECMA-376 Office Open XML File Formats [L1 ]
beware 23/09/2019: It's necessary to specify unicode encoding before reading the sharedstrings xml file.
If one of the shared strings contains formatting, then there might not be a t node, but rather a set of r nodes. The above code then gets every shared string after the offending string wrong, as the index is off.