wdb On base of Another minimalistic XML parser Iʼve built a tiny XML package, intended to be productive, providing these procedures:
% set tree [xml parse $src] type element name rss attribute {} content { ... } % _
parses $src to dictionary $tree like .
% set path [xml findElementById $tree important] 4 0 2 % _
returns path af child element with attribute id="important." $path is used to gain access:
% set element [xml getElement $tree {*}$path] type element name p attribute {id important} content {{type pcdata content hi!}} % _
Simplified for text data:
% set text [xml getText $tree {*}$path] hi! % _
To find all children with name item:
% set itemPaths [xml findElementsByName $tree item] {1 3} {1 4} {1 5} % _
Helpful when debuging:
% xml unparse $element -pp <p id="important"> hi </p> % _
shows $element with proper indentation.
As mentioned above, itʼs intended for production. Iʼve built a RSS processor dealing with feeds from outer world. Badass.
# # file: XML-0.1.tm # # XMl -- minimalistic but working XML parser # usage: package require XML # # xml parse $src ?-space yes|no? # xml getElement $tree ?n1 ?n2 ...?? # xml getText $tree ?n1 ?n2 ...?? # xml findElementById $tree $id # xml findElementsByName $tree $name # xml unparse $tree ?-pp? # package require Tcl 8.6.1 package provide xml 0.1 namespace eval xml namespace export\ parse\ unparse\ getElement\ getText\ findElementById\ findElementsByName namespace eval xml namespace import ::tcl::mathop::+ proc ::xml::parse {src args} { set option [dict merge { -space no } {*}[lmap {a b} $args {list $a $b}]] # remove comments set src [regsub -all <!--{1,1}?.*--> $src ""] # encode <![CDATA[http://www.taz.de//!p4608/]]> set cdataMap [concat {*}[lsort -unique [lmap {a b}\ [regexp -inline -all {<!\[CDATA\[(.*?)\]\]>} $src] { list $a [string map {< < & & > >} $b] }]]] set src [string map $cdataMap $src] # list of tag index pairs set pairs\ [regexp -inline -indices -all {</?[[:alnum:]:-]+[^>]*/?>} $src] # list of tag strings set tagList [lmap pair $pairs {string range $src {*}$pair}] # indices of pcdata set strIdx [lrange [concat {*}$pairs] 1 end-1] # list of pcdata strings set strList {} foreach {i j} $strIdx { lappend strList [string range $src $i+1 $j-1] } # tokens alternating: tag, pcdata, tag, ... set tokens {} foreach tag $tagList str $strList { lappend tokens $tag if {[dict get $option -space]} then { if {$str ne ""} then { lappend tokens $str } } else { if {![string is space $str]} then { lappend tokens [string trim $str] } } } # process list tokensVarToTree tokens } proc ::xml::tokensVarToTree _tokens { lassign [info level 0] recurse upvar $_tokens tokens set openPattern {<([[:alnum:]:-]+)[^>]*>} set emptyPattern {<([[:alnum:]:-]+)[^>]*/>} set closePattern {</([[:alnum:]:-]+)>} set dataPattern {^[^<]} # destructive set tokens [lassign $tokens token] if {[regexp $dataPattern $token]} then { # PCDATA dict set result type pcdata dict set result content $token return $result } elseif {[regexp $openPattern $token - name]} then { # TAG opening or empty dict set result type element dict set result name $name dict set result attribute {} dict set result content "" # attributes foreach {- key val}\ [regexp -inline -all {([[:alnum:]]+)="([^"]*?)"} $token] { dict set result attribute $key $val } foreach {- key val}\ [regexp -inline -all {([[:alnum:]]+)='([^']*?)'} $token] { dict set result attribute $key $val } } if {[regexp $emptyPattern $token]} then { # TAG empty: done return $result } while {![regexp $closePattern [lindex $tokens 0]] && [llength $tokens] > 0} { # TAG non-empty: fill contents dict lappend result content [$recurse tokens] } # remove closing TAG if {[regexp $closePattern [lindex $tokens 0]]} then { set tokens [lrange $tokens 1 end] } set result } proc ::xml::getElement {tree args} { lassign [info level 0] recurse if {$args eq "" || [dict get $tree type] eq "pcdata"} then { set tree } else { set args [lassign $args index] if {$index < [llength [dict get $tree content]]} then { $recurse [lindex [dict get $tree content] $index] {*}$args } } } proc ::xml::findElementById {tree id args} { lassign [info level 0] recurse if {[dict get $tree type] eq "element"} then { set i 0 foreach child [dict get $tree content] { if {[dict get $child type] eq "element" && [dict exists $child attribute id] && [dict get $child attribute id] eq $id} then { return [concat $args $i] } set path [$recurse $child $id {*}$args $i] if {$path ne ""} then { return $path } incr i } } } proc ::xml::findElementsByNameRoutine {tree name args} { lassign [info level 0] recurse if {[dict get $tree type] eq "element"} then { set i 0 foreach child [dict get $tree content] { if {[dict get $child type] eq "element" && [dict get $child name] eq $name} then { yield [concat $args $i] } $recurse $child $name {*}$args $i incr i } } } proc ::xml::findElementsByName {tree name} { lassign [info level 0] recurse set i 1 while {[info commands c$i] ne ""} { incr i } set coroutine c$i coroutine $coroutine apply [list {tree name} { yield [info coroutine] findElementsByNameRoutine $tree $name } [namespace current]] $tree $name set result {} while true { set path [$coroutine] if {$path ne ""} then { lappend result $path } else { return $result } } } proc ::xml::unparse {tree {indent 0} args} { lassign [info level 0] recurse if {![string is digit $indent]} then { lappend args $indent set indent 0 } set result "" if {"-pp" in $args} then { if {$indent > 0} then { append result \n } append result [string repeat " " $indent] } if {[dict get $tree type] eq "pcdata"} then { if {"-pp" in $args} then { append result [string trim [dict get $tree content]] } else { append result [dict get $tree content] } } else { append result <[dict get $tree name] foreach {key val} [dict get $tree attribute] { append result " $key=" if {[string first \u0022 $val] < 0} then { append result \" $val \" } else { append result ' $val ' } } if {[llength [dict get $tree content]] == 0} then { append result " />" } else { append result > foreach child [dict get $tree content] { append result [$recurse $child [+ $indent 1] {*}$args] } if {"-pp" in $args} then { append result \n[string repeat " " $indent] } append result </[dict get $tree name]> } } set result } proc ::xml::decode txt { lappend map "<" < ">" > "&" & """ \" set matches [lsort -unique [regexp -inline -all {&#[0-9]+;} $txt]] foreach match $matches { regexp {([0-9]+)} $match - i lappend map $match [format %c [scan $i %d]] } string map $map $txt } proc ::xml::getText {tree args} { set child [getElement $tree {*}$args] if {[dict get $child type] eq "pcdata"} then { decode [dict get $child content] } elseif {[llength [dict get $child content]] > 0} then { getText $child 0 } } namespace eval xml namespace ensemble create