George Peter Staplin Oct 10, 2007 - I created an XML parser that is re-entrant, and is designed to not use regexp or string map. It may be faster, or it might be slower than TAX: A Tiny API for XML. I'll eventually time it and post results here.
Here's revision 2:
#By George Peter Staplin set yaxmlp_count 0 proc yaxmlp {} { global yaxmlp_count while 1 { incr yaxmlp_count set token yaxmlp$yaxmlp_count if {[info commands $token] eq ""} { break } } proc $token args "[list yaxmlp-instance $token] \$args" return $token } proc yaxmlp-instance {token arglist} { global $token switch -- [lindex $arglist 0] { handler { if {3 != [llength $arglist]} { return -code error "invalid # args: should be: $token handler tag handler-callback" } set [set token](handler,[lindex $arglist 1]) [lindex $arglist 2] } parse { yaxmlp-parse $token [lindex $arglist 1] } } } proc yaxmlp-dispatch {token tagname props body} { global $token set cmd [set [set token](handler,$tagname)] set cmd [linsert $cmd end $token $tagname $props $body] uplevel #0 $cmd } proc yaxmlp-parse-prop-area {token script ivar endvar} { upvar $ivar i upvar $endvar end set GATHERPROP 1 set GATHERPROPNAME 2 set GATHERPROPVALUE 3 set GATHERPROPQUOTE 4 set state $GATHERPROP set props [list] for {} {$i < [string length $script]} {incr i} { set c [string index $script $i] #puts "PROPAREA:$c STATE:$state" if {$GATHERPROPVALUE == $state} { if {"\"" eq $c} { lappend props $propname $propvalue set state $GATHERPROP } else { append propvalue $c } } elseif {$GATHERPROPQUOTE == $state} { if {[string is space $c]} continue if {"\"" eq $c} { set state $GATHERPROPVALUE } } elseif {$GATHERPROPNAME == $state} { if {[string is space $c]} { continue } elseif {">" eq $c} { return $props } elseif {"=" eq $c} { set state $GATHERPROPQUOTE } else { append propname $c } } elseif {$GATHERPROP ==$state} { if {[string is space $c]} { set state $GATHERPROPNAME set propname "" set propvalue "" } elseif {"/" eq $c && ">" eq [string index $script [expr {$i + 1}]]} { set end 1 return $props } elseif {">" eq $c} { return $props } } } return -code error "property area without completing > or />" } #Return [list tagname props] proc yaxmlp-parse-tag-area {token script ivar} { upvar $ivar i set GATHERTAG 1 set state $GATHERTAG set tagname "" set props "" set end 0 for {} {$i < [string length $script]} {incr i} { set c [string index $script $i] #puts C:$c if {$GATHERTAG == $state} { if {">" eq $c} { return [list $tagname $props $end] } elseif {[string is space $c]} { set props [yaxmlp-parse-prop-area $token $script i end] return [list $tagname $props $end] } elseif {"/" eq $c && ">" eq [string index $script [expr {$i + 1}]]} { set end 1 incr i 2 if {[string length $tagname]} { return [list $tagname $props $end] } } else { append tagname $c } } } return -code error "tag without closing: > or />" } proc yaxmlp-future-match {script i string} { set subscript [string range $script $i [expr {$i + [string length $string] - 1}]] return [expr {$subscript eq $string}] } proc yaxmlp-parse {token script} { global $token #puts "PARSE:$token" set GATHERTAG 1 set GATHERBODY 2 set state $GATHERTAG set tagname "" set line 1 set scriptlen [string length $script] for {set i 0} {$i < $scriptlen} {incr i} { set c [string index $script $i] #puts PARSEC:$c if {"\n" eq $c} { incr line } if {$GATHERBODY == $state} { if {"<" eq $c} { if {[yaxmlp-future-match $script [expr {$i + 1}] /$tagname>]} { yaxmlp-dispatch $token $tagname $props $body set tagname "" set props "" incr i [string length /$tagname] set state $GATHERTAG } } if {[string is space -strict [string index $body end]] && [string is space $c]} { continue } else { append body $c } } elseif {$GATHERTAG == $state} { if {"<" eq $c} { incr i lassign [yaxmlp-parse-tag-area $token $script i] tagname props end if {$end} { #The tag was something like <foo bar="something"/> yaxmlp-dispatch $token $tagname $props "" set tagname "" set props "" set state $GATHERTAG } else { set body "" set state $GATHERBODY } } } } } #---- #Test code (from the TAX page) set input { <meta author="Anne Onymous"/> <meta> Composed in haste for purposes of demonstration. </meta> <para indent="3"> This is an indented paragraph. Only the first line is indented, which you can tell if the paragraph goes on long enough. <![CDATA[<exampletag "Hi!">]]> <![CDATA[\example\path]]> </para> <para> This is an ordinary paragraph. No line is indented. Not one. None at all, which you can tell if the paragraph goes on long enough. </para> } proc meta-handler {token tagname props body} { #puts "$tagname $props $body" puts "META:$tagname PROPS:$props BODY:$body ENDBODY" } proc para-handler {token tagname props body} { array set par $props puts PARA if {[info exists par(indent)]} { foreach line [split [string trim $body] \n] { puts [string repeat " " $par(indent)]$line } } else { puts BODY:$body } } set h [yaxmlp] $h handler meta meta-handler $h handler para para-handler $h parse $input