[George Peter Staplin] Oct 10, 2007 - I created an XML parser that is re-entrant, and is designed to not use [regexp] or [string map]. It may be faster, or it might be slower than [TAX: A Tiny API for XML]. I'll eventually time it and post results here.
Here's revision 2:
======
#By George Peter Staplin
set yaxmlp_count 0
proc yaxmlp {} {
global yaxmlp_count
while 1 {
incr yaxmlp_count
set token yaxmlp$yaxmlp_count
if {[info commands $token] eq ""} {
break
}
}
proc $token args "[list yaxmlp-instance $token] \$args"
return $token
}
proc yaxmlp-instance {token arglist} {
global $token
switch -- [lindex $arglist 0] {
handler {
if {3 != [llength $arglist]} {
return -code error "invalid # args: should be: $token handler tag handler-callback"
}
set [set token](handler,[lindex $arglist 1]) [lindex $arglist 2]
}
parse {
yaxmlp-parse $token [lindex $arglist 1]
}
}
}
proc yaxmlp-dispatch {token tagname props body} {
global $token
set cmd [set [set token](handler,$tagname)]
set cmd [linsert $cmd end $token $tagname $props $body]
uplevel #0 $cmd
}
proc yaxmlp-parse-prop-area {token script ivar endvar} {
upvar $ivar i
upvar $endvar end
set GATHERPROP 1
set GATHERPROPNAME 2
set GATHERPROPVALUE 3
set GATHERPROPQUOTE 4
set state $GATHERPROP
set props [list]
for {} {$i < [string length $script]} {incr i} {
set c [string index $script $i]
#puts "PROPAREA:$c STATE:$state"
if {$GATHERPROPVALUE == $state} {
if {"\"" eq $c} {
lappend props $propname $propvalue
set state $GATHERPROP
} else {
append propvalue $c
}
} elseif {$GATHERPROPQUOTE == $state} {
if {[string is space $c]} continue
if {"\"" eq $c} {
set state $GATHERPROPVALUE
}
} elseif {$GATHERPROPNAME == $state} {
if {[string is space $c]} {
continue
} elseif {">" eq $c} {
return $props
} elseif {"=" eq $c} {
set state $GATHERPROPQUOTE
} else {
append propname $c
}
} elseif {$GATHERPROP ==$state} {
if {[string is space $c]} {
set state $GATHERPROPNAME
set propname ""
set propvalue ""
} elseif {"/" eq $c && ">" eq [string index $script [expr {$i + 1}]]} {
set end 1
return $props
} elseif {">" eq $c} {
return $props
}
}
}
return -code error "property area without completing > or />"
}
#Return [list tagname props]
proc yaxmlp-parse-tag-area {token script ivar} {
upvar $ivar i
set GATHERTAG 1
set state $GATHERTAG
set tagname ""
set props ""
set end 0
for {} {$i < [string length $script]} {incr i} {
set c [string index $script $i]
#puts C:$c
if {$GATHERTAG == $state} {
if {">" eq $c} {
return [list $tagname $props $end]
} elseif {[string is space $c]} {
set props [yaxmlp-parse-prop-area $token $script i end]
return [list $tagname $props $end]
} elseif {"/" eq $c && ">" eq [string index $script [expr {$i + 1}]]} {
set end 1
incr i 2
if {[string length $tagname]} {
return [list $tagname $props $end]
}
} else {
append tagname $c
}
}
}
return -code error "tag without closing: > or />"
}
proc yaxmlp-future-match {script i string} {
set subscript [string range $script $i [expr {$i + [string length $string] - 1}]]
return [expr {$subscript eq $string}]
}
proc yaxmlp-parse {token script} {
global $token
#puts "PARSE:$token"
set GATHERTAG 1
set GATHERBODY 2
set state $GATHERTAG
set tagname ""
set line 1
set scriptlen [string length $script]
for {set i 0} {$i < $scriptlen} {incr i} {
set c [string index $script $i]
#puts PARSEC:$c
if {"\n" eq $c} {
incr line
}
if {$GATHERBODY == $state} {
if {"<" eq $c} {
if {[yaxmlp-future-match $script [expr {$i + 1}] /$tagname>]} {
yaxmlp-dispatch $token $tagname $props $body
set tagname ""
set props ""
incr i [string length /$tagname]
set state $GATHERTAG
}
}
if {[string is space -strict [string index $body end]] && [string is space $c]} {
continue
} else {
append body $c
}
} elseif {$GATHERTAG == $state} {
if {"<" eq $c} {
incr i
lassign [yaxmlp-parse-tag-area $token $script i] tagname props end
if {$end} {
#The tag was something like
yaxmlp-dispatch $token $tagname $props ""
set tagname ""
set props ""
set state $GATHERTAG
} else {
set body ""
set state $GATHERBODY
}
}
}
}
}
#----
#Test code (from the TAX page)
set input {
Composed in haste for purposes of demonstration.
This is an indented paragraph. Only the first line
is indented, which you can tell if the paragraph goes
on long enough. ]]>
This is an ordinary paragraph. No line is indented. Not
one. None at all, which you can tell if the paragraph
goes on long enough.
}
proc meta-handler {token tagname props body} {
#puts "$tagname $props $body"
puts "META:$tagname PROPS:$props BODY:$body ENDBODY"
}
proc para-handler {token tagname props body} {
array set par $props
puts PARA
if {[info exists par(indent)]} {
foreach line [split [string trim $body] \n] {
puts [string repeat " " $par(indent)]$line
}
} else {
puts BODY:$body
}
}
set h [yaxmlp]
$h handler meta meta-handler
$h handler para para-handler
$h parse $input
======
<> XML | Parsing