wdb Yet another XML parser. Very minimalistic, just 2 (two!) procedures. Recognizes also singleton like <br />.
Procedure xmlToTree builds list of tokens, then calls procedure tokensVarToTree which destructively processes this list, resulting a dictionary.
Exempli gratia: xmlToTree <p>Hi!</p> ⇒ type element name p attribute {} content {{type pcdata content Hi!}}
proc xmlToTree src1 { # remove comments set src [regsub -all <!--{1,1}?.*--> $src1 ""] # list of tag index pairs set pairs\ [regexp -inline -indices -all {</?[[:alnum:]]+[^>]*/?>} $src] # list of tag strings set tagList [lmap pair $pairs {string range $src {*}$pair}] # indices of pcdata set strIdx [lrange [concat {*}$pairs] 1 end-1] # list of pcdata strings set strList {} foreach {i j} $strIdx { lappend strList [string range $src $i+1 $j-1] } # tokens alternating: tag, pcdata, tag, ... set tokens {} foreach tag $tagList str $strList { lappend tokens $tag if {![string is space $str]} then { lappend tokens $str } } # process list tokensVarToTree tokens } proc tokensVarToTree _tokens { upvar $_tokens tokens set openPattern {<([[:alnum:]:]+)[^>]*>} set emptyPattern {<([[:alnum:]:]+)[^>]*/>} set closePattern {</([[:alnum:]:]+)>} set dataPattern {^[^<]} # destructive set tokens [lassign $tokens token] if {[regexp $dataPattern $token]} then { # PCDATA dict set result type pcdata dict set result content $token return $result } elseif {[regexp $openPattern $token - name]} then { # TAG opening or empty dict set result type element dict set result name $name dict set result attribute {} dict set result content "" # attributes foreach {- key val}\ [regexp -inline -all {([[:alnum:]]+)="([^"]*)"} $token] { dict set result attribute $key $val } foreach {- key val}\ [regexp -inline -all {([[:alnum:]]+)='([^']*)'} $token] { dict set result attribute $key $val } } if {[regexp $emptyPattern $token]} then { # TAG empty: done return $result } while {![regexp $closePattern [lindex $tokens 0]] && [llength $tokens] > 0} { # TAG non-empty: fill contents dict lappend result content [tokensVarToTree tokens] } # remove closing TAG set tokens [lrange $tokens 1 end] set result }