[wdb] This tool translates XML elements to Tcl [dict]s where the keys are element, attribute, and content. Key element carries name such as rss; key attribute carries attributes such as href or id; key contents list of contained elements or dicts with key cdata and value text. * procedure xmlToDict (''src'') transforms ''src'' to [dict] structure. * procedure getElementByName (''dict name'') returns first contained element named ''name''. * procedure getElementById (''dict id'') returns element of ''dict'' with attribute ''id''. * procedure getAllElementsByName (''dict name'') returns all contained elements named ''name''. Currently no write procedures. License [OLL] like always. ====== # # file: XmlDict-0.1.tm # package provide XmlDict 0.1 namespace eval XmlDict { namespace import ::tcl::mathop::+ ::tcl::mathop::- namespace export get* xml* } proc ::XmlDict::splitByPattern {src {pat {).*?\]\]>}}} { set ranges [regexp -inline -indices -all $pat $src] if {$ranges eq ""} then { set src } else { lappend result0 [string range $src 0 [- [lindex $ranges 0 0] 1]] foreach\ range0 [lrange $ranges 0 end-1]\ range1 [lrange $ranges 1 end] { lappend result0 [string range $src {*}$range0] lassign $range0 - a lassign $range1 b c lappend result0 [string range $src [+ $a 1] [- $b 1]] } lappend result0 [string range $src {*}[lindex $ranges end]]\ [string range $src [+ [lindex $ranges end end] 1] end] foreach str $result0 { if {$str ne ""} then { lappend result $str } } lappend result } } proc ::XmlDict::splitByMarkup src { set src [regsub -all {<{1,1}?!--.*?-->} $src ""] set parts [splitByPattern $src] foreach part $parts { if {[string match {} $part]} then { lappend result $part } else { lappend result {*}[splitByPattern $part {<[^>]+>}] } } lappend result } proc ::XmlDict::attributes tag { set atts0 [regexp -inline -all {\S+="[^"]*"} $tag] set atts1 [regexp -inline -all {\S+='[^']*'} $tag] lappend result foreach attStr $atts0 { regexp {^(\S+)="([^"]+)"$} $attStr - tag val lappend result $tag $val } foreach attStr $atts1 { regexp {^(\S+)='([^']+)'$} $attStr - tag val lappend result $tag $val } set result } proc ::XmlDict::tokensDump src { lappend result foreach part [splitByMarkup $src] { if {![string match <* $part]} then { lappend result [list cdata $part] } elseif {[regexp {^$} $part - cdata]} then { lappend result [list cdata [string map {& & < < > >} $cdata]] } elseif {[string match ]*?>} $part - name]} then { lappend result [list element $name atts [attributes $part]] if {[string match */> $part]} then { lappend result close } } } set result } proc ::XmlDict::dumpToList dump { lappend result while {[llength $dump] > 0} { set dump [lassign $dump first] switch -exact -- [lindex $first 0] { element { set name [dict get $first element] set atts [dict get $first atts] lassign [dumpToList $dump] dump cont lappend result [list element $name attribute $atts content $cont] } cdata { lappend result $first } close { return [list $dump $result] } } } list $result $dump } proc ::XmlDict::xmlToDict xmlSrc { lassign [dumpToList [tokensDump $xmlSrc]] dump while {[llength $dump] > 0 && [lindex $dump 0 0] ne "element"} { set dump [lrange $dump 1 end] } lindex $dump 0 } proc ::XmlDict::childElements tree { if {[dict exists $tree content]} then { dict get $tree content } } proc ::XmlDict::getElementByName {tree name} { if {[dict exists $tree element] && [string match $name [dict get $tree element]]} then { set tree } else { foreach child [childElements $tree] { set result [getElementByName $child $name] if {$result ne ""} then { return $result } } } } proc ::XmlDict::getElementById {tree id} { if {[dict exists $tree attribute id] && [string match $id [dict get $tree attribute id]]} then { return $tree } else { foreach child [childElements $tree] { set result [getElementById $child $id] if {$result ne ""} then { return $result } } } } proc ::XmlDict::getAllElementsByName {tree name} { lappend result if {[dict exists $tree element] && [string match $name [dict get $tree element]]} then { lappend result $tree } foreach el [childElements $tree] { lappend result {*}[getAllElementsByName $el $name] } set result } proc ::XmlDict::decodeEntities cdata { set entities\ [lsort -unique [regexp -inline -all {&[^;]*;} $cdata]] lappend map """ \u0022 {*}{& & ' ' < <; > >} foreach entity $entities { if {[regexp {&#([[:digit:]]+);} $entity - dec]} then { lappend map $entity [format %c $dec] } elseif {[regexp {&#x([[:xdigit:]]+);} $entity - hex]} then { set dec [scan $hex %x] lappend map $entity [format %c $dec] } } string map $map $cdata } proc ::XmlDict::getCdataFromElement tree { set result "" foreach el [dict get $tree content] { if {[dict exists $el cdata]} then { append result [decodeEntities [dict get $el cdata]] } else { append result [getCdataFromElement $el] } } set result } namespace import ::XmlDict::* ====== <>XML