Version 0 of XML as dict structure

Updated 2018-08-24 16:09:36 by wdb

wdb This tool translates XML elements to Tcl dicts where the keys are element, attribute, and content. Key element carries name such as rss; key attribute carries attributes such as href or id; key contents list of contained elements or dicts with key cdata and value text.

  • procedure xmlToDict (src) transforms src to dict structure.
  • procedure getElementByName (dict name) returns first contained element named name.
  • procedure getElementById (dict id) returns element of dict with attribute id.
  • procedure getAllElementsByName (dict name) returns all contained elements named name.

Currently no write procedures. License OLL like always.

#
# file: XmlDict-0.1.tm
#

package provide XmlDict 0.1

namespace eval XmlDict {
  namespace import ::tcl::mathop::+ ::tcl::mathop::-
  namespace export get* xml*
}

proc ::XmlDict::splitByPattern {src {pat {<![CDATA[(?!\]\]>).*?\]\]>}}} {
  set ranges regexp -inline -indices -all $pat $src
  lappend result0 string range $src 0 [- [lindex $ranges 0 0 1]
  foreach\
    range0 lrange $ranges 0 end-1\
    range1 lrange $ranges 1 end {
    lappend result0 string range $src {*}$range0
    lassign $range0 - a
    lassign $range1 b c 
    lappend result0 string range $src [+ $a 1 [- $b 1]
  }
  lappend result0 [string range $src {*}[lindex $ranges end]\
    string range $src [+ [lindex $ranges end end 1] end]
  foreach str $result0 {
    if {$str ne ""} then {
      lappend result $str
    }
  }
  lappend result
}

proc ::XmlDict::splitByMarkup src {
  set src regsub -all {<{1,1}?!--.*?-->} $src ""
  set parts splitByPattern $src
  foreach part $parts {
    if {string match {<![CDATA[*\\]>} $part]} then {
      lappend result $part
    } else {
      lappend result {*}splitByPattern $part {<[^>+>}]
    }
  }
  lappend result
}

proc ::XmlDict::attributes tag {
  set atts0 regexp -inline -all {\S+="[^"*"} $tag]
  set atts1 regexp -inline -all {\S+='[^'*'} $tag]
  lappend result
  foreach attStr $atts0 {
    regexp {^(\S+)="(^"+)"$} $attStr - tag val
    lappend result $tag $val
  }
  foreach attStr $atts1 {
    regexp {^(\S+)='(^'+)'$} $attStr - tag val
    lappend result $tag $val
  }
  set result
}

proc ::XmlDict::tokensDump src { # -- tut nicht!
  lappend result
  foreach part splitByMarkup $src {
    if {!string match <* $part} then {
      lappend result list cdata $part
    } elseif {regexp {^<![CDATA[(.*)\\]>$} $part - cdata]} then {
      lappend result [list cdata [string map {& &amp; < &lt; > &gt;} $cdata]
    } elseif {string match </* $part} then {
      lappend result close
    } elseif {regexp {<([:alnum:]+)[^>*?>} $part - name]} then {
      lappend result [list element $name atts [attributes $part]
      if {string match */> $part} then {
        lappend result close
      }
    }
  }
  set result
}

proc ::XmlDict::dumpToList dump {
  lappend result
  while {llength $dump > 0} {
    set dump lassign $dump first
    switch -exact -- lindex $first 0 {
      element {
        set name dict get $first element
        set atts dict get $first atts
        lassign dumpToList $dump dump cont
        lappend result list element $name attribute $atts content $cont
      }
      cdata {
        lappend result $first
      }
      close {
        return list $dump $result
      }
    }
  }
  list $result $dump
}

proc ::XmlDict::xmlToDict xmlSrc {
  lassign [dumpToList [tokensDump $xmlSrc] dump
  while {llength $dump > 0 &&
         lindex $dump 0 0 ne "element"} {
    set dump lrange $dump 1 end
  }
  lindex $dump 0
}

proc ::XmlDict::childElements tree {
  if {dict exists $tree content} then {
    dict get $tree content
  }
}

proc ::XmlDict::getElementByName {tree name} {
  if {dict exists $tree element &&
      [string match $name [dict get $tree element]} then {
    set tree
  } else {
    foreach child childElements $tree {
      set result getElementByName $child $name
      if {$result ne ""} then {
        return $result
      }
    }
  }
}

proc ::XmlDict::getElementById {tree id} {
  if {dict exists $tree attribute id &&
      [string match $id [dict get $tree attribute id]} then {
    return $tree
  } else {
    foreach child childElements $tree {
      set result getElementById $child $id
      if {$result ne ""} then {
        return $result
      }
    }
  }
}

proc ::XmlDict::getAllElementsByName {tree name} {
  lappend result
  if {dict exists $tree element &&
      [string match $name [dict get $tree element]} then {
    lappend result $tree
  }
  foreach el childElements $tree {
    lappend result {*}getAllElementsByName $el $name
  }
  set result
}

proc ::XmlDict::decodeEntities cdata {
  set entities\
    lsort -unique [regexp -inline -all {&[^;*;} $cdata]
  lappend map "&quot;" \" {*}{&amp; & &apos; ' &lt; <; &gt; >}
  foreach entity $entities {
    if {regexp {&#([:digit:]+);} $entity - dec} then {
      lappend map $entity format %c $dec
    } elseif {regexp {&#x([:xdigit:]+);} $entity - hex} then {
      set dec scan $hex %x
      lappend map $entity format %c $dec
    }
  }
  string map $map $cdata
}

proc ::XmlDict::getCdataFromElement tree {
  set result ""
  foreach el dict get $tree content {
    if {dict exists $el cdata} then {
      append result [decodeEntities [dict get $el cdata]
    } else {
      append result getCdataFromElement $el
    }
  }
  set result
}

namespace import ::XmlDict::*