Version 1 of XML Tree Walking

Updated 2005-03-21 22:45:22 by NEM

Keith Vetter 2005-03-21 : Recently I had to optimize some code that was using pure-tcl to extract out each element of an XML file in the order it appears in the file. My first step in optimization was simply to use tdom to build a dom tree. Then, the following code does a restartable pre-order traversal of that tree

The first call to :::WalkXML::NextNode with XML data initializes the dom tree. Each subsequent call to ::WalkXML::NextNode returns the next dom node.

Just this optimization alone, without redoing the code to utilize the dom tree, greatly speeded up the processing.

NEM 21Mar05: If you want to walk the elements in the order they appear in the file, might a SAX interface be just what you are after?


 ##+##########################################################################
 #
 # WalkXML -- walks an tdom XML tree with each call to NextNode returning
 # the next node in a pre-order traversal of the dom tree.
 # by Keith Vetter, March 2005
 #

 package require tdom
 namespace eval ::WalkXML { variable stack}

 proc ::WalkXML::NextNode {{XML ""}} {
    variable stack                              ;# List of VISITED ancestors

    if {$XML ne ""} {                           ;# Parse given xml
        set doc [dom parse $XML]
        set root [$doc documentElement]
        set stack [list $root]
        return $root
    }

    while {1} {
        set last [lindex $stack end]            ;# Last already visited node

        set next [$last firstChild]             ;# Try going down the tree
        if {$next ne ""} break

        set stack [lrange $stack 0 end-1]
        set next [$last nextSibling]            ;# Try going across the tree
        if {$next ne ""} break

        while {1} {                             ;# Must go up the tree
            set next [lindex $stack end]        ;# Parent
            set stack [lrange $stack 0 end-1]
            if {$next eq ""} break

            set next [$next nextSibling]        ;# Try parent's sibling
            if {$next ne ""} break
        }
        break
    }
    if {$next ne ""} {
        lappend stack $next
    }
    return $next
 }



 ################################################################
 #
 # Demo code
 #
 set XML {<?xml version="1.0" encoding="ISO-8859-1"?>
    <loc version="1.0" src="Groundspeak">
    <waypoint>
    <name id="GCGPXK"><![CDATA[Playing Poker with the Squirrels by Rino 'n Rinette]]></name>
    <coord lat="40.1548166" lon="-82.5202833"/>
    <type>Geocache</type>
    <link text="Cache Details">http://www.geocaching.com/seek/cache_details.aspx?wp=GCGPXK</link>
    </waypoint><waypoint>
    <name id="GC19DF"><![CDATA[Great Playground Caper by Treasure Hunters Inc.]]></name>
    <coord lat="40.0667166666667" lon="-82.5358"/>
    <type>Geocache</type>
    <link text="Cache Details">http://www.geocaching.com/seek/cache_details.aspx?wp=GC19DF</link>
    </waypoint>
    </loc>
 }


 set node [::WalkXML::NextNode $XML]
 while {$node ne ""} {
    if {[$node nodeType] eq "TEXT_NODE"} {
        puts "\"[$node nodeValue]\""
    } else {
        puts "[$node nodeName]"
    }
    set node [::WalkXML::NextNode]
 }