if 0 {[phk] 2003-08-18 Let's assume your application is generating html pages. [tdom] can help in a nice way to test the output. Let's get all options from a html select tag:} package require tdom package require http # get the html page set token [http::geturl http://aspn.activestate.com/ASPN/Cookbook/Tcl/] set data [http::data $token] # parse the html set doc [dom parse -html $data] set root [$doc documentElement] # get all option nodes set optionList [$root selectNodes {//select/option}] set result {} # loop through all the options foreach option $optionList { set text [[$option nextSibling] nodeValue] set value [$option getAttribute value] lappend result [list $text $value] } puts $result if 0 {which shows all the options {{this section} Subsection} {{all ASPN} ASPN} {Products Products} {Recipes Recipes} {News NewsFeeds} {Modules Modules} {{Mailing Lists} Archive} {{The Perl Journal} TPJ} {Reference Reference} from this html code fragment ... ... The result can be used in a [tcltest] proc or however. of course can code can be shorter, but I think it explains more this way. This is my first wiki contribution, any feedback is appreciated DMG 20-Aug-2003 asks: Offhand (and this is a general tdom/XML query) why use: set text [[$option nextSibling] nodeValue] versus set text [$option text] ?? } ---- Here's something [DG] did trying to inline fix bad HTML from [RSS] newsfeeds, which tends to be the norm from the big news sites these days. itcl::body newsFeedDecoder::validateHTML {body {norecurse 0}} { if {[catch {dom parse -html $body} htmlDoc]} { # un-parsable! return "$body" } set htmlRoot [$htmlDoc documentElement] if {$htmlRoot == ""} { # have arbitrary text, not html.. return [encTxt $body] } # Check for partial HTML content where a true root node is missing, # but was mis-interpreted (slashdot's rss feed). # if {!$norecurse && "[string index $body 1]" != "[string index [$htmlRoot nodeName] 0]"} { $htmlDoc delete return [validateHTML "$body" 1] } # If the root node is a

, replace it with a as I don't like # how it affects the formatting. # if {"[$htmlRoot nodeName]" == "p"} { set newDoc [dom createDocument span] set newRoot [$newDoc documentElement] deepCopy $newRoot $htmlRoot $htmlDoc delete set htmlDoc $newDoc set htmlRoot $newRoot } set imgNodes [$htmlRoot selectNodes //img] # make sure all tags have a require alt attribute foreach imgNode $imgNodes { if {![$imgNode hasAttribute alt]} { $imgNode setAttribute alt {} } } # make sure all tags use the title attribute for textual info foreach imgNode $imgNodes { if {![$imgNode hasAttribute title] && "[$imgNode @alt]" != ""} { $imgNode setAttribute title [$imgNode @alt] } } # replace all container elements with standards complient # # set nobrNodes [$htmlRoot selectNodes //nobr] foreach nobrNode $nobrNodes { set parent [$nobrNode parentNode] set newSpan [$htmlDoc createElement span] $newSpan setAttribute style "white-space: nowrap" deepCopy $newSpan $nobrNode $parent replaceChild $newSpan $nobrNode } set html [$htmlDoc asHTML -htmlEntities] $htmlDoc delete return $html } itcl::body newsFeedDecoder::encTxt {txt} { return [string map { & & < < > > \" " } $txt] } itcl::body newsFeedDecoder::deepCopy {to from} { foreach child [$from childNodes] { $to appendChild [$child cloneNode -deep] } } ---- [Category XML]