if 0 {[phk] 2003-08-18 Let's assume your application is generating html pages. [tdom] can help in a nice way to test the output. Let's get all options from a html select tag:} package require tdom package require http # get the html page set token [http::geturl http://aspn.activestate.com/ASPN/Cookbook/Tcl/] set data [http::data $token] # parse the html set doc [dom parse -html $data] set root [$doc documentElement] # get all option nodes set optionList [$root selectNodes {//select/option}] set result {} # loop through all the options foreach option $optionList { set text [[$option nextSibling] nodeValue] set value [$option getAttribute value] lappend result [list $text $value] } puts $result if 0 {which shows all the options {{this section} Subsection} {{all ASPN} ASPN} {Products Products} {Recipes Recipes} {News NewsFeeds} {Modules Modules} {{Mailing Lists} Archive} {{The Perl Journal} TPJ} {Reference Reference} from this html code fragment ... ... The result can be used in a [tcltest] proc or however. of course can code can be shorter, but I think it explains more this way. This is my first wiki contribution, any feedback is appreciated DMG 20-Aug-2003 asks: Offhand (and this is a general tdom/XML query) why use: set text [[$option nextSibling] nodeValue] versus set text [$option text] ?? } ---- Here's something [DG] did trying to inline fix bad HTML from [RSS] newsfeeds, which tends to be the norm from the big news sites these days. itcl::body newsFeedDecoder::validateHTML {body {norecurse 0}} { if {[catch {dom parse -html $body} htmlDoc]} { # un-parsable! return "$body" } set htmlRoot [$htmlDoc documentElement] if {$htmlRoot == ""} { # have arbitrary text, not html.. return [encTxt $body] } # Check for partial HTML content where a true root node is missing, # but was mis-interpreted (slashdot's rss feed). # if {!$norecurse && "[string index $body 1]" != "[string index [$htmlRoot nodeName] 0]"} { $htmlDoc delete return [validateHTML "$body" 1] } # If the root node is a
, replace it with a as I don't like
# how it affects the formatting.
#
if {"[$htmlRoot nodeName]" == "p"} {
set newDoc [dom createDocument span]
set newRoot [$newDoc documentElement]
deepCopy $newRoot $htmlRoot
$htmlDoc delete
set htmlDoc $newDoc
set htmlRoot $newRoot
}
set imgNodes [$htmlRoot selectNodes //img]
# make sure all tags have a require alt attribute
foreach imgNode $imgNodes {
if {![$imgNode hasAttribute alt]} {
$imgNode setAttribute alt {}
}
}
# make sure all tags use the title attribute for textual info
foreach imgNode $imgNodes {
if {![$imgNode hasAttribute title] && "[$imgNode @alt]" != ""} {
$imgNode setAttribute title [$imgNode @alt]
}
}
# replace all