if 0 {[phk] 2003-08-18 Let's assume your application is generating html pages.
[tdom] can help in a nice way to test the output.
Let's get all options from a html select tag:}
package require tdom
package require http
# get the html page
set token [http::geturl http://aspn.activestate.com/ASPN/Cookbook/Tcl/]
set data [http::data $token]
# parse the html
set doc [dom parse -html $data]
set root [$doc documentElement]
# get all option nodes
set optionList [$root selectNodes {//select/option}]
set result {}
# loop through all the options
foreach option $optionList {
set text [[$option nextSibling] nodeValue]
set value [$option getAttribute value]
lappend result [list $text $value]
}
puts $result
if 0 {which shows all the options
{{this section} Subsection}
{{all ASPN} ASPN}
{Products Products}
{Recipes Recipes}
{News NewsFeeds}
{Modules Modules}
{{Mailing Lists} Archive}
{{The Perl Journal} TPJ}
{Reference Reference}
from this html code fragment
...
...
The result can be used in a [tcltest] proc or however.
of course can code can be shorter, but I think it explains more this way.
This is my first wiki contribution, any feedback is appreciated
DMG 20-Aug-2003 asks: Offhand (and this is a general tdom/XML query) why use:
set text [[$option nextSibling] nodeValue]
versus
set text [$option text]
??
}
----
Here's something [DG] did trying to inline fix bad HTML from [RSS] newsfeeds, which tends to be the norm from the big news sites these days.
itcl::body newsFeedDecoder::validateHTML {body} {
if {[catch {dom parse -html $body} htmlDoc]} {
# un-parsable!
return "$body"
}
set htmlRoot [$htmlDoc documentElement]
if {$htmlRoot == ""} {
# have arbitrary text, not html..
return [encTxt $body]
}
set imgNodes [$htmlRoot selectNodes //img]
# make sure all tags have a require alt attribute
foreach imgNode $imgNodes {
if {![$imgNode hasAttribute alt]} {
$imgNode setAttribute alt {}
}
}
# make sure all tags use the title attribute for textual info
foreach imgNode $imgNodes {
if {![$imgNode hasAttribute title] && "[$imgNode @alt]" != ""} {
$imgNode setAttribute title [$imgNode @alt]
$imgNode setAttribute alt {}
}
}
# replace all container elements with standards complient
#
#
set nobrNodes [$htmlRoot selectNodes //nobr]
foreach nobrNode $nobrNodes {
set parent [$nobrNode parentNode]
set newSpan [$htmlDoc createElement span]
$newSpan setAttribute style "white-space: nowrap"
foreach child [$nobrNode childNodes] {
$newSpan appendChild $child
}
$parent replaceChild $newSpan $nobrNode
}
set html [$htmlDoc asHTML -htmlEntities]
$htmlDoc delete
return $html
}
itcl::body newsFeedDecoder::encTxt {txt} {
return [string map { & & < < > > \" " } $txt]
}
----
[Category XML]