[WJR] - There was a discussion on the [TclHttpd] mailing list about processing [RSS]. Here's a variant I came up with using [tDOM] and [The Tcler's Wiki] RSS feed (it should work with any RSS feed):
[Doc_Dynamic]
[
package require http
package require tdom
html::set token [http::geturl http://mini.net/tcl/rss.xml]
html::set rss_xml [http::data $token]
html::set doc [dom parse $rss_xml]
html::set root [$doc documentElement]
html::set stories [$root selectNodes /rss/channel/item]
]
[html::head {RSS Example}]
[html::bodyTag]
[html::h1 {RSS Example}]
[html::foreach story $stories {
-
[[$story selectNodes title/text()] nodeValue]
([[$story selectNodes pubDate/text()] nodeValue])
- [[$story selectNodes description/text()] nodeValue]
}]
[html::end]
[tDOM] and [TclHttpd] makes this pretty simple!
----
[DG] -- That little starter script got me going. I was messing with [TclXML] at first, but [tDOM] is a whole lot easier to use. See it in action [http://www.pobox.com/~davygrvy/news.tml]
# tmlrss.tcl --
#
# Process RSS (0.91, 0.92, 0.93, 1.0, 2.0) newsfeeds into
# 4.01 HTML. For use in .tml templates with tclhttpd.
#
# http://www.xml.com/pub/a/2002/12/18/dive-into-xml.html
# http://blogs.law.harvard.edu/tech/rss
# http://www.tdom.org/
#
# Instructions:
# 1) place this file in your tml library directory so
# the server sources it automatically at startup.
# 2) Call TmlRss_GenHTML with the url of the RSS feed
# within your .tml file (set to dynamic generation).
# This function returns the formatted HTML of the feed.
#
# By David Gravereaux
#
package require httpd
package require http
package require html
package require uri
if {[catch {package require tdom} err]} {
return -code error $err
}
namespace eval tmlrss {
variable rss_xml_cache
array set rss_xml_cache {}
}
proc TmlRss_GenHTML {uri {attr {}}} {
set doc [tmlrss::GetDOM $uri]
append html "\n"
append html [tmlrss::genTitleBlock $doc]
append html [tmlrss::genContent $doc]
append html "
\n"
$doc delete
return $html
}
proc tmlrss::GetDOM {uri} {
# Is it cached locally?
if {[isCachedXMLExpired $uri]} {
set doc [fetchXML $uri]
} else {
set doc [dom parse -baseurl [uriBase $uri] $rss_xml_cache($uri)]
}
return $doc
}
# returns the DOM object of the RSS feed.
proc tmlrss::fetchXML {uri} {
variable rss_xml_cache
set token [http::geturl $uri]
if {[catch {http::data $token} xml]} {
set xml {}
}
upvar #0 $token state
array set meta $state(meta)
http::cleanup $token
set doc [dom parse -baseurl [uriBase $uri] $xml]
# Append a download time to the DOM as a comment.
set comment [$doc createComment \
"downloaded: [clock format [clock seconds] -format {%a, %e %b %Y %T GMT} -gmt 1]"]
[$doc documentElement] appendChild $comment
# Append when the server considers it expired to the DOM.
catch {
set comment [$doc createComment "expires: $meta(Expires)"]
[$doc documentElement] appendChild $comment
}
# Save it in the cache.
set rss_xml_cache($uri) [$doc asXML -indent 4]
return $doc
}
proc tmlrss::isCachedXMLExpired {uri} {
variable rss_xml_cache
#TODO: make this work
if {[info exist rss_xml_cache($uri)]} {
set xml $rss_xml_cache($uri)
set doc [dom parse -baseurl [uriBase $uri] $xml]
# TODO: Is it past the TTL (if supported)?
# just return yes, for now.
if {1} {
$doc delete
unset rss_xml_cache($uri)
return true
}
return false
} else {
return true
}
}
proc tmlrss::uriBase {uri} {
array set info [uri::split $uri]
set info(path) [file dirname $info(path)]
return [eval uri::join [array get info]]
}
proc tmlrss::getRSSVersion {doc} {
set root [$doc documentElement]
switch [$root nodeName] {
rss {
if {[$root hasAttribute version]} {
return [$root getAttribute version]
}
# Best guess as most stuff is optional...
return 0.92
}
rdf:RDF {
return 1.0
}
default {
return 0
}
}
}
proc tmlrss::genTitleBlock {doc} {
set root [$doc documentElement]
append html "\n\n"
switch [getRSSVersion $doc] {
0.91 - 0.92 - 0.93 - 2.0 {
append html "[nodeTxt $root /rss/channel/title/text()] | \n"
if {[$root selectNode {/rss/channel/image}] != ""} {
append html " | \n"
}
}
1.0 {
append html "[nodeTxt $root /rdf:RDF/channel/title/text()] | "
}
}
append html "
\n\n"
return $html
}
proc tmlrss::genContent {doc} {
set root [$doc documentElement]
set stories [$root selectNodes /rss/channel/item]
append html "\n"
append html [html::foreach story $stories {
[expr {[$story selectNodes description/text()] != "" ? [fixTxt [[$story selectNodes description/text()] nodeValue]] : ""}]
|
}]
append html "\n"
return $html
}
proc tmlrss::fixUri {uri} {
return [string map { & & < < > > } $uri]
}
proc tmlrss::fixTxt {txt} {
return [string map { & & } $txt]
}
proc tmlrss::nodeTxt {node xpath} {
if {[$node selectNode $xpath] != ""} {
return [[$node selectNode $xpath] nodeValue]
} else {
return ""
}
}
proc tmlrss::ShutDown {} {
}
Httpd_RegisterShutdown tmlrss::ShutDown
package provide tmlrss 0.1
A .tml file to call it would look something like this:
[Doc_Dynamic]
SNews, you lose!
[html::foreach rss_source {
http://www.npr.org/rss/rss.php?topicId=2
http://rss.news.yahoo.com/rss/highestrated
http://mini.net/tcl/rss.xml
http://sourceforge.net/export/rss2_projsummary.php?group_id=10894
http://sourceforge.net/export/rss2_projnews.php?group_id=10894
http://sourceforge.net/export/rss2_projdocs.php?group_id=10894
} {[TmlRss_GenHTML $rss_source "border=1"]}]
[WJR] - Nice, works great on my system! You should consider making this one of TclHttpd's sample apps (a number of apps come with the distribution in the sampleapp subdir).
----
[Category TclHttpd]