Version 12 of TclHttpd RSS Processing

Updated 2004-08-26 10:55:28

WJR - There was a discussion on the TclHttpd mailing list about processing RSS. Here's a variant I came up with using tDOM and The Tcler's Wiki RSS feed (it should work with any RSS feed):

 [Doc_Dynamic]

 [
 package require http
 package require tdom

 html::set token [http::geturl http://mini.net/tcl/rss.xml]
 html::set rss_xml [http::data $token]

 html::set doc [dom parse $rss_xml]
 html::set root [$doc documentElement]
 html::set stories [$root selectNodes /rss/channel/item]
 ]

 [html::head {RSS Example}]

 <style type="text/css">
     body {
         font-family: Arial, Helvetica, sans-serif;
         font-size: 80%;
     }
     h1 {
         background: #f7f7f7;
         padding: 4px;
     }
     dt {
         font-size: 120%;
         font-weight: bold;
     }
     dd {
         margin: 10px;
     }
     #date {
         color: #999999;
     }
 </style>

 [html::bodyTag]

 [html::h1 {RSS Example}]

 <dl>
     [html::foreach story $stories {
         <dt>
             <a href="[[$story selectNodes link/text()] nodeValue]">
                 [[$story selectNodes title/text()] nodeValue]
             </a>
             <span class="date">
                 ([[$story selectNodes pubDate/text()] nodeValue])
             </span>
         </dt>
         <dd>[[$story selectNodes description/text()] nodeValue]</dd>
     }]
 </dl>

 [html::end]

tDOM and TclHttpd makes this pretty simple!


DG -- That little starter script got me going. I was messing with TclXML at first, but tDOM is a whole lot easier to use. See it in action [L1 ]

 # tmlrss.tcl --
 #
 #    Process RSS (0.91, 0.92, 0.93, 1.0, 2.0) newsfeeds into
 #    4.01 HTML.  For use in .tml templates with tclhttpd.
 #
 #    http://www.xml.com/pub/a/2002/12/18/dive-into-xml.html
 #    http://blogs.law.harvard.edu/tech/rss
 #    http://www.tdom.org/
 #    
 #    Instructions:
 #        1) place this file in your tml library directory so
 #        the server sources it automatically at startup.
 #        2) Call TmlRss_GenHTML with the url of the RSS feed
 #        within your .tml file (set to dynamic generation).
 #        This function returns the formatted HTML of the feed.
 #
 #    By David Gravereaux <[email protected]>
 #

 package require httpd
 package require http
 package require html
 package require uri
 if {[catch {package require tdom} err]} {
    return -code error $err
 }

 namespace eval tmlrss {
    variable rss_xml_cache
    array set rss_xml_cache {}
 }

 proc TmlRss_GenHTML {uri {attr {}}} {
    set doc [tmlrss::GetDOM $uri]
    append html "<table class=\"newsfeed_table\" $attr>\n"
    append html [tmlrss::genTitleBlock $doc]
    append html [tmlrss::genContent $doc]
    append html "</table>\n"
    $doc delete
    return $html
 }

 proc tmlrss::GetDOM {uri} {

    # Is it cached locally?
    if {[isCachedXMLExpired $uri]} {
        set doc [fetchXML $uri]
    } else {
        set doc [dom parse -baseurl [uriBase $uri] $rss_xml_cache($uri)]
    }

    return $doc
 }

 # returns the DOM object of the RSS feed.
 proc tmlrss::fetchXML {uri} {
    variable rss_xml_cache

    set token [http::geturl $uri]
    if {[catch {http::data $token} xml]} {
        set xml {}
    }
    upvar #0 $token state
    array set meta $state(meta)
    http::cleanup $token
    set doc [dom parse -baseurl [uriBase $uri] $xml]

    # Append a download time to the DOM as a comment.
    set comment [$doc createComment \
        "downloaded: [clock format [clock seconds] -format {%a, %e %b %Y %T GMT} -gmt 1]"]
    [$doc documentElement] appendChild $comment

    # Append when the server considers it expired to the DOM.
    catch {
        set comment [$doc createComment "expires: $meta(Expires)"]
        [$doc documentElement] appendChild $comment
    }

    # Save it in the cache.
    set rss_xml_cache($uri) [$doc asXML -indent 4]

    return $doc
 }

 proc tmlrss::isCachedXMLExpired {uri} {
    variable rss_xml_cache

    #TODO: make this work

    if {[info exist rss_xml_cache($uri)]} {
        set xml $rss_xml_cache($uri)
        set doc [dom parse -baseurl [uriBase $uri] $xml]

        # TODO: Is it past the TTL (if supported)?
        #       just return yes, for now.
        if {1} {
            $doc delete
            unset rss_xml_cache($uri)
            return true
        }
        return false
    } else {
        return true
    }
 }

 proc tmlrss::uriBase {uri} {
    array set info [uri::split $uri]
    set info(path) [file dirname $info(path)]
    return [eval uri::join [array get info]]
 }

 proc tmlrss::getRSSVersion {doc} {
    set root [$doc documentElement]
    switch [$root nodeName] {
        rss {
            if {[$root hasAttribute version]} {
                return [$root getAttribute version]
            }
            # Best guess as most stuff is optional...
            return 0.92
        }
        rdf:RDF {
            return 1.0
        }
        default {
            return 0
        }
    }
 }

 proc tmlrss::genTitleBlock {doc} {
    set root [$doc documentElement]
    append html "<thead>\n<tr>\n"
    switch [getRSSVersion $doc] {
        0.91 - 0.92 - 0.93 - 2.0 {
                append html "<th><a href=\"[fixUri [nodeTxt $root /rss/channel/link/text()]]\">[nodeTxt $root /rss/channel/title/text()]</a></th>\n"
            if {[$root selectNode {/rss/channel/image}] != ""} {
                append html "<th><a href=\"[nodeTxt $root /rss/channel/image/link/text()]\"><img\
                        src=\"[fixUri [nodeTxt $root /rss/channel/image/url/text()]]\"\
                        alt=\"[nodeTxt $root /rss/channel/image/title/text()]\"\
                        [expr {[nodeTxt $root /rss/channel/image/width/text()] != "" ? "width=[nodeTxt $root /rss/channel/image/width/text()]" : ""}]\
                        [expr {[nodeTxt $root /rss/channel/image/height/text()] != "" ? "height=[nodeTxt $root /rss/channel/image/height/text()]" : ""}]></a></th>\n"
            }
        }
        1.0 {
                append html "<th><a href=\"[nodeTxt $root {/rdf:RDF/channel/link/text()}]\">[nodeTxt $root /rdf:RDF/channel/title/text()]</a></th>"
        }
    }
    append html "</tr>\n</thead>\n"
    return $html
 }

 proc tmlrss::genContent {doc} {
    set root [$doc documentElement]
    set stories [$root selectNodes /rss/channel/item]
    append html "<tbody>\n"
    append html [html::foreach story $stories {
        <tr><td colspan=2>
            <div class="headline"><a href="[fixUri [nodeTxt $story link/text()]]">[nodeTxt $story title/text()]</a>[expr {[$story selectNodes pubDate/text()] != "" ? " <span class=\"headline_date\">([[$story selectNodes pubDate/text()] nodeValue])</span>" : ""}]</div>
            <div class="headline_details">[expr {[$story selectNodes description/text()] != "" ? [fixTxt [[$story selectNodes description/text()] nodeValue]] : ""}]</div>
        </td></tr>
        }]
    append html "</tbody>\n"
    return $html
 }

 proc tmlrss::fixUri {uri} {
    return [string map { & &amp; < &lt; > &gt; } $uri]
 }

 proc tmlrss::fixTxt {txt} {
    return [string map { & &amp; } $txt]
 }

 proc tmlrss::nodeTxt {node xpath} {
    if {[$node selectNode $xpath] != ""} {
        return [[$node selectNode $xpath] nodeValue]
    } else {
        return ""
    }
 }

 proc tmlrss::ShutDown {} {
 }

 Httpd_RegisterShutdown tmlrss::ShutDown
 package provide tmlrss 0.1

A .tml file to call it would look something like this:

 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/strict.dtd">
 [Doc_Dynamic]
 <html>
 <head>
 <title>SNews, you lose!</title>
 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 
 <style type="text/css">
    .newsfeed_table {
        width: 80%;
    }
    .headline {
    }
    .headline_details {
        padding-left: 2em;
    }
    .headline_date {
        font-size: x-small; font-style: italic; color: #7F0000;
    }
 </style>
 <body>

 [html::foreach rss_source {
        http://www.npr.org/rss/rss.php?topicId=2
        http://rss.news.yahoo.com/rss/highestrated
        http://mini.net/tcl/rss.xml
        http://sourceforge.net/export/rss2_projsummary.php?group_id=10894
        http://sourceforge.net/export/rss2_projnews.php?group_id=10894
        http://sourceforge.net/export/rss2_projdocs.php?group_id=10894
 } {[TmlRss_GenHTML $rss_source "border=1"]<p/>}]

 <a href="http://validator.w3.org/check/referer"><img border="0"
          src="http://www.w3.org/Icons/valid-html401"
          alt="Valid HTML 4.01!" height="31" width="88"></a>
 <a href="http://jigsaw.w3.org/css-validator/">
  <img style="border:0;width:88px;height:31px"
       src="http://jigsaw.w3.org/css-validator/images/vcss" 
       alt="Valid CSS!">
 </a>

 </body>
 </html>

Category TclHttpd