RSS Reader

This is the code for a simple RSS Reader. It is based on David Gravereaux's code (at TclHttpd RSS Processing) and splitted in a library to do the real reading and a RSSReader GUI.


  package provide czrss 0.11

  package require snit
  package require tdom
  package require http

  if {![catch {package require autoproxy}]} {
      autoproxy::init
  }


  # This is the class representing an RSS document
  snit::type ::czrss::doc {
    variable xpath
    variable channel
    variable items
    variable url

    # Konstruktor for a given URI
    constructor { uri } {
        set url $uri
        $self load
    }

    method load { } {
        # load xml to temporary file
        set file "[ clock seconds].xml"
        set out [ open $file w ]
        http::geturl $url -channel $out
        close $out

        # load xml into dom from temporary file
        set doc [ dom parse -channel [tDOM::xmlOpenFile $file] ]
        set _root [ $doc documentElement ]
        file delete $file
        
        set root [$doc documentElement]        
        switch [getRSSVersion $doc] {
            0.91 - 0.92 - 0.93 - 2.0 {
                set xpath(titleXpath)        {/rss/channel/title/text()}
                set xpath(linkXpath)        {/rss/channel/link/text()}
                set xpath(imgNodeXpath)        {/rss/channel/image/title}
                set xpath(imgTitleXpath) {/rss/channel/image/title/text()}
                set xpath(imgLinkXpath)        {/rss/channel/image/url/text()}
                  set xpath(imgWidthXpath)        {/rss/channel/image/width/text()}
                set xpath(imgHeightXpath) {/rss/channel/image/height/text()}
                set xpath(storiesXpath)        {/rss/channel/item}
                set xpath(itemTitleXpath)        {title/text()}
                set xpath(itemLinkXpath)        {link/text()}
                set xpath(itemPubDateXpath)        {pubDate/text()}
                set xpath(itemDescXpath)        {description/text()}
            }
            1.0 {
                set xpath(titleXpath)        {/rdf:RDF/*[local-name()='channel']/*[local-name()='title']/text()}
                set xpath(linkXpath)        {/rdf:RDF/*[local-name()='channel']/*[local-name()='link']/text()}
                set xpath(imgNodeXpath)        {/rdf:RDF/*[local-name()='image']}
                set xpath(imgTitleXpath) {/rdf:RDF/*[local-name()='image']/*[local-name()='title']/text()}
                set xpath(imgLinkXpath)        {/rdf:RDF/*[local-name()='image']/*[local-name()='url']/text()}
                set xpath(imgWidthXpath) {/rdf:RDF/*[local-name()='image']/*[local-name()='width']/text()}
                set xpath(imgHeightXpath) {/rdf:RDF/*[local-name()='image']/*[local-name()='height']/text()}
                set xpath(storiesXpath)        {/rdf:RDF/*[local-name()='item']}
                set xpath(itemTitleXpath)        {*[local-name()='title']/text()}
                set xpath(itemLinkXpath)        {*[local-name()='link']/text()}
                set xpath(itemPubDateXpath)        {*[local-name()='pubDate']/text()}
                set xpath(itemDescXpath)        {*[local-name()='description']/text()}

            }
            default {
                error "Unssupported schema [getRSSVersion $doc]"
            }
        }

        # Channel
        set cN [ $_root child 1 channel ]
        set channel [::czrss::channel create %AUTO% $self $cN]
        puts $channel

        # Items
        set items {}
        set stories [$_root selectNodes $xpath(storiesXpath) ]
        foreach iN $stories {
            lappend items [ ::czrss::item  create %AUTO% $self $iN ]
        }
    }

    # returns the XPath Query for a given type
    method xpath { key } {
        return $xpath($key)
    }        

    # returns the channel object
    method channel {} {
        return $channel
    }
 
    # returns a list of items
    method items {} {
        return $items
    }        

    # detects the RSS version of the document
    proc getRSSVersion {doc} {
        set root [$doc documentElement]
        switch [$root nodeName] {
            rss {
                if {[$root hasAttribute version]} {
                    return [$root getAttribute version]
                }
                # Best guess as most stuff is optional...
                return 0.92
            }
            rdf:RDF {
                return 1.0
            }
            default {
                return 0
            }
        }
    }
  }

  # this class is used to contain rss items
  snit::type ::czrss::item {
    variable _node
    variable _doc

    constructor {doc node } {
        variable history
        set _doc $doc
        set _node $node
    }
    # get the title 
    method title { } {
        set xpath [$_doc xpath itemTitleXpath]
        return [ ::czrss::nodeTxt $_node $xpath]
    }
    # get the link 
    method link {} {
        set xpath [$_doc xpath itemLinkXpath]
        return [ ::czrss::nodeUri $_node $xpath]
    }
    # get the description
    method description {} {
        set xpath [$_doc xpath itemDescXpath]
        return [ ::czrss::nodeTxt $_node $xpath]
    }
    # return the publication date as string
    method pubDate {} {
        set xpath [$_doc xpath itemPubDateXpath]
        return [ ::czrss::nodeTxt $_node $xpath]
    }
  }

  # this class contains information on the channel
  snit::type ::czrss::channel {
    variable _doc
    variable _root
    
    constructor { doc root} {
        set _doc $doc
        set _root $root
    }
    # get the title 
    method title { } {
        set xpath [$_doc xpath titleXpath]
        return [ ::czrss::nodeTxt $_root $xpath]
    }
    # get the image link 
    method imgLink {} {
        set xpath [$_doc xpath imgLinkXpath]
        return [ ::czrss::nodeUri $_root $xpath]
    }
    # get the image title        
    method imgTitle {} {
        set xpath [$_doc xpath imgTitleXpath]
        return [ ::czrss::nodeUri $_root $xpath]
    }
     
    # get the image width
    method imgWidth {} {
        set xpath [$_doc xpath imgWidthXpath]
        return [ ::czrss::nodeTxt $_root $xpath]
    }
    # get the image height
    method imgHeight {} {
        set xpath [$_doc xpath imgHeightXpath]
        return [ ::czrss::nodeTxt $_root $xpath]
    }
  }

  # this namespace contains some utility methods
  namespace eval ::czrss {

    proc encUri {uri} {
        return [string map { & %26 } $uri]
    }

    proc encTxt {txt} {
        return [string map { & &amp; < &lt; > &gt; } $txt]
    }

    proc nodeUri {node xpath} {
        if {[$node selectNode $xpath] != ""} {
            # Only if there is a lonely &, quote it back to an entity.
            return [encUri [[$node selectNode $xpath] nodeValue]]
        } else {
            return ""
        }
    }

    proc nodeTxt {node xpath} {
        if {[$node selectNode $xpath] != ""} {
            return [[$node selectNode $xpath] nodeValue]
        } else {
            return ""
        }
    }

  }

2014-04-28: The encUri command is insufficient. At the very least it needs to convert the % character too, and the space character should be converted to either + or %20. The following implementation is a little bit better (even though it possibly converts more characters than it needs to):

    proc encUri {uri} {
        set res {}
        foreach c [split $uri {}] {
            append res [if {[string match {[A-Za-z.0-9!()'*_~-]} $c]} {
                set c
            } else {
                format %%%02X [scan $c %c]
            }]
        }
        set res
    }