This is a small program I wrote to monitor changes in the RSS feeds of various websites and email those changes to me at work. I currently am running it as a cron job on debian linux with Tcl 8.4.6. BDK
The first file, rss.tcl, is a very minimal package to read rss feeds. Yes, I know such a package already exists, but it helped me get into the xml package.
I don't think any of the RSS packages have made it into tcllib or the other major distributed extensions, have they? Perhaps one of these could be considered for tcllib?
schlenk The basic problem with XML stuff for the tcllib is, that there isn't any pure Tcl xml package inside tcllib ( a great lacking IMHO).
#!/usr/local/bin/tclsh package require Tcl 8.4 package require struct 2.0 package require xml 2.6 package require snit 0.9 package provide rss 1.0 namespace eval ::rss { variable parser variable parserStack variable channelObject variable currentCmds array set currentCmds \ [list \ elementStart [list [namespace current]::XML.StartRSS] \ elementEnd [list [namespace current]::XML.EndRSS] \ characterData {} \ ] } proc ::rss::Parser.NewState {elementStart elementEnd characterData} { variable parser variable parserStack variable currentCmds variable channel $parserStack push \ [list \ $currentCmds(elementStart) \ $currentCmds(elementEnd) \ $currentCmds(characterData) \ ] set currentCmds(elementStart) $elementStart set currentCmds(elementEnd) $elementEnd set currentCmds(characterData) $characterData return } proc ::rss::Parser.PreviousState {} { variable parser variable parserStack variable currentCmds foreach {currentCmds(elementStart) currentCmds(elementEnd) currentCmds(characterData)} [$parserStack pop] {break} return } proc ::rss::Wrapper.ElementStart {name attlist args} { variable currentCmds if {$currentCmds(elementStart)!={}} { set code [catch {uplevel \#0 $currentCmds(elementStart) [list $name $attlist] $args} result] return -code $code -errorinfo $::errorInfo -errorcode $::errorCode $result } } proc ::rss::Wrapper.ElementEnd {name args} { variable currentCmds if {$currentCmds(elementEnd)!={}} { set code [catch {uplevel \#0 $currentCmds(elementEnd) [list $name] $args} result] return -code $code -errorinfo $::errorInfo -errorcode $::errorCode $result } } proc ::rss::Wrapper.CharacterData {data} { variable currentCmds if {$currentCmds(characterData)!={}} { set code [catch {uplevel \#0 $currentCmds(characterData) [list $data]} result] return -code $code -errorinfo $::errorInfo -errorcode $::errorCode $result } } proc ::rss::parse {data} { variable parser variable parserStack variable channel variable currentCmds set parser [::xml::parser ] set parserStack [::struct::stack] set channel [Channel %AUTO%] $parser configure \ -elementstartcommand [namespace current]::Wrapper.ElementStart \ -elementendcommand [namespace current]::Wrapper.ElementEnd \ -characterdatacommand [namespace current]::Wrapper.CharacterData if {[catch {$parser parse $data} errorMsg]} { # Caught an error, destroy the channel $channel destroy set channel {} # Reset the stack while {[$parserStack size] != 0} { Parser.PreviousState } $parser free $parserStack destroy if {$channel == {}} { return -code error $errorMsg } else { return $channel } } proc ::rss::XML.StartRSS {name attlist args} { variable channel switch -- $name { channel { Parser.NewState \ [list [namespace current]::XML.Channel $channel] \ [list [namespace current]::XML.ElementEnd] \ {} } item { set obj [Item %AUTO%] $channel AddItem $obj Parser.NewState \ [list [namespace current]::XML.Item $obj] \ [list [namespace current]::XML.ElementEnd] \ {} } } return } proc ::rss::XML.EndRSS {name args} { return } proc ::rss::XML.Channel {obj name attlist args} { switch -- $name { title { Parser.NewState \ [list [namespace current]::XML.ElementStart] \ [list [namespace current]::XML.ElementEnd] \ [list [namespace current]::XML.CharacterData [$obj GetVariable title]] } link { Parser.NewState \ [list [namespace current]::XML.ElementStart] \ [list [namespace current]::XML.ElementEnd] \ [list [namespace current]::XML.CharacterData [$obj GetVariable link]] } description { Parser.NewState \ [list [namespace current]::XML.ElementStart] \ [list [namespace current]::XML.ElementEnd] \ [list [namespace current]::XML.CharacterData [$obj GetVariable description]] } item { set item [Item %AUTO%] $obj AddItem $item Parser.NewState \ [list [namespace current]::XML.Item $item] \ [list [namespace current]::XML.ElementEnd] \ {} } default { Parser.NewState \ [list [namespace current]::XML.ElementStart] \ [list [namespace current]::XML.ElementEnd] \ {} } } return } proc ::rss::XML.Item {obj name attlist args} { switch -- $name { title { Parser.NewState \ [list [namespace current]::XML.ElementStart] \ [list [namespace current]::XML.ElementEnd] \ [list [namespace current]::XML.CharacterData [$obj GetVariable title]] } link { Parser.NewState \ [list [namespace current]::XML.ElementStart] \ [list [namespace current]::XML.ElementEnd] \ [list [namespace current]::XML.CharacterData [$obj GetVariable link]] } description { Parser.NewState \ [list [namespace current]::XML.ElementStart] \ [list [namespace current]::XML.ElementEnd] \ [list [namespace current]::XML.CharacterData [$obj GetVariable description]] } default { Parser.NewState \ [list [namespace current]::XML.ElementStart] \ [list [namespace current]::XML.ElementEnd] \ {} } } return } proc ::rss::XML.ElementStart {name attlist args} { Parser.NewState \ [list [namespace current]::XML.ElementStart] \ [list [namespace current]::XML.ElementEnd] \ {} return } proc ::rss::XML.ElementEnd {name args} { Parser.PreviousState return } proc ::rss::XML.CharacterData {var data} { upvar \#0 $var myVar append myVar $data return } ::snit::type ::rss::Channel { variable title {} variable link {} variable description {} variable items {} destructor { foreach item $items { $item destroy } return } method title {} { return $title } method link {} { return $link } method description {} { return $description } method items {} { return $items } method GetVariable {var} { return [varname $var] } method AddItem {item} { lappend items $item } } ::snit::type ::rss::Item { variable title {} variable link {} variable pubDate {} variable description {} method title {} { return $title } method link {} { return $link } method pubDate {} { return $pubDate } method description {} { return $description } method GetVariable {var} { return [varname $var] } }
This is the main body of the program, rss_monitor.tcl
#!/usr/local/bin/tclsh lappend auto_path . package require Tcl 8.4 package require http 2.4 package require mime 1.3 package require smtp 1.3 package require md5 1.4 package require rss 1.0 #Reads the options file. proc loadOptions {file} { #Initialize the interpreter which executes the contents #of the options file. set interp [interp create -safe] $interp eval [list namespace delete ::] $interp alias email loadOptions.email #Read the options file. set inFile [open $file r] $interp eval [read $inFile] close $inFile interp delete $interp return } proc loadOptions.email {email data} { #Initialize the interpeter which executes the contents of the #data variable. set interp [interp create -safe] $interp eval [list namespace delete ::] $interp alias rss loadOptions.rss $email $interp eval $data interp delete $interp return } proc loadOptions.rss {email url} { #Store the url in the global options array. if {![info exists ::options(email,$url)]} { lappend ::options(url) $url } lappend ::options(email,$url) $email return } #Loads the MD5 hash records. proc loadHashes {file} { #Create the interpeter. set interp [interp create -safe] $interp eval [list namespace delete ::] $interp alias hash loadHashes.hash #Open the file and read in the data. set inFile [open $file r] $interp eval [read $inFile] close $inFile interp delete $interp return } proc loadHashes.hash {url hash} { #Store the hash in the global options array. set ::options(hash,$url) $hash return } #Saves the hashes for all of the url's. proc saveHashes {file} { set outFile [open $file w] foreach url $::options(url) { if {[info exists ::options(hash,$url)]} { puts $outFile [list hash $url $::options(hash,$url)] } } close $outFile } #Do the work of parsing RSS feeds, generating and comparing hashes, sending messages. proc generateMessages {} { foreach url $::options(url) { #Retrieve the current hash for the URL. if {[info exists ::options(hash,$url)]} { set hash $::options(hash,$url) } else { set hash {} } #Retrieve the URL. set token [::http::geturl $url -timeout 2000] if {[::http::status $token] != "ok"} { puts "Could not retrieve data for $url" ::http::cleanup $token continue } set data [::http::data $token] ::http::cleanup $token #Parse the RSS feed. set channel [::rss::parse $data] set msgBody {} #Generate the msg body. append msgBody "[$channel title] ([$channel link])\n" append msgBody "[$channel description]\n\n" foreach item [$channel items] { append msgBody "----------------------------------------------------------------\n" append msgBody "[$item title] ([$item link])\n" append msgBody "\n" append msgBody "[$item description]\n" append msgBody "----------------------------------------------------------------\n" append msgBody "\n" } #Generate the new hash. #I thought it would be more efficient to generate the hash prior to parsing and #creating the message, but some sites change the comments within their XML data #to reflect when the feed was generated, like sourforge.net for example, #so this is a quick and simple fix. set newHash [::md5::md5 $msgBody] #If the hashes do not match, then the site has changed, #so send out the messages. if {$newHash != $hash} { #Create the MIME message. set mime [::mime::initialize -canonical text/plan -string $msgBody] ::mime::setheader $mime Subject "[$channel title] has been updated" foreach email $::options(email,$url) { ::smtp::sendmessage $mime \ -recipients $email \ -originator "[email protected]" } #Destroy (deallocate) the MIME message. ::mime::finalize $mime #Destroy (deallocate) parsed data. $channel destroy } #Store the new hash in the options array. set ::options(hash,$url) $newHash } } #The main program. #Locations of the configuration files. set configFile "rss_config" set hashFile "rss_hashes" loadOptions $configFile if {[file exists $hashFile]} { loadHashes $hashFile } generateMessages saveHashes $hashFile
And finally, and example configuration file, rss_config
email [email protected] { #Tcl'ers Wiki rss https://wiki.tcl-lang.org/rss.xml #Slashdot rss http://slashdot.org/index.rss #Sourceforge rss http://sourceforge.net/export/rss2_sfnews.php?feed } email [email protected] { #Sourceforge rss http://sourceforge.net/export/rss2_sfnews.php?feed }