Version 7 of RS's RSS

Updated 2005-02-09 02:34:39 by jsi

if 0 {Richard Suchenwirth 2005-02-06 - This weekend fun project deals with a little RSS browser. You can type or paste the RSS URL (often with .xml extension) in the entry on top, and on <Return> the XML document is read and rendered in the upper text widget. Clicking on a link (blue, underlined) retrieves that page, and renders it in a very simple way (trying to omit all frames, images, ads, and whatever noise the page contained) in the lower text.

http://mini.net/files/rssrss.jpg

This is highly experimental, so even less warranties than usual, but it sure is great fun! Note: the umlaut entities in the html2txt proc are resolved when this page is rendered (on IE 5 at least), so best copy the source in Edit mode, if you care for umlauts :)

JSI 09feb05 Better idea perhaps: Use this link, which shows the sourcefile ;-) [L1 ]

Known issues:

}

 package require http
 package require uri
 package require Tk

 proc main argv {
    pack [entry .e -textvar e] -fill x
    bind .e <Return> {showRSS .x.t $e}
    pack [panedwindow .p -ori vert] -fill both -expand 1

    .p add [frame .x]
    pack [scrollbar .x.y -command ".x.t yview"] -fill y -side right
    pack [text .x.t -wrap word -yscrollc ".x.y set" -spacing1 3 \
        -padx 5 -pady 3 -height 10] \
        -fill both -expand 1 -side right
    .x.t tag config link -foreground blue -underline 1
    .x.t tag bind link <Enter> {.x.t config -cursor hand2}
    .x.t tag bind link <Leave> {.x.t config -cursor {}}
    .x.t tag bind link <ButtonRelease-1> {click .x.t .f.t} 

    .p add [frame .f]
    pack [scrollbar .f.y -command ".f.t yview"] -fill y -side right
    pack [text .f.t -wrap word -yscrollc ".f.y set" -spacing1 3\
        -padx 5 -pady 3 -height 12] \
        -fill both -expand 1 -side right
    foreach i {red blue green3} {.f.t tag config $i -foreground $i}
    .f.t tag config title -font {Helvetica 11 bold}
    .f.t tag config bold -font "[.f.t cget -font] bold"
    focus .e
    raise .
    #set ::e {http://www.spiegel.de/schlagzeilen/rss/0,5291,,00.xml}
    set ::e http://www.tagesspiegel.de/feed/index.xml
    showRSS .x.t $::e
 }

if 0 {This proc is called when a RSS link is clicked. It uses the fact that the second tag is the URL itself:}

 proc click {rsswin textwin} {
    set url [lindex [$rsswin tag names insert] 1]
    $rsswin tag configure $url -foreground purple4
    showHTML $textwin $url
 }

if 0 {This renders a RSS URL with titles and links into the given text widget:}

 proc showRSS {w url} {
    $w delete 1.0 end
    upvar #0 [geturl_followRedirects $url] arr
    if ![info exists arr(body)] {set arr(body) "<html>not found :(</html>"}
    foreach {tag content} [html2txt $arr(body)] {
        switch -- $tag {
            <description> {set descr $content}
            </description> {$w insert end "$title - $descr\n"}
            <title> {set title $content}
            <link>  {set link $content}
            </item> {$w insert end " - " "" $title\n [list link $link]}
        }
    }
 }

if 0 {This is a crude HTML renderer, which uses a regexp to split the document into a tag nontag... sequence, and depending on the tag, renders the non-tag content in the given text widget:}

 proc showHTML {w url} {
    $w delete 1.0 end
    upvar #0 [geturl_followRedirects $url] arr
    foreach {tag content} [html2txt $arr(body)] {
        if {[string length $content]<20} continue
        if [regexp userAgent $content] continue
        switch -glob -- [string tolower $tag] {
            <title> {$w insert end $content title \n\n}
            <b>     {$w insert end $content bold \n}
            <div* - </div> - <p> - <!--* - </i> -
            <br* {$w insert end $content\n}
        }
        update
    }
 }
 proc html2txt {html} {
   set res {}
   set re {(<[^>]+>) *([^<>]*)}
   foreach {all tag content} [regexp -all -inline $re $html] {
      if {![regexp src= $content]} {
              lappend res [deblank $tag] [deblank $content]
      }
   }
   string map {
      &#220; Ü &#223; ß &#228; ä &#246; ö &#252; ü &#132; ' &#147; '
      &auml; ä &ouml; ö &uuml; ü &szlig; ß
   } $res
 }
 proc deblank string {regsub -all {\s+} $string " "}

#-- This redirecting geturl is courtesy of KPV's http://wiki.tcl.tk/11831

 proc geturl_followRedirects {url args} {
    array set URI [::uri::split $url] ;# Need host info from here
    while {1} {
        set token [eval [list http::geturl $url] $args]
        if {![string match {30[1237]} [::http::ncode $token]]} {return $token}
        array set meta [set ${token}(meta)]
        if {![info exist meta(Location)]} {
            return $token
        }
        array set uri [::uri::split $meta(Location)]
        unset meta
        if {$uri(host) == ""} { set uri(host) $URI(host) }
        # problem w/ relative versus absolute paths
        set url [eval ::uri::join [array get uri]]
    }
 }

 main $argv

#-- little debugging helpers (the F1 part works on Windows and Mac only)

 bind . <Escape> {exec wish $argv0 &; exit}
 bind . <F1> {console show}

if 0 {


Category Toys | Category Internet | Arts and crafts of Tcl-Tk programming }