if 0 {[Richard Suchenwirth] 2005-02-06 - This weekend fun project deals with a little [RSS] browser. You can type or paste the RSS URL (often with .xml extension) in the entry on top, and on the XML document is read and rendered in the upper [text] widget. Clicking on a link (blue, underlined) retrieves that page, and renders it in a very simple way (trying to omit all frames, images, ads, and whatever noise the page contained) in the lower text. [WikiDbImage rssrss.jpg] This is highly experimental, so even less warranties than usual, but it sure is great fun! Note: the umlaut entities in the ''html2txt'' proc are resolved when this page is rendered (on IE 5 at least), so best copy the source in Edit mode, if you care for umlauts :) JSI 09feb05 This link shows the sourcefile readonly ;-) http://wiki.tcl.tk/_edit/13490.txt Known issues: * links from http://rss.news.yahoo.com/rss/topstories always throw "Page Not Found" * on older Tcl versions (8.4.1 or so) the [panedwindow] comes up very thin, must be pulled open (fixed in 8.4.8) See also [A little RSS reaper], which makes an HTML page from an RSS feed and the pages it links to. } package require http package require uri package require Tk proc main argv { pack [entry .e -textvar e] -fill x bind .e {showRSS .x.t $e} pack [panedwindow .p -ori vert] -fill both -expand 1 .p add [frame .x] pack [scrollbar .x.y -command ".x.t yview"] -fill y -side right pack [text .x.t -wrap word -yscrollc ".x.y set" -spacing1 3 \ -padx 5 -pady 3 -height 10] \ -fill both -expand 1 -side right .x.t tag config link -foreground blue -underline 1 .x.t tag bind link {.x.t config -cursor hand2} .x.t tag bind link {.x.t config -cursor {}} .x.t tag bind link {click .x.t .f.t} .p add [frame .f] pack [scrollbar .f.y -command ".f.t yview"] -fill y -side right pack [text .f.t -wrap word -yscrollc ".f.y set" -spacing1 3\ -padx 5 -pady 3 -height 12] \ -fill both -expand 1 -side right foreach i {red blue green3} {.f.t tag config $i -foreground $i} .f.t tag config title -font {Helvetica 11 bold} .f.t tag config bold -font "[.f.t cget -font] bold" focus .e raise . #set ::e {http://www.spiegel.de/schlagzeilen/rss/0,5291,,00.xml} set ::e http://www.tagesspiegel.de/feed/index.xml showRSS .x.t $::e } if 0 {This proc is called when a RSS link is clicked. It uses the fact that the second tag is the URL itself:} proc click {rsswin textwin} { set url [lindex [$rsswin tag names insert] 1] $rsswin tag configure $url -foreground purple4 showHTML $textwin $url } if 0 {This renders a RSS URL with titles and links into the given [text] widget:} proc showRSS {w url} { $w delete 1.0 end upvar #0 [geturl_followRedirects $url] arr if ![info exists arr(body)] {set arr(body) "not found :("} foreach {tag content} [html2txt $arr(body)] { switch -- $tag { {set descr $content} {$w insert end "$title - $descr\n"} {set title $content} <link> {set link $content} </item> {$w insert end " - " "" $title\n [list link $link]} } } } if 0 {This is a crude HTML renderer, which uses a [regexp] to split the document into a tag nontag... sequence, and depending on the tag, renders the non-tag content in the given [text] widget:} proc showHTML {w url} { $w delete 1.0 end upvar #0 [geturl_followRedirects $url] arr foreach {tag content} [html2txt $arr(body)] { if {[string length $content]<20} continue if [regexp userAgent $content] continue switch -glob -- [string tolower $tag] { <title> {$w insert end $content title \n\n} <b> {$w insert end $content bold \n} <div* - </div> - <p> - <!--* - </i> - <br* {$w insert end $content\n} } update } } proc html2txt {html} { set res {} set re {(<[^>]+>) *([^<>]*)} foreach {all tag content} [regexp -all -inline $re $html] { if {![regexp src= $content]} { lappend res [deblank $tag] [deblank $content] } } string map { Ü Ü ß ß ä ä ö ö ü ü „ ' “ ' ä ä ö ö ü ü ß ß } $res } proc deblank string {regsub -all {\s+} $string " "} #-- This redirecting geturl is courtesy of KPV's http://wiki.tcl.tk/11831 proc geturl_followRedirects {url args} { array set URI [::uri::split $url] ;# Need host info from here while {1} { set token [eval [list http::geturl $url] $args] if {![string match {30[1237]} [::http::ncode $token]]} {return $token} array set meta [set ${token}(meta)] if {![info exist meta(Location)]} { return $token } array set uri [::uri::split $meta(Location)] unset meta if {$uri(host) == ""} { set uri(host) $URI(host) } # problem w/ relative versus absolute paths set url [eval ::uri::join [array get uri]] } } main $argv #-- little debugging helpers (the F1 part works on Windows and Mac only) bind . <Escape> {exec wish $argv0 &; exit} bind . <F1> {console show} if 0 { ---- [Category Toys] | [Category Internet] | [Arts and crafts of Tcl-Tk programming] }