[Ro] started this page on July 8th, 2003 This parses [Slashdot] [http://www.slashdot.org] comments. It reads in the source of a comment page that is of type '''nested'''. #!/usr/bin/tclsh proc @ij {c_name i_j} {foreach {i j} $i_j break ; upvar $c_name c ; return [string range $c $i $j]} proc =@ {c arr_name} { upvar $arr_name v ; set n 0 while {[regexp -indices -start $n {\s+?\s+(.+?) \(Score\:(.+?)\)\s+\s+
(.+?)\s+\s+\s+(.+?)\s+\s+\s+.+?\[(.+?)\]} $c MATCH i_cid i_title i_rating i_header i_body i_trailer]} { #puts -nonewline . ; flush stdout foreach var [info vars] {if {[regexp {i_(.+)} $var -> rvar]} {set $rvar [@ij c [set $var]]}} lappend v(comments) $cid ; set v($cid,title) $title ; set v($cid,body) $body if {![regexp {(.+)\, (.+)} $rating -> v($cid,mod_num) v($cid,mod_type)]} {set v($cid,mod_num) $rating ; set v($cid,mod_type) no} if {![regexp {(\d+)">Parent} $trailer -> v($cid,parent)]} {set v($cid,parent) no} if {![regexp {by (.+?) \((\d+)\) } [lindex [split $header \n] 0] -> v($cid,u_slash_home) v($cid,u_name) v($cid,u_id)]} \ {foreach el {u_slash_home u_name u_id} {set v($cid,$el) no}} regexp {on (.+?) \(} [lindex [split $header \n] 1] -> v($cid,time) set n [expr {[lindex $MATCH 1] + 1}] foreach el [lsort [array names v $cid,*]] {puts [format {%20s %s} $el $v($el)]} ; puts --------------------- ; flush stdout } } set f [open nested_comments.html] ; set c [read $f] ; close $f ; unset f =@ $c v puts done! ---- Output looks like this, ''it's all in an array, so you can do anything you want with the comments...'' ... 6372291,body Actually: Mass Destruction + Stupidity = Globalization... or something ;o) 6372291,mod_num 1 6372291,mod_type no 6372291,parent 6372196 6372291,time Saturday July 05, @10:58AM 6372291,title Re:Encryption... 6372291,u_id 681945 6372291,u_name darth_silliarse 6372291,u_slash_home //slashdot.org/~darth_silliarse --------------------- 6373621,body You call that a troll?

You moderators need to get out of the house a bit too...watch that sunlight, it'll do your head in if you're not expecting it.

6373621,mod_num 2 6373621,mod_type no 6373621,parent 6372196 6373621,time Saturday July 05, @05:02PM 6373621,title Re:Encryption... 6373621,u_id 601553 6373621,u_name ShieldW0lf 6373621,u_slash_home //slashdot.org/~ShieldW0lf .... Enjoy! ---- [PT] 8-Jul-2003: This wins my vote for the Obfuscated Tcl contest. '=@' eeeewwww! :) ---- %|[Category Internet] | [Category Parsing]|%