05 Sept 2003 [Mike Tuxford]: I was chatting on irc this morning and a friend said 'I need something to clean out all the dead links in my bookmark file' and so I wrote the following script. It works well for me as you can see from the command line output example below but I advise that you back-up your real bookmark file first. burp:/tcl/s/bookmarks# ./deadlinks.tcl bookmarks.html Checking 1150 sites... Testing site [1150] 1024 Good sites 126 Dead sites ---- #!/usr/bin/tclsh # ################################# # Mike Tuxford 2003 # # Script: deadlinks.tcl # Syntax: deadlinks # Checks a netscape (mozilla) style bookmark file for dead links. # # Requires the http packages. # Does not overwrite the bookmark file but creates 2 # new files (name.good name.dead) # # Tests sites by just retrieving the html headers and will # also leave local file/dir links untouched # # adjust the "timeout,seconds" values as desired. # The timeout value is how long we should try retreiving a # page header from a site before listing it as dead. # array set opt { "timeout,seconds" 10 } array set data { "c,sites" 0 "c,good" 0 "c,dead" 0 } proc main {} { global opt fd data set i 0 puts "Checking $data(c,sites) sites..." puts -nonewline stdout "Checking site "; flush stdout foreach line $data(bm,in) { switch -glob -- $line { *HREF=* { incr i puts -nonewline stdout \ " \[$i\][string repeat \b [expr [string length $i] +3]]" flush stdout if {![extractSite $line]} { # it's a local dir/file bookmark puts $fd(good) $line incr data(c,good) continue } if {![verifySite]} { puts $fd(dead) $line incr data(c,dead) } else { puts $fd(good) $line incr data(c,good) } } default { puts $fd(good) $line } } } puts -nonewline "\n" flush stdout return } proc verifySite {} { global opt data if {[catch {http::geturl $data(site) -validate 1 \ -timeout [expr 1000 * $opt(timeout,seconds)]} tok]} { #puts $tok return 0 } upvar #0 $tok state if {$state(status) == "timeout"} { return 0 } http::cleanup $tok return 1 } proc extractSite {line} { global data set sidx [expr [string first "HREF" $line]+13] if {[string match -nocase *file:* [lrange $line 0 $sidx]]} { return 0 } set edix [expr [string first \" $line $sidx]-1] set data(site) [string range $line $sidx $edix] return 1 } ########################### # get the show started here # if {[catch {package require http} loadErr]} { puts $loadErr puts "Need the http package" exit } if {[llength $argv] == 0 || [string match -h* [lindex $argv 0]]} { puts "Syntax: deadlinks.tcl " exit } else { set opt(file,in) [lindex $argv 0] } if {[catch {open $opt(file,in) r} fd(in)]} { puts $fd(in) exit } else { set data(bm,in) [split [read $fd(in)] \n] close $fd(in) } set opt(file,good) "$opt(file,in).good" set opt(file,dead) "$opt(file,in).dead" foreach type {good dead} { if {[catch {open $opt(file,$type) w+} fd($type)]} { puts $fd($type) exit } } foreach line $data(bm,in) { if {[string match -nocase *href=* $line]} { incr data(c,sites) } } main close $fd(good) close $fd(dead) puts [format "%5d %s" $data(c,good) "Good sites"] puts [format "%5d %s" $data(c,dead) "Dead sites"] ----