gopher-get.tcl is a tool to mirror gopher sites. It depends on TclCurl, Tclx, and tcllib. I have only tested this on Slackware64 15.0. ---- ====== #!/usr/bin/tclsh # gopher-get.tcl version 1 by Ben Collver # Short script to mirror a gopher site. # It works similar to `wget --mirror`. package require TclCurl package require Tclx package require uri proc curl_exit {ch code} { $ch cleanup exit $code } proc download_delay {} { # be a good bot and sleep between downloads sleep 1 } # fetch a gopher document by URL # if a directory index has been fetched, # then return a file name for the contents proc fetch {ch url} { set parts [gopher_uri_split $url] set parsed [dict create {*}$parts] set host [dict get $parsed fqdn] set type [dict get $parsed type] set path [dict get $parsed path] if {[string length $host] == 0 || [string length $type] == 0 || [string length $path] == 0 } { puts "Could not parse URL $url" return "" } if {$type eq "0"} { if {[string index $path end] eq "/"} { set filename "index.gph" set file [format "%s/%s%s%s" $host $type $path $filename] } else { set file [format "%s/%s%s" $host $type $path] } } elseif {$type eq "1"} { if {[string index $path end] ne "/"} { set path "$path/" } set filename "index.gph" set file [format "%s/%s%s%s" $host $type $path $filename] } elseif {$type eq "I" || $type eq "g"} { if {[string index $path end] eq "/"} { puts "Bad image filename $path" return "" } set file [format "%s/%s%s" $host $type $path] } else { puts "Unknown item type $type" return "" } set dirname [file dirname $file] if {![file exists $dirname]} { puts "mkdir $dirname" file mkdir $dirname } if {![file exists $file]} { $ch configure -file $file -url $url puts "Downloading $url ..." set result [$ch perform] if {$result != 0} { puts "Curl error $result" } download_delay } if {$type eq "1"} { set retval $file } else { set retval "" } return $retval } # gopher_uri_pattern_orig returns a regular expression to parse gopher URL's # the same pattern exists in $::uri::gopher::url # this unused procedure is here to compare with gopher_uri_pattern proc gopher_uri_pattern_orig {} { set escape $::uri::basic::escape set hostOrPort $::uri::basic::hostOrPort set search $::uri::http::search # make $xCharN the same as $::uri::basic::xCharN set xCharN {[a-zA-Z0-9$_.+!*'(,);/?:@&=-]} # make $xChar the same as $::uri::basic::xChar set xChar "(${xCharN}|${escape})" # make $url the same as $::uri::gopher::url set type $xChar set selector "$xChar*" set string $selector set schemepart "//${hostOrPort}(/(${type}(${selector}(%09${search}(%09${string})?)?)?)?)?" set url "gopher:${schemepart}" return $::uri::gopher::url } # gopher_uri_pattern returns a regular expression to parse gopher URL's # a similar regular expression can be found in $::uri::gopher::url # this version is the same, except it adds a tilde as an acceptable # character. # # Why didn't tcllib uri allow tilde in the first place? # See link below for pedantic reasons against allowing tilde. # https://jkorpela.fi/tilde.html proc gopher_uri_pattern {} { set escape $::uri::basic::escape set hostOrPort $::uri::basic::hostOrPort set search $::uri::http::search # add tilde "~" to $xCharN set xCharN {[a-zA-Z0-9$_.+~!*'(,);/?:@&=-]} # add tilde "~" to $xChar set xChar "(${xCharN}|${escape})" # add tilde "~" to $url set type $xChar set selector "$xChar*" set string $selector set schemepart "//${hostOrPort}(/(${type}(${selector}(%09${search}(%09${string})?)?)?)?)?" set url "gopher:${schemepart}" return $url } proc gopher_uri_split {url} { set pattern [gopher_uri_pattern] set parts [regexp -inline $pattern $url] set retval [list \ url [lindex $parts 0] \ fqdn [lindex $parts 1] \ fqdn2 [lindex $parts 2] \ host_dot [lindex $parts 3] \ host [lindex $parts 4] \ domain [lindex $parts 5] \ unused1 [lindex $parts 6] \ unused2 [lindex $parts 7] \ req_abs [lindex $parts 8] \ req_rel [lindex $parts 9] \ type [lindex $parts 10] \ path [lindex $parts 11] \ last_char [lindex $parts 12] \ unused3 [lindex $parts 13] \ unused4 [lindex $parts 14] \ unused5 [lindex $parts 15] \ unused6 [lindex $parts 16] \ ] } # parse a gopher directory index for links proc parse {filename} { set retval [list] set fh [open $filename] set text [read $fh] close $fh set text [string map [list "\r\n" "\n"] $text] set lines [split $text "\n"] foreach {line} $lines { set type [string index $line 0] set data [string range $line 1 end] set fields [split $data "\t"] lassign $fields label path server port if {$type eq "1"} { if {[string index $path end] ne "/"} { set path "$path/" } } if {$type eq "0" || $type eq "1" || $type eq "I" || $type eq "g"} { if {$port == 70} { set url [format "gopher://%s/%s%s" $server $type $path] } else { set url [format "gopher://%s:%d/%s%s" $server $port $type $path] } lappend retval $url } elseif {$type eq "h" || $type eq "i"} { # ignore "h" and "i" types continue } elseif {$type eq "."} { break } else { puts "Unknown gopher type $type in $filename" continue } } return $retval } proc main {argv} { set ch [curl::init] if {[llength $argv] == 0} { puts "Usage: gopher-get.tcl gopher-URI" exit } set url [lindex $argv 0] set parts [gopher_uri_split $url] set parsed [dict create {*}$parts] set host [dict get $parsed fqdn] set path [dict get $parsed path] set path_len [string length $path] if {$path_len > 0} { incr path_len -1 } set files_done [dict create] set urls [list $url] set urls_done [dict create] while {[llength $urls] > 0} { set files [list] foreach {url} $urls { dict set urls_done $url 1 set filename [fetch $ch $url] if {[llength $filename] > 0 && ![dict exists $files_done $filename] } { lappend files $filename } } set urls [list] foreach {filename} $files { dict set files_done $filename 1 set links [parse $filename] foreach {link} $links { set parts [gopher_uri_split $link] set parsed [dict create {*}$parts] set link_host [dict get $parsed fqdn] set link_path [dict get $parsed path] set start [string range $link_path 0 $path_len] if {$link_host eq $host && $start eq $path && ![dict exists $urls_done $link] } { lappend urls $link } } } } curl_exit $ch 0 } main $::argv ====== <> Internet