gopher-get.tcl is a tool to mirror gopher sites. It depends on TclCurl, Tclx, and tcllib. I have only tested this on Slackware64 15.0.
#!/usr/bin/tclsh # gopher-get.tcl version 3 by Ben Collver # Short script to mirror a gopher site. # It works similar to `wget --mirror`. package require TclCurl package require Tclx package require uri proc curl_exit {ch code} { $ch cleanup exit $code } proc download_delay {} { # be a good bot and sleep between downloads (uses Tclx [sleep]) sleep 1 } # fetch a gopher document by URL # if a directory index has been fetched, # then return a file name for the contents proc fetch {ch url} { set parts [gopher_uri_split $url] set parsed [dict create {*}$parts] set host [dict get $parsed fqdn] set type [dict get $parsed type] set path [dict get $parsed path] if {[string length $host] == 0 || [string length $type] == 0 || [string length $path] == 0 } { puts "Could not parse URL $url" return "" } if {$type eq "0"} { if {[string index $path end] eq "/"} { set filename "index.gph" set file [format "%s/%s%s%s" $host $type $path $filename] } else { set file [format "%s/%s%s" $host $type $path] } } elseif {$type eq "1"} { if {[string index $path end] ne "/"} { set path "$path/" } set filename "index.gph" set file [format "%s/%s%s%s" $host $type $path $filename] } elseif {$type eq "9"} { if {[string index $path end] eq "/"} { puts "Bad binary filename $path" return "" } else { set file [format "%s/%s%s" $host $type $path] } } elseif {$type eq "I" || $type eq "g"} { if {[string index $path end] eq "/"} { puts "Bad image filename $path" return "" } set file [format "%s/%s%s" $host $type $path] } else { puts "Unknown item type $type" return "" } set dirname [file dirname $file] if {![file exists $dirname]} { puts "mkdir $dirname" file mkdir $dirname } if {![file exists $file]} { $ch configure -file $file -url $url puts "Downloading $url ..." set result [$ch perform] if {$result != 0} { puts "Curl error $result" } download_delay } if {$type eq "1"} { set retval $file } else { set retval "" } return $retval } # gopher_uri_pattern_orig returns a regular expression to parse gopher URL's # the same pattern exists in $::uri::gopher::url # this unused procedure is here to compare with gopher_uri_pattern proc gopher_uri_pattern_orig {} { set escape $::uri::basic::escape set hostOrPort $::uri::basic::hostOrPort set search $::uri::http::search # make $xCharN the same as $::uri::basic::xCharN set xCharN {[a-zA-Z0-9$_.+!*'(,);/?:@&=-]} # make $xChar the same as $::uri::basic::xChar set xChar "(${xCharN}|${escape})" # make $url the same as $::uri::gopher::url set type $xChar set selector "$xChar*" set string $selector set schemepart "//${hostOrPort}(/(${type}(${selector}(%09${search}(%09${string})?)?)?)?)?" set url "gopher:${schemepart}" return $::uri::gopher::url } # gopher_uri_pattern returns a regular expression to parse gopher URL's # a similar regular expression can be found in $::uri::gopher::url # this version is the same, except it adds a tilde as an acceptable # character. # # Why didn't tcllib uri allow tilde in the first place? # See link below for pedantic reasons against allowing tilde. # https://jkorpela.fi/tilde.html proc gopher_uri_pattern {} { set escape $::uri::basic::escape set hostOrPort $::uri::basic::hostOrPort set search $::uri::http::search # add tilde "~" to $xCharN set xCharN {[a-zA-Z0-9$_.+~!*'(,);/?:@&=-]} # add tilde "~" to $xChar set xChar "(${xCharN}|${escape})" # add tilde "~" to $url set type $xChar set selector "$xChar*" set string $selector set schemepart "//${hostOrPort}(/(${type}(${selector}(%09${search}(%09${string})?)?)?)?)?" set url "gopher:${schemepart}" return $url } proc gopher_uri_split {url} { set pattern [gopher_uri_pattern] set parts [regexp -inline $pattern $url] # protect against relative path abuse set path [lindex $parts 11] set path [string map [list "/../" "/dotdot/"] $path] set retval [list \ url [lindex $parts 0] \ fqdn [lindex $parts 1] \ fqdn2 [lindex $parts 2] \ host_dot [lindex $parts 3] \ host [lindex $parts 4] \ domain [lindex $parts 5] \ unused1 [lindex $parts 6] \ unused2 [lindex $parts 7] \ req_abs [lindex $parts 8] \ req_rel [lindex $parts 9] \ type [lindex $parts 10] \ path $path \ last_char [lindex $parts 12] \ unused3 [lindex $parts 13] \ unused4 [lindex $parts 14] \ unused5 [lindex $parts 15] \ unused6 [lindex $parts 16] \ ] } # parse a gopher directory index for links proc parse {filename} { set retval [list] set fh [open $filename] set text [read $fh] close $fh set text [string map [list "\r\n" "\n"] $text] set lines [split $text "\n"] foreach {line} $lines { set type [string index $line 0] set data [string range $line 1 end] set fields [split $data "\t"] lassign $fields label path server port if {$type eq "1"} { if {[string index $path end] ne "/"} { set path "$path/" } } if {$type eq "0" || $type eq "1" || $type eq "9" || $type eq "I" || $type eq "g" } { if {$port == 70} { set url [format "gopher://%s/%s%s" $server $type $path] } else { set url [format "gopher://%s:%d/%s%s" $server $port $type $path] } lappend retval $url } elseif {$type eq "h" || $type eq "i"} { # skip "h" and "i" types continue } elseif {$type eq "."} { break } else { puts "Unknown gopher type $type in $filename" continue } } return $retval } proc main {argv} { set ch [curl::init] set binary "error" set show_skipped false set url "" while {[llength $argv] > 0} { set remainder [lassign $argv arg] set argv $remainder if {$arg eq "--binary-skip"} { set binary "skip" } elseif {$arg eq "--binary-save"} { set binary "save" } elseif {$arg eq "--show-skipped"} { set show_skipped true } elseif {[regexp {^--} $arg]} { puts "\nUnknown option $arg\n" break } else { set url $arg break } } if {[string length $url] == 0} { puts {Usage: gopher-get.tcl [options] gopher-URI} puts "" puts "Options:" puts "--binary-skip (Skip item type 9 links)" puts "--binary-save (Save item type 9 links)" puts "--show-skipped (Report skipped links)" puts "" exit } set parts [gopher_uri_split $url] set parsed [dict create {*}$parts] set host [dict get $parsed fqdn] set path [dict get $parsed path] set path_len [string length $path] if {$path_len > 0} { incr path_len -1 } set files_done [dict create] set uris [list $parsed] set urls_done [dict create] while {[llength $uris] > 0} { # for each url in the list, fetch a file # keep track of type 1 index files set files [list] foreach {uri} $uris { set type [dict get $uri type] set url [dict get $uri url] dict set urls_done $url 1 set filename [fetch $ch $url] if {$type eq "1" && [llength $filename] > 0 && ![dict exists $files_done $filename] } { lappend files $filename } } # parse each type 1 index file for new urls set uris [list] foreach {filename} $files { dict set files_done $filename 1 set links [parse $filename] foreach {link} $links { set parts [gopher_uri_split $link] set parsed [dict create {*}$parts] set link_host [dict get $parsed fqdn] set link_path [dict get $parsed path] set link_type [dict get $parsed type] set start [string range $link_path 0 $path_len] if {$link_host ne $host} { if {$show_skipped} { puts "Skipped link $link, $link_host != $host" } continue } if {$start ne $path} { if {$show_skipped} { puts "Skipped link $link, $start != $path" } continue } if {[dict exists $urls_done $link]} { if {$show_skipped} { puts "Skipped link $link, link already flagged done" } continue } if {$link_type eq "9"} { if {$binary eq "error"} { puts "Error: item type 9 (binary file)" exit } elseif {$binary eq "skip"} { if {$show_skipped} { puts "Skipped link $link, item type = 9" } continue } else { # save it } } lappend uris $parsed } } } curl_exit $ch 0 } main $::argv