gopher-get.tcl is a tool to mirror gopher sites. It depends on TclCurl, Tclx, and tcllib. I have only tested this on Slackware64 15.0.
----
======
#!/usr/bin/tclsh
# gopher-get.tcl version 23 by Ben Collver
# Short script to mirror a gopher site.
# It works similar to `wget --mirror`.
package require TclCurl
package require Tclx
package require uri
proc curl_exit {ch code} {
$ch cleanup
exit $code
}
proc download_delay {} {
# be a good bot and sleep between downloads (uses Tclx [sleep])
sleep 1
}
# fetch a gopher document by URL
# if a directory index has been fetched,
# then return a file name for the contents
proc fetch {ch url} {
set parts [gopher_uri_split $url]
set parsed [dict create {*}$parts]
set host [dict get $parsed fqdn]
set type [dict get $parsed type]
set path [dict get $parsed path]
if {[string length $host] == 0 ||
[string length $type] == 0 ||
[string length $path] == 0
} {
puts "Could not parse URL $url"
return ""
}
if {$type eq "0"} {
if {[string index $path end] eq "/"} {
set filename "index.gph"
set file [format "%s/%s%s%s" $host $type $path $filename]
} else {
set file [format "%s/%s%s" $host $type $path]
}
} elseif {$type eq "1"} {
if {[string index $path end] ne "/"} {
set path "$path/"
}
set filename "index.gph"
set file [format "%s/%s%s%s" $host $type $path $filename]
} elseif {$type eq "9"} {
if {[string index $path end] eq "/"} {
puts "Bad binary filename $path"
return ""
} else {
set file [format "%s/%s%s" $host $type $path]
}
} elseif {$type eq "I" || $type eq "g"} {
if {[string index $path end] eq "/"} {
puts "Bad image filename $path"
return ""
}
set file [format "%s/%s%s" $host $type $path]
} else {
puts "Unknown item type $type"
return ""
}
set dirname [file dirname $file]
if {![file exists $dirname]} {
puts "mkdir $dirname"
file mkdir $dirname
}
if {![file exists $file]} {
$ch configure -file $file -url $url
puts "Downloading $url ..."
set result [$ch perform]
if {$result != 0} {
puts "Curl error $result"
}
download_delay
}
if {$type eq "1"} {
set retval $file
} else {
set retval ""
}
return $retval
}
# gopher_uri_pattern_orig returns a regular expression to parse gopher URL's
# the same pattern exists in $::uri::gopher::url
# this unused procedure is here to compare with gopher_uri_pattern
proc gopher_uri_pattern_orig {} {
set escape $::uri::basic::escape
set hostOrPort $::uri::basic::hostOrPort
set search $::uri::http::search
# make $xCharN the same as $::uri::basic::xCharN
set xCharN {[a-zA-Z0-9$_.+!*'(,);/?:@&=-]}
# make $xChar the same as $::uri::basic::xChar
set xChar "(${xCharN}|${escape})"
# make $url the same as $::uri::gopher::url
set type $xChar
set selector "$xChar*"
set string $selector
set schemepart "//${hostOrPort}(/(${type}(${selector}(%09${search}(%09${string})?)?)?)?)?"
set url "gopher:${schemepart}"
return $::uri::gopher::url
}
# gopher_uri_pattern returns a regular expression to parse gopher URL's
# a similar regular expression can be found in $::uri::gopher::url
# this version is the same, except it adds a tilde as an acceptable
# character.
#
# Why didn't tcllib uri allow tilde in the first place?
# See link below for pedantic reasons against allowing tilde.
# https://jkorpela.fi/tilde.html
proc gopher_uri_pattern {} {
set escape $::uri::basic::escape
set hostOrPort $::uri::basic::hostOrPort
set search $::uri::http::search
# add tilde "~" to $xCharN
set xCharN {[a-zA-Z0-9$_.+~!*'(,);/?:@&=-]}
# add tilde "~" to $xChar
set xChar "(${xCharN}|${escape})"
# add tilde "~" to $url
set type $xChar
set selector "$xChar*"
set string $selector
set schemepart "//${hostOrPort}(/(${type}(${selector}(%09${search}(%09${string})?)?)?)?)?"
set url "gopher:${schemepart}"
return $url
}
proc gopher_uri_split {url} {
set pattern [gopher_uri_pattern]
set parts [regexp -inline $pattern $url]
# protect against relative path abuse
set path [lindex $parts 11]
set path [string map [list "/../" "/dotdot/"] $path]
set retval [list \
url [lindex $parts 0] \
fqdn [lindex $parts 1] \
fqdn2 [lindex $parts 2] \
host_dot [lindex $parts 3] \
host [lindex $parts 4] \
domain [lindex $parts 5] \
unused1 [lindex $parts 6] \
unused2 [lindex $parts 7] \
req_abs [lindex $parts 8] \
req_rel [lindex $parts 9] \
type [lindex $parts 10] \ path [lindex $partsh 11] \
last_char [lindex $parts 12] \
unused3 [lindex $parts 13] \
unused4 [lindex $parts 14] \
unused5 [lindex $parts 15] \
unused6 [lindex $parts 16] \
]
}
# parse a gopher directory index for links
proc parse {filename} {
set retval [list]
set fh [open $filename]
set text [read $fh]
close $fh
set text [string map [list "\r\n" "\n"] $text]
set lines [split $text "\n"]
foreach {line} $lines {
set type [string index $line 0]
set data [string range $line 1 end]
set fields [split $data "\t"]
lassign $fields label path server port
if {$type eq "1"} {
if {[string index $path end] ne "/"} {
set path "$path/"
}
}
if {$type eq "0" ||
$type eq "1" ||
$type eq "9" ||
$type eq "I" ||
$type eq "g"
} {
if {$port == 70} {
set url [format "gopher://%s/%s%s" $server $type $path]
} else {
set url [format "gopher://%s:%d/%s%s" $server $port $type $path]
}
lappend retval $url
} elseif {$type eq "h" || $type eq "i"} {
# skip "h" and "i" types
continue
} elseif {$type eq "."} {
break
} else {
puts "Unknown gopher type $type in $filename"
continue
}
}
return $retval
}
proc main {argv} {
set ch [curl::init]
set binary "error"
set show_skipped false
set url ""
while {[llength $argv] > 0} {
set remainder [lassign $argv arg]
set argv $remainder
if {$arg eq "--binary-skip"} {
set binary "skip"
} elseif {$arg eq "--binary-save"} {
set binary "save"
} elseif {$arg eq "--show-skipped"} {
set show_skipped true
} elseif {[regexp {^--} $arg]} {
puts "\nUnknown option $arg\n"
break
} else {
set url $arg
break
}
}
if {[string length $url] == 0} {
puts {Usage: gopher-get.tcl [options] gopher-URI}
puts ""
puts "Options:"
puts "--binary-skip (Skip item type 9 links)"
puts "--binary-save (Save item type 9 links)"
puts "--show-skipped (Report skipped links)"
puts ""
exit
}
set parts [gopher_uri_split $url]
set parsed [dict create {*}$parts]
set host [dict get $parsed fqdn]
set path [dict get $parsed path]
set path_len [string length $path]
if {$path_len > 0} {
incr path_len -1
}
set files_done [dict create]
set uris [list $parsed]
set urls_done [dict create]
while {[llength $uris] > 0} {
# for each url in the list, fetch a file
# keep track of type 1 index files
set files [list]
foreach {uri} $uris {
set type [dict get $uri type]
set url [dict get $uri url]
dict set urls_done $url 1
set filename [fetch $ch $url]
if {$type eq "1" &&
[llength $filename] > 0 &&
![dict exists $files_done $filename]
} {
lappend files $filename
}
}
# parse each type 1 index file for new urls
set uris [list]
foreach {filename} $files {
dict set files_done $filename 1
set links [parse $filename]
foreach {link} $links {
set parts [gopher_uri_split $link]
set parsed [dict create {*}$parts]
set link_host [dict get $parsed fqdn]
set link_path [dict get $parsed path]
set link_type [dict get $parsed type]
set start [string range $link_path 0 $path_len]
if {$link_host ne $host} {
if {$show_skipped} {
puts "Skipped link $link, $link_host != $host"
}
continue
}
if {$start ne $path} {
if {$show_skipped} {
puts "Skipped link $link, $start != $path"
}
continue
}
if {[dict exists $urls_done $link]} {
if {$show_skipped} {
puts "Skipped link $link, link already flagged done"
}
continue
}
if {$link_type eq "9"} {
if {$binary eq "error"} {
puts "Error: item type 9 (binary file)"
exit
} elseif {$binary eq "skip"} {
if {$show_skipped} {
puts "Skipped link $link, item type = 9"
}
continue
} else {
# save it
}
}
lappend uris $parsed
}
}
}
curl_exit $ch 0
}
main $::argv
======
<<categories>> Internet