Chi Hung Chan 16 Nov 2004
Motivation:
Tested on
Some benchmark results:
#! /usr/local/bin/tclsh if { $argc < 1 || $argc > 2 } { puts stderr "Usage: $argv0 <url> \[#threads\]" puts stderr " default #threads is 4" exit 1 } proc comma {num {sep ,}} { while {[regsub {^([-+]?\d+)(\d\d\d)} $num "\\1$sep\\2" num]} {} return $num } proc now {} { return [clock format [clock seconds] -format {%H:%M:%S}] } proc lremove { l v } { foreach i $v { set ind [lsearch $l $i] if { $ind == -1 } { continue } set indm1 [expr {$ind-1}] set indp1 [expr {$ind+1}] set l [concat [lrange $l 0 $indm1] [lrange $l $indp1 end]] } return $l } proc urlSize { url } { global validate if { [info exists validate] == 0 } { set validate [http::geturl $url -validate 1] } set code [http::ncode $validate] if { $code != 200 } { puts stderr "Error. http return code=$code" exit 2 } set size [set ${validate}(totalsize)] return $size } proc urlType { url } { global validate if { [info exists validate] == 0 } { set validate [http::geturl $url -validate 1] } return [set ${validate}(type)] } proc isAcceptRanges { url } { global validate if { [info exists validate] == 0 } { set validate [http::geturl $url -validate 1] } array set www [set ${validate}(meta)] if { [info exists www(Accept-Ranges)] == 1 } { return 1 } else { return 0 } } # # get basename of url # proc urlBasename { url } { array set www [uri::split $url] set fname [lindex [split $www(path) /] end] if { [string length $fname] == 0 } { set fname {index.html} } return $fname } # # work out the byte range # proc byteRanges { size nthreads } { set step [expr $size/$nthreads] set p0 -1 set p1 -1 set br {} for { set i 0 } { $i < $nthreads } { incr i } { set p0 [expr $p1 + 1] if { $i == [expr {$nthreads-1}] } { set p1 $size } else { set p1 [expr $p0 + $step] } lappend br $p0 lappend br $p1 set p0 $p1 } return $br } # # fix up nthreads # if server does not support accept-range, nthreads=1 # if '#nthreads' file exists, get from there # proc fixNthreads { url nthreads } { set rc $nthreads # if server cannot support byte range, nthreads=1 if { [isAcceptRanges $url] == 0 } { set rc 1 return $rc } # in resume mode, nthreads now and previous has to tally set fname [urlBasename $url] set ntFilename ".${fname}#nthreads" if { [file exists $ntFilename] } { set fp [open $ntFilename r] set rc [read $fp] close $fp } else { set fp [open $ntFilename w] puts $fp $nthreads close $fp set rc $nthreads } return $rc } # MAIN PROGRAM STARTS HERE package require Thread package require http package require uri set url [lindex $argv 0] set nthreads 4 if { $argc == 2 } { set nthreads [lindex $argv 1] } tsv::set dap url $url tsv::set dap t0 [clock seconds] puts "--[now]-- $url" puts "\t=> [urlBasename $url]" # # if resume is needed, set resumeSize to sum of file size # set resume [glob -nocomplain [format {.%s-*} [urlBasename $url]]] if { [llength $resume] > 0 } { set rs 0 foreach i $resume { incr rs [file size $i] } tsv::set dap resumeSize $rs } else { tsv::set dap resumeSize 0 } set nthreads [fixNthreads $url $nthreads] # # create and initialise thread pool # puts -nonewline "Setting up thread pool of $nthreads threads ... " set tpool [tpool::create -minworkers $nthreads -maxworkers $nthreads \ -idletime 20 -initcmd { package require http package require uri proc dl { seq p0 p1 } { set url [tsv::get dap url] array set www [uri::split $url] set fname [lindex [split $www(path) /] end] set fname [format {.%s-%d} $fname $seq] # resume if { [file exists $fname] == 1 } { set size [file size $fname] if { $size >= [expr $p1-$p0+1] } { return } set p0 [expr $p0+$size] } set fpi [open $fname a] fconfigure $fpi -translation binary set s [http::geturl $url -channel $fpi -binary 1 \ -progress httpProgress \ -headers [list Range bytes=$p0-$p1]] close $fpi } proc httpProgress { token total current } { upvar #0 $token state tsv::set dap thread[thread::id] $current # calculate set max [tsv::get dap size] set sum [tsv::get dap resumeSize] foreach t [thread::names] { if { $t == 1 } { continue } incr sum [tsv::get dap thread$t] } # progress status set t0 [tsv::get dap t0] set size [tsv::get dap size] set percent [expr {100*$sum/$max}] set elapse [expr [clock seconds] - $t0] set kbps [expr {$sum*8.0/(1024.0*$elapse)}] set eta [expr [clock seconds]-$t0] set etam [expr $eta/60] set etas [expr $eta-$etam*60] set status [format {%3d%%[%-51s] %6.2fKbps ETA %02d:%02d} \ $percent \ "[string repeat = [expr $percent/2]]>" \ $kbps \ $etam \ $etas] puts -nonewline "$status\r" flush stdout } }] puts "Done" # # submit job to thread pool, work out the byte range for each thread # puts -nonewline "Submitting jobs to all threads ... " set joblist {} set seq 1 set size [urlSize $url] tsv::set dap size $size foreach { p0 p1 } [byteRanges $size $nthreads] { lappend joblist [tpool::post $tpool [list dl $seq $p0 $p1]] incr seq } puts "Done" puts "Length: [comma $size] \[[urlType $url]\]" # # monitor thread pool til completion # while 1 { set f [tpool::wait $tpool $joblist] set joblist [lremove $joblist $f] if { [llength $joblist] == 0 } { break } after 100 } # # consolidation # puts "\n" puts -nonewline "Download completed. Consolidating ... " set fnameo [urlBasename $url] set fpo [open $fnameo w] fconfigure $fpo -translation binary for { set seq 1 } { $seq <= $nthreads } { incr seq } { set fnamei [format {.%s-%d} $fnameo $seq] set fpi [open $fnamei r] fconfigure $fpi -translation binary fcopy $fpi $fpo -size [file size $fnamei] close $fpi } close $fpo puts "Done" # # cleanup # foreach i [glob -nocomplain ".${fnameo}*"] { file delete -force $i } puts "" puts "--[now]-- $fnameo ([file size $fnameo]/$size)"