Version 2 of Download Accelerator

Updated 2004-11-16 01:22:08

Chi Hung Chan 16 Nov 2004

Motivation:

  • inspired by Download Accelerator Plus (www.speedbit.com)
  • get my hands dirty on Tcl Thread
  • frustrated with wget on UNIX for big file download

Tested on

  • Tcl 8.3.5, Tcl-thread 2.5.2 on Solaris 8 SPARC
  • Tcl 8.4.7, Tcl-thread 2.5.2 on Solaris 10 x86

---

Some benchmark results:

---

 #! /usr/local/bin/tclsh

 if { $argc < 1 || $argc > 2 } {
     puts stderr "Usage: $argv0 <url> \[#threads\]"
     puts stderr "       default #threads is 4"
     exit 1
 }



 proc comma {num {sep ,}} {
     while {[regsub {^([-+]?\d+)(\d\d\d)} $num "\\1$sep\\2" num]} {}
     return $num
 }


 proc now {} {
     return [clock format [clock seconds] -format {%H:%M:%S}]
 }


 proc lremove { l v } {
     foreach i $v {
         set ind [lsearch $l $i]
         if { $ind == -1 } { continue }
         set indm1 [expr {$ind-1}]
         set indp1 [expr {$ind+1}]
         set l [concat [lrange $l 0 $indm1] [lrange $l $indp1 end]]
     }
     return $l
 }


 proc urlSize { url } {
     global validate

     if { [info exists validate] == 0 } {
         set validate [http::geturl $url -validate 1]
     }
     set code [http::ncode $validate]
     if { $code != 200 } {
         puts stderr "Error. http return code=$code"
         exit 2
     }
     set size [set ${validate}(totalsize)]

     return $size
 }



 proc urlType { url } {
     global validate

     if { [info exists validate] == 0 } {
         set validate [http::geturl $url -validate 1]
     }
     return [set ${validate}(type)]
 }


 proc isAcceptRanges { url } {
     global validate

     if { [info exists validate] == 0 } {
         set validate [http::geturl $url -validate 1]
     }
     array set www [set ${validate}(meta)]
     if { [info exists www(Accept-Ranges)] == 1 } {
         return 1
     } else {
         return 0
     }
 }


 #
 # get basename of url
 #
 proc urlBasename { url } {
     array set www [uri::split $url]
     set fname [lindex [split $www(path) /] end]
     if { [string length $fname] == 0 } {
         set fname {index.html}
     }
     return $fname
 }


 #
 # work out the byte range
 #
 proc byteRanges { size nthreads } {
     set step [expr $size/$nthreads]
     set p0 -1
     set p1 -1
     set br {}
     for { set i 0 } { $i < $nthreads } { incr i } {
         set p0 [expr $p1 + 1]
         if { $i == [expr {$nthreads-1}] } {
             set p1 $size
         } else {
             set p1 [expr $p0 + $step]
             }
         lappend br $p0
         lappend br $p1
         set p0 $p1
     }
     return $br
 }


 #
 # fix up nthreads
 # if server does not support accept-range, nthreads=1
 # if '#nthreads' file exists, get from there
 #
 proc fixNthreads { url nthreads } {
     set rc $nthreads

     # if server cannot support byte range, nthreads=1
     if { [isAcceptRanges $url] == 0 } {
         set rc 1
         return $rc
     }

     # in resume mode, nthreads now and previous has to tally
     set fname [urlBasename $url]
     set ntFilename ".${fname}#nthreads"
     if { [file exists $ntFilename] } {
         set fp [open $ntFilename r]
         set rc [read $fp]
         close $fp
     } else {
         set fp [open $ntFilename w]
         puts $fp $nthreads
         close $fp
         set rc $nthreads
     }
     return $rc
 }




 # MAIN PROGRAM STARTS HERE


 package require Thread
 package require http
 package require uri


 set url [lindex $argv 0]
 set nthreads 4
 if { $argc == 2 } {
     set nthreads [lindex $argv 1]
 }
 tsv::set dap url $url
 tsv::set dap t0 [clock seconds]


 puts "--[now]-- $url"
 puts "\t=> [urlBasename $url]"


 #
 # if resume is needed, set resumeSize to sum of file size
 #
 set resume [glob -nocomplain [format {.%s-*} [urlBasename $url]]]
 if { [llength $resume] > 0 } {
     set rs 0
     foreach i $resume {
         incr rs [file size $i]
     }
     tsv::set dap resumeSize $rs
 } else {
     tsv::set dap resumeSize 0
 }


 set nthreads [fixNthreads $url $nthreads]


 #
 # create and initialise thread pool
 #
 puts -nonewline "Setting up thread pool of $nthreads threads ... "
 set tpool [tpool::create -minworkers $nthreads -maxworkers $nthreads \
         -idletime 20 -initcmd {
     package require http
     package require uri

     proc dl { seq p0 p1 } {
         set url [tsv::get dap url]
         array set www [uri::split $url]
         set fname [lindex [split $www(path) /] end]
         set fname [format {.%s-%d} $fname $seq]

         # resume
         if { [file exists $fname] == 1 } {
             set size [file size $fname]
             if { $size >= [expr $p1-$p0+1] } {
                 return
             }
             set p0 [expr $p0+$size]
         }

         set fpi [open $fname a]
         fconfigure $fpi -translation binary
         set s [http::geturl $url -channel $fpi -binary 1 \
             -progress httpProgress \
             -headers [list Range bytes=$p0-$p1]]
         close $fpi
     }
     proc httpProgress { token total current } {
         upvar #0 $token state

         tsv::set dap thread[thread::id] $current

         # calculate
         set max [tsv::get dap size]
         set sum [tsv::get dap resumeSize]
         foreach t [thread::names] {
             if { $t == 1 } { continue }
             incr sum [tsv::get dap thread$t]
         }

         # progress status 
         set t0 [tsv::get dap t0]
         set size [tsv::get dap size]
         set percent [expr {100*$sum/$max}]
         set elapse [expr [clock seconds] - $t0]
         set kbps [expr {$sum*8.0/(1024.0*$elapse)}]
         set eta [expr [clock seconds]-$t0]
         set etam [expr $eta/60]
         set etas [expr $eta-$etam*60]
         set status [format {%3d%%[%-51s] %6.2fKbps  ETA %02d:%02d} \
             $percent \
             "[string repeat = [expr $percent/2]]>" \
             $kbps \
             $etam \
             $etas]
         puts -nonewline "$status\r"
         flush stdout
     }
 }]
 puts "Done"


 #
 # submit job to thread pool, work out the byte range for each thread
 #
 puts -nonewline "Submitting jobs to all threads ... "
 set joblist {}
 set seq 1
 set size [urlSize $url]
 tsv::set dap size $size
 foreach { p0 p1 } [byteRanges $size $nthreads] {
     lappend joblist [tpool::post $tpool [list dl $seq $p0 $p1]] 
     incr seq 
 }
 puts "Done"


 puts "Length: [comma $size] \[[urlType $url]\]"



 #
 # monitor thread pool til completion
 #
 while 1 {
     set f [tpool::wait $tpool $joblist]
     set joblist [lremove $joblist $f]
     if { [llength $joblist] == 0 } { break }
     after 100
 }


 #
 # consolidation
 #
 puts "\n"
 puts -nonewline "Download completed. Consolidating ... "
 set fnameo [urlBasename $url]
 set fpo [open $fnameo w]
 fconfigure $fpo -translation binary
 for { set seq 1 } { $seq <= $nthreads } { incr seq } {
     set fnamei [format {.%s-%d} $fnameo $seq]
     set fpi [open $fnamei r]
     fconfigure $fpi -translation binary
     fcopy $fpi $fpo -size [file size $fnamei]
     close $fpi
 }
 close $fpo
 puts "Done"


 #
 # cleanup
 #
 foreach i [glob -nocomplain ".${fnameo}*"] {
     file delete -force $i
 }


 puts ""
 puts "--[now]-- $fnameo ([file size $fnameo]/$size)"