Keith Vetter 2007-06-06 : I came across an html utility written by Eric Raymond called imgsizer. It edits web pages inserting missing WIDTH and HEIGHT attributes to image tags [L1 ]. By having these tags, browsers can load pages faster exploiting multithreading image loading; without them, browsers must do sequential image loading.
I wanted to use Eric's tool but it's written in Python. So I just decided to rewrite it in tcl.
I first tried using TDOM to parse web pages, but it was too brittle and couldn't handle malformed html. This version uses regular expressions to find the image tags to update.
If you have TclMagick, then this utility will run in pure tcl. Otherwise, it uses Img for figuring out image size which means that Tk also gets loaded. (Using tcllib's jpeg, gif and png library is left as an exercise for the reader.)
##+########################################################################## # # imgsizer.tsh -- splices in WIDTH and HEIGHT parameters for HTML IMG tags # This allows browsers to load pages faster by multi-threading the image # loading rather than strict sequential loading. # # by Keith Vetter, May 2007 # inspired by Eric Raymond's version: http://www.catb.org/~esr/software.html package require http package require cmdline set OPT(haveMagick) [expr {! [catch {package require TclMagick}]}] if {! $OPT(haveMagick)} {package require Img} catch {wm withdraw .} array set OPT {version 0.1 root . keepOld 0 noOverwrite 0 verbose 1} ##+########################################################################## # # ImageSizer -- fixes up WIDTH and HEIGHT parameters for IMG tag in one file # uses regexp to find img tags (TDOM was too fragile) # proc ImageSizer {fname} { INFO 1 "$fname\n" set fin [open $fname r] set data [read $fin]; list close $fin set result {} set last 0 set ::CHANGED 0 foreach idx [regexp -inline -all -indices {<img.*?>} $data] { foreach {start end} $idx break append result [string range $data $last [expr {$start-1}]] set imgTag [string range $data $start $end] append result [HandleImgTag $imgTag] set last [expr {$end+1}] } append result [string range $data $last end] SaveResult $result $fname } ##+########################################################################## # # HandleImgTag -- fixes up one image tag # proc HandleImgTag {imgTag} { GetAllAttributes $imgTag set src [GetAttribute src "<none>"] if {$src eq "<none>"} { WARN "Image tag without src" return $imgTag } INFO 2 " $src" if {[file pathtype $src] ne "relative"} { set src [file join $::OPT(root) ".$src"] } set w [GetAttribute width "?"] set h [GetAttribute height "?"] if {[string first "%" $w] != -1 || [string first "%" $h] != -1} { INFO 2 " -- skipping: %\n" return $imgTag } if {$::OPT(noOverwrite) && [string is integer -strict $w] && [string is integer -strict $h]} { INFO 2 " -- skipping: noOverwrite\n" return $imgTag } if {[catch {foreach {w2 h2} [GetImageSize $src] break}]} { INFO 2 "\n" WARN "ERROR: cannot read image dimensions for '$src'" return $imgTag } if {$w ne $w2 || $h ne $h2} {incr ::CHANGED} set ::ATTR(width) $w2 set ::ATTR(height) $h2 INFO 2 " ($w,$h) => ($w2,$h2)\n" return [RebuildImgTag] } ##+########################################################################## # # RebuildImgTag -- returns new image tag with all its attributes # proc RebuildImgTag {} { array set attr [array get ::ATTR] set all [concat src width height [array names attr]] set html "<img" foreach arr $all { if {! [info exists attr($arr)]} continue set value $attr($arr) set delim "\"" if {[string is integer -strict $value]} { set delim "" } elseif {[string first "\"" $value] != -1} { set delim "'" } append html " $arr=$delim$value$delim" unset attr($arr) } append html "/>" return $html } ##+########################################################################## # # GetAttribute -- returns attribute value, using default if not found # proc GetAttribute {which default} { set which [string tolower $which] if {[info exists ::ATTR($which)]} { return $::ATTR($which) } return $default } ##+########################################################################## # # GetAllAttributes -- extracts all attributes for a given tag into global ATTR # proc GetAllAttributes {thisTag} { set last 2 unset -nocomplain ::ATTR while {1} { set next [regexp -inline -nocase -indices -start $last {\s.*?=.} $thisTag] if {$next eq {}} break foreach {start end} [lindex $next 0] break set name [string range $thisTag [expr {$start+1}] [expr {$end-2}]] set name [string tolower $name] set delim [string index $thisTag $end] if {$delim eq "'" || $delim eq "\""} { set start2 [expr {$end+1}] set end2 [string first $delim $thisTag $start2] } else { set start2 $end set idx [regexp -indices -inline -start $start2 {\s|>} $thisTag] set end2 [lindex $idx 0 1] } set value [string range $thisTag $start2 [expr {$end2 - 1}]] set last $end2 set ::ATTR($name) $value } } ##+########################################################################## # # GetImageSize -- returns the size of an image # proc GetImageSize {iname} { if {[info exists ::CACHE($iname)]} { ;# Cache to avoid downloads return $::CACHE($iname) } if {[regexp {(?i)^http:} $iname]} { ;# Is iname really a URL??? set ::CACHE($iname) [GetWebImageSize $iname] return $::CACHE($iname) } if {$::OPT(haveMagick)} { set wand [magick create wand] $wand ReadImage $iname set w [$wand width] set h [$wand height] } else { set img [image create photo -file $iname] set w [image width $img] set h [image height $img] image delete $img } set ::CACHE($iname) [list $w $h] return [list $w $h] } ##+########################################################################## # # GetWebImageSize -- returns the size of an image after first downloading # from the web # proc GetWebImageSize {url} { set token [::http::geturl $url] ::http::wait $token set idata [::http::data $token] ; list ::http::cleanup $token if {$::OPT(haveMagick)} { set wand [magick create wand] $wand ReadImageBlob $idata set w [$wand width] set h [$wand height] } else { set img [image create photo -data $idata] set w [image width $img] set h [image height $img] image delete $img } return [list $w $h] } ##+########################################################################## # # SaveResult -- safely saves our result while safely moving files around. # proc SaveResult {html fname} { if {$::CHANGED == 0} { INFO 2 " no change\n" return } set tempname [GetTempName $fname ".tmp"] set backname [GetTempName $fname ".bak"] set fout [open $tempname w] puts -nonewline $fout $html close $fout file rename $fname $backname file rename $tempname $fname if {! $::OPT(keepOld)} { file delete $backname } } ##+########################################################################## # # GetTempName -- returns an unused filename based on a given basename and # extension. Not bullet-proof, race condition exists but good enough for now. # proc GetTempName {base extension} { set fname "$base$extension" if {! [file exists $fname]} { return $fname } for {set i 1} {$i < 1000} {incr i} { set fname [format "%s%s%03d" $base $extension $i] if {! [file exists $fname]} { return $fname } } for {set i 1} {$i < 1000} {incr i} { set rand [expr {int(rand()*0x7FFFffff)}] set fname "$base$extension$rand" if {! [file exists $fname]} { return $fname } } error "Could not create tempfile '$fname' '$extension'" } proc ParseArgs {} { global argc argv OPT for {set i 0} {$i < $argc} {incr i} { set arg [lindex $argv $i] switch -regexp -- $arg { ^--document-root$ - ^-d$ { set OPT(root) [lindex $argv [incr i]]} ^-d { set OPT(root) [string range $arg 2 end]} ^--no-overwrite$ - ^-n$ { set OPT(noOverwrite) 1 } ^--keep-original$ - ^-k$ { set OPT(keepOld) 1 } ^-q$ { set OPT(verbose) 0 } ^-v$ { incr OPT(verbose) } ^-V$ { DoHelp version } ^-h$ - ^-?$ - ^--help DoHelp ^--$ { incr i; break } ^- { WARN "unknown option: \"$arg\""; DoHelp usage } default { break } } } set argc [expr {$argc - $i}] if {$argc <= 0 && ! $::tcl_interactive} { DoHelp usage } set argv [lrange $argv $i end] } proc WARN {msg} { puts stderr $msg} proc INFO {lvl msg} {if {$lvl <= $::OPT(verbose)} {puts -nonewline $msg }} proc DoHelp {{what help}} { if {$what eq "version"} { puts "imgsizer version $::OPT(version)" exit } set txt "imgsizer ?-d documentRoot? ?-n? ?-k? html-files" if {$what eq "usage"} { puts "usage: $txt" exit } append txt "\n\n" append txt "The imgsizer script automatically inserts WIDTH and HEIGHT parameters\n" append txt "for IMG tags for HTML files. These parameters enable browsers to show\n" append txt "pages faster by multi-threading the image loading rather than strict\n" append txt "sequential loading.\n\n" append txt "This script will insert missing WIDTH and HEIGHT parameters and\n" append txt "correct existing parameters unless they contain a percent sign (%),\n" append txt "or if you gave the '-n' or '--no-overwrite' switch.\n\n" append txt "Options:\n" append txt " -d, --document-root Directory for absolute image filenames\n" append txt " (i.e, ones which contain a leading '/')\n" append txt " -n, --no-overwrite Do not overwrite existing image size info\n" append txt " -k, --keep-original Keep original source files with .bak extension\n" append txt " -V Show version information\n" append txt " -v Verbose output\n" append txt " -q Quiet output\n" puts $txt exit } ################################################################ ParseArgs if {$tcl_interactive} return ;# For debugging foreach arg $argv { if {$tcl_platform(platform) ne "windows"} { ImageSizer $arg continue } foreach fname [glob -nocomplain -- $arg] { ;# Manual glob'ing ImageSizer $fname } } exit ;# Img package loads Tk return