Using zipper to create zip files

Kevin Walzer I've been looking for a pure-Tcl solution to create zip archives, and stumbled across JCW's "zipper" package, documented at http://www.equi4.com/critlib/zipper.README . It's part of his critlib package at http://www.equi4.com/critlib/ . I'm a bit surprised that this extension isn't more widely discussed or used, because it appears to offer a foundation for writing a tcllib extension similar to tar.

pd Since equi4 no longer exists, here is source obtained from there.

# ZIP file constructor

package provide zipper 0.11

namespace eval zipper {
  namespace export initialize addentry finalize

  namespace eval v {
    variable fd
    variable base
    variable toc
  }

  proc initialize {fd} {
    set v::fd $fd
    set v::base [tell $fd]
    set v::toc {}
    fconfigure $fd -translation binary -encoding binary
  }

  proc emit {s} {
    puts -nonewline $v::fd $s
  }

  proc dostime {sec} {
    set f [clock format $sec -format {%Y %m %d %H %M %S} -gmt 1]
    regsub -all { 0(\d)} $f { \1} f
    foreach {Y M D h m s} $f break
    set date [expr {(($Y-1980)<<9) | ($M<<5) | $D}]
    set time [expr {($h<<11) | ($m<<5) | ($s>>1)}]
    return [list $date $time]
  }

  proc addentry {name contents {date ""} {force 0}} {
    if {$date == ""} { set date [clock seconds] }
    foreach {date time} [dostime $date] break
    set flag 0
    set type 0 ;# stored
    set fsize [string length $contents]
    set csize $fsize
    set fnlen [string length $name]

    if {$force > 0 && $force != [string length $contents]} {
      set csize $fsize
      set fsize $force
      set type 8 ;# if we're passing in compressed data, it's deflated
    }

    if {[catch { zlib crc32 $contents } crc]} {
      set crc 0
    } elseif {$type == 0} {
      set cdata [zlib deflate $contents]
      if {[string length $cdata] < [string length $contents]} {
        set contents $cdata
        set csize [string length $cdata]
        set type 8 ;# deflate
      }
    }

    lappend v::toc "[binary format a2c6ssssiiiss4ii PK {1 2 20 0 20 0} \
                            $flag $type $time $date $crc $csize $fsize $fnlen \
                        {0 0 0 0} 128 [tell $v::fd]]$name"

    emit [binary format a2c4ssssiiiss PK {3 4 20 0} \
                    $flag $type $time $date $crc $csize $fsize $fnlen 0]
    emit $name
    emit $contents
  }

  proc finalize {} {
    set pos [tell $v::fd]

    set ntoc [llength $v::toc]
    foreach x $v::toc { emit $x }
    set v::toc {}

    set len [expr {[tell $v::fd] - $pos}]
    incr pos -$v::base

    emit [binary format a2c2ssssiis PK {5 6} 0 0 $ntoc $ntoc $len $pos 0]

    return $v::fd
  }
}

if {[info exists pkgtest] && $pkgtest} {
  puts "no test code"
}

# test code below runs when this is launched as the main script
if {[info exists argv0] && [string match zipper-* [file tail $argv0]]} {

  catch { package require zlib }

  zipper::initialize [open try.zip w]

  set dirs [list .]
  while {[llength $dirs] > 0} {
    set d [lindex $dirs 0]
    set dirs [lrange $dirs 1 end]
    foreach f [lsort [glob -nocomplain [file join $d *]]] {
      if {[file isfile $f]} {
        regsub {^\./} $f {} f
        set fd [open $f]
        fconfigure $fd -translation binary -encoding binary
        zipper::addentry $f [read $fd] [file mtime $f]
        close $fd
      } elseif {[file isdir $f]} {
        lappend dirs $f
      }
    }
  }

  close [zipper::finalize]

  puts "size = [file size try.zip]"
  puts [exec unzip -v try.zip]

  file delete try.zip
}

See also Zip A Directory.

Here's a sample:

    package require zipper
    zipper::initialize [open try.zip w]
    zipper::addentry dir/file.txt "some data to store"
    close [zipper::finalize]

This will create a zip archive called "try.zip" with "file.txt," whose contents are "some data to store." Mounting the archive with a tool like WinZip or, more appropriately, the tclvfs package (vfs::zip) confirms the contents of the file.

I was curious if one could copy an existing file to a zip file with zipper, and it seems you can. I tried a jpeg, and it worked fine. Use code similar to this:

   package require zipper
   set f  [open somefile.jpg r]
   fconfigure $f -translation binary
   zipper::initialize [open try.zip w]
   zipper::addentry somefile.jpg [read $f]
   close $f
   close [zipper::finalize]

This works by reading the jpeg file as a binary stream, then writing the data to a file of the same name in the zip archive. By contrast, with the tclvfs extension, you can copy a file from the virtual file system (in this case a zip archive) to your local directory using the file copy command.

One thing zipper does not do is append files to an existing zip archive--I have not found a way to do that. ((Using the command "open try.zip a" simply overwrote the contents of the existing zip archive.) If you want to append files you will probably need to do a workaround by writing the contents of an existing zip archive to a temporary directory, adding whatever files you want to that directory via file copy, then writing each file back to a new zip archive with the same name via zipper. This might be slow if you have a lot of files or they are large.

I'm thinking about writing a little GUI utility that will simplify the creation and viewing of zip archives, and the zipper command set will likely be an integral part of it.


MHo, 2007-Oct-09: I noticed a time difference of -2 hours after adding some files with addentry... [file mtime sourcefile] (WinXP, daylight saving time active, germany)... Can someone confirm this behaviour? I can't remember details, but a 1 hour-delta-problem with NTFS sounds somewhat familiar... Meanwhile I found a possible workaround for the diffs in zipper.tcl:

 proc dostime {sec} {
     set f [clock format $sec -format {%Y %m %d %H %M %S} -gmt 1]; ########################### -gmt 1
     regsub -all { 0(\d)} $f { \1} f
     foreach {Y M D h m s} $f break
     set date [expr {(($Y-1980)<<9) | ($M<<5) | $D}]
     set time [expr {($h<<11) | ($m<<5) | ($s>>1)}]
     return [list $date $time]
 }

After removing -gmt 1, the local timezone is respected and most of the time stamps are accurat. But still some files remain which are stored with a 1-hour-difference which is due to daylight saving time manipulations, I guess... hm ... How is the time stamp in the zip archiv interpreted?


RS 2007-10-09: Of course, it would be best if tclvfs supported adding to ZIP files... MHo: Yes, indeed. But as I noticed that tclkit*.exe have the zlib compiled in, zipper.tcl was the easiest way to create zip archives without execing to an external zip.exe. I don't know the details of zlib, though.

erich 2010-05-24: without the zlib package, for those using pre-8.6 tcl, it might be useful to change the addentry proc and use tcllib's crc32.tcl for the crc check so it doesn't spit out a zip archive that gives you an error of a bad checksum when trying to open the archive

EF 2017-12-18: Since the tar on the page linked above did not contain the latest version with directory support, I thought that I would paste some slightly modified code below. This implementation is able to handle several ZIP files at the same time, as it associates a context for each file descriptor passed to initialize. My modifications provides for a more modern, Tk-style, interface as well. To self-test, you will need to have an unzip binary available.

# ZIP file constructor

package require zlib

namespace eval zipper {
    namespace export initialize
    namespace eval v {}
    catch {namespace ensemble create}
}
    
proc ::zipper::initialize {fd} {
    # Store file specific information in a separate namespace
    namespace eval v::$fd {}
    set v::${fd}::fd $fd
    set v::${fd}::base [tell $fd]
    set v::${fd}::toc [list]
    fconfigure $fd -translation binary -encoding binary
    # Arrange for access to callers, Tk-style
    interp alias {} [namespace current]::v::$fd {} [namespace current]::Dispatch $fd
    return [namespace current]::v::$fd
}

proc ::zipper::Dispatch { fd cmd args } {
    if { [string match {[a-z]*} $cmd] && [llength [info procs [namespace current]::$cmd]] } {
        if { [namespace exists [namespace current]::v::$fd] } {
            return [uplevel 1 [linsert $args 0 [namespace current]::$cmd $fd]]
        } else {
            return -code error "$fd doesn't refer to a zipper context"
        }
    } else {
        return -code error "$cmd is not a known zipper command"
    }
}

proc ::zipper::Emit { fd s} {
    puts -nonewline [set v::${fd}::fd] $s
}

proc ::zipper::DosTime {sec} {
    set f [clock format $sec -format {%Y %m %d %H %M %S} -gmt 1]
    regsub -all { 0(\d)} $f { \1} f
    foreach {Y M D h m s} $f break
    set date [expr {(($Y-1980)<<9) | ($M<<5) | $D}]
    set time [expr {($h<<11) | ($m<<5) | ($s>>1)}]
    return [list $date $time]
}

proc ::zipper::addentry {fd name contents {date ""} {force 0}} {
    if {$date == ""} { set date [clock seconds] }
    foreach {date time} [DosTime $date] break
    set flag 0
    set type 0 ;# stored
    set fsize [string length $contents]
    set csize $fsize
    set fnlen [string length $name]
    
    if {$force > 0 && $force != [string length $contents]} {
        set csize $fsize
        set fsize $force
        set type 8 ;# if we're passing in compressed data, it's deflated
    }
    
    if {[catch { zlib crc32 $contents } crc]} {
        set crc 0
    } elseif {$type == 0} {
        set cdata [zlib deflate $contents]
        if {[string length $cdata] < [string length $contents]} {
            set contents $cdata
            set csize [string length $cdata]
            set type 8 ;# deflate
        }
    }
    
    lappend v::${fd}::toc "[binary format a2c6ssssiiiss4ii PK {1 2 20 0 20 0} \
            $flag $type $time $date $crc $csize $fsize $fnlen \
            {0 0 0 0} 128 [tell [set v::${fd}::fd]]]$name"
    
    Emit $fd [binary format a2c4ssssiiiss PK {3 4 20 0} \
            $flag $type $time $date $crc $csize $fsize $fnlen 0]
    Emit $fd $name
    Emit $fd $contents
}

proc ::zipper::adddirentry {fd name {date ""} {force 0}} {
    if {$date == ""} { set date [clock seconds] }
    # remove trailing slashes and add new one
    set name "[string trimright $name /]/"
    foreach {date time} [DosTime $date] break
    set flag 2
    set type 0
    set crc 0
    set csize 0
    set fsize 0
    set fnlen [string length $name]
    
    lappend v::${fd}::toc "[binary format a2c6ssssiiiss4ii PK {1 2 20 0 20 0} \
            $flag $type $time $date $crc $csize $fsize $fnlen \
            {0 0 0 0} 128 [tell [set v::${fd}::fd]]]$name"
    Emit $fd [binary format a2c4ssssiiiss PK {3 4 20 0} \
            $flag $type $time $date $crc $csize $fsize $fnlen 0]
    Emit $fd $name
}

proc ::zipper::finalize { fd } {
    set pos [tell [set v::${fd}::fd]]
    
    set ntoc [llength [set v::${fd}::toc]]
    foreach x [set v::${fd}::toc] {
        Emit $fd $x
    }
    set v::${fd}::toc {}
    
    set len [expr {[tell [set v::${fd}::fd]] - $pos}]
    incr pos -[set v::${fd}::base]
    
    Emit $fd [binary format a2c2ssssiis PK {5 6} 0 0 $ntoc $ntoc $len $pos 0]
    namespace delete v::$fd
    
    return $fd
}

# test code below runs when this is launched as the main script
if {[info exists argv0] && [string match zipper* [file tail $argv0]]} {
    
    set zip [zipper initialize [open try.zip w]]
    
    set dirs [list .]
    while {[llength $dirs] > 0} {
        set d [lindex $dirs 0]
        set dirs [lrange $dirs 1 end]
        foreach f [lsort [glob -nocomplain [file join $d *]]] {
            if {[file isfile $f]} {
                regsub {^\./} $f {} f
                set fd [open $f]
                fconfigure $fd -translation binary -encoding binary
                $zip addentry $f [read $fd] [file mtime $f]
                close $fd
            } elseif {[file isdir $f]} {
                lappend dirs $f
            }
        }
    }
    
    close [$zip finalize]
    
    puts "size = [file size try.zip]"
    puts [exec unzip -v try.zip]
    
    file delete try.zip
}

package provide zipper 0.12