Version 16 of creating zip file archives from tcl with zlib

Updated 2009-05-30 23:31:15 by PT

By Artur Trzewik. vfs's vfs::zip currently only supports the ability to read zip archives. I have not found any Tcl library that can create zip archives, so I implemented it by myself. Adding write support to a vfs extension is quite a bit more complicated so I have written it separately in XOTcl. The code can be also simply ported to pure Tcl. The zip archive should be the same as generated with WinZip.

    package require XOTcl
    namespace import xotcl::*

    Class ZipArchive
    @ ::ZipArchive idemeta component ZipArchive
    ZipArchive instproc addFile {inputFile fileName} {
        # inputFile - source file to archive
        # fileName - name of file in the archive
        my lappend files $inputFile $fileName
    }
    ZipArchive instproc addToStream {stream fin fout} {
        my instvar written

        set offset $written

        set fdata [open $fin r]
        fconfigure $fdata -encoding binary -translation binary
        set data [read $fdata]
        set datacompresed [string range [::vfs::zip -mode compress $data] 2 end-4]
        close $fdata

        binary scan \x04\x03\x4B\x50 I LFH_SIG
        my writeLong $stream $LFH_SIG

        incr written 4

        my writeShort $stream 20
        # java implementation make 8
        # but tools (WinZip) leave it 0
        my writeShort $stream 0
        incr written 4

        my writeShort $stream 8
        incr written 2

        # last mod. time and date
        set dosTime [my toDosTime $fin]

        my writeLong $stream $dosTime
        incr written 4

        set crc [::vfs::crc $data]
        set csize [string length $datacompresed]
        set size [string length $data]

        my writeLong $stream $crc
        my writeLong $stream $csize
        my writeLong $stream $size
        incr written 12

        # file name length
        my writeShort $stream [string length $fout]
        incr written 2

        # extra field length
        set extra ""
        my writeShort $stream [string length $extra]
        incr written 2

        # file name
        puts -nonewline $stream $fout
        incr written [string length $fout]

        puts -nonewline $stream $extra
        incr written [string length $extra]

        set dataStart written;
        puts -nonewline $stream $datacompresed
        incr written $csize

        list $offset $dosTime $crc $csize $size
    }
    ZipArchive instproc createFile file {
        set fout [open $file w]
        fconfigure $fout -encoding binary -translation binary
        my createToStream $fout
        close $fout
    }
    ZipArchive instproc createToStream ostream {
        my instvar files cdOffset cdLength written
        set descriptionList [list]
        foreach {fin fout} $files {
            lappend descriptionList [my addToStream $ostream $fin $fout]
        }
        set cdOffset $written

        foreach {fin fout} $files desc $descriptionList {
            foreach {offset dosTime crc csize size} $desc {}
            my writeCentralFileHeader $ostream $fin $fout $offset $dosTime $size $csize $crc
        }

        set cdLength [expr {$written - $cdOffset}]
        # wirte header

        # EOCD 0X06054B50L scan 0X06054B50L %x s set s
        binary scan \x06\x05\x4B\x50 I EOCD
        my writeLong $ostream $EOCD

        # disk numbers
        my writeShort $ostream 0
        my writeShort $ostream 0

        # number of entries
        set filenum [expr {[llength $files]>>1}]
        my writeShort $ostream $filenum
        my writeShort $ostream $filenum

        # length and location of CD
        my writeLong $ostream $cdLength
        my writeLong $ostream $cdOffset

        # zip file comment
        set comment ""
        # comment lenght
        my writeShort $ostream [string bytelength $comment]

        puts -nonewline $ostream $comment
    }
    ZipArchive instproc init {} {
        my set files [list]
        package require vfslib
        my instvar cdLength cdOffset written
        set cdLength 0
        set cdOffset 0
        set written 0
    }
    ZipArchive instproc toDosTime file {
        set sec [file mtime $file]

        foreach {year month day hour minute secound} [clock format $sec -format "%Y %m %e %k %M %S"] {}

        set month [string trimleft $month 0]
        set year [string trimleft $year 0]
        if {$minute eq ""} {
            set minute 0
        }
       set secound [string trimleft $secound 0]
       if {$secound eq ""} {
           set secound 0
       }

        set value [expr (($year - 1980) << 25) | ($month << 21) | ($day << 16) | ($hour << 11) | ($minute << 5) | ($secound >> 1)]
        return $value
    }
    ZipArchive instproc writeCentralFileHeader {ostream fin fout offset dosTime size csize crc} {
        my instvar written

        # CFH 0X02014B50L
        binary scan \x02\x01\x4B\x50 I CFH_SIG
        my writeLong $ostream $CFH_SIG
        incr written 4

        if {$::tcl_platform(platform) eq "windows"} {
            # unix
            set pid 5
        } else {
            # windows
            set pid 11
        }
        my writeShort $ostream [expr { (($pid << 8) | 20)}]
        incr written 2

        # version needed to extract
        # general purpose bit flag

        my writeShort $ostream 20
        my writeShort $ostream 0
        incr written 4

        # compression method
        my writeShort $ostream 8
        incr written 2

        # last mod. time and date
        my writeLong $ostream $dosTime
        incr written 4

        # CRC
        # compressed length
        # uncompressed length
        my writeLong $ostream $crc
        my writeLong $ostream $csize
        my writeLong $ostream $size
        incr written 12;

        set comment ""
        set extra ""

        # file name length

        my writeShort $ostream [string bytelength $fout]
        incr written 2;

        # extra field length
        my writeShort $ostream [string bytelength $extra]
        incr written 2;

        # file comment length
        my writeShort $ostream [string bytelength $comment]
        incr written 2;

        # disk number start
        my writeShort $ostream 0
        incr written 2

        # internal file attributes
        my writeShort $ostream 0
        incr written 2

        # external file attributes
        my writeLong $ostream 0
        incr written 4

        # relative offset of LFH
        my writeLong $ostream $offset
        incr written 4

        # file name
        puts -nonewline $ostream $fout
        incr written [string bytelength $fout]

        # extra field
        puts -nonewline $ostream $extra
        incr written [string bytelength $extra]

        # file comment
        puts -nonewline $ostream $comment
        incr written [string bytelength $comment]
    }
    ZipArchive instproc writeLong {stream short} {
        puts -nonewline $stream [binary format i $short]
    }
    ZipArchive instproc writeShort {stream short} {
        puts -nonewline $stream [binary format s $short]
    }
    ZipArchive proc createZip {zipFile files} {
        set zipArch [my new]
        foreach f $files {
            $zipArch addFile $f [file tail $f]
        }
        $zipArch createFile $zipFile
        $zipArch destroy
    }
    ZipArchive proc testZip {} {
        my createZip {C:/tmp/my2.zip} {C:/tmp/test.txt C:/tmp/test2.txt C:/tmp/tmp.zip}
        # ZipArchive dumpFile {C:/tmp/my2.zip}
        # ZipArchive dumpFile {C:/tmp/tmp.zip}
    }

requires vfslib Usage as in class method createZip

   set zipArch [ZipArchive new]
   foreach f $files {
      $zipArch addFile $f [file tail $f]
   }
   $zipArch createFile $zipFile
   $zipArch destroy

PT 23-Jan-2009: Now that zlib functions are part of the core I'm working to make it simple to create jar-style Tcl archives currently known as zipkits. The following code is a tcl program to construct a zip archive from a directory tree using nothing but Tcl 8.6 core features. The resulting zip file should be compatible with other zip programs - with the possible exception of unicode support. The Tcl generated files use utf-8 encoding for all filenames and comments but I notice particularly on Windows info-zip has rather poor support for this part of the ZIP file specification.

If you use mkzip mystuff.tm -zipkit -directory mystuff.vfs it will pack your mystuff.vfs/ virtual filesystem tree into a zip archive with a suitable header such that on unix you may mark it executable and it should run with tclkit. Or you can run it with tclsh or wish 8.6 if you like. To change the executable header, specify -runtime preface where preface is a file containing code you want prefixed. For instance, on windows you can create a self-extracting zip archive using mkzip mystuff.exe -directory mystuff.vfs -runtime unzipsfx.exe (unzipsfx is the Info-Zip self-extracting stub).

31-May-2009: a minor update is to use the streaming part of the core zlib api to deflate the data in chunks to avoid using large amounts of memory when compressing big files.

 # mkzip.tcl -- Copyright (C) 2009 Pat Thoyts <[email protected]>
 #
 #        Create ZIP archives in Tcl.
 #
 # version 1.1

 package require Tcl 8.6

 namespace eval zip {}

 # zip::timet_to_dos
 #
 #        Convert a unix timestamp into a DOS timestamp for ZIP times.
 #
 #   DOS timestamps are 32 bits split into bit regions as follows:
 #                  24                16                 8                 0
 #   +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
 #   |Y|Y|Y|Y|Y|Y|Y|m| |m|m|m|d|d|d|d|d| |h|h|h|h|h|m|m|m| |m|m|m|s|s|s|s|s|
 #   +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
 #
 proc zip::timet_to_dos {time_t} {
     set s [clock format $time_t -format {%Y %m %e %k %M %S}]
     scan $s {%d %d %d %d %d %d} year month day hour min sec
     expr {(($year-1980) << 25) | ($month << 21) | ($day << 16) 
           | ($hour << 11) | ($min << 5) | ($sec >> 1)}
 }

 # zip::pop --
 #
 #        Pop an element from a list
 #
 proc zip::pop {varname {nth 0}} {
     upvar $varname args
     set r [lindex $args $nth]
     set args [lreplace $args $nth $nth]
     return $r
 }

 # zip::walk --
 #
 #        Walk a directory tree rooted at 'path'. The excludes list can be
 #        a set of glob expressions to match against files and to avoid.
 #        The match arg is internal.
 #        eg: walk library {CVS/* *~ .#*} to exclude CVS and emacs cruft.
 #
 proc zip::walk {path {excludes ""} {match *}} {
     set result {}
     set files [glob -nocomplain -types f -directory $path $match]
     foreach file $files {
         set excluded 0
         foreach glob $excludes {
             if {[string match $glob $file]} {
                 set excluded 1
                 break
             }
         }
         if {!$excluded} {lappend result $file}
     }
     foreach dir [glob -nocomplain -types d -directory $path $match] {
         set subdir [walk $dir $excludes $match]
         if {[llength $subdir]>0} {
             set result [concat $result $dir $subdir]
         }
     }
     return $result
 }

 # zip::mkzipfile --
 #
 #        Add a single file to a zip archive. The zipchan channel should
 #        already be open and binary. You may provide a comment for the file
 #        The return value is the central directory record that will need 
 #        to be used when finalizing the zip archive.
 #
 proc zip::mkzipfile {zipchan path {comment ""}} {
     set mtime [timet_to_dos [file mtime $path]]
     set utfpath [encoding convertto utf-8 $path]
     set utfcomment [encoding convertto utf-8 $comment]
     set flags [expr {(1<<10)}] ;# use utf-8
     set method 0               ;# store 0, deflate 8
     set attr 0                 ;# text or binary (default binary)
     set version 20             ;# minumum version req'd to extract
     set extra ""
     set crc 0
     set size 0
     set csize 0
     set data ""
     set seekable 1
     if {[file isdirectory $path]} {
         set attrex 0x41ff0010  ;# 0o040777 (drwxrwxrwx)
     } elseif {[file executable $path]} {
         set attrex 0x81ff0080  ;# 0o100777 (-rwxrwxrwx)
     } else {
         set attrex 0x81b60020  ;# 0o100666 (-rw-rw-rw-)
         if {[file extension $path] eq ".tcl"} {
             set attr 1         ;# text
         }
     }

     if {[file isfile $path]} {
         set size [file size $path]
     }

     set offset [tell $zipchan]
     set local [binary format a4sssiiiiss PK\03\04 \
                    $version $flags $method $mtime $crc $csize $size \
                    [string length $utfpath] [string length $extra]]
     append local $utfpath $extra
     puts -nonewline $zipchan $local

     if {[file isfile $path]} {
         set pos [tell $zipchan]
         set fin [open $path rb]
         set method 8
         set zlib [zlib stream deflate]
         while {![eof $fin]} {
             set data [read $fin 4096]
             set crc [zlib crc32 $data $crc]
             $zlib put $data
             if {[string length [set zdata [$zlib get]]]} {
                 incr csize [string length $zdata]
                 puts -nonewline $zipchan $zdata
             }
         }
         close $fin
         $zlib finalize
         set zdata [$zlib get]
         incr csize [string length $zdata]
         puts -nonewline $zipchan $zdata
         $zlib close
     }

     if {$seekable} {
         # update the header if the output is seekable
         set local [binary format a4sssiiii PK\03\04 \
                        $version $flags $method $mtime $crc $csize $size]
         set current [tell $zipchan]
         seek $zipchan $offset
         puts -nonewline $zipchan $local
         seek $zipchan $current
     }

     set hdr [binary format a4ssssiiiisssssii PK\01\02 0x0317 \
                  $version $flags $method $mtime $crc $csize $size \
                  [string length $utfpath] [string length $extra]\
                  [string length $utfcomment] 0 $attr $attrex $offset]
     append hdr $utfpath $extra $utfcomment
     return $hdr
 }

 # zip::mkzip --
 #
 #        Create a zip archive in 'filename'. If a file already exists it will be
 #        overwritten by a new file. If '-directory' is used, the new zip archive
 #        will be rooted in the provided directory.
 #        -runtime can be used to specify a prefix file. For instance, 
 #        zip myzip -runtime unzipsfx.exe -directory subdir
 #        will create a self-extracting zip archive from the subdir/ folder.
 #        The -comment parameter specifies an optional comment for the archive.
 #
 #        eg: zip my.zip -directory Subdir -runtime unzipsfx.exe *.txt
 # 
 proc zip::mkzip {filename args} {
     array set opts {-zipkit 0 -runtime "" -comment "" -directory ""}

     while {[string match -* [set option [lindex $args 0]]]} {
         switch -exact -- $option {
             -zipkit  { set opts(-zipkit) 1 }
             -comment { set opts(-comment) [encoding convertto utf-8 [pop args 1]] }
             -runtime { set opts(-runtime) [pop args 1] }
             -directory {set opts(-directory) [pop args 1] }
             -- { pop args ; break }
             default {
                 break
             }
         }
         pop args
     }

     set zf [open $filename wb]
     if {$opts(-runtime) ne ""} {
         set rt [open $opts(-runtime) rb]
         fcopy $rt $zf
         close $rt
     } elseif {$opts(-zipkit)} {
         set zkd "#!/usr/bin/env tclkit\n\# This is a zip-based Tcl Module\n"
         append zkd "package require vfs::zip\n"
         append zkd "vfs::zip::Mount \[info script\] \[info script\]\n"
         append zkd "if {\[file exists \[file join \[info script\] main.tcl\]\]} {\n"
         append zkd "    source \[file join \[info script\] main.tcl\]\n"
         append zkd "}\n"
         append zkd \x1A
         puts -nonewline $zf $zkd
     }

     set count 0
     set cd ""
     set opwd [pwd]
     if {[catch {
         if {$opts(-directory) ne ""} {
             cd $opts(-directory)
             set paths [walk {} {CVS/* */CVS/* *~ ".#*" "*/.#*"}]
         } else {
             set paths [glob -nocomplain {*}$args]
         }
         foreach path $paths {
             puts $path
             append cd [mkzipfile $zf $path]
             incr count
         }
         set cdoffset [tell $zf]
         set endrec [binary format a4ssssiis PK\05\06 0 0 \
                         $count $count [string length $cd] $cdoffset\
                         [string length $opts(-comment)]]
         append endrec $opts(-comment)
         puts -nonewline $zf $cd
         puts -nonewline $zf $endrec
         close $zf
     } err erropt]} {
         cd $opwd
         return -options erropt $err
     }
     cd $opwd
     return
  }


 if {!$tcl_interactive} {
     set r [catch [linsert $argv 0 zip::mkzip] err]
     if {$r} {puts stderr $errorInfo} elseif {[string length $err]} {puts $err}
     exit $r
 }

See also: Using zipper to create zip files