Version 13 of creating zip file archives from tcl with zlib

Updated 2009-01-23 23:04:29 by PT

By Artur Trzewik. vfs's vfs::zip currently only supports the ability to read zip archives. I have not found any Tcl library that can create zip archives, so I implemented it by myself. Adding write support to a vfs extension is quite a bit more complicated so I have written it separately in XOTcl. The code can be also simply ported to pure Tcl. The zip archive should be the same as generated with WinZip.

    package require XOTcl
    namespace import xotcl::*

    Class ZipArchive
    @ ::ZipArchive idemeta component ZipArchive
    ZipArchive instproc addFile {inputFile fileName} {
        # inputFile - source file to archive
        # fileName - name of file in the archive
        my lappend files $inputFile $fileName
    }
    ZipArchive instproc addToStream {stream fin fout} {
        my instvar written

        set offset $written

        set fdata [open $fin r]
        fconfigure $fdata -encoding binary -translation binary
        set data [read $fdata]
        set datacompresed [string range [::vfs::zip -mode compress $data] 2 end-4]
        close $fdata

        binary scan \x04\x03\x4B\x50 I LFH_SIG
        my writeLong $stream $LFH_SIG

        incr written 4

        my writeShort $stream 20
        # java implementation make 8
        # but tools (WinZip) leave it 0
        my writeShort $stream 0
        incr written 4

        my writeShort $stream 8
        incr written 2

        # last mod. time and date
        set dosTime [my toDosTime $fin]

        my writeLong $stream $dosTime
        incr written 4

        set crc [::vfs::crc $data]
        set csize [string length $datacompresed]
        set size [string length $data]

        my writeLong $stream $crc
        my writeLong $stream $csize
        my writeLong $stream $size
        incr written 12

        # file name length
        my writeShort $stream [string length $fout]
        incr written 2

        # extra field length
        set extra ""
        my writeShort $stream [string length $extra]
        incr written 2

        # file name
        puts -nonewline $stream $fout
        incr written [string length $fout]

        puts -nonewline $stream $extra
        incr written [string length $extra]

        set dataStart written;
        puts -nonewline $stream $datacompresed
        incr written $csize

        list $offset $dosTime $crc $csize $size
    }
    ZipArchive instproc createFile file {
        set fout [open $file w]
        fconfigure $fout -encoding binary -translation binary
        my createToStream $fout
        close $fout
    }
    ZipArchive instproc createToStream ostream {
        my instvar files cdOffset cdLength written
        set descriptionList [list]
        foreach {fin fout} $files {
            lappend descriptionList [my addToStream $ostream $fin $fout]
        }
        set cdOffset $written

        foreach {fin fout} $files desc $descriptionList {
            foreach {offset dosTime crc csize size} $desc {}
            my writeCentralFileHeader $ostream $fin $fout $offset $dosTime $size $csize $crc
        }

        set cdLength [expr {$written - $cdOffset}]
        # wirte header

        # EOCD 0X06054B50L scan 0X06054B50L %x s set s
        binary scan \x06\x05\x4B\x50 I EOCD
        my writeLong $ostream $EOCD

        # disk numbers
        my writeShort $ostream 0
        my writeShort $ostream 0

        # number of entries
        set filenum [expr {[llength $files]>>1}]
        my writeShort $ostream $filenum
        my writeShort $ostream $filenum

        # length and location of CD
        my writeLong $ostream $cdLength
        my writeLong $ostream $cdOffset

        # zip file comment
        set comment ""
        # comment lenght
        my writeShort $ostream [string bytelength $comment]

        puts -nonewline $ostream $comment
    }
    ZipArchive instproc init {} {
        my set files [list]
        package require vfslib
        my instvar cdLength cdOffset written
        set cdLength 0
        set cdOffset 0
        set written 0
    }
    ZipArchive instproc toDosTime file {
        set sec [file mtime $file]

        foreach {year month day hour minute secound} [clock format $sec -format "%Y %m %e %k %M %S"] {}

        set month [string trimleft $month 0]
        set year [string trimleft $year 0]
        if {$minute eq ""} {
            set minute 0
        }
       set secound [string trimleft $secound 0]
       if {$secound eq ""} {
           set secound 0
       }

        set value [expr (($year - 1980) << 25) | ($month << 21) | ($day << 16) | ($hour << 11) | ($minute << 5) | ($secound >> 1)]
        return $value
    }
    ZipArchive instproc writeCentralFileHeader {ostream fin fout offset dosTime size csize crc} {
        my instvar written

        # CFH 0X02014B50L
        binary scan \x02\x01\x4B\x50 I CFH_SIG
        my writeLong $ostream $CFH_SIG
        incr written 4

        if {$::tcl_platform(platform) eq "windows"} {
            # unix
            set pid 5
        } else {
            # windows
            set pid 11
        }
        my writeShort $ostream [expr { (($pid << 8) | 20)}]
        incr written 2

        # version needed to extract
        # general purpose bit flag

        my writeShort $ostream 20
        my writeShort $ostream 0
        incr written 4

        # compression method
        my writeShort $ostream 8
        incr written 2

        # last mod. time and date
        my writeLong $ostream $dosTime
        incr written 4

        # CRC
        # compressed length
        # uncompressed length
        my writeLong $ostream $crc
        my writeLong $ostream $csize
        my writeLong $ostream $size
        incr written 12;

        set comment ""
        set extra ""

        # file name length

        my writeShort $ostream [string bytelength $fout]
        incr written 2;

        # extra field length
        my writeShort $ostream [string bytelength $extra]
        incr written 2;

        # file comment length
        my writeShort $ostream [string bytelength $comment]
        incr written 2;

        # disk number start
        my writeShort $ostream 0
        incr written 2

        # internal file attributes
        my writeShort $ostream 0
        incr written 2

        # external file attributes
        my writeLong $ostream 0
        incr written 4

        # relative offset of LFH
        my writeLong $ostream $offset
        incr written 4

        # file name
        puts -nonewline $ostream $fout
        incr written [string bytelength $fout]

        # extra field
        puts -nonewline $ostream $extra
        incr written [string bytelength $extra]

        # file comment
        puts -nonewline $ostream $comment
        incr written [string bytelength $comment]
    }
    ZipArchive instproc writeLong {stream short} {
        puts -nonewline $stream [binary format i $short]
    }
    ZipArchive instproc writeShort {stream short} {
        puts -nonewline $stream [binary format s $short]
    }
    ZipArchive proc createZip {zipFile files} {
        set zipArch [my new]
        foreach f $files {
            $zipArch addFile $f [file tail $f]
        }
        $zipArch createFile $zipFile
        $zipArch destroy
    }
    ZipArchive proc testZip {} {
        my createZip {C:/tmp/my2.zip} {C:/tmp/test.txt C:/tmp/test2.txt C:/tmp/tmp.zip}
        # ZipArchive dumpFile {C:/tmp/my2.zip}
        # ZipArchive dumpFile {C:/tmp/tmp.zip}
    }

requires vfslib Usage as in class method createZip

   set zipArch [ZipArchive new]
   foreach f $files {
      $zipArch addFile $f [file tail $f]
   }
   $zipArch createFile $zipFile
   $zipArch destroy

PT 23-Jan-2009: Now that zlib functions are part of the core I'm working to make it simple to create jar-style Tcl archives currently known as zipkits. The following code is a tcl program to construct a zip archive from a directory tree using nothing but Tcl 8.6 core features. The resulting zip file should be compatible with other zip programs - with the possible exception of unicode support. The Tcl generated files use utf-8 encoding for all filenames and comments but I notice particularly on Windows info-zip has rather poor support for this part of the ZIP file specification. If you use mkzip mystuff.tm -zipkit -directory mystuff.vfs it will pack your mystuff.vfs/ virtual filesystem tree into a zip archive with a suitable header such that on unix you may mark it executable and it should run with tclkit. Or you can run it with tclsh or wish 8.6 if you like. To change the executable header, specify -runtime preface where preface is a file containing code you want prefixed. For instance, on windows you can create a self-extracting zip archive using mkzip mystuff.exe -directory mystuff.vfs -runtime unzipsfx.exe (unzipsfx is the Info-Zip self-extracting stub).

 # mkzip.tcl -- Copyright (C) 2009 Pat Thoyts <[email protected]>
 #
 #        Create ZIP archives in Tcl.
 #
 #
 
 package require Tcl 8.6
 
 namespace eval zip {}
 
 # zip::timet_to_dos
 #
 #        Convert a unix timestamp into a DOS timestamp for ZIP times.
 #
 #   DOS timestamps are 32 bits split into bit regions as follows:
 #                  24                16                 8                 0
 #   +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
 #   |Y|Y|Y|Y|Y|Y|Y|m| |m|m|m|d|d|d|d|d| |h|h|h|h|h|m|m|m| |m|m|m|s|s|s|s|s|
 #   +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+ +-+-+-+-+-+-+-+-+
 #
 proc zip::timet_to_dos {time_t} {
     set s clock format $time_t -format {%Y %m %e %k %M %S}
     scan $s {%d %d %d %d %d %d} year month day hour min sec
     expr {(($year-1980) << 25) | ($month << 21) | ($day << 16) 
           | ($hour << 11) | ($min << 5) | ($sec >> 1)}
 }
 
 # zip::pop --
 #
 #        Pop an element from a list
 #
 proc zip::pop {varname {nth 0}} {
     upvar $varname args
     set r lindex $args $nth
     set args lreplace $args $nth $nth
     return $r
 }
 
 # zip::walk --
 #
 #        Walk a directory tree rooted at 'path'. The excludes list can be
 #        a set of glob expressions to match against files and to avoid.
 #        The match arg is internal.
 #        eg: walk library {CVS/* *~ .#*} to exclude CVS and emacs cruft.
 #
 proc zip::walk {path {excludes ""} {match *}} {
     set result {}
     set files glob -nocomplain -types f -directory $path $match
     foreach file $files {
         set excluded 0
         foreach glob $excludes {
             if {string match $glob $file} {
                 set excluded 1
                 break
             }
         }
         if {!$excluded} {lappend result $file}
     }
     foreach dir glob -nocomplain -types d -directory $path $match {
         set subdir walk $dir $excludes $match
         if {llength $subdir>0} {
             set result concat $result $dir $subdir
         }
     }
     return $result
 }
 
 # zip::mkzipfile --
 #
 #        Add a single file to a zip archive. The zipchan channel should
 #        already be open and binary. You may provide a comment for the file
 #        The return value is the central directory record that will need 
 #        to be used when finalizing the zip archive.
 #
 proc zip::mkzipfile {zipchan path {comment ""}} {
     set size file size $path
     set mtime [timet_to_dos [file mtime $path]
     set utfpath encoding convertto utf-8 $path
     set utfcomment encoding convertto utf-8 $comment
     set flags expr {(1<<10)} ;# use utf-8
     set method 0               ;# store 0, deflate 8
     set attr 0                 ;# text or binary (default binary)
     set extra ""
     set crc 0
     set csize 0
     set version 20
     set data ""
     if {file isdirectory $path} {
         set size 0
         set attrex 0x41ff0010  ;# 0o040777 (drwxrwxrwx)
     } elseif {file executable $path} {
         set attrex 0x81ff0000  ;# 0o100777 (-rwxrwxrwx)
     } else {
         set attrex 0x81b60000  ;# 0o100666 (-rw-rw-rw-)
         if {file extension $path eq ".tcl"} {
             set attr 1         ;# text
         }
     }
 
     if {file isfile $path} {
         set fin open $path rb
         set data read $fin
         set crc zlib crc32 $data
         set cdata zlib deflate $data
         if {string length $cdata < $size} {
             set method 8
             set data $cdata
         }
         close $fin
         set csize string length $data
     }
     
     set local [binary format a4sssiiiiss PK\03\04 \
                    $version $flags $method $mtime $crc $csize $size \
                    string length $utfpath [string length $extra]
     append local $utfpath $extra
 
     set offset tell $zipchan
     puts -nonewline $zipchan $local
     puts -nonewline $zipchan $data
 
     set hdr [binary format a4ssssiiiisssssii PK\01\02 0x0317 \
                  $version $flags $method $mtime $crc $csize $size \
                  string length $utfpath string length $extra\
                  string length $utfcomment 0 $attr $attrex $offset]
     append hdr $utfpath $extra $utfcomment
     return $hdr
 }
 
 # zip::mkzip --
 #
 #        Create a zip archive in 'filename'. If a file already exists it will be
 #        overwritten by a new file. If '-directory' is used, the new zip archive
 #        will be rooted in the provided directory.
 #        -runtime can be used to specify a prefix file. For instance, 
 #        zip myzip -runtime unzipsfx.exe -directory subdir
 #        will create a self-extracting zip archive from the subdir/ folder.
 #        The -comment parameter specifies an optional comment for the archive.
 #
 #        eg: zip my.zip -directory Subdir -runtime unzipsfx.exe *.txt
 # 
 proc zip::mkzip {filename args} {
     array set opts {-zipkit 0 -runtime "" -comment "" -directory ""}
 
     while {string match -* [set option [lindex $args 0]} {
         switch -exact -- $option {
             -zipkit  { set opts(-zipkit) 1 }
             -comment { set opts(-comment) [encoding convertto utf-8 [pop args 1] }
             -runtime { set opts(-runtime) pop args 1 }
             -directory {set opts(-directory) pop args 1 }
             -- { pop args ; break }
             default {
                 break
             }
         }
         pop args
     }
 
     set zf open $filename wb
     if {$opts(-runtime) ne ""} {
         set rt open $opts(-runtime) rb
         fcopy $rt $zf
         close $rt
     } elseif {$opts(-zipkit)} {
         set zkd "#!/usr/bin/env tclkit\n\# This is a zip-based Tcl Module\n"
         append zkd "package require vfs::zip\n"
         append zkd "vfs::zip::Mount [info script\] [info script\]\n"
         append zkd "if {[file exists [file join [info script\] main.tcl\]\]} {\n"
         append zkd "    source [file join [info script\] main.tcl\]\n"
         append zkd "}\n"
         append zkd \x1A
         puts -nonewline $zf $zkd
     }
     
     set count 0
     set cd ""
     set opwd pwd
     if {[catch {
         if {$opts(-directory) ne ""} {
             cd $opts(-directory)
             set paths walk . {CVS/* *~ ".#*"}
         } else {
             set paths glob -nocomplain {*}$args
         }
         foreach path $paths {
             append cd mkzipfile $zf $path
             incr count
         }
         set cdoffset tell $zf
         set endrec [binary format a4ssssiis PK\05\06 0 0 \
                         $count $count string length $cd $cdoffset\
                         [string length $opts(-comment)]
         append endrec $opts(-comment)
         puts -nonewline $zf $cd
         puts -nonewline $zf $endrec
         close $zf
     } err erropt]} {
         cd $opwd
         return -options erropt $err
     }
     cd $opwd
     return
  }
     
 
 if {!$tcl_interactive} {
     set r catch [linsert $argv 0 zip::mkzip err]
     if {$r} {puts stderr $errorInfo} elseif {string length $err} {puts $err}
     exit $r
 }
 

See also: Using zipper to create zip files