Version 3 of An LZW-compressing virtual filesystem

Updated 2004-07-30 23:27:51 by SEH

SEH An LZW-compressing virtual filesystem.

Usage: Mount <existing directory> <virtual directory>

All file contents are stored in an LZW-compressed format. Uncompressed file size is tracked and reported via file stat command, so compression is invisible to user.

LZW compression procedures adapted from JR's contributions at LZW

Daisy-chain this with mkvfs, and you've got something akin to a pure-Tcl tar.gz archive construct, with the advantage that individual files can be accessed, edited and replaced without having to decompress and re-compress the whole archive. Not to mention perfectly cross-platform compatible.

 package require vfs 1

 namespace eval ::vfs::template::compress {}

 proc ::vfs::template::compress::Mount {args} {
        set pathto [eval MountProcedure $args]
        set path [lindex $pathto 0]
        set to [lindex $pathto 1]
        ::vfs::filesystem mount $to [list ::vfs::template::compress::handler $path]
        ::vfs::RegisterMount $to [list ::vfs::template::compress::Unmount]
        return $to
 }

 proc ::vfs::template::compress::Unmount {to} {
        set path [lindex [::vfs::filesystem info $to] end]
        UnmountProcedure $path $to
        ::vfs::filesystem unmount $to
 }

 proc ::vfs::template::compress::handler {path cmd root relative actualpath args} {
        switch -- $cmd {
                access {
                        set mode [lindex $args 0]
                        set error [catch {Access $path $root $relative $actualpath $mode}]
                        if $error {vfs::filesystem posixerror $::vfs::posix(EACCES) ; return -code error $::vfs::posix(EACCES)}
                }
                createdirectory {
                        CreateDirectory $path $root $relative $actualpath
                }
                deletefile {
                        DeleteFile $path $root $relative $actualpath
                }
                fileattributes {
                        set index [lindex $args 0]
                        set value [lindex $args 1]
                        array set attributes [FileAttributes $path $root $relative $actualpath]
                        if {$index == {}} {
                                return [lsort [array names attributes]]
                        }
                        set attribute [lindex [lsort [array names attributes]] $index]
                        if {$value == {}} {
                                return $attributes($attribute)
                        }
                        FileAttributesSet $path $root $relative $actualpath $attribute $value
                }
                matchindirectory {
                        set pattern [lindex $args 0]
                        set types [lindex $args 1]
                        return [MatchInDirectory $path $root $relative $actualpath $pattern $types]
                } open {
                        set mode [lindex $args 0]
                        if {$mode == {}} {set mode r}
                        set permissions [lindex $args 1]
                        set channelID [Open $path $root $relative $actualpath $mode $permissions]
                        switch -glob -- $mode {
                                "" -
                                "r*" -
                                "w*" {
                                        seek $channelID 0
                                }
                                "a*" {
                                            seek $channelID 0 end
                                }
                                default {
                                        ::vfs::filesystem posixerror $::vfs::posix(EINVAL)
                                        return -code error $::vfs::posix(EINVAL)
                                }
                        }
                        set result $channelID
                        lappend result [list ::vfs::template::compress::Close $channelID $path $root $relative $actualpath $mode]
                        return $result
                }
                removedirectory {
                        set recursive [lindex $args 0]
                        if !$recursive {
                                if {[MatchInDirectory $path $root $relative $actualpath * 0] != {}} {
                                        ::vfs::filesystem posixerror $::vfs::posix(EEXIST)
                                        return -code error $::vfs::posix(EEXIST)
                                }
                        }
                        RemoveDirectory $path $root $relative $actualpath
                }
                stat {
                        return [Stat $path $root $relative $actualpath]
                }
                utime {
                        set atime [lindex $args 0]
                        set mtime [lindex $args 1]
                        Utime $path $root $relative $actualpath $atime $mtime
                }
        }
 }

 namespace eval ::vfs::template::compress {

 proc MountProcedure {args} {
        set path [lindex $args 0]
        set path [file normalize $path]
        set to [lindex $args 1]
        set to [file normalize $to
        if {![catch {vfs::filesystem info $to}]} {
                # unmount old mount
                Unmount $to
        }


        file mkdir $path

        lappend pathto $path
        lappend pathto $to
        return $pathto
 }

 proc UnmountProcedure {path to} {
        return
 }

 proc Access {path root relative actualpath mode} {
        set modeString [::vfs::accessMode $mode]
        if {$modeString == "F"} {set modeString RWX}
        set modeString [split $modeString {}]
        set fileName [AcquireFile $path $relative]
        if [file readable $fileName] {lappend fileString R}
        if [file writable $fileName] {lappend fileString W}
        if [file executable $fileName] {lappend fileString X}
        foreach mS $modeString {
                set errorMessage "not [string map {R readable W writable X executable} $mS]"
                if {[lsearch $fileString $mS] == -1} {error $errorMessage}
        }
 }

 proc CreateDirectory {path root relative actualpath} {
        file mkdir [file join $path $relative]
 }

 proc DeleteFile {path root relative actualpath} {
        set fileName [AcquireFile $path $relative]
        file delete $fileName
 }

 proc FileAttributes {path root relative actualpath} {
        set fileName [AcquireFile $path $relative]
        file attributes $fileName
 }

 proc FileAttributesSet {path root relative actualpath attribute value} {
        set fileName [AcquireFile $path $relative]
        file attributes $fileName $attribute $value
 }

 proc MatchInDirectory {path root relative actualpath pattern types} {
        if [::vfs::matchDirectories $types] {lappend typeString d}
        if [::vfs::matchFiles $types] {lappend typeString f}

        set globList [glob -directory [file join $path $relative] -nocomplain -types $typeString *]
        set pathLength [expr [string length $path] - 1]
        set newGlobList {}
        foreach gL $globList {
                set gL [join [lrange [split $gL .] 0 end-1] .]
                set gL [string replace $gL 0 $pathLength $root]
                if [string match $pattern [file tail $gL]] {lappend newGlobList $gL}
        }
        return [lsort -unique $newGlobList]
 }

 proc Open {path root relative actualpath mode permissions} {
        set fileName [AcquireFile $path $relative]
        if ![file exists $fileName] {
                set channelID [open $fileName $mode]
                catch {file attributes $fileName -permissions $permissions}
                return $channelID
        }
        set f [open $fileName r]
        fconfigure $f -translation binary
        set cdataEncode [read $f]
        close $f

        set data [Decompress $cdataEncode]
        unset cdataEncode
        set filed [vfs::memchan]
        fconfigure $filed -translation binary
        puts -nonewline $filed $data
        unset data
        fconfigure $filed -translation auto
        seek $filed 0
        return $filed
 }

 proc Close {channelID path root relative actualpath mode} {
 # Do not close the channel in the close callback!
 # It will crash Tcl!  Honest!
 # The core will close the channel after you've taken what info you need from it.

 #        close $channelID
        if {$mode == "r"} {return}
        fconfigure $channelID -translation binary
        seek $channelID 0
        set data [read $channelID]
        set fileSize [string length $data]
        set cdataEncode [Compress $data]
        unset data
        set fileName [AcquireFile $path $relative]
        set newFileName [file join $path $relative].$fileSize
        file rename $fileName $newFileName
        set f [open $newFileName w]
        fconfigure $f -translation binary
        puts -nonewline $f $cdataEncode
        close $f
        return
 }

 proc RemoveDirectory {path root relative actualpath} {
        file delete -force [file join $path $relative]
 }

 proc Stat {path root relative actualpath} {
        set fileName [AcquireFile $path $relative]
        set fileSize [lindex [split $fileName .] end]
        file stat [file join $path $relative] fs
        set fs(size) $fileSize
        return [array get fs]
 }

 proc Utime {path root relative actualpath atime mtime} {
        set fileName [AcquireFile $path $relative]
        file atime $fileName $atime
        file mtime $fileName $mtime
 }

 proc AcquireFile {path relative} {
        set fileNames [glob -nocomplain -path [file join $path $relative] .*]
        foreach fn $fileNames {
                if [regexp "[file join $path $relative]\.\[0-9\]+" $fn] {return $fn}
         }
        return [file join $path $relative].0
 }

 proc Compress {data} {
      set cpre {}
      for {set x 0} {$x < 256} {incr x} {set dict([binary format c $x]) $x}
      set pos 0
      set rval {}
        set string_length_data [string length $data]
      while {$pos < $string_length_data} {
        set ch [string index $data $pos]
        incr pos
        set ci [array names dict -exact $cpre$ch]


        if {$ci != {}} {
          # string in dictionary
          append cpre $ch
        } else {
          set dict($cpre$ch) [array size dict]
          lappend rval $dict($cpre)
          set cpre $ch
        }
      }
      lappend rval $dict($cpre)
        foreach rv $rval {
                if {$rv == 38} {
                        append rvalEncode "&0;"
                } elseif {$rv == 59} {
                        append rvalEncode "&1;"
                } elseif {$rv > 255} {
                        set rv [expr $rv - 254]
                        append rvalEncode "&$rv;"
                } else {
                        append rvalEncode [binary format c $rv]
                }
        }
        set rvalEncode [string map {;& { }} $rvalEncode]
      return $rvalEncode
 }

 proc Decompress {cdataEncode} {
        set string_length_cdataEncode [string length $cdataEncode]
        set pos 0
        while {$pos < $string_length_cdataEncode} {
                set strIndex [string index $cdataEncode $pos]
                if {$strIndex == "&"} {
                        while {[set strIndex [string index $cdataEncode [incr pos]]] != "\;"} {
                                if {$strIndex == { }} {
                                        if {$cDatum == 0} {
                                                set cDatum 38
                                        } elseif {$cDatum == 1} {
                                                set cDatum 59
                                        } else {
                                                set cDatum [expr $cDatum + 254]
                                        }
                                        lappend cdata $cDatum
                                        unset cDatum
                                }
                                append cDatum $strIndex
                        }
                        if {$cDatum == 0} {
                                set cDatum 38
                        } elseif {$cDatum == 1} {
                                set cDatum 59
                        } else {
                                set cDatum [expr $cDatum + 254]
                        }
                        lappend cdata $cDatum
                        unset cDatum
                } else {
                        binary scan $strIndex c strIndex
                        lappend cdata $strIndex
                }
                incr pos
        }

      set cpre {}
      set dict {}
      for {set x 0} {$x < 256} {incr x} {lappend dict [binary format c $x]}
      set pos 0
      set rval {}
        set llength_cdata [llength $cdata]
      while {$pos < $llength_cdata} {
        set co [lindex $cdata $pos]
        incr pos
        if {$co >= [llength $dict]} {
          lappend dict $cpre[string index $cpre 0]
          set cpre [lindex $dict $co]
        } else {
          append cpre [string index [lindex $dict $co] 0]
          # this only won't apply for the very first character
          if {[string length $cpre] > 1} {
            lappend dict $cpre
          }
          set cpre [lindex $dict $co]
        }
        append rval [lindex $dict $co]
      }
      return $rval
 }

 }
 # end namespace eval vfs::template::compress

[Category VFS]