tdelta.tcl: Produce an rdiff-style delta signature of one file with respect to another, and re-create one file by applying the delta to the other. Strong md5 checksums of file segments are stored in the delta signature, which reduces efficiency but enhances security. # Usage: # tdelta # Returns a delta of the target file with respect to the reference file. # i.e., using patch to apply the delta to the target file will re-create the reference file. # patch # Reconstitute original reference file by applying delta to target file. # global variable: blockSize # Size of file segments to compare. # Smaller blockSize tends to create smaller delta. # Larger blockSize tends to take more time to compute delta. if ![info exists blockSize] {variable blockSize 100} if ![info exists Mod] {variable Mod [expr pow(2,16)]} variable temp if ![info exists temp] { catch {set temp $env(TMP)} catch {set temp $env(TEMP)} catch {set temp $env(TRSYNC_TEMP)} if [catch {file mkdir $temp}] {set temp [pwd]} } if ![file writable $temp] {error "temp location not writable"} proc Backup {args} { return } proc ConstructFile {copyinstructions {eolNative 0} {backup {}}} { package require md5 1 set fileToConstruct [lindex $copyinstructions 0] set existingFile [lindex $copyinstructions 1] set blockSize [lindex $copyinstructions 2] array set fileStats [lindex $copyinstructions 3] array set digestInstructionArray [lindex $copyinstructions 4] array set dataInstructionArray [lindex $copyinstructions 5] unset copyinstructions set existingFile [FileNameNormalize $existingFile] if {$fileToConstruct == {}} {file delete -force $existingFile ; return} set fileToConstruct [FileNameNormalize $fileToConstruct] if $eolNative {set eolNative [string is ascii -strict [array get dataInstructionArray]]} set temp $::temp set constructTag "trsync.[md5::md5 "[clock seconds] [clock clicks]"]" catch { set existingID [open $existingFile r] fconfigure $existingID -translation binary } set constructID [open $temp/$constructTag w] fconfigure $constructID -translation binary set filePointer 1 while {$filePointer < $fileStats(size)} { if {[array names dataInstructionArray $filePointer] != {}} { puts -nonewline $constructID $dataInstructionArray($filePointer) set segmentLength [string length $dataInstructionArray($filePointer)] array unset dataInstructionArray $filePointer set filePointer [expr $filePointer + $segmentLength] } elseif {[array names digestInstructionArray $filePointer] != {}} { if ![info exists existingID] {error "Corrupt copy instructions."} set blockNumber [lindex $digestInstructionArray($filePointer) 0] set blockMd5Sum [lindex $digestInstructionArray($filePointer) 1] seek $existingID [expr $blockNumber * $blockSize] set existingBlock [read $existingID $blockSize] set existingBlockMd5Sum [md5::md5 $existingBlock] if ![string equal -nocase $blockMd5Sum $existingBlockMd5Sum] {error "digest file contents mismatch"} puts -nonewline $constructID $existingBlock if $eolNative {set eolNative [string is ascii -strict $existingBlock]} unset existingBlock set filePointer [expr $filePointer + $blockSize] } else { error "Corrupt copy instructions." } } close $constructID catch {close $existingID} if $eolNative { fcopy [set fin [open $temp/$constructTag r]] [set fout [open $temp/${constructTag}fcopy w]] close $fin close $fout file delete -force $temp/$constructTag set constructTag "${constructTag}fcopy" } catch {file attributes $temp/$constructTag -readonly 0} result catch {file attributes $temp/$constructTag -permissions rw-rw-rw-} result catch {file attributes $temp/$constructTag -owner $fileStats(uid)} result catch {file attributes $temp/$constructTag -group $fileStats(gid)} result catch {file mtime $temp/$constructTag $fileStats(mtime)} result catch {file atime $temp/$constructTag $fileStats(atime)} result catch {file attributes $existingFile -readonly 0} result catch {file attributes $existingFile -permissions rw-rw-rw-} result Backup $backup $fileToConstruct file mkdir [file dirname $fileToConstruct] file rename -force $temp/$constructTag $fileToConstruct array set attributes $fileStats(attributes) foreach attr [array names attributes] { catch {file attributes $fileToConstruct $attr $attributes($attr)} result } catch {file attributes $fileToConstruct -permissions $fileStats(mode)} result return } proc CopyInstructions {filename digest} { package require md5 1 set filename [FileNameNormalize $filename] file stat $filename fileStats array set fileAttributes [file attributes $filename] set arrayadd attributes ; lappend arrayadd [array get fileAttributes] ; array set fileStats $arrayadd set fileSize $fileStats(size) set digestFileName [lindex $digest 0] set blockSize [lindex $digest 1] set digest [lrange $digest 2 end] if {[lsearch -exact $digest fingerprints] > -1} { set fingerPrints [lindex $digest end] set digest [lrange $digest 0 end-2] set f [open $filename r] fconfigure $f -translation binary set fileContents [read $f] set matchCount 0 foreach fP $fingerPrints { if {[string first $fP $fileContents] > -1} {incr matchCount} if {$matchCount > 3} {break} } unset fileContents close $f if {$matchCount < 3} {set digest {}} } set digestLength [llength $digest] for {set i 0} {$i < $digestLength} {incr i} { set arrayadd [lindex [lindex $digest $i] 1] lappend arrayadd $i array set Checksums $arrayadd } set f [open $filename r] fconfigure $f -translation binary set digestInstructions {} set dataInstructions {} set weakChecksum {} set startBlockPointer 0 set endBlockPointer 0 if ![array exists Checksums] { set dataInstructions 1 lappend dataInstructions [read $f] set endBlockPointer $fileSize } while {$endBlockPointer < $fileSize} { set endBlockPointer [expr $startBlockPointer + $blockSize] incr startBlockPointer if {$weakChecksum == {}} { set blockContents [read $f $blockSize] set blockNumberSequence [SequenceBlock $blockContents] set weakChecksumInfo [WeakChecksum $blockNumberSequence] set weakChecksum [format %.0f [lindex $weakChecksumInfo 0]] set startDataPointer $startBlockPointer set endDataPointer $startDataPointer set dataBuffer {} } if {[array names Checksums $weakChecksum] != {}} { set md5Sum [md5::md5 $blockContents] set blockIndex $Checksums($weakChecksum) set digestmd5Sum [lindex [lindex $digest $blockIndex] 0] if [string equal -nocase $digestmd5Sum $md5Sum] { if {$endDataPointer > $startDataPointer} { lappend dataInstructions $startDataPointer lappend dataInstructions $dataBuffer } lappend digestInstructions $startBlockPointer lappend digestInstructions "$blockIndex $md5Sum" set weakChecksum {} set startBlockPointer $endBlockPointer continue } } if {$endBlockPointer >= $fileSize} { lappend dataInstructions $startDataPointer lappend dataInstructions $dataBuffer$blockContents break } set rollChar [read $f 1] binary scan $rollChar c* rollNumber set rollNumber [expr ($rollNumber + 0x100)%0x100] lappend blockNumberSequence $rollNumber set blockNumberSequence [lrange $blockNumberSequence 1 end] binary scan $blockContents a1a* rollOffChar blockContents set blockContents $blockContents$rollChar set dataBuffer $dataBuffer$rollOffChar incr endDataPointer set weakChecksumInfo "[eval RollChecksum [lrange $weakChecksumInfo 1 5] $rollNumber] [lindex $blockNumberSequence 0]" set weakChecksum [format %.0f [lindex $weakChecksumInfo 0]] } close $f lappend copyInstructions $filename lappend copyInstructions $digestFileName lappend copyInstructions $blockSize lappend copyInstructions [array get fileStats] lappend copyInstructions $digestInstructions lappend copyInstructions $dataInstructions return $copyInstructions } proc Digest {filename blockSize {sizecheck 0} {fingerprint 0}} { package require md5 1 set filename [FileNameNormalize $filename] set digest "[list $filename] $blockSize" if {!([file isfile $filename] && [file readable $filename])} {return $digest} set fileSize [file size $filename] if {$sizecheck && ($fileSize < [expr $blockSize * 5])} {return $digest} set charsRead 0 set f [open $filename r] fconfigure $f -translation binary while {$charsRead < $fileSize} { set blockContents [read $f $blockSize] set charsRead [expr $charsRead + $blockSize] set md5Sum [md5::md5 $blockContents] set blockNumberSequence [SequenceBlock $blockContents] set weakChecksum [lindex [WeakChecksum $blockNumberSequence] 0] lappend digest "$md5Sum [format %.0f $weakChecksum]" } if $fingerprint { set fileIncrement [expr $fileSize/10] set fpLocation [expr $fileSize - 21] set i 0 while {$i < 10} { if {$fpLocation < 0} {set fpLocation 0} seek $f $fpLocation lappend fingerPrints [read $f 20] set fpLocation [expr $fpLocation - $fileIncrement] incr i } lappend digest fingerprints lappend digest [lsort -unique $fingerPrints] } close $f return $digest } proc FileNameNormalize {filename} { file normalize $filename } proc RollChecksum {a(k,l)_ b(k,l)_ k l Xsub_k Xsub_l+1 } { set Mod $trsync::Mod set a(k+1,l+1)_ [expr ${a(k,l)_} - $Xsub_k + ${Xsub_l+1}] set b(k+1,l+1)_ [expr ${b(k,l)_} - (($l - $k + 1) * $Xsub_k) + ${a(k+1,l+1)_}] set a(k+1,l+1)_ [expr fmod(${a(k+1,l+1)_},$Mod)] set b(k+1,l+1)_ [expr fmod(${b(k+1,l+1)_},$Mod)] set s(k+1,l+1)_ [expr ${a(k+1,l+1)_} + ($Mod * ${b(k+1,l+1)_})] return "${s(k+1,l+1)_} ${a(k+1,l+1)_} ${b(k+1,l+1)_} [incr k] [incr l]" } proc SequenceBlock {blockcontents} { binary scan $blockcontents c* blockNumberSequence set blockNumberSequenceLength [llength $blockNumberSequence] for {set i 0} {$i < $blockNumberSequenceLength} {incr i} { set blockNumberSequence [lreplace $blockNumberSequence $i $i [expr ([lindex $blockNumberSequence $i] + 0x100)%0x100]] } return $blockNumberSequence } proc WeakChecksum {Xsub_k...Xsub_l} { set a(k,i)_ 0 set b(k,i)_ 0 set Mod $trsync::Mod set k 1 set l [llength ${Xsub_k...Xsub_l}] for {set i $k} {$i <= $l} {incr i} { set Xsub_i [lindex ${Xsub_k...Xsub_l} [expr $i - 1]] set a(k,i)_ [expr ${a(k,i)_} + $Xsub_i] set b(k,i)_ [expr ${b(k,i)_} + (($l - $i + 1) * $Xsub_i)] } set a(k,l)_ [expr fmod(${a(k,i)_},$Mod)] set b(k,l)_ [expr fmod(${b(k,i)_},$Mod)] set s(k,l)_ [expr ${a(k,l)_} + ($Mod * ${b(k,l)_})] return "${s(k,l)_} ${a(k,l)_} ${b(k,l)_} $k $l [lindex ${Xsub_k...Xsub_l} 0]" } proc tdelta {referenceFile targetFile blockSize} { set signature [Digest $targetFile $blockSize] return [CopyInstructions $referenceFile $signature] } proc patch {targetFile copyInstructions fileToConstruct} { set copyInstructions [lreplace $copyInstructions 0 1 $fileToConstruct $targetFile] return [ConstructFile $copyInstructions] } set trsync::Mod $Mod