Another diff in tcl

Another diff in Tcl, by HE, 2012-01-09, saves resources by holding neither the file contents nor the result in the memory.

Description

This implementation is efficient in both computation and memory usage, but instead of using the longest-common-subsequence algorithm, it progresses through the subsequences matching the earliest possible lines, and may therefore present a more fragemented and less concise view of the differences in a file.

Implementation

# Syntax:
#        filediff <file1> <file2> <cmdEqual> <cmdAdd> <cmdDel>
# Description:
#        Compare two files line by line directly from the local filesystem and provide the result with the stated procs.
# with:
#         <file1>    = One of the files to compare.
#        <file2>    = The other file to compare.
#        <cmdEqual> = Procedure to call if two lines are equal. Default is 'cmdEqual'.
#        <cmdAdd>   = Procedure to call if an addtional row is found in <file2>. Default is 'cmdAdd'.
#        <cmdDel>   = Procedure to call if an row is found which not exist in <file2>. Default is 'cmdDel'.
#
# Create your own procedures to output the result if you need anything different than simply write to stdout with puts

proc filediff {file1 file2 {cmdEqual {cmdEqual}} {cmdAdd {cmdAdd}} {cmdDel {cmdDel}}} {
        set sourcefid1 [open $file1 r]
        set sourcefid2 [open $file2 r]

        set found 1
        while {![eof $sourcefid1] && ![eof $sourcefid2]} {
                set lastmark [tell $sourcefid2] ;# Position in <file2> before reading the next line
                gets $sourcefid1 line1
                gets $sourcefid2 line2

                if {$line1 eq $line2} {
                        $cmdEqual $line1
                        continue
                }

                # Lines with only whitespace are also equal
                if {[regexp -- {^\s*$} $line1] && [regexp -- {^\s*$} $line2]} {
                        $cmdEqual {}
                        continue
                }

                # From here both lines are unequal

                set state 0
                while {[regexp -- {^\s*$} $line1]} {
                        # If unequal then directly state empty lines in <file1> as deleted.
                        $cmdDel $line1
                        if {![eof $sourcefid1]} {
                                gets $sourcefid1 line1
                                if {$line1 eq $line2} {
                                        $cmdEqual $line1
                                        set state 1
                                        break
                                }
                        } else {
                                break
                        }
                }
                if {$state} {
                        continue
                }
                
                # Remember position in <file2> and look forward
                set mark2  [tell $sourcefid2]
                set mark2a $lastmark
                set found 0
                while {![eof $sourcefid2]} {
                        gets $sourcefid2 line2
                        if {$line1 ne $line2} {
                                set mark2a $mark2
                                set mark2 [tell $sourcefid2]
                        } else {
                                # Found a matching line. Everything up to the line before are new lines
                                seek $sourcefid2 $lastmark
                                while {[tell $sourcefid2] <= $mark2a} {
                                        gets $sourcefid2 line2
                                        $cmdAdd $line2
                                }
                                gets $sourcefid2 line2
                                $cmdEqual $line2
                                set found 1
                                break
                        }
                }
                if {!$found} {
                        # No matching line found in <file2>. Line must be deleted
                        $cmdDel $line1
                        seek $sourcefid2 $lastmark
                }
        }
        # Output the rest of <file1> as deleted
        while {![eof $sourcefid1]} {
                gets $sourcefid1 line1
                $cmdDel $line1
        }

        # Output the rest of <file2> as added
        while {![eof $sourcefid2]} {
                gets $sourcefid2 line2
                $cmdAdd $line2
        }
        close $sourcefid2
        close $sourcefid1
        return
}

proc cmdEqual {txt} {
        global n
        puts "=$txt";update
}

proc cmdAdd {txt} {
        global n
        puts "+$txt";update
}

proc cmdDel {txt} {
        global n
        puts "-$txt";update
}

Page Authors

HE
main author
PYK
comments on the algorithm