Version 0 of diffing very similar files

Updated 2003-04-01 12:58:55

Arjen Markus (1 april 2003) I often need to compare two directories with files that are very likely to be the same, but they may contain small changes, as I keep directories with source code for all of the platforms we build our programs for. This used to be somewhat cumbersome - the output from diff is not always that nice and manually comparing files takes a lot of patience. So I sat down and wrote this small script.


 #!/bin/sh 
 #\
 exec tclsh "$0" ${1+"$@"}
 #
 # compdir --
 #    Simply script to compare two directories with files that are 
 #    probably almost identical
 #
 # Usage:
 #    compdir other_directory
 #
 # Note:
 #    This script works as follows:
 #    - For each file (satisfying the mask) in the given two directories 
 #      - report at most 10 different lines
 #      - report their sizes and dates if they differ
 #

 # checkFiles --
 #    Scan the (text) files and compare them line by line
 #
 # Arguments:
 #    file1       File in the current directory
 #    file2       File in the other directory
 # Result:
 #    {} if no differences, otherwise a report of up to 10 different lines
 #    (lines are chopped off at 35 characters and concatenated)
 #
 proc checkFiles { file1 file2 } {

    set infile1 [open $file1 "r"] 
    set infile2 [open $file2 "r"] 

    set count   0
    set result  {}

    while { [gets $infile1 line1] >= 0 } {
       set rc2 [gets $infile2 line2] 

       # Second file is shorter? Report this
       if { $rc2 < 0 } { 
          set line2 "=== end-of-file ==="
       }

       if { $line1 != $line2 } {
          incr count 
          append result [format %-35s [string range $line1 0 34]] " | " \
                        [format %-35s [string range $line2 0 34]] "\n"

          if { $count > 10 } {
             break
          }
       }

       if { $rc2 < 0 } { 
          break 
       }
    }

    if { $count < 10 } { 
       # Check that the second file is also at the end
       set rc2 [gets $infile2 line2] 
       if { $rc2 > 0 } { 
          set line1 "=== end-of-file ==="
          append result [format %-35s [string range $line1 0 34]] " | " \
                        [format %-35s [string range $line2 0 34]] "\n"

       }
    }

    close $infile1
    close $infile2

    return $result
 }

 # reportDifferences --
 #    Scan the directory and report possible differences  
 #
 # Arguments:
 #    report      Report file 
 #    mask        Mask for the files to be scanned 
 #    dir         Name of the directory to scan against
 # Result:
 #    None, the report file is used instead
 #
 proc reportDifferences { report mask dir } { 

    set files1 [glob -nocomplain $mask]

    set orgdir [pwd]
    cd  $dir 
    set files2 [glob -nocomplain $mask]
    cd  $orgdir 

    set common {}
    set unique {}

    foreach f1 $files1 {
       set idx [lsearch $files2 $f1]
       if { $idx > -1 } {
          lappend common $f1
          set files2 [lreplace $files2 $idx $idx]
       } else {
          lappend unique $f1
       }
    }   

    if { $unique != {} } {
       puts $report "Files unique to [pwd]: $unique"
    }
    if { $files2 != {} } {
       puts $report "Files unique to $dir: $files2"
    }

    if { $files1 != {} } { 
       puts $report " "
    }

    foreach f $common {
       puts "File: $f"
       puts $report "File: $f"
       puts $report [checkFiles $f [file join $dir $f]]
    }
 }

 # main --
 #   Get the thing going
 #

 set dir [lindex $argv 0]
 if { $dir == "" } {
    puts "Usage: [file root $argv0] second-directory"
    exit 1
 }

 set report [open "compdir.rep" "w"]

 reportDifferences $report "*.c" $dir
 reportDifferences $report "*.h" $dir
 reportDifferences $report "*.f" $dir
 reportDifferences $report "*.f90" $dir
 reportDifferences $report "*.inc" $dir
 reportDifferences $report "*.y" $dir
 reportDifferences $report "*.l" $dir
 reportDifferences $report "*.tcl" $dir

 close $report