Version 5 of Tracking Filesystem Changes

Updated 2006-05-31 23:32:31

George Peter Staplin May 30, 2006 - This is some code that can be used to create database snapshots of your filesystem structure and statistics. The first tool genfsdb is used to create a database from a tree. The second tool cmpfsdb is used to compare 2 databases. The database comparison output indicates timestamp changes, deletions, and new files or directories.

An example usage would be tracking what has changed after installing some software in Windows or Unix.

My license for the code: Use it (commercial or non-commercial), learn from it, modify it, give me credit in source form, and don't blame me at all for any damage.


genfsdb-4.tcl:

 #Copyright 2006 George Peter Staplin

 proc generate.file.system.database {db root} {
  proc out data "[list puts [set fd [open $db w]]] \$data"

  recurse $root

  close $fd
 }

 proc recurse {dir} {
  foreach f [lsort -dictionary [glob -nocomplain [file join $dir *]]] {
   #puts FILE:$f
   if {![file exists $f]} {
    #
    # The file is a symbolic link that doesn't point to anything.
    #
    continue
   }

   file stat $f stats
   #
   # It's critical that we use list here, because the filename
   # may have spaces.
   #
   out [list $stats(ctime) $stats(mtime) $f]
   if {[file isdirectory $f]} {
    #
    # XXX we could use a trampoline here to eliminate the recursion
    # The wiki has an example for such a trampoline by RS.
    # XXX in unix we also have the issue of symbolic links.
    # We need a circular link test to make this complete.
    #
    recurse $f
   }
  }
 }

 proc main {argc argv} {

  if {2 != $argc} {
   puts stderr "syntax is: [info script] database filesystem-root"
   return 1
  }

  generate.file.system.database [lindex $argv 0] [lindex $argv 1]
  return 0
 }
 exit [main $::argc $::argv]

cmpfsdb-5.tcl:

 # Copyright 2006 George Peter Staplin
 # Revision 5
 # May 31, 2006 fixed a DELETED NEW pattern with proc filter.invalid.  


 array set ::records {}
 array set ::changes {}

 proc read.records id {
  global records

  #
  # Read 500 chars, unless that would exceed the amount remaining.
  #
  set amount 500

  if {$amount > $records($id,remaining)} {
   set amount $records($id,remaining)
  }

  #
  # Concatenate the partial record (if there was one) with the new data.
  #
  set data [split $records($id,partial)[read $id $amount] \n]
 #puts DATA:$data

  #
  #XXX check for [eof $id] just in case the db is changed by another program?
  #

  #
  # Recalculate the remaining data.
  #
  set records($id,remaining) [expr {$records($id,remaining) - $amount}]

  #
  # Set the valid records (terminated by \n) in the records array.
  #
  set records($id,records) [lrange $data 0 [expr {[llength $data] - 2}]]

 #puts RECORDS:$records($id,records)

  #
  # There may be a partial record at the very end, so save that for use later.
  #
  set records($id,partial) [lindex $data end]

 #puts PARTIAL:$records($id,partial)

  set records($id,offset) [tell $id]
 }

 proc init.record {id f} {
  global records

  set records($id,file) $f
  set records($id,fd) $id
  set records($id,offset) 0
  set records($id,size) [file size $f]
  set records($id,remaining) $records($id,size)
  set records($id,partial) ""
  set records($id,records) [list]

  read.records $id
 }

 proc compare.records {a b} {
  foreach {a_ctime a_mtime a_f} $a break
  foreach {b_ctime b_mtime b_f} $b break

  global changes

  if {$a_f eq $b_f} {
   if {$a_ctime != $b_ctime} {
    lappend changes($a_f) CTIME
   }

   if {$a_mtime != $b_mtime} {
    lappend changes($a_f) MTIME
   }
   return 0
  } else {
   #puts "a_f $a_f"
   #puts "b_f $b_f"
   return [string compare $a_f $b_f]
  }
 }

 proc next.record id {
  global records

  if {![llength $records($id,records)]} {
   #
   # We need to attempt to read more records, because the list is empty.
   #
   if {$records($id,remaining) <= 0} {
    #
    # This record database has reached the end.
    #
    return [list]
   }
   read.records $id
  }

  set r [lindex $records($id,records) 0]
  set records($id,records) [lrange $records($id,records) 1 end]

  #puts REC:$r

  return $r
 }

 proc compare.databases {a b} {
  global records changes

  set ar [next.record $a]
  set br [next.record $b]

  while {[llength $ar] && [llength $br]} {
   set a_f [lindex $ar 2]
   set b_f [lindex $br 2]

   #puts "CMP $a_f $b_f"

   switch -- [compare.records $ar $br] {
    -1 {
     #
     # $a_f < $b_f in character value
     # $a_f was deleted
     #
     lappend changes($a_f) DELETED
     set ar [next.record $a]
    }

    0 {
     set ar [next.record $a]
     set br [next.record $b]
    }

    1 {
     #
     # $a_f > $b_f in character value
     # Therefore the file $b_f is a new file.
     # XXX is this always right?  It seems like it should be, because
     # the other operations go a record at a time, and the values are pre-sorted.
     #
     #puts NEW
     lappend changes($b_f) NEW
     set br [next.record $b]
    }
   }
  }

  #puts AR:$ar
  #puts BR:$br

  #
  # One or both of the lists are exhausted now. 
  # We must see which it is, and then list the files
  # remaining as NEW or DELETED.
  #
  if {![llength $ar]} {
   #
   # We have a remaining file unhandled by the loop above.
   #
   if {[llength $br]} {
    lappend changes([lindex $br 2]) NEW
   }

   #
   # The files remaining are new in the 2nd database/b.
   #
   while {[llength [set br [next.record $b]]]} {
    lappend changes([lindex $br 2]) NEW
   }  
  }

  if {![llength $br]} {
   #
   # This record wasn't handled by the loop above.
   #
   if {[llength $ar]} {
    lappend changes([lindex $ar 2]) DELETED
   }

   #
   # The files remaining were deleted from the 2nd database/b.
   #
   while {[llength [set ar [next.record $a]]]} {
    lappend changes([lindex $ar 2]) DELETED
   }
  }
 }


 proc filter.invalid ar_var {
  upvar $ar_var ar

  foreach {key value} [array get ar] {
   if {[set a [lsearch -exact $value DELETED]] >= 0 \
     && [lsearch -exact $value NEW] >= 0} {

    set value [lreplace $value $a $a]
    set b [lsearch -exact $value NEW]
    set value [lreplace $value $b $b]

    if {![llength $value]} {
     unset ar($key)
     continue
    }
    set ar($key) $value
   }
  }
 }

 proc main {argc argv} {
  if {2 != $argc} {
   puts stderr "syntax is: [info script] database-1 database-2"
   return 1
  }

  foreach {f1 f2} $argv break

  set id1 [open $f1 r]
  set id2 [open $f2 r]

  init.record $id1 $f1
  init.record $id2 $f2

  compare.databases $id1 $id2

  filter.invalid ::changes

  parray ::changes

  return 0
 }
 exit [main $::argc $::argv]

schlenk For a tripwire like tool, which can also check differences between directories see:

 #!/bin/sh
 #
 # stolperdrähtchen einfaches Skript in der Tradition von Tripwire
 #
 # -*- tcl -*- \
 exec tclsh "$0" ${1+"$@"}

 package require fileutil
 package require logger
 package require cmdline
 package require md5 2
 set version 0.2
 # Options 
 set subcmd [list sichern testen] 
 set options { 
   {datenbank.arg "stolper.dat" \
   {Datei in der die Hashes und andere Daten gespeichert werden sollen}}
   {verzeichnis.arg "." \
   {Verzeichnis das Bearbeitet werden soll}}
   {loglevel.arg "info" \
   {Welche Logmeldungen ausgegeben werden sollen \
   (debug,info, notice,warn,error,critical)}}
   {rekursiv \
   {Rekursiv die Unterverzeichnisse durchsuchen}}
 }
 set usage " sichern | testen\nOptionen:"
 proc optionenFehler {fehler {kopfzeile {}} } {
  global options
  if {![string equal $kopfzeile ""]} { 
   puts stderr $kopfzeile
  }
  puts stderr "[info script] sichern ?optionen?"
  puts stderr "[info script] testen ?optionen?"
  puts stderr [::cmdline::usage $options "Optionen:" ]

  exit $fehler
 }
 if {[llength $argv] > 0} {
   set cmd [lindex $argv 0]
   if {[lsearch -exact $subcmd $cmd] == -1} {
     optionenFehler 1 "Ungültiges Kommando \"$cmd\"."
   } 
   set argv [lrange $argv 1 end]
 } else {
   optionenFehler 2 
   exit 2
 }
 # Die Optionen auswerten
 if {[catch {::cmdline::getoptions argv $options $usage} opts ]} {
   # Ein Fehler ist aufgetreten
   puts stderr $opts
   exit 1
 }
 # Argumente auswerten
 foreach {option wert} $opts {
   puts stdout "Option: $option Wert: $wert"
   switch -glob -- $option {
        d* {
          if {![file exists $wert] && [string equal $cmd testen]} {
            puts stderr "Fehler: Datenbank \"$wert\" existiert nicht."
            exit 3
          } 
          set Config(DB) $wert
        }
        v* {
          if {![file isdirectory $wert]} {
            puts stderr "Fehler: \"$wert\" ist kein existierendes Verzeichnis."
            exit 4
          }
          set Config(Startverzeichnis) [file normalize $wert]
        }
        logl* {
          if {[lsearch -exact [::logger::levels] $wert]==-1} {
            puts stderr "Fehler: Unbekannter loglevel \"$wert\" ."
            exit 5
          }
          set Config(Loglevel) $wert
        }
        r* {
          set Config(rekursiv) $wert
        }
        default {}
        }
 }
 unset opts

 set log [::logger::init stolper]
 ${log}::setlevel $Config(Loglevel)

 proc testen {log datenbank start rekursiv } {
   if {[catch {open $datenbank} fd]} {
     ${log}::critical "Datenbank \"$datenbank\" \
                       konnte nicht geöffnet werden."
     exit 5
   }
   set line [gets $fd]
   if {[string compare "# Stolperdrähtchen $::version" $line]} {
      ${log}::critical "Datenbank \"$datenbank\" ist keine \
                        Stolperdrähtchen $::version Datenbank." 
      exit 6
   }
   set line [gets $fd]
   set verzeichnis ""
   regexp {# Startverzeichnis:\t(.*)} $line -> verzeichnis
   if {[string compare $verzeichnis $start]} {
      ${log}::critical "Unterschiedliche Startverzeichnisse \
                        Datenbank benutzt \"$verzeichnis\"."
      exit 7
   }
   set line [gets $fd]
   set seconds 0
   regexp {# Erstellt an:\t([0-9]+)} $line -> seconds
   ${log}::info "Datenbank erstellt: [clock format $seconds]"
   set line [gets $fd]
   set dbrekursiv 0
   regexp {# Rekursiv:\t([01])} $line -> dbrekursiv
   # Dateiliste erstellen
   ${log}::info "Dateiliste erstellen"
   if {!$rekursiv} {
     set dateien [glob -nocomplain -directory $start *]
   } else {
     set dateien [::fileutil::find $start]
   }
   # Infos aus der Datenbank einlesen
   set maxlen 0 
   while {![eof $fd] && [gets $fd line]} {
     if {[regexp {([^\t]+)\tmtime: (-?[0-9]+|no)\tMD5: (.+)} $line -> name mtime md5]} {
       set dbmtime($name) $mtime
       set dbmd5($name) $md5 
       if {[string length $name] > $maxlen} {set maxlen [string length $name]}
     } 
   }
   ${log}::notice "[array size dbmtime] Dateiinfos aus Datenbank eingelesen"
   close $fd
   puts stdout "Veränderte oder neue Dateien:"
   foreach datei [lsort -ascii $dateien] {
     if {![file isfile $datei]} {continue}
     if {[string length $datei] > $maxlen } {set maxlen $datei}
     set md5change 0
     set mtimechange 0
     set neu 0
     if {[catch {file mtime $datei} mtime]} {
       set mtime no
     }
     set md5 [::md5::md5 -hex -filename $datei]
     if {[info exists dbmtime($datei)]} {
       if {[string compare -nocase $md5 $dbmd5($datei)]} {
         set md5change 1
       }
       if {$mtime != $dbmtime($datei)} {
         set mtimechange 1
       }
       unset dbmtime($datei)
       unset dbmd5($datei)
     } else {
       set neu 1
     }
     if {$mtimechange || $md5change || $neu} {
       puts -nonewline stdout "[format "%-[expr {$maxlen+5}]s" $datei]"
       if {$neu} {
         puts stdout "NEU"
         continue
       } 
       if {$mtimechange} {
         puts -nonewline stdout "MTIME "
       }
       if {$md5change} {
         puts -nonewline stdout "MD5 "
       }
       puts stdout ""
     }
   }
   if {[array size dbmtime]} {
     foreach datei [array keys dbmtime] {
       puts stdout "[format "%-[expr {$maxlen+5}]s\tFEHLT" $datei]"
     }
   }
 }

 proc sichern {log datenbank start rekursiv } {
   # Dateiliste erstellen
   ${log}::info "Dateiliste erstellen"
   if {!$rekursiv} {
     set dateien [glob -nocomplain -directory $start *]
   } else {
     set dateien [::fileutil::find $start]
   } 
   ${log}::info "[llength $dateien] Dateien gefunden"
   # Datenbank Datei öffnen
   if {[catch {open $datenbank {CREAT WRONLY}} fd]} {
     ${log}::critical "Datenbankdatei \"$datenbank\" \
                       konnte nicht erstellt werden."
     exit 5
   }
   ${log}::notice "Datenbankdatei \"$datenbank\" geöffnet."
   puts $fd "# Stolperdrähtchen $::version"
   puts $fd "# Startverzeichnis:\t$start"
   puts $fd "# Erstellt an:\t[clock seconds]"
   puts $fd "# Rekursiv:\t$rekursiv"
   # Dateiattribute bestimmen
   foreach datei [lsort -ascii $dateien] {
     if {![file isfile $datei]} {continue}
     if {[catch {file mtime $datei} mtime]} {
       set mtime "no"
     }
     set md5 [::md5::md5 -hex -filename $datei]
     puts $fd [format "%s\tmtime: %s \tMD5: %s" $datei $mtime $md5]
     ${log}::info "Datei bearbeitet $datei"
   }
   close $fd
   ${log}::notice "Datenbank \"$datenbank\" fertiggestellt."
 }

 $cmd $log $Config(DB) $Config(Startverzeichnis) $Config(rekursiv) 

[ Category File ]