if 0 { [KBK]: The ''longest common subsequence problem'' is: Given two lists L1 and L2, compute the largest set of ordered pairs {x1,y1), (x2,y2), ..., (xn, yn) such that x1 < x2 < ... < xn y1 < y2 < ... < yn and L1[x1] = L2[y1] ; L1[x2] = L2[y2] ; ... L1[xn] = L2[yn] Solving this problem is critical to the implementation of [diff in Tcl]. Here is one Tcl implementation of the "folklore algorithm." Note that Hunt and McIlroy have published a much better algorithm (used in the Unix 'diff' command) that is implemented at [diff in Tcl]. This page is here to hold some of the historical discussion. ---- } ====== namespace eval list { } namespace eval list::longestCommonSubsequence { namespace export compare } # Internal procedure that indexes into the 2-dimensional array t, # which corresponds to the sequence y, looking for the (i,j)th element. proc list::longestCommonSubsequence::Index { t y i j } { set indx [expr { ([llength $y] + 1) * ($i + 1) + ($j + 1) }] return [lindex $t $indx] } # Internal procedure that implements Levenshtein to derive the longest # common subsequence of two lists x and y. proc list::longestCommonSubsequence::ComputeLCS { x y } { set t [list] for { set i -1 } { $i < [llength $y] } { incr i } { lappend t 0 } for { set i 0 } { $i < [llength $x] } { incr i } { lappend t 0 for { set j 0 } { $j < [llength $y] } { incr j } { if { [string equal [lindex $x $i] [lindex $y $j]] } { set lastT [Index $t $y [expr { $i - 1 }] [expr {$j - 1}]] set nextT [expr {$lastT + 1}] } else { set lastT1 [Index $t $y $i [expr { $j - 1 }]] set lastT2 [Index $t $y [expr { $i - 1 }] $j] if { $lastT1 > $lastT2 } { set nextT $lastT1 } else { set nextT $lastT2 } } lappend t $nextT } } return $t } # Internal procedure that traces through the array built by ComputeLCS # and finds a longest common subsequence -- specifically, the one that # is lexicographically first. proc list::longestCommonSubsequence::TraceLCS { t x y } { set trace {} set i [expr { [llength $x] - 1 }] set j [expr { [llength $y] - 1 }] set k [expr { [Index $t $y $i $j] - 1 }] while { $i >= 0 && $j >= 0 } { set im1 [expr { $i - 1 }] set jm1 [expr { $j - 1 }] if { [Index $t $y $i $j] == [Index $t $y $im1 $jm1] + 1 && [string equal [lindex $x $i] [lindex $y $j]] } { lappend trace xy [list $i $j] set i $im1 set j $jm1 } elseif { [Index $t $y $im1 $j] > [Index $t $y $i $jm1] } { lappend trace x $i set i $im1 } else { lappend trace y $j set j $jm1 } } while { $i >= 0 } { lappend trace x $i incr i -1 } while { $j >= 0 } { lappend trace y $j incr j -1 } return $trace } # list::longestCommonSubsequence::compare -- # # Compare two lists for the longest common subsequence # # Arguments: # x, y - Two lists of strings to compare # matched - Callback to execute on matched elements, see below # unmatchedX - Callback to execute on unmatched elements from the # first list, see below. # unmatchedY - Callback to execute on unmatched elements from the # second list, see below. # # Results: # None. # # Side effects: # Whatever the callbacks do. # # The 'compare' procedure compares the two lists of strings, x and y. # It finds a longest common subsequence between the two. It then walks # the lists in order and makes the following callbacks: # # For an element that is common to both lists, it appends the index in # the first list, the index in the second list, and the string value of # the element as three parameters to the 'matched' callback, and executes # the result. # # For an element that is in the first list but not the second, it appends # the index in the first list and the string value of the element as two # parameters to the 'unmatchedX' callback and executes the result. # # For an element that is in the second list but not the first, it appends # the index in the second list and the string value of the element as two # parameters to the 'unmatchedY' callback and executes the result. proc list::longestCommonSubsequence::compare { x y matched unmatchedX unmatchedY } { set t [ComputeLCS $x $y] set trace [TraceLCS $t $x $y] set i [llength $trace] while { $i > 0 } { set indices [lindex $trace [incr i -1]] set type [lindex $trace [incr i -1]] switch -exact -- $type { xy { set c $matched eval lappend c $indices lappend c [lindex $x [lindex $indices 0]] uplevel 1 $c } x { set c $unmatchedX lappend c $indices lappend c [lindex $x $indices] uplevel 1 $c } y { set c $unmatchedY lappend c $indices lappend c [lindex $y $indices] uplevel 1 $c } } } return } ====== # With this code in hand, we can now write the external parts of a '''diff''' command. The various options of '''diff''' alter how it ''displays'' the comparison, but not its fundamental operation. Here's an external wrapper that gives very simple-minded output. ====== namespace import list::longestCommonSubsequence::compare proc umx { index value } { variable lastx variable xlines append xlines "< " $value \n set lastx $index } proc umy { index value } { variable lasty variable ylines append ylines "> " $value \n set lasty $index } proc matched { index1 index2 value } { variable lastx variable lasty variable xlines variable ylines if { [info exists lastx] && [info exists lasty] } { puts "[expr { $lastx + 1 }],${index1}c[expr {$lasty + 1 }],${index2}" puts -nonewline $xlines puts "----" puts -nonewline $ylines } elseif { [info exists lastx] } { puts "[expr { $lastx + 1 }],${index1}d${index2}" puts -nonewline $xlines } elseif { [info exists lasty] } { puts "${index1}a[expr {$lasty + 1 }],${index2}" puts -nonewline $ylines } catch { unset lastx } catch { unset xlines } catch { unset lasty } catch { unset ylines } } # Really, we should read the first file in like this: # set f0 [open [lindex $argv 0] r] # set x [split [read $f0] \n] # close $f0 # But I'll just provide some sample lines: set x {} for { set i 0 } { $i < 10 } { incr i } { lappend x a r a d e d a b r a x } # The second file, too, should be read in like this: # set f1 [open [lindex $argv 1] r] # set y [split [read $f1] \n] # close $f1 # Once again, I'll just do some sample lines. set y {} for { set i 0 } { $i < 10 } { incr i } { lappend y a b r a c a d a b r a } compare $x $y matched umx umy matched [llength $x] [llength $y] {} ====== if 0 { <>Algorithm | Application }