Version 5 of Diffs Code Module in progress

Updated 2003-11-08 10:50:28

A suggested first step

Is it possible to read the Python code and to establish what the algo is in plain words? ex: Step one: define the variable


/def diff(s1, s2) :

  from difflib import SequenceMatcher

  s1 = s1.replace('&', '&')
  s1 = s1.replace('<', '&lt;')
  s2 = s2.replace('&', '&amp;')
  s2 = s2.replace('<', '&lt;')

  seq1 = s1.splitlines()
  seq2 = s2.splitlines()

  seqobj = SequenceMatcher(None, seq1, seq2)

  linematch = seqobj.get_matching_blocks()

  if len(seq1) == len(seq2) \
and linematch0 == (0, 0, len(seq1))
# No differences.
    return '<strong>No differences.</strong>'

  lastmatch = (0, 0)
  end       = (len(seq1), len(seq2))

  result = "<table class='diff'>\n"

  for match in linematch :              # Print all differences.
if lastmatch == match0:2
# Starts of pages identical.
      lastmatch = (match[0] + match[2], match[1] + match[2])
      continue

    result = result \
             + "<tr><td colspan='2' class='diff-title'><strong>" \
             + "Line " + str(lastmatch[0] + 1) + ", removed:" \
             + "</strong></td><td colspan='2' class='diff-title'><strong>" \
             + "Line " + str(lastmatch[1] + 1) + ", added:" \
             + "</strong></td></tr>\n"

    leftpane  = ''
    rightpane = ''
    linecount = max(match[0] - lastmatch[0], match[1] - lastmatch[1])
    for line in range(linecount) :
      if line < match[0] - lastmatch[0] :
        if line > 0 :
          leftpane += '\n'
        leftpane += seq1[lastmatch[0] + line]
      if line < match[1] - lastmatch[1] :
        if line > 0 :
          rightpane += '\n'
        rightpane += seq2[lastmatch[1] + line]

    charobj   = SequenceMatcher(None, leftpane, rightpane)
    charmatch = charobj.get_matching_blocks()

    if leftpane == '' and rightpane == '' :
      ratio = 1.0
    else :
      ratio = charobj.ratio()
if ratio < 0.5
# Insufficient similarity.
      if len(leftpane) != 0 :
        leftresult = "<span class='diff-removed'>" + leftpane + "</span>"
      else :
        leftresult = ''

      if len(rightpane) != 0 :
        rightresult = "<span class='diff-added'>" + rightpane + "</span>"
      else :
        rightresult = ''
else
# Some similarities; markup changes.
      charlast = (0, 0)
      charend  = (len(leftpane), len(rightpane))

      leftresult  = ''
      rightresult = ''
      for thismatch in charmatch :
        if thismatch[0] - charlast[0] != 0 :
          leftresult = leftresult \
                       + "<span class='diff-removed'>" \
                       + leftpane[charlast[0]:thismatch[0]] \
                       + "</span>"
        if thismatch[1] - charlast[1] != 0 :
          rightresult = rightresult \
                        + "<span class='diff-added'>" \
                        + rightpane[charlast[1]:thismatch[1]] \
                        + "</span>"
        leftresult = leftresult \
                     + leftpane[thismatch[0]:thismatch[0] + thismatch[2]]
        rightresult = rightresult \
                      + rightpane[thismatch[1]:thismatch[1] + thismatch[2]]
        charlast = (thismatch[0] + thismatch[2], thismatch[1] + thismatch[2])

    leftpane  = leftresult.replace('\n', '<br />\n')
    rightpane = rightresult.replace('\n', '<br />\n')

    result = result \
             + "<tr><td colspan='2' class='diff-removed'>" \
             + leftpane \
             + "</td><td colspan='2' class='diff-added'>" \
             + rightpane \
             + "</td></tr>\n"

    lastmatch = (match[0] + match[2], match[1] + match[2])

  result = result + '</table>\n'

  return result

 import sys

 file1 = file(sys.argv[1])
 file2 = file(sys.argv[2])

 str1 = file1.read()
 str2 = file2.read()

 file1.close()
 file2.close()

 print diff(str1, str2)

What on earth is the use of this page? -jcw

Don't you know??? There is a huge need for a diffs module!