Spell correcter

Prompted by a question from 'AurovilleRadio' on comp.lang.tcl, here is a direct port to Tcl of Norvig's Python spell-correcter at http://norvig.com/spell-correct.html .

Disclaimer1: no effort is made to mimic Python's very expressive (should I say terse ?) style based on its powerful list comprehensions and other functional tools. Only the task at hand is aimed for.

Disclaimer2: yes, it is slow. Feel free to optimize. (note there's a beginning in that direction by using string when possible instead of the splits of the original)

Usage: spell-correct SOME-TEXT-FULL-OF-WORDS

Then type misspelled words, one per line (non-letters are collapsed anyway), and see the one-line outputs:

 * DICT : word was in dictionary
 * EDIT1: word was at edit distance 1 from dictionary
 * EDIT2: word was at edit distance 2 from dictionary
 * NO-LUCK
#! /bin/sh
#\
exec tclsh $0 "$@"

if {$argc<1} {puts stderr "Usage: [file tail $::argv0] <wordsfile> \[<wordsfile> ...\]";exit 1}

foreach fn $argv {
    puts stderr " (loading $fn)"
    set ff [open $fn r]
    set x [read $ff]
    close $ff
    regsub -all {[^a-z]+} [string tolower $x] \  x
    foreach w $x {
        if {[info exists model($w)]} {
            incr model($w)
        } else {
            set model($w) 1
        }
    }
}

set alphabet [split abcdefghijklmnopqrstuvwxyz ""]

proc splits w {
    set out {}
    set n [string length $w]
    for {set i 0} {$i<=$n} {incr i} {
        lappend out [list [string range $w 0 $i-1] [string range $w $i $n]]
    }
    return $out
}

proc deletes w {
    set out {}
    set n [string length $w]
    for {set i 0} {$i<$n} {incr i} {
        lappend out [string replace $w $i $i]
    }
    return $out
}        

proc transposes w {
    set out {}
    set n [expr {[string length $w]-1}]
    for {set i 0} {$i<$n} {incr i} {
        lappend out [string replace $w $i $i+1 [string index $w $i+1][string index $w $i]]
    }
    return $out
}        

proc replaces w {
    set out {}
    set n [string length $w]
    for {set i 0} {$i<$n} {incr i} {
        foreach a $::alphabet {
            lappend out [string replace $w $i $i $a]
        }
    }
    return $out
}

proc inserts w {
    set out {}
    set n [string length $w]
    for {set i 0} {$i<=$n} {incr i} {
        foreach a $::alphabet {
            lappend out [string range $w 0 $i-1]$a[string range $w $i $n]
        }
    }
    return $out
}

proc edit1 {w vtab} {
    upvar $vtab tab

    foreach x [deletes $w] {set tab($x) 1}
    foreach x [transposes $w] {set tab($x) 1}
    foreach x [replaces $w] {set tab($x) 1}
    foreach x [inserts $w] {set tab($x) 1}
}


fconfigure stdin -translation binary
fconfigure stdout -translation binary -buffering line

puts stderr " (now processing stdin)"
while {[gets stdin line]>=0} {
    regsub -all {[^a-z]+} [string tolower $line] "" w
    if {[info exists model($w)]} {
        puts "DICT        $w"
        continue
    }
    array unset tab
    edit1 $w tab
    set best 0
    foreach x [array names tab] {
        if {[info exists model($x)]} {
            set v $model($x)
            if {$v>$best} {
                set best $v
                set bestx $x
            }
        }
    }
    if {$best} {
        puts "EDIT1        $bestx"
        continue
    }
    array unset tab2
    foreach x [array names tab] {
        edit1 $x tab2
    }
    set best 0
    foreach x [array names tab2] {
        if {[info exists model($x)]} {
            set v $model($x)
            if {$v>$best} {
                set best $v
                set bestx $x
            }
        }
    }
    if {$best} {
        puts "EDIT2        $bestx"
        continue
    }
    puts "NO-LUCK"
}