Prompted by a question from 'AurovilleRadio' on comp.lang.tcl, here is a direct port to Tcl of Norvig's Python spell-correcter at http://norvig.com/spell-correct.html .
Disclaimer1: no effort is made to mimic Python's very expressive (should I say terse ?) style based on its powerful list comprehensions and other functional tools. Only the task at hand is aimed for.
Disclaimer2: yes, it is slow. Feel free to optimize. (note there's a beginning in that direction by using string when possible instead of the splits of the original)
Usage: spell-correct SOME-TEXT-FULL-OF-WORDS
Then type misspelled words, one per line (non-letters are collapsed anyway), and see the one-line outputs:
* DICT : word was in dictionary * EDIT1: word was at edit distance 1 from dictionary * EDIT2: word was at edit distance 2 from dictionary * NO-LUCK
#! /bin/sh #\ exec tclsh $0 "$@" if {$argc<1} {puts stderr "Usage: [file tail $::argv0] <wordsfile> \[<wordsfile> ...\]";exit 1} foreach fn $argv { puts stderr " (loading $fn)" set ff [open $fn r] set x [read $ff] close $ff regsub -all {[^a-z]+} [string tolower $x] \ x foreach w $x { if {[info exists model($w)]} { incr model($w) } else { set model($w) 1 } } } set alphabet [split abcdefghijklmnopqrstuvwxyz ""] proc splits w { set out {} set n [string length $w] for {set i 0} {$i<=$n} {incr i} { lappend out [list [string range $w 0 $i-1] [string range $w $i $n]] } return $out } proc deletes w { set out {} set n [string length $w] for {set i 0} {$i<$n} {incr i} { lappend out [string replace $w $i $i] } return $out } proc transposes w { set out {} set n [expr {[string length $w]-1}] for {set i 0} {$i<$n} {incr i} { lappend out [string replace $w $i $i+1 [string index $w $i+1][string index $w $i]] } return $out } proc replaces w { set out {} set n [string length $w] for {set i 0} {$i<$n} {incr i} { foreach a $::alphabet { lappend out [string replace $w $i $i $a] } } return $out } proc inserts w { set out {} set n [string length $w] for {set i 0} {$i<=$n} {incr i} { foreach a $::alphabet { lappend out [string range $w 0 $i-1]$a[string range $w $i $n] } } return $out } proc edit1 {w vtab} { upvar $vtab tab foreach x [deletes $w] {set tab($x) 1} foreach x [transposes $w] {set tab($x) 1} foreach x [replaces $w] {set tab($x) 1} foreach x [inserts $w] {set tab($x) 1} } fconfigure stdin -translation binary fconfigure stdout -translation binary -buffering line puts stderr " (now processing stdin)" while {[gets stdin line]>=0} { regsub -all {[^a-z]+} [string tolower $line] "" w if {[info exists model($w)]} { puts "DICT $w" continue } array unset tab edit1 $w tab set best 0 foreach x [array names tab] { if {[info exists model($x)]} { set v $model($x) if {$v>$best} { set best $v set bestx $x } } } if {$best} { puts "EDIT1 $bestx" continue } array unset tab2 foreach x [array names tab] { edit1 $x tab2 } set best 0 foreach x [array names tab2] { if {[info exists model($x)]} { set v $model($x) if {$v>$best} { set best $v set bestx $x } } } if {$best} { puts "EDIT2 $bestx" continue } puts "NO-LUCK" }