This page is based on http://www.paulgraham.com/spam.html but it is still missing major pieces of functionality (like the code to build the frequency tables from the message corpuses, the tables probably should not be recomputed every time, etc.) Enjoy! ''[DKF]'' ---- set WordRE {[-\w'$]+} proc countwords {table string} { global WordRE upvar 1 $table t set i 0 while {[regexp -indices -start $i $WordRE $string match]} { foreach {j i} $match {} set word [string range $string $j $i] if {[catch {incr t($word)}]} { set t($word) 1 } } } proc genprob {word} { upvar #0 goodTable good $badTable bad set g 0 catch { set g [expr {$good($word) * 2}] } set b 0 catch { set b $bad($word) } if {$g == 0 && $b == 0} { # Not seen before return .2 } if {$g+$b < 5} { # Not frequent enough return .0 } set bfreq [min 1. [expr {double($b)/$badCount}]] set gfreq [min 1. [expr {double($g)/$goodCount}]] return [max .01 [min .99 [expr {$bfreq / ($gfreq + $bfreq)}]]] } proc combine {probs} { set p1 1. set p2 1. foreach prob $probs { set p1 [expr {$p1 * $prob}] set p2 [expr {$p2 * (1. - $prob)}] } return [expr {$p1 / ($p1 + $p2)}] } proc min {x y} {expr {$x<$y ? $x : $y}} proc max {x y} {expr {$x>$y ? $x : $y}} proc isSpam {message} { global WordRE while {[regexp -indices -start $i $WordRE $message match]} { foreach {j i} $match {} set t([string range $string $j $i]) {} } foreach word [array names t] { set p [genprob $word] lappend magic [list [expr {abs($p-.5)}] $p] } foreach l [lrange 0 15 [lsort -real -index 0 $magic]] { lappend interesting [lindex $l 1] } return [expr {[combine $interesting] > .9}] }