# Markov Chains in Tcl set matrix [dict create] set eolchars {. ? ; ! @} set breaks {: ,} set junk {/} set start {} set startword 1 set whatever {well 1 "I think" 1 could 1 may 1 wonder 1} set inp [open "Midsummer Night's Dream.txt"] set filebuf {} proc getword {} { global filebuf inp if {[llength $filebuf] == 0} {gets $inp filebuf; set filebuf [split $filebuf " "]} set result [lindex $filebuf 0] set filebuf [lrange $filebuf 1 end] return $result } set w1 [getword] set w2 [getword] set w3 [getword] set w4 [getword] set pocket "" set actors {} while {![eof $inp]} { puts -nonewline . if {$pocket eq ""} { set next [getword] if {[set ch [string index $next end]] in $breaks} { set pocket $ch\n set next [string range $next 0 end-1] } } else { set next $pocket set pocket "" } set next [string trim $next] #puts $next # skip blank lines or lines with just one word if {$next eq ""} continue if {($next eq "ACT")||($next eq "SCENE")||([string index $next 0] eq "/")} continue if {[string index $next end] in $junk} continue if {[string index $next 0] eq "*"} { set next [string tolower $next]; set next [string toupper $next 0] if {[lsearch $actors $next]==-1} {lappend actors $next} continue } regsub \; $next @ next # linklen 4 if {[catch {set _index [dict get $matrix "$w1 $w2 $w3 $w4"]}]!=0} {set _index [dict create]} dict incr _index $; dict incr _index _total; dict set matrix "$w1 $w2 $w3 $w4" ${_index} # linklen 3 if {[catch {set _index [dict get $matrix "$w2 $w3 $w4"]}]!=0} {set _index [dict create]} dict incr _index $next; dict incr _index _total; dict set matrix "$w2 $w3 $w4" ${_index} # linklen 2 if {[catch {set _index [dict get $matrix "$w3 $w4"]}]!=0} {set _index [dict create]} dict incr _index $next; dict incr _index _total; dict set matrix "$w3 $w4" ${_index} # linklen 1 if {[catch {set _index [dict get $matrix $w4]}]!=0} {set _index [dict create]} dict incr _index $next; dict incr _index _total; dict set matrix $w4 ${_index} if {$startword} { if {[string index $w1 0] in {A B C D E F G H I J K L M N O P Q R S T U V W X Y Z}} { if {[string index $w1 end] ni $eolchars} { if {[lsearch $start $w1] == -1} {lappend start $w1} } } set startword 0 } set w1 $w2 set w2 $w3 set w3 $w4 set w4 $next if {[string index $next end] in $eolchars} {set startword 1} } close $inp puts "" set startlen [llength $start] puts "MATRIX IS\n$matrix\n\n\n" #puts "Start: $start" proc gen {} { global start matrix startlen workstr whatever eolchars breaks set workstr [lindex $start [expr {int(rand()*$startlen)}]] set result $workstr while 1 { while 1 { if {[catch {set choices [dict get $matrix $workstr]}] != 0} { if {[llength $workstr]>0} { set workstr [lrange $workstr 1 end] continue } set choices $whatever } break } #puts "got hit on '$workstr', choices='$choices'" # choices is our dict with words and counts # _index contains the number of choices total set max [dict get $choices _total] #puts "max=$max" dict unset choices _total set choice [expr {int(rand()*$max)}] #puts "select $choice" dict for {word number} $choices { #puts "word=$word number=$number" incr choice -$number #puts "choice=$choice" if {$choice <= 0} { lappend result $word #puts "result now '$result'" if {[string index $word end] in $eolchars} { foreach ch $breaks {regsub -all " $ch" $result $ch result} regsub -all @ $result \; result #puts "returning '$result'"; return $result } lappend workstr $word if {[llength $workstr]>4} {set workstr [lrange $workstr 1 end]} } } } } for {set i 0} {$i < 10} {incr i} {puts "$i: [gen]"} exit