** A simple arithmetic expression parser ** *** Description *** [DKF]: Here's a simple example of how to do lexing and parsing with [coroutine]s. The lexer makes a lot of use of [switch]'s `-regexp` option, and is run in a coroutine so that it is easier to connect to the parser (which is ''not'' a coroutine). This code was originally developed by [DKF] for [http://rosettacode.org/wiki/EBNF_parser#Tcl%|%Rosetta Code]. *** Code *** ====== package require Tcl 8.6 # Utilities to make the coroutine easier to use proc provide args {while {![yield $args]} {yield}} proc next lexer {$lexer 1} proc pushback lexer {$lexer 0} # Lexical analyzer coroutine core proc lexer {str} { yield [info coroutine] set symbols {+ PLUS - MINUS * MULT / DIV ( LPAR ) RPAR} set idx 0 while 1 { switch -regexp -matchvar m -- $str { {^\s+} { # No special action for whitespace } {^([-+*/()])} { provide [dict get $symbols [lindex $m 1]] [lindex $m 1] $idx } {^(\d+)} { provide NUMBER [lindex $m 1] $idx } {^$} { provide EOT "EOT" $idx return } . { provide PARSE_ERROR [lindex $m 0] $idx } } # Trim the matched string set str [string range $str [string length [lindex $m 0]] end] incr idx [string length [lindex $m 0]] } } # Utility functions to help with making an LL(1) parser; ParseLoop handles # EBNF looping constructs, ParseSeq handles sequence constructs. proc ParseLoop {lexer def} { upvar 1 token token payload payload index index foreach {a b} $def { if {$b ne "-"} {set b [list set c $b]} lappend m $a $b } lappend m default {pushback $lexer; break} while 1 { lassign [next $lexer] token payload index switch -- $token {*}$m if {[set c [catch {uplevel 1 $c} res opt]]} { dict set opt -level [expr {[dict get $opt -level]+1}] return -options $opt $res } } } proc ParseSeq {lexer def} { upvar 1 token token payload payload index index foreach {t s} $def { lassign [next $lexer] token payload index switch -- $token $t { if {[set c [catch {uplevel 1 $s} res opt]]} { dict set opt -level [expr {[dict get $opt -level]+1}] return -options $opt $res } } EOT { throw SYNTAX "end of text at position $index" } default { throw SYNTAX "\"$payload\" at position $index" } } } # Main parser driver; contains "master" grammar that ensures that the whole # text is matched and not just a prefix substring. Note also that the parser # runs the lexer as a coroutine (with a fixed name in this basic demonstration # code). proc parse {str} { set lexer [coroutine l lexer $str] try { set parsed [parse.expr $lexer] ParseLoop $lexer { EOT { return $parsed } } throw SYNTAX "\"$payload\" at position $index" } trap SYNTAX msg { return -code error "syntax error: $msg" } finally { catch {rename $lexer ""} } } # Now the descriptions of how to match each production in the grammar... proc parse.expr {lexer} { set expr [parse.term $lexer] ParseLoop $lexer { PLUS - MINUS { set expr [list $token $expr [parse.term $lexer]] } } return $expr } proc parse.term {lexer} { set term [parse.factor $lexer] ParseLoop $lexer { MULT - DIV { set term [list $token $term [parse.factor $lexer]] } } return $term } proc parse.factor {lexer} { ParseLoop $lexer { NUMBER { return $payload } MINUS { ParseSeq $lexer { NUMBER {return -$payload} } } LPAR { set result [parse.expr $lexer] ParseSeq $lexer { RPAR {return $result} } break } EOT { throw SYNTAX "end of text at position $index" } } throw SYNTAX "\"$payload\" at position $index" } # Demonstration code puts [parse "1 - 2 - -3 * 4 + 5"] puts [parse "1 - 2 - -3 * (4 + 5)"] ====== Which produces this output: ======none PLUS {MINUS {MINUS 1 2} {MULT -3 4}} 5 MINUS {MINUS 1 2} {MULT -3 {PLUS 4 5}} ====== ** Parsing expressions with `typedlist` ** *** Description *** [FM] This is an experimentation to parse with coroutine. It uses the [typedlist] package. *** Code *** ====== #! /bin/sh # this is a -*-Tcl-*- file package require typedlist typedlist create BNF::nombre coroutine nombre apply {{} { while 1 { if {[regexp {[0-9]} [set n [yield]]]} { if {[info exist ::String($::Instruction)] && \ [[lindex $::String($::Instruction) end] type] eq "nombre"} { [lindex $::String($::Instruction) end] lappend $n } else { lappend ::String($::Instruction) [BNF::nombre $n] } } } }} typedlist create BNF::mot coroutine mot apply {{} { while 1 { set n [yield] if {[regexp {[a-zA-Z]} $n]} { if {[info exist ::String($::Instruction)]} { if {[[lindex $::String($::Instruction) end] type] eq "mot"} { [lindex $::String($::Instruction) end] lappend $n } elseif {[[lindex $::String($::Instruction) end] type] eq "var"} { [lindex $::String($::Instruction) end] lappend $n } else { lappend ::String($::Instruction) [BNF::mot $n] } } else { lappend ::String($::Instruction) [BNF::mot $n] } } elseif {[regexp {[0-9]} $n]} { if {[info exist ::String($::Instruction)]} { if {[[lindex $::String($::Instruction) end] type] eq "mot"} { [lindex $::String($::Instruction) end] lappend $n } elseif {[[lindex $::String($::Instruction) end] type] eq "var"} { [lindex $::String($::Instruction) end] lappend $n } } } } }} typedlist create BNF::égal coroutine égal apply {{} { while 1 { if {[regexp {=} [set n [yield]]]} { lappend ::String($::Instruction) [BNF::égal $n] } } }} typedlist create BNF::plus coroutine plus apply {{} { while 1 { if {[regexp {\+} [set n [yield]]]} { lappend ::String($::Instruction) [BNF::plus $n] } } }} typedlist create BNF::fois coroutine fois apply {{} { while 1 { if {[regexp -- {\*} [set n [yield]]]} { lappend ::String($::Instruction) [BNF::fois $n] } } }} typedlist create BNF::moins coroutine moins apply {{} { while 1 { if {[regexp -- {-} [set n [yield]]]} { lappend ::String($::Instruction) [BNF::moins $n] } } }} typedlist create BNF::rapport coroutine rapport apply {{} { while 1 { if {[regexp -- {/} [set n [yield]]]} { lappend ::String($::Instruction) [BNF::rapport $n] } } }} typedlist create BNF::ParenthèseOuverte coroutine ParenthèseOuverte apply {{} { while 1 { if {[regexp -- {\(} [set n [yield]]]} { lappend ::String($::Instruction) [BNF::ParenthèseOuverte $n] } } }} typedlist create BNF::ParenthèseFermée coroutine ParenthèseFermée apply {{} { while 1 { if {[regexp -- {\)} [set n [yield]]]} { lappend ::String($::Instruction) [BNF::ParenthèseFermée $n] } } }} typedlist create BNF::var coroutine var apply {{} { while 1 { set n [yield] if {[regexp {\$} $n]} { lappend ::String($::Instruction) [BNF::var $n] } } }} typedlist create BNF::FinDeInstruction coroutine FinDeInstruction apply {{} { while 1 { set n [yield] if {[regexp {\;} $n] || [regexp {\n} $n]} { incr ::Instruction } } }} typedlist create BNF::point coroutine point apply {{} { while 1 { set n [yield] if {[regexp {\.} $n]} { if {[info exist ::String($::Instruction)] && \ [[lindex $::String($::Instruction) end] type] eq "nombre"} { [lindex $::String($::Instruction) end] lappend $n } else { lappend ::String($::Instruction) [BNF::nombre $n] } } } }} typedlist create BNF::sousScript coroutine sousScript apply {{} { while 1 { set n [yield] if {[regexp {\[} $n]} { if {$::sousScript == 0} { lappend ::String($::Instruction) [BNF::sousScript $n] } else { [lindex $::String($::Instruction) end] lappend $n } incr ::sousScript } elseif {[regexp {\]} $n]} { [lindex $::String($::Instruction) end] lappend $n incr ::sousScript -1 } elseif {$::sousScript >= 1} { ToutVenant $n } } }} typedlist create BNF::FinDeMot coroutine FinDeMot apply {{} { while 1 { set n [yield] if {[regexp {[ \t]} $n]} { lappend ::String($::Instruction) [BNF::FinDeMot $n] } } }} typedlist create BNF::ToutVenant coroutine ToutVenant apply {{} { set i 0 set N {} set ::SousExpression 0 while 1 { set n [yield] if {$n eq " " && $i == 0} { lappend N " " incr i } elseif {$n eq "\{" && $i == 1} { lappend N \{ incr i } elseif {$n eq "=" && $i == 2} { lappend N = incr i } elseif {$n eq "\}" && $i == 3} { lappend N \} incr i } elseif {$n eq "(" && $i == 4} { set ::SousExpression 1 set ::Parenthèse 1 [lindex $::String($::Instruction) end] lappend \[ e x p r { } \{ $n set N {} set i 5 } elseif {$::SousExpression} { sousExpression $n if {!$::SousExpression} { set i 1 } } else { [lindex $::String($::Instruction) end] lappend {*}$N $n set N {} } } }} typedlist create BNF::sousExpression coroutine sousExpression apply {{} { set i 0 while 1 { set n [yield] if {$n eq ")" && ${::Parenthèse} == 1} { [lindex $::String($::Instruction) end] lappend $n \} \] set ::SousExpression 0 } elseif {$n eq ")" && ${::Parenthèse} > 1} { [lindex $::String($::Instruction) end] lappend $n incr ::Parenthèse -1 } elseif {$n eq "("} { [lindex $::String($::Instruction) end] lappend $n incr ::Parenthèse } else { [lindex $::String($::Instruction) end] lappend $n } } }} proc lexpr {sz} { catch {array unset ::String} array set ::String {} set ::Instruction 0 set ::sousScript 0 # lexer foreach c [split $sz ""] { sousScript $c if {$::sousScript} continue mot $c égal $c var $c nombre $c plus $c fois $c moins $c rapport $c point $c ParenthèseOuverte $c ParenthèseFermée $c FinDeInstruction $c } set Res [] # interpreteur for {set i 0} {$i <= $::Instruction} {incr i} { set res [] set var [] set Silencieux 0 if {![info exist ::String($i)]} continue if {[set range [lsearch $::String($i) *::égal:*]] ne {}} { foreach e [lrange $::String($i) 0 ${range}-1] { append var [$e get] } set ::String($i) [lrange $::String($i) ${range}+1 end] if {$range == 0} { set Silencieux 1 } } foreach e $::String($i) { append res [join [$e get] ""] } if {$var ne {}} { set $var [expr $res] } elseif {$Silencieux} { eval subst $res } else { lappend Res [expr $res] } } return $Res } # Test lexpr { =[wm withdraw . toplevel [set t .top_tmp_] wm geom $t +100+100 wm attributes $t -alpha 0.0 wm state $t zoomed update idletask lassign [split [winfo geometry $t] x+] W H X Y] w=[winfo width .]; h=[winfo height .] =[wm geom . +{=}(($W-$w)/2+$X)+{=}(($H-$h)/2+$Y) wm deiconify . destroy $t update idletask] } ====== the `lexpr` proc here do : * if the line begin with `v =`, `v` is seen as a variable and so is set to the value calculate after the equal sign (variable is only one letter long for the moment) * if the line begin with `=`, the action isn't return as result of the command (else it will be appended to a list which will be return) * what is between `[[` and `]]` is a seen as a subscript to be evaluated * the construct `{=}( ... )` is seen as an expression to be calculated in a subscript. <>String Processing | Example |Parsing