Version 2 of parsing expressions

Updated 2005-01-23 15:26:32

SS 23Jan2004 - In order to add the expr command to Jim (A small-footprint Tcl implementation I'm working on) I had to write a compiler able to turn mathematical expressions into bytecode for a stack-based machine. Before to write it in C, I wrote a prototype in Tcl that can be useful. AM is also working on something like this, similar in design to the Tcl's expr parser itself. This one is a bit different. It generates a stack program from the expr representation, and optionally can turn the stack program into a Tcl program (i.e. a parse tree). For Jim the last part is not useful, but I added it for completeness. This code does not check at all if the input expression is correct. It's just a prototype, I'm going to write the real version in C.

Example output:

 Exp: 1+2*3
 Rpn: 1 2 3 * +
 Tcl: [+ 1 [* 2 3]]

 Exp: 1*2+3
 Rpn: 1 2 * 3 +
 Tcl: [+ [* 1 2] 3]

 Exp: ((1*(2+3)*4)+5)*2
 Rpn: 1 2 3 + * 4 * 5 + 2 *
 Tcl: [* [+ [* [* 1 [+ 2 3]] 4] 5] 2]

 Exp: 0-1+5
 Rpn: 0 1 - 5 +
 Tcl: [+ [- 0 1] 5]

 Exp: 2*0-1+5
 Rpn: 2 0 * 1 - 5 +
 Tcl: [+ [- [* 2 0] 1] 5]

 Exp: 1+2*3+4*5+6
 Rpn: 1 2 3 * + 4 5 * + 6 +
 Tcl: [+ [+ [+ 1 [* 2 3]] [* 4 5]] 6]

Exp is the input expression, Rpn is the generated RPN program, Tcl is the RPN program translated into a Tcl program.

And That's the code:

 # Expression parser in Tcl.
 # Copyright (C) 2005 Salvatore Sanfilippo
 # Under the BSD license.

 # This list represent the operators.
 # is composed of groups of three elements:
 # The operator name, precedente, arity.
 set ExprOperators {
     - 0 2
     + 0 2
     \* 1 2
     / 1 2
 }

 proc ExprOperatorPrecedence op {
     foreach {name prec arity} $::ExprOperators {
         if {$name eq $op} {return $prec}
     }
     return -1
 }

 proc ExprOperatorArity op {
     foreach {name prec arity} $::ExprOperators {
         if {$name eq $op} {return $arity}
     }
     return -1
 }

 proc ExprIsOperator op {
     expr {[ExprOperatorPrecedence $op] != -1}
 }

 proc ExprGetToken exprVar {
     upvar 1 $exprVar expression
     set expression [string trim $expression]
     if {[regexp {(^[0-9]+)(.*)} $expression -> tok exprRest]} {
         set res [list operand $tok]
         set expression $exprRest
     } elseif {[ExprIsOperator [string index $expression 0]]} {
         set res [list operator [string index $expression 0]]
         set expression [string range $expression 1 end]
     } elseif {[string index $expression 0] eq "("} {
         set res [list substart {}]
         set expression [string range $expression 1 end]
     } elseif {[string index $expression 0] eq ")"} {
         set res [list subend {}]
         set expression [string range $expression 1 end]
     } else {
         return -code error \
             "default reached in ExprGetToken. String: '$expression'"
     }
     return $res
 }

 proc ExprTokenize expression {
     set tokens {}
     while {[string length [string trim $expression]]} {
         lappend tokens [ExprGetToken expression]
     }
     return $tokens
 }

 proc ExprPop listVar {
     upvar 1 $listVar list
     set ele [lindex $list end]
     set list [lindex [list [lrange $list 0 end-1] [set list {}]] 0]
     return $ele
 }

 proc ExprPush {listVar element} {
     upvar 1 $listVar list
     lappend list $element
 }

 proc ExprPeek listVar {
     upvar 1 $listVar list
     lindex $list end
 }

 proc ExprTokensToRPN tokens {
     set rpn {}
     set stack {}
     foreach t $tokens {
         foreach {type token} $t {}
         if {$type eq {operand}} {
             ExprPush rpn $token
         } elseif {$type eq {operator}} {
             while {[llength $stack] && \
                     [ExprOperatorPrecedence [ExprPeek stack]] >= \
                     [ExprOperatorPrecedence $token]} \
             {
                 ExprPush rpn [ExprPop stack]
             }
             ExprPush stack $token
         } elseif {$type eq {substart}} {
             ExprPush stack "("
         } elseif {$type eq {subend}} {
             while 1 {
                 set op [ExprPop stack]
                 if {$op eq "("} break
                 ExprPush rpn $op
             }
         }
     }
     while {[llength $stack]} {
         ExprPush rpn [ExprPop stack]
     }
     return $rpn
 }

 proc ExprToRpn expression {
     set tokens [ExprTokenize $expression]
     ExprTokensToRPN $tokens
 }

 proc ExprRpnToTcl rpn {
     set stack {}
     foreach item $rpn {
         if {[ExprIsOperator $item]} {
             set arity [ExprOperatorArity $item]
             set operators [lrange $stack end-[expr {$arity-1}] end]
             set stack [lrange $stack 0 end-$arity]
             while {$arity} {ExprPop rpn; incr arity -1}
             set item "$item "
             foreach operator $operators {
                 append item "$operator "
             }
             set item [string range $item 0 end-1]
             ExprPush stack "\[$item\]"
         } else {
             ExprPush stack $item
         }
     }
     return [lindex $stack 0]
 }

 proc ExprTest {} {
     set expressions {
         {1+2*3}
         {1*2+3}
         {((1*(2+3)*4)+5)*2}
         {0-1+5}
         {2*0-1+5}
         {1+2*3+4*5+6}
     }
     foreach e $expressions {
         set rpn [ExprToRpn $e]
         set tcl [ExprRpnToTcl $rpn]
         puts "Exp: $e"
         puts "Rpn: $rpn"
         puts "Tcl: $tcl"
         puts {}
     }
 }

 ExprTest