Version 0 of Playing with parsing

Updated 2008-03-17 09:49:03 by suchenwi

Richard Suchenwirth 2008-03-17 - Once again, I had some waiting time, and wanted to experiment with Tcl - in this case some toys to

  • parse infix expressions like 1+(2*3)
  • convert them to some RPN-like notation, like {push 1} {push 2} {push 3} * +
  • execute the resulting "code", so that the result is in this case 7

What is not covered is the precedence of operators. The code below associates to the left if not parenthesized, so 2+3*4 gives 20...

#-------------- Testing framework first, so we can test immediately set verbose expr {[lsearch $argv -v >= 0}] set N 0 set Nfail 0 proc e.g. {cmd -> expected} {

    incr ::N
    catch {uplevel 1 $cmd} res
    if {$res ne $expected} {
        puts [list ($::N) $cmd -> $res ***** expected $expected]
        incr ::Nfail
    } elseif $::verbose {puts [list ($::N) $cmd -> $res]}

} #------------------------------------------------------- real stuff

proc typeof char {

    switch -- $char {
        0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - . {return digit}
        + - "-" - * - / - ( - ) {return op}
        default {error "bad character $char"}
    }

} e.g. {typeof 7} -> digit e.g. {typeof .} -> digit e.g. {typeof *} -> op e.g. {typeof ?} -> {bad character ?}

#---------------------------------------------------------------- proc tokenize str {

    set res ""
    set previous ""
    set parens 0
    foreach char [split $str ""] {
        if {$char eq "("} {incr parens; append res " "}
        if {$char eq ")"} {incr parens -1}
        if {$char eq " "} continue
        set type [typeof $char]
        if {$previous ne "" && $type ne $previous} {append res " "}
        set previous $type
        append res $char
        if {$char eq ")"} {append res " "}
    }
    if $parens {error "unmatched parens $parens"}
    string trim $res

} e.g. {tokenize 2+3} -> {2 + 3} e.g. {tokenize "2 + 3"} -> {2 + 3} e.g. {tokenize "2 + (3)"} -> {2 + ( 3 )} e.g. {tokenize 12+43} -> {12 + 43} e.g. {tokenize (} -> {unmatched parens 1} e.g. {tokenize )} -> {unmatched parens -1}

#----------------------------------------------------------------- Stack proc pop {{_stack ""}} {

    if {$_stack eq ""} {set _stack ::S} ;# default: global stack
    upvar 1 $_stack stack
    K [lindex $stack end] [set stack [lrange $stack 0 end-1]]

} proc K {a b} {set a}

proc push what {lappend ::S $what}

e.g. {push hello; set S} -> hello e.g. {pop; set S} -> {}

#----------------------------------------------------------------- Parser proc parse tokens {

    #puts [info level 0]
    set res {}
    set opstack {}
    set braces 0
    set recurs {}
    foreach token $tokens {
        if $braces {
            lappend recurs $token
            if {$token eq "("} {incr braces}
            if {$token eq ")"} {
                incr braces -1
                if {$braces == 0} {
                    eval lappend res [parse [lrange $recurs 0 end-1]]
                    set recurs {}
                }
            }
        } else {
            if [string is double -strict $token] {
                lappend res [list push $token] 
                if {[llength $opstack]} {lappend res [pop opstack]}
            } elseif {$token eq "("} {
                incr braces
            } elseif {[typeof $token] eq "op"} {
                lappend opstack $token
            }
        }
        #puts token:$token,braces:$braces,recurs:$recurs
    }
    while {[llength $opstack]} {lappend res [pop opstack]}
    set res

} e.g. {parse "2 + 3"} -> {{push 2} {push 3} +} e.g. {parse "2 + 3 + 4"} -> {{push 2} {push 3} + {push 4} +} e.g. {parse "( 2 + 3 ) * 4"} -> {{push 2} {push 3} + {push 4} *}

#---------------------------------------------------------------- foreach op {+ - * /} {

    proc $op {} "set a \[pop\]; push \[expr {\[pop\] $op \$a}\]"

} e.g. {info body +} -> {set a pop; push expr {[pop + $a}]}

#----------------------------------------------------------- RPN evaluator proc rpn cmdlist {

    if $::verbose {puts $cmdlist}
    global S
    foreach cmd $cmdlist {eval $cmd}
    if {[llength $S] != 1} {error "bad stack: $S"}
    pop

} e.g. {rpn {{push 6} {push 7} *}} -> 42

#---------------------------------------------------------------- proc i2r {expr} {rpn [parse [tokenize $expr]}

e.g. {i2r 2+3} -> 5 e.g. {i2r 2/3} -> 0 e.g. {i2r 1/2.} -> 0.5 e.g. {i2r 1./2} -> 0.5 e.g. {i2r 2/0} -> "divide by zero" e.g. {i2r 2/3.} -> 0.666666666667 e.g. {i2r (2+3)*4} -> 20 e.g. {i2r 2+3*4} -> 20 ;# yes... left-associative, no precedence e.g. {i2r 2+(3*4)} -> 14 e.g. {i2r 2+3+4} -> 9 e.g. {i2r (1+2)*(3+4)} -> 21 e.g. {i2r 2*3+4} -> 10 e.g. {i2r (2*3)+4} -> 10 e.g. {i2r (2*3)+(4)} -> 10 e.g. {i2r 3*(3+4)} -> 21 e.g. {i2r {3 + ( 4 * ( 5 + 6 ) )}} -> 47

#------------------------------ Final tally incr N -1 ;# last call to e.g. will incr again puts --- e.g. {list $Nfail / $N = [format %.2f [expr 100.*$Nfail/$N] %} -> 0 puts "" foreach arg $argv {e.g. $arg -> ?}