Version 1 of Playing with parsing

Updated 2008-03-17 09:49:57 by suchenwi

Richard Suchenwirth 2008-03-17 - Once again, I had some waiting time, and wanted to experiment with Tcl - in this case some toys to

  • parse infix expressions like 1+(2*3)
  • convert them to some RPN-like notation, like {push 1} {push 2} {push 3} * +
  • execute the resulting "code", so that the result is in this case 7

What is not covered is the precedence of operators. The code below associates to the left if not parenthesized, so 2+3*4 gives 20...

#-------------- Testing framework first, so we can test immediately
set verbose [expr {[lsearch $argv -v] >= 0}]
set N 0
set Nfail 0
proc e.g. {cmd -> expected} {
    incr ::N
    catch {uplevel 1 $cmd} res
    if {$res ne $expected} {
	puts [list ($::N) $cmd -> $res ***** expected $expected]
	incr ::Nfail
    } elseif $::verbose {puts [list ($::N) $cmd -> $res]}
}
#------------------------------------------------------- real stuff

proc typeof char {
    switch -- $char {
	0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - . {return digit}
	+ - "-" - * - / - ( - ) {return op}
	default {error "bad character $char"}
    }
}
e.g. {typeof 7} -> digit
e.g. {typeof .} -> digit
e.g. {typeof *} -> op
e.g. {typeof ?} -> {bad character ?}

#----------------------------------------------------------------
proc tokenize str {
    set res ""
    set previous ""
    set parens 0
    foreach char [split $str ""] {
	if {$char eq "("} {incr parens; append res " "}
	if {$char eq ")"} {incr parens -1}
	if {$char eq " "} continue
	set type [typeof $char]
	if {$previous ne "" && $type ne $previous} {append res " "}
	set previous $type
	append res $char
	if {$char eq ")"} {append res " "}
    }
    if $parens {error "unmatched parens $parens"}
    string trim $res
}
e.g. {tokenize 2+3} -> {2 + 3}
e.g. {tokenize "2 + 3"} -> {2 + 3}
e.g. {tokenize "2 + (3)"} -> {2 + ( 3 )}
e.g. {tokenize 12+43} -> {12 + 43}
e.g. {tokenize (} -> {unmatched parens 1}
e.g. {tokenize )} -> {unmatched parens -1}


#----------------------------------------------------------------- Stack
proc pop {{_stack ""}} {
    if {$_stack eq ""} {set _stack ::S} ;# default: global stack
    upvar 1 $_stack stack
    K [lindex $stack end] [set stack [lrange $stack 0 end-1]]
}
proc K {a b} {set a}

proc push what {lappend ::S $what}

e.g. {push hello; set S} -> hello
e.g. {pop; set S} -> {}

#----------------------------------------------------------------- Parser
proc parse tokens {
    #puts [info level 0]
    set res {}
    set opstack {}
    set braces 0
    set recurs {}
    foreach token $tokens {
	if $braces {
	    lappend recurs $token
	    if {$token eq "("} {incr braces}
	    if {$token eq ")"} {
		incr braces -1
		if {$braces == 0} {
		    eval lappend res [parse [lrange $recurs 0 end-1]]
		    set recurs {}
		}
	    }
	} else {
	    if [string is double -strict $token] {
		lappend res [list push $token] 
		if {[llength $opstack]} {lappend res [pop opstack]}
	    } elseif {$token eq "("} {
		incr braces
	    } elseif {[typeof $token] eq "op"} {
		lappend opstack $token
	    }
	}
	#puts token:$token,braces:$braces,recurs:$recurs
    }
    while {[llength $opstack]} {lappend res [pop opstack]}
    set res
}
e.g. {parse "2 + 3"} -> {{push 2} {push 3} +}
e.g. {parse "2 + 3 + 4"} -> {{push 2} {push 3} + {push 4} +}
e.g. {parse "( 2 + 3 ) * 4"} -> {{push 2} {push 3} + {push 4} *}

#----------------------------------------------------------------
foreach op {+ - * /} {
    proc $op {} "set a \[pop\]; push \[expr {\[pop\] $op \$a}\]"
}
e.g. {info body +} -> {set a [pop]; push [expr {[pop] + $a}]}

#----------------------------------------------------------- RPN evaluator
proc rpn cmdlist {
    if $::verbose {puts $cmdlist}
    global S
    foreach cmd $cmdlist {eval $cmd}
    if {[llength $S] != 1} {error "bad stack: $S"}
    pop
}
e.g. {rpn {{push 6} {push 7} *}} -> 42

#----------------------------------------------------------------
proc i2r {expr} {rpn [parse [tokenize $expr]]}

e.g. {i2r 2+3}     -> 5
e.g. {i2r 2/3}     -> 0
e.g. {i2r 1/2.}    -> 0.5
e.g. {i2r 1./2}    -> 0.5
e.g. {i2r 2/0}     -> "divide by zero"
e.g. {i2r 2/3.}    -> 0.666666666667
e.g. {i2r (2+3)*4} -> 20
e.g. {i2r 2+3*4}   -> 20 ;# yes... left-associative, no precedence
e.g. {i2r 2+(3*4)}   -> 14
e.g. {i2r 2+3+4}   -> 9
e.g. {i2r (1+2)*(3+4)} -> 21
e.g. {i2r 2*3+4}   -> 10
e.g. {i2r (2*3)+4}   -> 10
e.g. {i2r (2*3)+(4)}   -> 10
e.g. {i2r 3*(3+4)}   -> 21
e.g. {i2r {3 + ( 4 * ( 5 + 6 ) )}} -> 47


#------------------------------ Final tally
incr N -1 ;# last call to e.g. will incr again
puts ---
e.g. {list $Nfail / $N = [format %.2f [expr 100.*$Nfail/$N]] %} -> 0
puts ""
foreach arg $argv {e.g. $arg -> ?}