Version 1 of Operator precedence expression parser

Updated 2012-09-13 13:06:21 by jbr

===== # This code is an operator precedence parser which uses an operator table and a variation

 # on the shunting yard parser algorithm to convert an infix expression to a series of 
 # prefix operator callbacks.
 #
 #
 namespace eval expression {
    # This is the operator precedence table for C++.  The dot operator is effectivly
    # commented out by "renameing" it ".###".  The lexical "analyzer" is too primitive 
    # to destinguish floating point numbers and a real . operator usage.
    #
    # The conditional, "?:" and sequence operator, "," are missing.
    #
    # precedence values are spaced out by multiplying the nominal
    # C++ precedence by 10.  This should allow applications to add other
    # operators as needed at any preceduce required.
    #
    # The unary * has been changes to nulary to support tna
    #
    # The index [] operator is nary and treated similarly to a function call to "indx" instead
    # of "call".
    #
    # dolar "$" is added for eventaul support of tcl variables in tna.
    #
    set optable {
:
{ 10 2 left name }
        ++        {   20                 1        left        inc   }
        --        {   20                 1        left        dec   }
         .###        {   20                 2        left        dot   }
            ->        {   20                 2        left        arrow }
        ++u        {   30                 1        right        uinc  }
        --u        {   30                 1        right        udec  }
         -u        {   30                 1        right        usub  }
         +u        {   30                 1        right        uadd  }
         *u        {   30                 0        right        deref }
         $u        {   30                 1        right        dolar }
         &u        {   30                 1        right        refer }
  • { 50 2 left mul }
         /        {   50                 2        left        div   }
         %        {   50                 2        left        mod   }
         +        {   60                 2        left        add   }
         -        {   60                 2        left        sub   }
        <<        {   70                 2        left        shl   }
        >>        {   70                 2        left        shr   }
        >        {   80                 2        left        gt    }
        <        {   80                 2        left        lt    }
        <=        {   80                 2        left        lte   }
        >=        {   80                 2        left        gte   }
        ==        {   90                 2        left        equ   }
        !=        {   90                 2        left        neq   }
         &        {  100                 2        left        band  }
         ^        {  110                 2        left        bxor  }
         |        {  120                 2        left        bor   }
        &&        {  130                 2        left        land  }
        ||        {  140                 2        left        lor   }
         ?        {  150                 2        right        hook  }
         =        {  160                 2        right        assign}
        +=        {  160                 2        right        addasn}
        -=        {  160                 2        right        subasn}
        *=        {  160                 2        right        mulasn}
        /=        {  160                 2        right        divasn}
        %=        {  160                 2        right        modasn}
        <<=        {  160                 2        right        shlasn}
        >>=        {  160                 2        right        shrasn}
        &=        {  160                 2        right        bndasn}
        ^=        {  160                 2        right        bxrasn}
        |=        {  160                 2        right        borasn}

        [        { 1000                 0        {} indx        }
        ]        { 1000                 0        {} none        }
        (        { 1000                 0        {} call        }
        )        { 1000                 0        {} none        }
        ,        { 1000                 0        {} none        }
        ;       { 1000                 0        {} semi }
    }


    # A little helper to sort a precedence table in to a [string map] mapping that will be used
    # for "lexical analysis".
    #
    proc prep-tokens { tokens } {
        variable opers

        foreach token [dict keys $tokens] {
            set opers([lindex [dict get $tokens $token] 3]) $token

            set token [string map { u {} } $token]
            lappend map $token " $token "
        }
        lappend map "\n" " ; "
        lsort -stride 2 -command lencmp [lsort -stride 2 -u $map]
    }

        proc lencmp { a b } {
            return [expr [string len $b] - [string len $a]]
        }

    # Accessors for the operator table data structure.
    #
    proc prec { tok } {                        # Return the precedence of an operator.
        upvar optable optable

        set reply 0
        catch { set reply [lindex [dict get $optable $tok] 0] }
        return $reply
    }
    proc arity { tok } {                # Return the arity of an operator.
        upvar optable optable

        set reply 0
        catch { set reply [lindex [dict get $optable $tok] 1] }
        return $reply
    }
    proc assoc { tok } {                # Return the assiciativity of an operator.
        upvar optable optable

        set reply 0
        catch { set reply [lindex [dict get $optable $tok] 2] }
        return $reply
    }
    proc name { tok } {                        # Return the name of an operator.
        upvar optable optable

        set reply 0
        catch { set reply [lindex [dict get $optable $tok] 3] } reply; # puts $reply
        return $reply
    }

    # Stack operators.
    #
    proc push { stkName value }   { upvar $stkName stk;   lappend stk $value }
    proc pop  { stkName { n 1 } } { upvar $stkName stk;
        set x [expr $n-1]
        set top [lrange $stk end-$x end]
        set stk [lrange $stk      0 end-$n]
        if { $n == 1 } {
            return [join $top]
        } else {
            return $top
        }
    }
    proc top  { stkName }         { upvar $stkName stk;   return [lindex $stk end] }
    proc pull { stkName }         { upvar $stkName stk;   set stk [lassign $stk top]; return $top }
    proc next { stkName }         { upvar $stkName stk;   return [lindex $stk 0] }

    # Here is the parser.  You pass in the input string, the token map, the operator precedence table and
    # a script prefix with will be called as each subexpresion in the input is recognized.
    #
    proc parse { input tokenmap optable prefix } {
        set operator {}                                        ; # Stacks
        set operands {}
        set parens   {}
        set prv {}                                        ; # The previous token.

        set input [string map $tokenmap $input]                ; # Lexing done by mapping spaces around the operators!

        #puts $input

        try {
            while { [llength $input] } {
                set tok [pull input]

                if { $tok eq ";" } {
                    while { [top operator] ne {} } {
                        push operands [{*}$prefix [name [set op [pop operator]]] {*}[pop operands [arity $op]]]
                    }
                    {*}$prefix [name ";"]

                    # Reset everyone
                    #
                    set operator {}                        ; # Stacks
                    set operands {}
                    set parens   {}
                    set prv {}                                ; # The previous token.

                    continue
                }

                if { [prec ${tok}u] && $prv ne ")" && ($prv eq {} || [prec $prv]) } {
                    set tok ${tok}u
                }

                #puts "$tok [prec $tok], op: $operator args: $operands"


                if { ![prec $tok] } {                                                        ; # Push operand
                    if { $prv ne {} && ![prec $prv] } { error "unexpected token : $prv _@_ $tok" }

                    push operands $tok
                } elseif {  $tok eq "("
                        ||  $tok eq {[}
                        ||  [top operator] eq {}
                        ||  [prec $tok] <  [prec [top operator]]
                        || ([prec $tok] <= [prec [top operator]]
                         && [assoc $tok] eq "right" ) } {

                    if { $tok eq "(" || $tok eq {[} } {
                        if { $tok eq {[} || ($prv ne {} && ![prec $prv]) } {
                            if { [next input] eq ")" }        { push parens 0                ; # Function call or Index
                            } else                         { push parens 1   }
                        } else                                 { push parens "(" }        ; # Normal expression paren
                    } else {
                        if { [prec ${tok}u] && ($prv eq {} || $prv eq "," || $prv eq "(" || $prv eq {[}) } {
                            set tok ${tok}u
                        }
                    }

                    push operator $tok

                } elseif { $tok eq ")" || $tok eq {]} || $tok eq "," } {        ; # Function call, Index or comma
                    switch [top parens] {
                     {}  { error "unexpected \")\" : $prv $tok [next input]"        }

                     "(" {                                                        ; # Close paren on normal expression paren
                        while { [top operator] ne "(" } {
                            push operands [{*}$prefix [name [set op [pop operator]]] {*}[pop operands [arity $op]]]
                        }
                        pop operator
                        pop parens
                     }
                     default {                                                                ; # Function call, Index or comma
                        if { $tok eq "," } { push parens [expr [pop parens]+1] }        ; # Incr function nargs

                        while { [top operator] ne "(" && [top operator] ne {[} } {        ; # Output function arg
                            push operands [{*}$prefix [name [set op [pop operator]]] {*}[pop operands [arity $op]]]
                        }
                        if { $tok eq ")" || $tok eq "]" } {                                ; # Output function call or Index
                            push operands [{*}$prefix [name [pop operator]] {*}[pop operands [expr [pop parens]+1]]]
                            pop  parens
                        }
                     }
                    }
                } else {
                    while { [top operator] ne {} && [prec $tok] >= [prec [top operator]] } {
                        push operands [{*}$prefix [name [set op [pop operator]]] {*}[pop operands [arity $op]]]
                    }
                    push operator $tok
                }

                set prv $tok

            }
            if { $parens ne {} } { error "parens not balanced" }

            while { [top operator] ne {} } {
                push operands [{*}$prefix [name [set op [pop operator]]] {*}[pop operands [arity $op]]]
            }
        } on error message {
            puts $::errorInfo
            error "parse error at: $prv _@_ $tok [next input] : $message"

        }

        pop operands
    }
 }

=====