Version 0 of Naive Tcl parser

Updated 2013-08-01 07:33:25 by CMcC

Thinking about the dodekalogue, so I threw this together. It's a Tcl parser in Tcl. It tries to follow the dodekalogue assiduously.

# tclParser.tcl - a naive first principles tcl parser
source Debug.tcl
package require Debug

Debug define parser
Debug define brace
Debug define quote
Debug define pch

oo::class create Parser {
    variable rest next accum done count endc escaped
    method next {} {
        incr count
        set next [string index $rest 0]
        append accum $next
        set rest [string range $rest 1 end]
        set done [expr {[string length $rest] == 0}]
        Debug.pch {next '$next'}
        return $next
    }

    method pushback {char} {
        set rest $char$rest
    }

    method skip {{chars 1}} {
        incr count $chars
        set result [string range $rest 0 $chars-1]
        set rest [string range $rest $chars end]
        set done [expr {[string length $rest] == 0}]
        Debug.pch {skip $chars '$result'}
        return $result
    }

    method lookahead {{chars 1}} {
        set done [expr {[string length $rest] <= $chars}]
        return [string range $rest 0 $chars-1]
    }

    method inject {ch} {
        append accum $ch
    }
    
    method consume {} {
        set a $accum
        set accum ""
        Debug.pch {consume '$a'}
        return $a
    }
    
    method comment {} {
        while {!$done && [my skip] ne "\n"} {}
    }
    
    method bsnl {} {
        if {[my lookahead] eq "\n"} {
            while {[string is space -strict [my lookahead]]} {my skip}
            my inject " "
            Debug.parser {bsnl: [my lookahead]}
        }
    }
        
    method goatse {eoc} {
        my skip 3
        return [list goatse [my cword $eoc]]
    }
        
    method brace {} {
        #Debug.parser {brace: '[my lookahead]'}
        set level -1
        while {[set ch [my lookahead]] ne ""} {
            switch -exact -- $ch {
                \{ {
                    my next
                    incr level
                    Debug.brace {brace: in $level '[string range $accum end-10 end]'}
                }
                \} {
                    my next
                    incr level -1
                    Debug.brace {brace: out $level '[string range $accum end-10 end]'}
                    if {$level < 0} {
                        set result [string range [my consume] 1 end-1]        ;# strip braces
                        Debug.parser {brace: complete '$result'}
                        return [list brace $result]
                    }
                }
                \\ {
                    if {[my lookahead 2] eq "\\\n"} {
                        Debug.brace {got bsnl: '$accum'}
                        my skip        ;# skip the backslash
                        my bsnl        ;# do \\n subst
                    } elseif {[my lookahead 2] eq "\\\\"} {
                        my next
                        my next
                    } elseif {[my lookahead 2] eq "\\\{"} {
                        my next
                        my next
                    } elseif {[my lookahead 2] eq "\\\}"} {
                        my next
                        my next
                    } else {
                        my next
                    }
                }
                default {
                    my next
                }
            }
        }
        error "unmatched brace '[my consume]'"
    }

    method skipws {} {
        while {[string is space -strict [string index $rest 0]]} {
            my skip
        }
    }

    method eocommand {eoc} {
        return [expr {[my lookahead] in $eoc}]
    }

    # skipcws - skip whitespace terminating on end of command
    method skipcws {eoc} {
        while {[string is space -strict [string index $rest 0]]} {
            if {[my eocommand $eoc]} {
                return -code break "end of command"
            }
            my skip
        }
    }

    # skipclass - skip and return a contiguous prefix matching a class
    method skipclass {cc {end end}} {
        string is $cc -strict -failindex fail [string range $rest 0 $end]
        if {[info exists fail]} {
            set result [string range $rest 0 $fail-1]
            my skip $fail
        } else {
            # the entire $rest matches class
            set result [string range $rest 0 end]
            my skip [string length $result]
        }
        return $result
    }

    # char - gather a character with backslash substitution
    method char {} {
        Debug.pch {char: '[my lookahead]'}
        set escaped 0
        if {[my lookahead] eq "\\"} {
            my skip
            set escaped 1
            switch -exact -- [my lookahead] {
                a - b - f - n - r - t - v {
                    my skip; my inject [subst \\[my lookahead]]
                }
                \n {
                    my skip; my bsnl; set next " "
                }
                \\ {
                    my skip; my inject [set next \\]
                    }
                x {
                    my skip;        ;# skip the literal "x"
                    set hex [my skipclass xdigit]
                    set hex [string index $hex end-1][string index $hex end]
                    my inject [set next [binary decode hex $hex]]
                }
                u {
                    my skip
                    string is xdigit -strict -failindex hexl $rest
                    if {![info exists hexl] || $hexl > 4} {
                        set hexl 4
                    }
                    set hex [my skip $hexl]
                    my inject [set next [subst \\$hex]]
                    unset hexl
                }
                default {
                    my next
                }
            }
        } else {
            my next
        }
        return $next
    }

    method bracket {} {
        set commands {}
        set eoc {\n ; "" \]}
        while {!$done && ![my eocommand $eoc]} {
            set command [my command $eoc]
            if {[llength $command]} {
                lappend commands $command
            }
            if {[my skip] eq "\]"} {
                break
                puts stderr "EOC in bracket: '$endc'"
            }
        }
        puts stderr "BRACKET [llength $commands]: $commands"
        if {[llength $commands] == 1} {
            return [lindex $commands 0]
        } else {
            return [list script $commands]
        }
    }
    
    method var {} {
        # ${name} form
        if {[my lookahead] eq "\{"} {
            while {!$done && [my char] != "\}"} {}
            set var [string range [my consume] 0 end-1]
            return [list var $var]
        }

        # $name or $name(index) form
        set var ""
        while {!$done &&
               [string is alnum -strict [set ch [my char]]]
               || $ch eq ":"
               || $ch eq "_"} {
            append var $ch
        }

        if {$ch ne "("} {
            my pushback $ch
            my consume
            return [list var $var]
        }

        set index ""
        while (!$done) {
            if {[set ch [my char]] eq ")" && !$escaped} break
            append index $ch
        }

        my consume
        return [list var $var $index]
    }

    method quote {} {
        if {[my next] ne "\""} {
            error "parsing quote without quote"
        }
        my consume

        while {!$done} {
            # accumulate a word character
            switch -exact -- [my char] {
                \" {
                    # got close quote
                    set accum [string range $accum 0 end-1]
                    if {$accum ne ""} {
                        lappend result [list literal $accum]
                        my consume
                    }
                    break
                }

                \[ {
                    if {$accum ne "\["} {
                        lappend result [list literal [string range [my consume] 0 end-1]]
                    } else {
                        my consume        ;# delete open bracket
                    }
                    lappend result [my bracket]
                    puts stderr "POSTB $result"
                }

                \$ {
                    if {$accum ne "\$"} {
                        lappend result [list literal [string range [my consume] 0 end-1]]
                    }
                    lappend result [my var]
                }

                default {
                }
            }
            Debug.quote {'$accum'}
        }

        if {$done} {
            error "no close quote"
        }

        Debug.parser {quote: '$result'}
        if {[llength $result] == 1} {
            return [lindex $result 0]
        } else {
            return [list compound {*}$result]
        }
    }

    method word {eoc} {
        set result {}
        while {!$done
               && [set la [my lookahead]] ni $eoc
               && ![string is space -strict $la]
           } {
            # accumulate a word character
            switch -exact -- [set ch [my char]] {
                \[ {
                    if {$accum ne "\["} {
                        lappend result [list literal [string range [my consume] 0 end-1]]
                    } else {
                        my consume        ;# delete open bracket
                    }
                    lappend result [my bracket]
                    puts stderr "POSTB $result"
                }
                \$ {
                    if {$accum ne "\$"} {
                        lappend result [list literal [string range [my consume] 0 end-1]]
                    }
                    lappend result [my var]
                }
                default {
                }
            }
        }

        if {$accum ne ""} {
            lappend result [list literal [my consume]]
        }

        Debug.parser {word: '$result' / '$next' / $la}
        if {[llength $result] == 1} {
            return [lindex $result 0]
        } else {
            return [list compound {*}$result]
        }
    }

    method cword {eoc} {
        try {
            my skipcws $eoc        ;# skip whitespace and break on end of command
        } on break {e eo} {
            # end of command
            return -code break "end of command"
        }

        #Debug.parser {cword: '[my lookahead]'}
        if {[my lookahead 3] eq "{*}"} {
            tailcall my goatse $eoc
        }

        switch -exact -- [my lookahead] {
            \{ {
                tailcall my brace
            }

            \" {
                tailcall my quote
            }

            default {
                tailcall my word $eoc
            }
        }
    }

    method command {{eoc {\n ; ""}}} {
        # skip leading comments
        my skipws        ;# skip plain whitespace
        while {[my lookahead] eq "#"} {
            my comment
            my skipws        ;# skip all whitespace
        }

        if {$done} {
            return {}
        }

        set result {}
        while {!$done && ![my eocommand $eoc]} {
            lappend result [my cword $eoc]
            puts stderr "post CWORD '[my lookahead]'"
        }
        puts stderr "EOC c: '$endc'"

        if {[llength $result]} {
            return [list command {*}$result]
        } else {
            return {}
        }
    }
    

    method parse {script} {
        set rest $script
        while {!$done} {
            lappend commands [my command]
            my skip        ;# consume the end of command character
        }
        return [list script {*}$commands]
    }

    constructor {args} {
        set done 0
        set endc ""
        set escaped 0
    }
}

Debug on parser
Debug off brace
Debug off quote
Debug "off" pch

set tclp [Parser new]
puts [$tclp parse [read stdin]]

exit

#set test "quote[test]passed"
#set test word[test]passed
#set test test$test(that)those
set test [list {*}$moop]