Version 1 of Naive Tcl parser

Updated 2013-08-01 07:36:14 by CMcC

Thinking about the dodekalogue, so I threw this together. It's a Tcl parser in Tcl. It tries to follow the dodekalogue assiduously.

# tclParser.tcl - a naive first principles tcl parser
oo::class create Parser {
    variable rest next accum done count endc escaped
    method next {} {
        incr count
        set next [string index $rest 0]
        append accum $next
        set rest [string range $rest 1 end]
        set done [expr {[string length $rest] == 0}]

        return $next
    }

    method pushback {char} {
        set rest $char$rest
    }

    method skip {{chars 1}} {
        incr count $chars
        set result [string range $rest 0 $chars-1]
        set rest [string range $rest $chars end]
        set done [expr {[string length $rest] == 0}]

        return $result
    }

    method lookahead {{chars 1}} {
        set done [expr {[string length $rest] <= $chars}]
        return [string range $rest 0 $chars-1]
    }

    method inject {ch} {
        append accum $ch
    }
    
    method consume {} {
        set a $accum
        set accum ""

        return $a
    }
    
    method comment {} {
        while {!$done && [my skip] ne "\n"} {}
    }
    
    method bsnl {} {
        if {[my lookahead] eq "\n"} {
            while {[string is space -strict [my lookahead]]} {my skip}
            my inject " "
        }
    }
        
    method goatse {eoc} {
        my skip 3
        return [list goatse [my cword $eoc]]
    }
        
    method brace {} {
        set level -1
        while {[set ch [my lookahead]] ne ""} {
            switch -exact -- $ch {
                \{ {
                    my next
                    incr level
                }
                \} {
                    my next
                    incr level -1
                    if {$level < 0} {
                        set result [string range [my consume] 1 end-1]        ;# strip braces
                        return [list brace $result]
                    }
                }
                \\ {
                    if {[my lookahead 2] eq "\\\n"} {
                        my skip        ;# skip the backslash
                        my bsnl        ;# do \\n subst
                    } elseif {[my lookahead 2] eq "\\\\"} {
                        my next
                        my next
                    } elseif {[my lookahead 2] eq "\\\{"} {
                        my next
                        my next
                    } elseif {[my lookahead 2] eq "\\\}"} {
                        my next
                        my next
                    } else {
                        my next
                    }
                }
                default {
                    my next
                }
            }
        }
        error "unmatched brace '[my consume]'"
    }

    method skipws {} {
        while {[string is space -strict [string index $rest 0]]} {
            my skip
        }
    }

    method eocommand {eoc} {
        return [expr {[my lookahead] in $eoc}]
    }

    # skipcws - skip whitespace terminating on end of command
    method skipcws {eoc} {
        while {[string is space -strict [string index $rest 0]]} {
            if {[my eocommand $eoc]} {
                return -code break "end of command"
            }
            my skip
        }
    }

    # skipclass - skip and return a contiguous prefix matching a class
    method skipclass {cc {end end}} {
        string is $cc -strict -failindex fail [string range $rest 0 $end]
        if {[info exists fail]} {
            set result [string range $rest 0 $fail-1]
            my skip $fail
        } else {
            # the entire $rest matches class
            set result [string range $rest 0 end]
            my skip [string length $result]
        }
        return $result
    }

    # char - gather a character with backslash substitution
    method char {} {
        set escaped 0
        if {[my lookahead] eq "\\"} {
            my skip
            set escaped 1
            switch -exact -- [my lookahead] {
                a - b - f - n - r - t - v {
                    my skip; my inject [subst \\[my lookahead]]
                }
                \n {
                    my skip; my bsnl; set next " "
                }
                \\ {
                    my skip; my inject [set next \\]
                    }
                x {
                    my skip;        ;# skip the literal "x"
                    set hex [my skipclass xdigit]
                    set hex [string index $hex end-1][string index $hex end]
                    my inject [set next [binary decode hex $hex]]
                }
                u {
                    my skip
                    string is xdigit -strict -failindex hexl $rest
                    if {![info exists hexl] || $hexl > 4} {
                        set hexl 4
                    }
                    set hex [my skip $hexl]
                    my inject [set next [subst \\$hex]]
                    unset hexl
                }
                default {
                    my next
                }
            }
        } else {
            my next
        }
        return $next
    }

    method bracket {} {
        set commands {}
        set eoc {\n ; "" \]}
        while {!$done && ![my eocommand $eoc]} {
            set command [my command $eoc]
            if {[llength $command]} {
                lappend commands $command
            }
            if {[my skip] eq "\]"} {
                break
            }
        }

        if {[llength $commands] == 1} {
            return [lindex $commands 0]
        } else {
            return [list script $commands]
        }
    }
    
    method var {} {
        # ${name} form
        if {[my lookahead] eq "\{"} {
            while {!$done && [my char] != "\}"} {}
            set var [string range [my consume] 0 end-1]
            return [list var $var]
        }

        # $name or $name(index) form
        set var ""
        while {!$done &&
               [string is alnum -strict [set ch [my char]]]
               || $ch eq ":"
               || $ch eq "_"} {
            append var $ch
        }

        if {$ch ne "("} {
            my pushback $ch
            my consume
            return [list var $var]
        }

        set index ""
        while (!$done) {
            if {[set ch [my char]] eq ")" && !$escaped} break
            append index $ch
        }

        my consume
        return [list var $var $index]
    }

    method quote {} {
        if {[my next] ne "\""} {
            error "parsing quote without quote"
        }
        my consume

        while {!$done} {
            # accumulate a word character
            switch -exact -- [my char] {
                \" {
                    # got close quote
                    set accum [string range $accum 0 end-1]
                    if {$accum ne ""} {
                        lappend result [list literal $accum]
                        my consume
                    }
                    break
                }

                \[ {
                    if {$accum ne "\["} {
                        lappend result [list literal [string range [my consume] 0 end-1]]
                    } else {
                        my consume        ;# delete open bracket
                    }
                    lappend result [my bracket]
                }

                \$ {
                    if {$accum ne "\$"} {
                        lappend result [list literal [string range [my consume] 0 end-1]]
                    }
                    lappend result [my var]
                }

                default {
                }
            }
        }

        if {$done} {
            error "no close quote"
        }

        if {[llength $result] == 1} {
            return [lindex $result 0]
        } else {
            return [list compound {*}$result]
        }
    }

    method word {eoc} {
        set result {}
        while {!$done
               && [set la [my lookahead]] ni $eoc
               && ![string is space -strict $la]
           } {
            # accumulate a word character
            switch -exact -- [set ch [my char]] {
                \[ {
                    if {$accum ne "\["} {
                        lappend result [list literal [string range [my consume] 0 end-1]]
                    } else {
                        my consume        ;# delete open bracket
                    }
                    lappend result [my bracket]
                }
                \$ {
                    if {$accum ne "\$"} {
                        lappend result [list literal [string range [my consume] 0 end-1]]
                    }
                    lappend result [my var]
                }
                default {
                }
            }
        }

        if {$accum ne ""} {
            lappend result [list literal [my consume]]
        }

        if {[llength $result] == 1} {
            return [lindex $result 0]
        } else {
            return [list compound {*}$result]
        }
    }

    method cword {eoc} {
        try {
            my skipcws $eoc        ;# skip whitespace and break on end of command
        } on break {e eo} {
            # end of command
            return -code break "end of command"
        }

        if {[my lookahead 3] eq "{*}"} {
            tailcall my goatse $eoc
        }

        switch -exact -- [my lookahead] {
            \{ {
                tailcall my brace
            }

            \" {
                tailcall my quote
            }

            default {
                tailcall my word $eoc
            }
        }
    }

    method command {{eoc {\n ; ""}}} {
        # skip leading comments
        my skipws        ;# skip plain whitespace
        while {[my lookahead] eq "#"} {
            my comment
            my skipws        ;# skip all whitespace
        }

        if {$done} {
            return {}
        }

        set result {}
        while {!$done && ![my eocommand $eoc]} {
            lappend result [my cword $eoc]
        }

        if {[llength $result]} {
            return [list command {*}$result]
        } else {
            return {}
        }
    }
    

    method parse {script} {
        set rest $script
        while {!$done} {
            lappend commands [my command]
            my skip        ;# consume the end of command character
        }
        return [list script {*}$commands]
    }

    constructor {args} {
        set done 0
        set endc ""
        set escaped 0
    }
}

set tclp [Parser new]
puts [$tclp parse [read stdin]]

exit

#set test "quote[test]passed"
#set test word[test]passed
#set test test$test(that)those
set test [list {*}$moop]