Version 1 of SiCL - A Simple Command Language

Updated 2014-12-12 11:15:42 by nem

NEM 2014-12-12: (Page in progress - dodgy wifi so saving regularly...)

# sicl.tcl --
# vim: ft=tcl tw=120 sw=4 ts=8 expandtab
#
#       Implements SiCL the "Simple" Command Language. This is basically just
#       a minor repackaging of Tcl with slightly more syntax. I find it
#       pleasing, you may well hate it.
#
# Copyright 2014 Neil Madden.
# License: Tcl-style
#

package require Tcl                 8.6
package require grammar::aycock     1.0

package provide sicl                0.1

namespace eval ::sicl {
    namespace export parse lex eval console
    namespace ensemble create

    namespace import ::grammar::aycock::parser

    proc debug args { }
    #proc debug msg { puts stdout [uplevel 1 [list subst $msg]] }

    variable parser [parser [regsub -all -- {\#[^\n]*\n} {

        # A program is a sequence of commands
        program     ::=     commands                    { _ 0 }
        # Commands are separated by new-lines or semi-colons 
        commands    ::=     command                     { set _ }
        commands    ::=     command NEWLINE commands    { linsert [_ 2] 0 [_ 0] }
        commands    ::=     command SEMICOLON commands  { linsert [_ 2] 0 [_ 0] }
        commands    ::=                                 {}
        # A command is either a comment (ignored) or a sequence of words
        command     ::=     COMMENT                     {}
        command     ::=     words                       { join [_ 0] }
        words       ::=     word                        { set _ }
        words       ::=     word words                  { linsert [_ 1] 0 [_ 0] }

        # A word is one of:
        #   A string, either in double quotes, braces or as a bare word
        #       delimited by whitespace (as in Tcl)
        #   A sub-command: [command] (as in Tcl)
        #   An expression: (...)
        #   A variable substitution: $name / ${name} (as in Tcl, minus arrays)
        #   A script, beginning with ':' and ending with the keyword (gasp)
        #   'end': : ... end
        word        ::=     STRING                      { _ 0 }
        word        ::=     LPAREN expr RPAREN          { _ 1 }
        word        ::=     LBRACK command RBRACK       { join $_ "" }
        word        ::=     VARIABLE                    { _ 0 } 
        word        ::=     COLON block                 { _ 1 }
        # Allow other symbols as literal strings here
        word        ::=     COMMA                       { _ 0 }

        # A block of commands starts with a colon and then either:
        #   - a new-line followed by a series of commands terminated by an END token (---+)
        #   - a single command up to the end of the current line
        block       ::=     NEWLINE commands END        { format {{%s}} [join [_ 1] "; "] }
        block       ::=     command                     { format {{%s}} [_ 0]  }

        expr        ::=     expression                  { _ 0 }
        expr        ::=     dict                        { format {[dict create %s]} [join [_ 0]] }
        expr        ::=     list                        { format {[list %s]} [join [_ 0]] }

        dict        ::=     dictEntries                 { _ 0 }
        list        ::=     listEntries                 { _ 0 }

        dictEntries ::=     dictEntry                   { _ 0 }
        dictEntries ::=     dictEntries COMMA dictEntry { linsert [_ 0] end [_ 2 0] [_ 2 1] }
        dictEntry   ::=     STRING COLON expression     { list [_ 0] [_ 2] }

        listEntries ::=     listEntry                   { set _ }
        listEntries ::=     listEntries COMMA listEntry { linsert [_ 0] end [_ 2] }
        listEntry   ::=     expression                  { _ 0 }

        expression  ::=     STRING                      { _ 0 }
        expression  ::=     factors                     { format {[expr {%s}]} [join [_ 0] ""] }

        factors     ::=     factor factors              { linsert [_ 1] 0 [_ 0] }
        factors     ::=                                 {}

        factor      ::=     STRING                      { _ 0 }
        factor      ::=     COLON                       { _ 0 }
        factor      ::=     STRING LPAREN stuff RPAREN  { join [list [_ 0] [_ 1] [join [_ 2] ""] [_ 3]] "" }
        factor      ::=     VARIABLE                    { _ 0 }
        factor      ::=     LBRACK stuff RBRACK         { join $_ ""}
        factor      ::=     LPAREN expr RPAREN          { _ 1 }

        stuff       ::=     thing stuff                 { linsert [_ 1] 0 [_ 0] }
        stuff       ::=                                 {}
        thing       ::=     STRING                      { _ 0 }
        thing       ::=     LPAREN expression RPAREN    { join $_ "" }
        thing       ::=     LBRACK stuff RBRACK         { join $_ "" }
        thing       ::=     COMMA                       { _ 0 }
        thing       ::=     COLON                       { _ 0 }
        thing       ::=     VARIABLE                    { _ 0 }
        thing       ::=     COMMENT                     { _ 0 }
        thing       ::=     NEWLINE                     { _ 0 }
        thing       ::=     SEMICOLON                   { _ 0 }

    } ""]]

    # Helper procedure for the grammar (dirty hack)
    proc ${parser}::_ args { upvar 1 _ _; lindex $_ {*}$args }


    variable actions [list]
    variable alts   [list]
    proc token {name re} {
        variable actions
        variable alts
        lappend alts $re
        lappend actions "^$re\$" [string map [list TOKEN $name] {
            debug {token -> TOKEN $token}
            lappend types  TOKEN
            lappend values $token
        }]
    }

    proc ignore re {
        variable actions
        variable alts

        lappend alts $re
        lappend actions "^$re\$" { debug {Ignoring $token}; continue }
    }

    ignore              {[ \t\r\f\v]+}
    token NEWLINE       {\n}
    token COMMENT       {\#[^\n]*\n?}
    token LPAREN        {\(}
    token RPAREN        {\)}
    token LBRACK        {\[}
    token RBRACK        {\]}
    token COMMA         {,}
    token COLON         {:}
    token SEMICOLON     {;}
    token END           {---+}
    token VARIABLE      {\$(?:[a-zA-Z0-9_:]+|\{(?:[^\}\\]|\\.)+\})}
    token STRING        {(?:"(?:[^"\\]|\\.)*"|\{(?:[^\}\\]|\\.)*\}|(?:[^\"\s();,\[\]:$]|:[^\w\s\"\{])+)}

    # Parses a SiCL script into an equivalent Tcl script
    proc parse script {
        variable parser
        lassign [lex $script] types values
        join [$parser parse $types $values] \n
    }

    # Tokenizes a script, returning two list: a list of tokens and a list of
    # values.
    proc lex script {
        variable alts
        variable actions
    
        debug {[join $alts "\n|  "]}

        regsub -all {\\\n} $script {} script
        set tokens [regexp -all -inline [join $alts "|"] $script]

        set types [list]
        set values [list]
        foreach token $tokens {
            debug {Matching token: |$token|}
            switch -regexp -- $token [linsert $actions end default { error "unknown token \"$token\"" }]
        }

        return [list $types $values]
    }

    proc eval script {
        set tcl [parse $script]
        debug {TCL: $tcl}
        uplevel #0 $tcl
    }

    proc console {{in stdin} {out stdout} {prompt "<- "}} {
        while {true} {
            puts -nonewline $out $prompt
            flush $out
            set script ""
            while {[gets $in line] > 0} {
                append script $line\n
            }

            if {[string trim $script] ne ""} {
                try {
                    eval $script
                } on ok result {
                    if {$result ne ""} { puts $out $result }
                } on error msg {
                    puts $out "ERROR: $msg"
                }
            }

            if {[eof $in]} { break }
        }
    }
}

if {[info exists ::argv0] && $::argv0 eq [info script]} {
    sicl console
}