Version 2 of SiCL - A Simple Command Language

Updated 2014-12-12 11:26:23 by nem

NEM 2014-12-12: SiCL is the "Simple" Command Language (or Silly depending on your point of view). It is a highly experimental pre-processor that translates a language that is a bit like Tcl with a more conventional syntax into equivalent Tcl scripts.

A SiCL script is a sequence of commands separated by newlines or semicolons (as in Tcl). A command is a sequence of words (not necessarily separated by whitespace). A word is one of:

  • A string: foo, 12, 3.145, "foo bar", {foo bar}
  • A variable reference: $foo, ${foo} (Note: no arrays)
  • A sub-command: [foo bar\]
  • An expression enclosed in parentheses: (foo)
  • A block of SiCL commands starting with a colon and ending either with a newline, or (if a newline immediately follows the colon) a sequence of three or more dashes (---).

A SiCL expression is one of:

  • A string (as above) - this evaluates to itself
  • A mathematical expression - compiles to [expr {...}\]
  • A list of expressions separated by commas: e.g., (x, y, sin(0.5)*2) --> [list x y [expr {sin(0.5)*2}\]\]
  • A dictionary of expressions separated by commas, where key/value pairs are separated by colons: (x: 1, y: 2) --> [dict create x 1 y 2\]

Examples:

# Avoid problems with double-substitution:
proc when(cond, then, (_else: "else"), (else: "")): if {$cond} $then else $else
for {set x 0} {$x < 10} {incr x}:
    when ($x < 5): puts "Small"
    when ($x > 5):
        puts "Big"
    ---
    when ($x == 5):
        puts "Just right"
    ---
---

Here is the code. This is very experimental. It may break your programs and/or your mind.

# sicl.tcl --
# vim: ft=tcl tw=120 sw=4 ts=8 expandtab
#
#       Implements SiCL the "Simple" Command Language. This is basically just
#       a minor repackaging of Tcl with slightly more syntax. I find it
#       pleasing, you may well hate it.
#
# Copyright 2014 Neil Madden.
# License: Tcl-style
#

package require Tcl                 8.6
package require grammar::aycock     1.0

package provide sicl                0.1

namespace eval ::sicl {
    namespace export parse lex eval console
    namespace ensemble create

    namespace import ::grammar::aycock::parser

    proc debug args { }
    #proc debug msg { puts stdout [uplevel 1 [list subst $msg]] }

    variable parser [parser [regsub -all -- {\#[^\n]*\n} {

        # A program is a sequence of commands
        program     ::=     commands                    { _ 0 }
        # Commands are separated by new-lines or semi-colons 
        commands    ::=     command                     { set _ }
        commands    ::=     command NEWLINE commands    { linsert [_ 2] 0 [_ 0] }
        commands    ::=     command SEMICOLON commands  { linsert [_ 2] 0 [_ 0] }
        commands    ::=                                 {}
        # A command is either a comment (ignored) or a sequence of words
        command     ::=     COMMENT                     {}
        command     ::=     words                       { join [_ 0] }
        words       ::=     word                        { set _ }
        words       ::=     word words                  { linsert [_ 1] 0 [_ 0] }

        # A word is one of:
        #   A string, either in double quotes, braces or as a bare word
        #       delimited by whitespace (as in Tcl)
        #   A sub-command: [command] (as in Tcl)
        #   An expression: (...)
        #   A variable substitution: $name / ${name} (as in Tcl, minus arrays)
        #   A script, beginning with ':' and ending with the keyword (gasp)
        #   'end': : ... end
        word        ::=     STRING                      { _ 0 }
        word        ::=     LPAREN expr RPAREN          { _ 1 }
        word        ::=     LBRACK command RBRACK       { join $_ "" }
        word        ::=     VARIABLE                    { _ 0 } 
        word        ::=     COLON block                 { _ 1 }
        # Allow other symbols as literal strings here
        word        ::=     COMMA                       { _ 0 }

        # A block of commands starts with a colon and then either:
        #   - a new-line followed by a series of commands terminated by an END token (---+)
        #   - a single command up to the end of the current line
        block       ::=     NEWLINE commands END        { format {{%s}} [join [_ 1] "; "] }
        block       ::=     command                     { format {{%s}} [_ 0]  }

        expr        ::=     expression                  { _ 0 }
        expr        ::=     dict                        { format {[dict create %s]} [join [_ 0]] }
        expr        ::=     list                        { format {[list %s]} [join [_ 0]] }

        dict        ::=     dictEntries                 { _ 0 }
        list        ::=     listEntries                 { _ 0 }

        dictEntries ::=     dictEntry                   { _ 0 }
        dictEntries ::=     dictEntries COMMA dictEntry { linsert [_ 0] end [_ 2 0] [_ 2 1] }
        dictEntry   ::=     STRING COLON expression     { list [_ 0] [_ 2] }

        listEntries ::=     listEntry                   { set _ }
        listEntries ::=     listEntries COMMA listEntry { linsert [_ 0] end [_ 2] }
        listEntry   ::=     expression                  { _ 0 }

        expression  ::=     STRING                      { _ 0 }
        expression  ::=     factors                     { format {[expr {%s}]} [join [_ 0] ""] }

        factors     ::=     factor factors              { linsert [_ 1] 0 [_ 0] }
        factors     ::=                                 {}

        factor      ::=     STRING                      { _ 0 }
        factor      ::=     COLON                       { _ 0 }
        factor      ::=     STRING LPAREN stuff RPAREN  { join [list [_ 0] [_ 1] [join [_ 2] ""] [_ 3]] "" }
        factor      ::=     VARIABLE                    { _ 0 }
        factor      ::=     LBRACK stuff RBRACK         { join $_ ""}
        factor      ::=     LPAREN expr RPAREN          { _ 1 }

        stuff       ::=     thing stuff                 { linsert [_ 1] 0 [_ 0] }
        stuff       ::=                                 {}
        thing       ::=     STRING                      { _ 0 }
        thing       ::=     LPAREN expression RPAREN    { join $_ "" }
        thing       ::=     LBRACK stuff RBRACK         { join $_ "" }
        thing       ::=     COMMA                       { _ 0 }
        thing       ::=     COLON                       { _ 0 }
        thing       ::=     VARIABLE                    { _ 0 }
        thing       ::=     COMMENT                     { _ 0 }
        thing       ::=     NEWLINE                     { _ 0 }
        thing       ::=     SEMICOLON                   { _ 0 }

    } ""]]

    # Helper procedure for the grammar (dirty hack)
    proc ${parser}::_ args { upvar 1 _ _; lindex $_ {*}$args }


    variable actions [list]
    variable alts   [list]
    proc token {name re} {
        variable actions
        variable alts
        lappend alts $re
        lappend actions "^$re\$" [string map [list TOKEN $name] {
            debug {token -> TOKEN $token}
            lappend types  TOKEN
            lappend values $token
        }]
    }

    proc ignore re {
        variable actions
        variable alts

        lappend alts $re
        lappend actions "^$re\$" { debug {Ignoring $token}; continue }
    }

    ignore              {[ \t\r\f\v]+}
    token NEWLINE       {\n}
    token COMMENT       {\#[^\n]*\n?}
    token LPAREN        {\(}
    token RPAREN        {\)}
    token LBRACK        {\[}
    token RBRACK        {\]}
    token COMMA         {,}
    token COLON         {:}
    token SEMICOLON     {;}
    token END           {---+}
    token VARIABLE      {\$(?:[a-zA-Z0-9_:]+|\{(?:[^\}\\]|\\.)+\})}
    token STRING        {(?:"(?:[^"\\]|\\.)*"|\{(?:[^\}\\]|\\.)*\}|(?:[^\"\s();,\[\]:$]|:[^\w\s\"\{])+)}

    # Parses a SiCL script into an equivalent Tcl script
    proc parse script {
        variable parser
        lassign [lex $script] types values
        join [$parser parse $types $values] \n
    }

    # Tokenizes a script, returning two list: a list of tokens and a list of
    # values.
    proc lex script {
        variable alts
        variable actions
    
        debug {[join $alts "\n|  "]}

        regsub -all {\\\n} $script {} script
        set tokens [regexp -all -inline [join $alts "|"] $script]

        set types [list]
        set values [list]
        foreach token $tokens {
            debug {Matching token: |$token|}
            switch -regexp -- $token [linsert $actions end default { error "unknown token \"$token\"" }]
        }

        return [list $types $values]
    }

    proc eval script {
        set tcl [parse $script]
        debug {TCL: $tcl}
        uplevel #0 $tcl
    }

    proc console {{in stdin} {out stdout} {prompt "<- "}} {
        while {true} {
            puts -nonewline $out $prompt
            flush $out
            set script ""
            while {[gets $in line] > 0} {
                append script $line\n
            }

            if {[string trim $script] ne ""} {
                try {
                    eval $script
                } on ok result {
                    if {$result ne ""} { puts $out $result }
                } on error msg {
                    puts $out "ERROR: $msg"
                }
            }

            if {[eof $in]} { break }
        }
    }
}

if {[info exists ::argv0] && $::argv0 eq [info script]} {
    sicl console
}