Version 1 of ptparser

Updated 2004-03-23 19:48:16

SS 23Mar2004 - ptparser is a Tcl parser in pure Tcl. It comes from the Sugar macro system, but can be useful alone. The following is the code and a very simple example that shows how to use it.

 # ptparser - A Tcl parser in pure Tcl.
 # Copyright (C) 2004 Salvatore Sanfilippo
 # Under the same license as Tcl version 8.4

 # The parser. It does not discard info about space and comments.
 # The return value is the "type" of the token (EOF EOL SPACE TOKEN).
 #
 # It may be interesting to note that this is half of a simple
 # Tcl interpreter.
 #
 # The fact that it is still so simple, compared to what it can
 # be in Python just to say one (much worst in Perl), it's an advice
 # that to add syntax to Tcl is a bad idea.
 namespace eval ptparser {}

 # Initialize the state of the interpreter.
 # Currently this parser is mostly stateless, it only needs
 # to save the type of the last returned token to know
 # if something starting with '#' is a comment or not.
 proc ptparser::parserInitState statevar {
     upvar $statevar state
     set state [list EOL]
 }

 proc ptparser::parser {text tokenvar indexvar statevar {dosubst 0}} {
     upvar $tokenvar token $indexvar i $statevar state
     set token {}
     set inside {}
     set dontstop $dosubst
     while 1 {
         # skip spaces
         while {!$dontstop && [string match "\[ \t\]" [string index $text $i]]} {
             append token [string index $text $i]
             incr i
         }
         # skip comments
         if {$state eq {EOL} && !$dontstop && [string equal [string index $text $i] #]} {
             while {[string length [string index $text $i]] &&
                   ![string match [string index $text $i] \n]} \
             {
                 append token [string index $text $i]
                 incr i
             }
         }
         # return a SPACE token if needed
         if {[string length $token]} {return [set state SPACE]}
         # check for special conditions
         if {!$dontstop} {
             switch -exact -- [string index $text $i] {
                 {} {return [set state EOF]}
                 {;} -
                 "\n" {
                     append token [string index $text $i]
                     incr i
                     return [set state EOL]
                 }
             }
         }
         # main parser loop
         while 1 {
             switch -exact -- [string index $text $i] {
                 {} break
                 { } -
                 "\t" -
                 "\n" -
                 ";" {
                     if {!$dontstop} {
                         break;
                     }
                 }
                 \\ {
                     incr i
                     append token \\ [string index $text $i]
                     incr i
                     continue
                 }
                 \" {
                     if {[string equal $inside {}]} {
                         incr dontstop
                         set inside \"
                         append token \"
                         incr i
                         continue
                     } elseif {[string equal $inside \"]} {
                         incr dontstop -1
                         set inside {}
                         append token \"
                         incr i
                         continue
                     }
                 }
                 "\{" {
                     if {[string equal $inside {}]} {
                         incr dontstop
                         set inside "\{"
                         append token "\{"
                         incr i
                         continue
                     } elseif {[string equal $inside "\{"]} {
                         incr dontstop
                     }
                 }
                 "\}" {
                     if {[string equal $inside "\{"]} {
                         incr dontstop -1
                         if {$dontstop == 0} {
                             set inside {}
                             append token "\}"
                             incr i
                             continue
                         }
                     }
                 }
                 \$ {
                     if {![string equal $inside "\{"]} {
                         if {![string equal [string index $text [expr {$i+1}]] $]} {
                             set res [ptparser::substVar $text i]
                             append token "$$res"
                             continue
                         }
                     }
                 }
                 \[ {
                     if {![string equal $inside "\{"]} {
                         set res [ptparser::substCmd $text i]
                         append token "\[$res\]"
                         continue
                     }
                 }
             }
             append token [string index $text $i]
             incr i
         }
         return [set state TOK]
     }
 }

 # Actually does not really substitute commands. You can
 # perform a real substitution if you like.
 proc ptparser::substCmd {text indexvar} {
     upvar $indexvar i
     set go 1
     set cmd {}
     incr i
     while {$go} {
         switch -exact -- [string index $text $i] {
             {} break
             \[ {incr go}
             \] {incr go -1}
         }
         append cmd [string index $text $i]
         incr i
     }
     string range $cmd 0 end-1
 }

 # Get the control when a '$' (not followed by $) is encountered,
 # extract the name of the variable, and return it.
 proc ptparser::substVar {text indexvar} {
     upvar $indexvar i
     set dontstop 0
     set varname {}
     incr i
     while {1} {
         switch -exact -- [string index $text $i] {
             \[ -
             \] -
             "\t" -
             "\n" -
             "\"" -
             \; -
             \{ -
             \} -
             \$ -
             ( -
             ) -
             { } -
             "\\" -
             {} {
                 if {!$dontstop} {
                     break
                 }
             }
             ( {incr dontstop}
             ) {incr dontstop -1}
             default {
                 append varname [string index $text $i]
             }
         }
         incr i
     }
     return $varname
 }

 #################### TEST CODE #######################

 ptparser::parserInitState state
 set script [info body ptparser::parserInitState]
 set index 0
 while 1 {
     set type [ptparser::parser $script token index state]
     puts "$type \"$token\""
     if {$type eq {EOF}} break
 }

The output generated by the TEST CODE above can be useful to understand what's the parser output before to download/try it.

 EOL "
 "
 SPACE "    "
 TOK "upvar"
 SPACE " "
 TOK "$statevar"
 SPACE " "
 TOK "state"
 EOL "
 "
 SPACE "    "
 TOK "set"
 SPACE " "
 TOK "state"
 SPACE " "
 TOK "[list EOL]"
 EOL "
 "
 EOF ""