Version 9 of xsource

Updated 2008-02-23 07:15:20 by sarnold

Sarnold on 2008-02-10: Here is an extended Tcl-syntax preprocessor. It allows the use of expressions anywhere in a Tcl source file.

Usage:

source xsource.tcl
namespace import xsource::*
xsource ?-force? filename

The xsource command creates a new file named 'origname__x.tcl'.

Put the following in a file:

 set x 1
 set y $x+1

And it should translate it into:

 set x 1
 set y [expr {$x+1}]

2008-02-13: This version is to be considered quite stable, although it still has serious bugs. -- Sarnold

2008-02-14: Update: the source code can process itself without errors. -- Sarnold


#===================================================#
#  xsource : an extended syntax parser for Tcl		#
#													#
# Author: Stephane Arnold 2008						#
# License: BSD-style Tcl license					#
#===================================================#

namespace eval xsource {
	namespace export xsource parse
	
	
	proc xsource {args} {
		set syntax "syntax : xsource ?-force? file"
		set fname [lindex $args end]
		set oname ${fname}__x.tcl
		if {[llength $args]==2} {
			assert {[lindex $args 0] eq "-force"} $syntax
			transform $fname $oname
			return [source $oname]
		}
		assert {[llength $args] == 1} $syntax
		if {![file exists $oname] || [file mtime $fname] > [file mtime $oname]} {
			transform $fname $oname
		}
		source $oname
	}
	
	proc transform {in out} {
		assert {![file exists $out] || [file writable $out]} "file $out not writable"
		set r ""
		set fd [open $in]
		while {![eof $fd]} {append r [read $fd]}
		close $fd
		set fd [open $out w]
		puts $fd [parse [string map {\r\n \n} $r]]
		close $fd
	}
	
	proc assert {e {msg {assertion failed}}} {
		if {![uplevel 1 expr [list $e]]} {error $msg}
	}
	# returns yes if index pos of string text is escaped
	proc escaped {text pos} {
		if {$pos == 0} {return no}
		set new [expr {$pos -1}]
		while {[string index $text $new] eq "\\" && $new >0} {
			incr new -1
		}
		expr {($pos-$new)%2==0}
	}
	
	proc flush {vcmd vexpr} {
		upvar 1 $vcmd cmd $vexpr expr
		if {$cmd ne ""} {lappend expr txt $cmd}
		set cmd ""
	}
	
	proc normalize {expr cmd} {
		if {$cmd ne ""} {lappend expr txt $cmd}
		set expr [splitblanks $expr]
		norm2 $expr
	}
	
	proc norm2 {expr} {
		foreach {blanks expr} [shiftBlanks $expr] break
		# returns the string [expr {...}] where ... is the string result of the normalization
		if {[catch {
			foreach {e rest} [_normalize $expr] break
		}]} {return $blanks[toString $expr]}
		if {$e eq ""} {return $blanks[toValue $rest]}
		return "$blanks\[expr {$e}\][toValue $rest]"
	}
	
	proc shiftBlanks {expr} {
		set blanks ""
		set i 0
		foreach {type val} $expr {
			if {$type eq "txt" && [string trim $val] eq ""} {
				append blanks $val
				incr i 2
			} else {
				break
			}
		}
		list $blanks [lrange $expr $i end]
	}	
	
	proc toValue {expr} {
		if {![llength $expr]} {return ""}
		set r [getValue [lindex $expr 0] [lindex $expr 1]]
		return $r[norm2 [lrange $expr 2 end]]
	}
	
	proc toString {expr} {
		if {![llength $expr]} {return ""}
		set r [getValue [lindex $expr 0] [lindex $expr 1]]
		return $r[toValue [lrange $expr 2 end]]
	}
	
	proc splitblanks expr {
		set res [list]
		foreach {type val} $expr {
			if {$type eq "txt"} {
				if {[string trim $val] eq ""} {
					lappend res txt $val
				} elseif {![string equal [string trim $val] $val]} {
					set left [string length [string trimleft $val]]
					set right [string length [string trimright $val]]
					set in [string range $val end-[incr left -1] [incr right -1]]
					lappend res txt [string range $val 0 end-[incr left]]
					lappend res txt $in txt [string range $val [incr right] end]
				} else {
					lappend res txt $val
				}
			} else {lappend res $type $val}
		}
		set res
	}
	
	# TODO: parse math expressions
	proc _normalize {expr} {
		# the math parser
		if {![llength $expr]} {return [list "" ""]}
		foreach {type val} $expr {break}
		if {$type eq "txt"} {
			assert {[string is integer $val] || [string is double $val]}
		}
		foreach {bexpr rest} [getExpr $expr] break
		if {[llength $rest] >= [llength $expr]-2} {return [list "" $expr]}
		list $bexpr $rest
	}
	
	proc in {a l} {expr {[lsearch $l $a]>=0}}
	
	proc isValue {type val} {
		in $type {var txt param}
	}
	
	proc getValue {type val} {
		switch -- $type {
			var {
				return \$$val
			}
			txt - op {
				return $val
			}
			param {
				return \[$val\]
			}
			open {return "("}
			close {return ")"}
			default {error "not a value : $type"}
		}
	}
	
	proc getExpr {expr {index no} {unary no}} {
		set res ""
		set rest ""
		set bexpr [trim $expr]
		if {![llength $bexpr]} {return [list "" $expr]}
		foreach {type val} $bexpr break
		if {![isValue $type $val]} {
			# unary operator
			if {$type eq "open"} {
				foreach {a expr} [getExpr [lrange $bexpr 2 end] yes] break
				append res "($a"
			} elseif {$type eq "close"} {
				return [list ")" [lrange $bexpr 2 end]]
			} {
				assert {!$unary}
				assert {$type eq "op"}
				assert {[in $val {- ! ~}]}
				foreach {expr rest} [getExpr [lrange $bexpr 2 end] $index yes] break
				return [list $val$expr $rest]
			}
		}
		append res [getValue $type $val]
		set bexpr [trim [set expr [lrange $bexpr 2 end]]]
		switch -- $type {
			var - txt {
				foreach {type val} $bexpr break
				if {$type eq "open"} {
					foreach {a expr} [getExpr [lrange $bexpr 2 end] yes] break
					append res "($a"
				} elseif {$type eq "close" && $index} {
					return [list "$res)" [lrange $bexpr 2 end]]
				}
			}
		}
		if {[llength $bexpr]==0 || [lindex $bexpr 0] ne "op"} {
			return [list $res $expr]
		}
		if {[catch {
			foreach {a bexpr} [getOperator $bexpr $index] break
			append res $a
		} dummy]} {
			return [list $res $expr]
		}
		foreach {a expr} [getExpr $bexpr $index] break
		list $res$a $expr
	}
		
	proc getOperator {expr {inindex no}} {
		foreach {type val} [set expr [trim $expr]] break
		assert {$type eq "op"}
		assert {$val ne "," || $inindex} "comma not allowed here"
		set res $val
		set bexpr [trim [set expr [lrange $expr 2 end]]]
		if {![llength $bexpr]} {return [list $res $expr]}
		foreach {type val} $bexpr break
		if {$type eq "op"} {
			if {[in $val {- ! ~}]} {return [list "$res $val" [lrange $bexpr 2 end]]}
			assert [in $res$val {== << >> <= >= != && ||}]
			return [list $res$val [lrange $bexpr 2 end]]
		}
		list $res $expr
	}
	
	proc trim {expr} {
		if {[llength $expr]==0} {return $expr}
		foreach {type val} $expr {break}
		if {$type eq "txt" && [string trim $val] eq ""} {
			set expr [trim [lrange $expr 2 end]]
		}
		set expr
	}
	
	proc dputs {a} {puts $a}
		
	# a full Tcl-syntax parser
	proc parse {text {char ""} {up ""} {start no}} {
		if {$up ne ""} {upvar i $up} else {set i 0}
		# if we have an expression right at the start
		# when we parse "if {$i < 0}" it
		# must NOT be translated to "if {[expr {$i < 0}]}"
		# start is set to TRUE when we are at the beginning of a braced
		# expression before the line ending
		# init some context variables
		set beforecmd yes
		set result ""
		set expr [list]
		set cmd ""; # the current statement
		set inexpr no; # we are not inside a math expression
		set finish no; # a flag to jump to flushing text
		set return no; # a flag to jump to return
		# TODO...
		for {} {$i < [string length $text]} {incr i} {
			set c [string index $text $i]
			if {[escaped $text $i]} {
				if {$inexpr} {
					set finish yes
				}
			} else {
			switch -- $c {
				\" {
					set beforecmd no
					# inside a string
					set j $i
					incr i
					while {[string index $text $i] ne "\"" && ![escaped $text $i]} {incr i}
					if {$inexpr} {
						flush cmd expr
						lappend expr txt [string range $text $j $i]
						set finish yes
					} else {
						append cmd [string range $text $j $i]
					}
					set c ""
				}
				\{ {
					if {$inexpr} {
						append result [normalize $expr $cmd]
						set inexpr no
						set expr [list]
						set cmd ""
					}
					set beforecmd no
					incr i
					append cmd $c
					append cmd [parse $text \} i yes]
					if {[string index $text $i] eq "\}"} {
						append cmd \}
					}
					set c ""
				}
				\} {
					set return yes
				}
				\[ {
					set beforecmd no
					if {$inexpr} {
						flush cmd expr
						incr i
						lappend expr param [parse $text \] i $start]
						#incr i
						set c ""
					} elseif {!$start} {
						append result $cmd
						set inexpr yes
						set cmd ""
						incr i
						set expr [list param [parse $text \] i $start]]
						set c ""
					} else {
						incr i
						append cmd $c
						append cmd [parse $text \] i $start]\]
						set c ""
					}
				}
				\] {
					set return yes
				}
				"(" {
					set beforecmd no
					# math function call
					if {$inexpr} {
						flush cmd expr
						lappend expr open ""
					}
				}
				")" {
					if {$inexpr} {
						flush cmd expr
						lappend expr close ""
					}
				}
				\# {
					if {$beforecmd} {
						set j $i
						incr i
						set c [string index $text $i]
						while {($c ne "\r" && $c ne "\n")|| [escaped $text $i]} {
							incr i
							set c [string index $text $i]
						}
						if {$inexpr} {
							flush cmd expr
							lappend expr txt [string range $text $j $i]
						} else {
							append cmd [string range $text $j $i]
						}
						set c ""
					}
				}
				\r - \n - ; {
					set finish yes
					set beforecmd yes
					set start no
				}
				\$ {
					if {!$start} {
						incr i
						set varname [string range $text $i [expr {$i+100}]]
						# it must be a very long variable name to fool me...
						if {[regexp {^\{[a-zA-Z0-9:_]+\}} $varname name] || [regexp {^[a-zA-Z0-9:_]+} $varname name]} {
							if {$inexpr} {
								flush cmd expr
							} else {
								append result $cmd
								set inexpr yes
								set expr [list]
								set cmd ""
							}
							lappend expr var $name
							incr i [string length $name]
							incr i -1
						}
					}
				}
				+ - - - / - \* - % - & - | - ^ - < - > - = - ! - , - ~ {
					# an arithmetic operator or somewhat else ressembling
					if {$inexpr} {
						flush cmd expr
						lappend expr op $c
					} elseif {$c eq "-" && !$start && [string index $text [expr {$i+1}]] ne " "} {
						append result $cmd
						set inexpr yes
						set cmd ""
						set expr {op -}
					}
				}
				default {
					if {$inexpr} {
						if {![string match {[0-9a-zA-Z_. \t]} $c]} {
							set finish yes
						}
						if {[string trim $c] eq ""} {flush cmd expr}
						append cmd $c
					} elseif {[string match {[0-9]} $c] && !$start} {
						# begin expression
						append result $cmd
						set inexpr yes
						set cmd $c
					}
				}
			}
			}
			if {$return && [string equal $c $char]} {
				if {$inexpr} {return $result[normalize $expr $cmd]}
				return $result$cmd
			} else {
				set return no
			}
			if {$inexpr && $finish} {
				append result [normalize $expr $cmd]
				set inexpr no
				set expr [list]
				set cmd $c
			} elseif {!$inexpr} {
				append cmd $c
			}
			set finish no
		}
		if {$inexpr} {set cmd [normalize $expr $cmd]}
		return $result$cmd
	}
}