Version 20 of Byticle

Updated 2007-12-23 22:47:11 by sarnold

Byticle is a codename for a fictive programming language. The goal is to build a language in which there is *quite* no keyword, with a generalized infix notation.


Larry Smith Like ee? [L1 ]

Lars H: Or infix?


The relation with Tcl is the minimalism of the approach, the fact that tokens have spaces between them. It was inspired by Scheme because it will be functional, enforcing recursivity, but with infix notation instead of prefix.


2007-12-03 This project is being revitalized. -- Sarnold


The source

package provide byticle 0.3
namespace eval byticle {
	namespace export execname check get assert isnil
	variable binary
	variable unary
	variable functions
	variable params 0
	
	array unset binary *
	array unset unary *
	array unset functions *
	
	proc assert {expr {msg "assertion failed"}} {
		if {![uplevel 1 expr $expr]} {error $msg}
	}
	
	proc isnil {x} {string equal $x {nil nil}}
	
	# registers 'proc' as the implementation of 'name' with params types as 'params'
	proc register {name params proc} {
		assert {[llength $params]<3}
		register_[llength $params] $name $params $proc
	}
	
	proc register_0 {name params proc} {
		variable functions
		set functions($name) $proc
	}
	
	proc register_1 {name params proc} {
		variable unary
		set unary($name,$params) $proc
	}
	
	proc register_2 {name params proc} {
		variable binary
		set binary($name,[join $params ,]) $proc
	}
	
	proc check {type value} {
		if {$type eq "*"} {return $value}
		assert {[string equal [lindex $value 0] $type]} "type expected to be $type"
		lindex $value 1
	}
	
	proc max {a b} {
		expr {$a>$b ? $a : $b}
	}
	
	proc updateParam {p} {
		variable params
		switch -- $p {
			X - `X {
				set params [max $params 1]
			}
			Y - `Y {
				set params [max $params 2]
			}
		}
	}
	
	proc paramtype x {
		assert {[regexp {^\w+$} $x]} "invalid syntax in type name $x"
		if {[in2 $x {expr pexpr eval open close name param func}]} {
			error "$x is a reserved type name"
		}
		assert {![string is integer [string index $x 0]]} "type names cannot start with a digit"
	}
	
	proc parseparams {body} {
		assert {[regexp {\s*\(\s*(\w+\s+)?(\w+)?\s*\)} $body t a b]} "missing function parameters"
		regexp {\w+} $a a
		if {$a eq ""} {set params $b} else {set params [list $a $b]}
		foreach x $params {paramtype $x}
		list $params [string range $body [string length $t] end]
	}
	
	proc parse {body} {
		set result [list]
		while {$body ne ""} {
			foreach {id token sbody} [lex $body] {break}
			if {$id eq "def"} {
				foreach {id name sbody} [lex $sbody] {break}
				assert {[in2 $id name]}
				foreach {params body} [parseparams $sbody] {break}
				foreach {statement body} [_parse $body yes] break
				lappend result definition [list $name $params $statement]
			} else {
				foreach {statement body} [_parse $body] break
				lappend result statement $statement
			}
		}
		return $result
	}
	
	proc in2 {elt list} {
		expr {[lsearch -exact $list $elt]>=0}
	}
	
	proc params {params token} {
		array set t2p {X X Y Y `X X `Y Y}
		set param $t2p($token)
		if {[in2 $param $params]} {
			lappend params $param
		}
		return $params
	}
	
	proc isValue {id} {
		in2 $id {nil boolean param int real string func}
	}
	
	proc isFunc {id} {
		in2 $id {name fparam}
	}
	
	proc priority {token} {
		# lowest priority
		if [in2 $token {then else}] {return 1}
		if [in2 $token {and or}] {return 2}
		if [in2 $token {> < <= >= = !=}] {return 3}
		if [in2 $token {& | ^}] {return 4}
		if [in2 $token {<< >>}] {return 5}
		if [in2 $token {+ -}] {return 6}
		if [in2 $token {* / %}] {return 7}
		# highest priority (power)
		if {$token eq "**"} {return 8}
		#default
		return 4
	}

	proc _parseOpen {bodyvar} {
		upvar 1 $bodyvar body
		set result ""
		while {$body ne ""} {
			foreach {id token body} [lex $body] {break}
			if {$id eq "end"} break
			if {[isValue $id]} {
				lappend result expr [list $id $token] [_parseNextExpr body yes]
				rework-priority result
			} elseif {[isFunc $id]} {
				lappend result $id $token [_parseExpr body yes]
			} else {
				switch $id {
					eval {
						lappend result eval [_parseOpen body] [_parseExpr body yes]
						rework-priority result
					}
					open {
						lappend result pexpr [_parseOpen body] [_parseNextExpr body yes]
					}
					close {
						return $result
					}
					default {
						if {$id eq "eos"} {error "unbalanced open parenthesis"}
						error "unknown id : $id"
					}
				}
			}
		}
		error "unbalanced open parenthesis"
	}
	
	proc _parseNextExpr {bodyvar {close no}} {
		upvar 1 $bodyvar body
		set result ""
		while {$body ne ""} {
			foreach {id token body} [lex $body] {break}
			if {$id eq "end"} break
			if {[isValue $id] || $id eq "open"} {
				error "operator expected at: $token"
			} elseif {[isFunc $id]} {
				lappend result $id $token [_parseExpr body $close]
			} else {
				switch $id {
					eval {
						lappend result eval [_parseOpen body] [_parseExpr body $close]
					}
					close {
						treatclose body
						if {$close} {return $result}
						error "unmatched close parenthesis"
					}
					eos {
						if {$close} {error "unbalanced open parenthesis"}
						treateos body
						return $result
					}
					default {
						error "unknown id : $id"
					}
				}
			}
		}
		if {$close} {error "unbalanced open parenthesis"}
		return $result
	}
	
	proc _parseExpr {bodyvar {close no}} {
		upvar 1 $bodyvar body
		set result ""
		while {$body ne ""} {
			foreach {id token body} [lex $body] {break}
			if {$id eq "end"} break
			if {[isValue $id]} {
				lappend result expr [list $id $token] [_parseNextExpr body $close]
				rework-priority result
			} elseif {[isFunc $id]} {
				lappend result $id $token [_parseExpr body $close]
			} else {
				switch $id {
					eval {
						lappend result eval [_parseOpen body] [_parseExpr body $close]
						rework-priority result
					}
					open {
						lappend result pexpr [_parseOpen body] [_parseNextExpr body $close]
					}
					close {
						treatclose body
						if {$close} {return $result}
						error "unmatched close parenthesis"
					}
					eos {
						if {$close} {error "unbalanced open parenthesis"}
						treateos body
						return $result
					}
					default {
						error "unknown id : $id"
					}
				}
			}
		}
		if {$close} {error "unbalanced open parenthesis"}
		return $result
	}
	
	proc prio {tree} {
		# default priority
		set default 4
		switch -- [lindex $tree 0] {
			expr {
				switch -- [lindex $tree 2 0] {
					name {return [priority [lindex $tree 2 1]]}
					lambda {return $default}
					default {return -1}
				}
			}
			default {return -1}
		}
	}
	
	proc rework-priority {treevar} {
		upvar 1 $treevar tree
		set prio [prio $tree]
		#puts "$prio $tree"
		if {$prio == -1} {return}
		set innerprio [prio [lindex $tree end end]]
		# priority not applicable
		if {$innerprio == -1} {return}
		if {$innerprio > $prio} {
			lset tree end end [list pexpr [lindex $tree end end] {}]
			#puts "rework: $tree"
		}
	}
	
	proc treatclose {var} {
		upvar 1 $var body
		set body )$body
	}
	
	proc treateos {var} {
		upvar 1 $var body
		set body \;$body
	}
		
	proc _parse {body {define no}} {
		set result ""
		variable params
		set params 0
		while {$body ne ""} {
			foreach {id token body} [lex $body] {break}
			if {$id eq "end"} break
			if {[isValue $id]} {
				lappend result expr [list $id $token] [_parseNextExpr body]
				rework-priority result
			} elseif {[isFunc $id]} {
				lappend result $id $token [_parseExpr body]
			} else {
				switch $id {
					eval {
						lappend result eval [_parseOpen body] [_parseExpr body]
						rework-priority result
					}
					open {
						lappend result pexpr [_parseOpen body] [_parseNextExpr body]
					}
					eos {
						if {!$define && $params} {error "X and Y are not allowed outside definitions"} 
						return [list $result $body]
					}
					default {
						if {$id eq "close"} {error "unmatched close parenthesis"}
						error "unknown id : $id"
					}
				}
			}
		}
		if {!$define && $params} {error "X and Y are not allowed outside definitions"}
		list $result $body
	}
	
	proc _next {statement} {
		if {$statement eq ""} {return ""}
		foreach {type first} $statement break
		switch -- $type {
			name {
				return [_exec $statement]
			}
			expr {
				return $first
			}
			pexpr {
				return [_exec [lindex $statement 1]]
			}
			default {
				return $statement
			}
		}
	}
			
	proc _nextstatement {statement} {
		if {$statement eq ""} {return ""}
		foreach {type first} $statement break
		switch -- $type {
			name {
				return ""
			}
			expr - pexpr {
				return [lindex $statement 2]
			}
			default {
				return {nil nil}
			}
		}
	}
	
	proc _getfunc {name argc val vnext vlazy} {
		upvar $val value
		upvar $vnext next
		upvar $vlazy lazy
		getfunc_$argc $name value next lazy
	}
	
	proc getfunc_0 {name value next lazy} {
		variable functions
		assert {[info exists functions($name)]} "no such function: $name"
		set functions($name)
	}
	
	proc ary {var key} {
		upvar $var array
		llength [array names array $key]
	}
	
	proc getfunc_1 {name val vnext lazy} {
		variable unary
		assert {[ary unary [unglob $name],*]} "no such unary operator: $name"
		upvar $vnext next
		set next [_exec $next]
		if {[info exists unary($name,[lindex $next 0])]} {
			foreach {type next} $next break
			return $unary($name,$type)
		}
		assert {[info exists unary($name,T)]} "no matching unary operator: $name ([lindex $next 0])"
		return $unary($name,T)
	}
	
	proc getfunc_2 {name val vnext vlazy} {
		variable binary
		upvar $vlazy lazy
		set lazy true
		set msg "no such binary operator: $name"
		assert {[ary binary [unglob $name],*]} $msg
		upvar $val value $vnext next
		foreach {ltype value} $value break
		if {[ary binary [unglob $name,$ltype],*]} {
			if {[ary binary [unglob $name,$ltype],*]==1 && [info exists binary($name,$ltype,T)]} {
				# lazy evaluation of right-side expressions
				return $binary($name,$ltype,T)
			}
			foreach {rtype next} [_exec $next] break
			set lazy false
			if {[info exists binary($name,$ltype,$rtype)]} {
				return $binary($name,$ltype,$rtype)
			}
			if {[info exists binary($name,$ltype,T)]} {
				return $binary($name,$ltype,T)
			}
		}
		if {$lazy} {
			set value [list $ltype $value]
			if {[ary binary [unglob $name,T],*]==1 && [info exists binary($name,T,T)]} {
				# allows for lazy evaluation
				return $binary($name,T,T)
			}
			foreach {rtype next} [_exec $next] break
			set lazy true
		}
		if {[info exists binary($name,T,$rtype)]} {
			return $binary($name,T,$rtype)
		}
		assert {[info exists binary($name,T,T)]} $msg
		set next [list $rtype $next]
		return $binary($name,T,T)
	}
	
	proc execname {name args} {
		switch [llength $args] {
			0 {
				set s [list name $name {}]
			}
			1 {
				set s [list name $name [lindex $args 0]]
			}
			2 {
				set s [list expr [lindex $args 0] [list name $name [lindex $args 1]]]
			}
			default {error "incorrect argument number"}
		}
		return [_exec $s]
	}
	
	proc _exec {statement} {
		variable internals
		variable userdefined
		set value ""
		variable context
		while {[llength $statement]} {
			foreach {type first} $statement break
			switch -- $type {
				name {
					#puts "$first $statement"
					set next [_next [lindex $statement 2]]
					#puts $next,$value
					set argc [expr {($value eq "")? (([llength $next])?1:0):2}]
					set lazy true
					foreach {func fname} [_getfunc $first $argc value next lazy] break
					switch -- $func {
						func {
							#puts "$fname $argc $value $next"
							switch $argc {
								0 {set value [$fname]}
								1 {set value [$fname $next]}
								2 {set value [$fname $value $next]}
							}
							set statement [_nextstatement [lindex $statement 2]]
						}
						proc {
							switch $argc {
								0 {
									lappend context "" ""
								}
								1 {
									lappend context $next ""
								}
								2 {
									lappend context $value $next
								}
							}
							# saves the context
							lappend context $lazy [_nextstatement [lindex $statement 2]]
							# executes the proc's body
							puts $fname
							set value [_exec $fname]
							puts $context
							set statement [lindex $context end]
							# restores the context
							set context [lrange $context 0 end-4]
						}
					}
				}
				expr {
					set value [_exec $first]
					set statement [lindex $statement 2]
				}
				pexpr {
					set value [_exec $first]
					set statement [lindex $statement 2]
				}
				param {
					switch -- $first {
						X {
							return [lindex $context end-3]
						}
						Y {
							if {[lindex $context end-1]} {
								# lazy evaluation
								return [_exec [lindex $context end-2]]
							}
							return [lindex $context end-2]
						}
					}
				}
				func {
					return $statement
				}
				default {
					return $statement
				}
			}
		}
		return $value
	}
				
			
	proc execute {body} {
		variable builtins
		variable userdefined
		set value ""
		foreach {type statement} [parse $body] {
			switch -- $type {
				statement {
					set value [_exec $statement]
				}
				definition {
					foreach {name params definition} $statement break
					register $name $params [list proc $definition]
				}
				default {
					error "unknown type $type"
				}
			}
		}
		set value
	}
	
	proc unglob {x} {
		string map {* \\* ? \\? [ \\[ ] \\]} $x
	}
	
	proc get {type object} {
		check $type [_exec $object]
	}
	
	# the lexer
	proc lex {body} {
		set keywords {def define lambda lambda nil nil 
			boolean true boolean false 
			open ( close ) eos ; eval `(}
		foreach var {X Y A B} {lappend keywords param $var fparam `$var}
		set patterns {
			real {[+\-]?[0-9]+\.[0-9]+([eE][-+]?[0-9]+)?}
			int {[+\-]?[0-9]+}
			string {"([^"]*\\")*[^"]*"}
			name {[A-Za-z0-9+\-\*/%~\._!<>=@\|]+}
			func {`[A-Za-z0-9+\-\*/%~\._!<>=@\|]+}
		}
		set body [string trimleft $body " \t\n\r"]
		while {[string index $body 0] eq "#"} {
			set body [regsub {#.*$} $body ""]
			set body [string trimleft $body " \t\n\r"]
		}
		foreach {id k} $keywords {
			if {[string first $k $body]==0} {
				updateParam $k
				return [list $id $k [string range $body [string length $k] end]]
			}
		}
		foreach {id pat} $patterns {
			set patb "^${pat}\[ \\t\]+"
			if {[regexp $patb $body] || [regexp "^${pat}\\)" $body] || [regexp "^${pat};?" $body]} {
				regexp "^$pat" $body token
				set len [string length $token]
				if {$id eq "string"} {
					set token [string range $token 1 end-1]
				}
				return [list $id $token [string range $body $len end]]
			}
		}
		if {[regexp {^\s*$} $body]} {
			return [list end "" ""]
		}
		error "syntax error : $body"
	}
}

namespace eval byticle::funcs {
	namespace import ::byticle::*
	proc tonumber {x} {
		if {[string is integer $x]} {return [list int $x]}
		list real $x
	}
	proc bool x {list bool [expr {$x ? "true" : "false"}]}
	proc + {a b} {
		tonumber [expr {$a + $b}]
	}
	proc - {a b} {
		tonumber [expr {$a - $b}]
	}
	proc unary- {a} {
		tonumber [expr {-$a}]
	}
	proc * {a b} {
		tonumber [expr {$a * $b}]
	}
	proc / {a b} {
		tonumber [expr {$a / $b}]
	}
	proc % {a b} {
		list int [expr {$a % $b}]
	}
	proc fmod {a b} {
		list real [expr {fmod($a, $b)}]
	}
	proc puts_cmd {a} {
		puts -nonewline $a
		list string $a
	}
	proc newline {} {
		puts ""
		list nil nil
	}
	proc car {a} {
		lindex $a 0
	}
	proc cdr {a} {
		lindex $a 1
	}
	proc cons {a b} {
		list list [list $a [get * $b]]
	}
	proc cons1 a {list list [list $a {nil nil}]}
	proc snoc {a b} {
		list list [list [get * $b] $a]
	}
	proc snoc1 a {list list [list {nil nil} $a]}
	proc real x {list real $x}
	proc int x {list int $x}
	proc tostr x {list string $x}
	proc i2r {x} {
		list int [expr {int($x)}]
	}
	proc I {x} {set x}
	proc K {x y} {
		get * $y
		set x
	}
	proc L {x y} {
		set y [get func $y]
		execname [string range $y 1 end] $x
		return $x
	}
	foreach t {< <= > >= = ~=} op {< <= > >= == !=} {
		proc $t {x y} [string map [list %OP $op] {bool [expr {$x %OP $y}]}]
		proc str$t {x y} [string map [list %OP $op] {bool [expr {[string compare $x $y] %OP 0}]}]
	}
	
	proc register {name params proc} {
		byticle::register $name $params [list func ::byticle::funcs::$proc]
	}
}
# The Kombinator
byticle::funcs::register K {T T} K
byticle::funcs::register L {T T} L
# The Identity operator
byticle::funcs::register I T I
# Number conversions
byticle::funcs::register real real real
byticle::funcs::register real string real
byticle::funcs::register real int  real
# Integer
byticle::funcs::register int int int
byticle::funcs::register int string int
byticle::funcs::register int real i2r
# Number to string
byticle::funcs::register string int tostr
byticle::funcs::register string real tostr

foreach proc {+ - * / > < <= >= = ~=} {
	foreach x {int real} {
		byticle::funcs::register $proc [list $x $x] $proc
	}
}
foreach proc {< <= > >= = ~=} {byticle::funcs::register $proc {string string} str$proc}
byticle::funcs::register - int unary-
byticle::funcs::register - real unary-
byticle::funcs::register % {int int} %
byticle::funcs::register % {real real} fmod

# list functions
byticle::funcs::register car {list} car
byticle::funcs::register cdr {list} cdr
byticle::funcs::register cons {T T} cons
byticle::funcs::register cons T cons1
byticle::funcs::register ~ {T T} snoc
byticle::funcs::register ~ T snoc1

byticle::funcs::register puts {string} puts_cmd
byticle::funcs::register newline {} newline

proc e x {byticle::execute $x}

Example

Byticle has cons, car and cdr, right from LISP. The syntax is simple: operations are evaluated left-to-right, except when parentheses or predefined priorities (like * over +) apply.

When an operator follows another operator, or the beginning of an expression, it is treated as unary. cons, +, * and / are binary operators. car and cdr are exclusively unary, while for -, it is unary or binary, depending of the context. You may put parentheses when necessary for the comprehension, but predefined priorities should allow code to behave like it looks (at least, I hope so).

I also added to the language two well-known lambda-calculus combinators: K and I.

e {1 + 2 * 3}
e {(1 + 2) * 3}
e {car cdr (1 cons (2 cons 3))}
e {puts "Hello, world!"; newline}
e {puts "Hello, " K puts "world!"}

2007-12-23 - Now user functions can be created, with the following syntax:

 'define' name (typedefs) body;

typedefs contains zero to two type names. The special type name 'T' denotes generic type-matching. In body, 'X' and 'Y' can be used to define resp. left and right operands. The right operand can be evaluated lazily in some circunstances [TODO: explain it].

e {define add (T T) X + Y;}
e {1 add 2}