Coop2 - A Compiler and Language

George Peter Staplin Dec 6, 2007 - Years ago I created something called coop. It started out as a C precompiler that added some unique keywords for dealing with single and double linked lists. I've grown tired of maintaining and porting that compiler, and the lex and yacc code, so I created Coop2 (this time in Tcl). Oddly Coop2 doesn't yet have any commands for dealing with linked lists.

This Coop2 compiler has some features I wanted to play around with, and it generates C code.

Dec 11, 2007 after many revisions I'm liking this more... I updated this to the latest code. -GPS

 external function puts int
 external function printf int

 structure rectangle
 property rectangle int x y width height update

 define rectangle initsize {int:width ; int:height} {
         self width width
         self height height
         printf "initializing size to %d %d\n" width height
 }

 define rectangle move {int:x ; int:y} {
         self x x
         self y y        
         printf "moving to %d %d\n" x y
 }

 function main {int:argc ; char *[]:argv} {
         declare-object r rectangle
         set r [object rectangle]
         if {>? argc 2} {
                 puts "MAGIC"
                 while {>? argc 0} {
                         puts "COUNTDOWN"
                         set argc [- argc 1]
                 }        
         }

         r initsize 200 400
         r move 1 2

         r destroy

         return 0
 } int

 # Coop2 (a compiler for the Coop2 language)
 # By George Peter Staplin
 # Based on svn revision 486.

 # This assumes that all pointers are of equal size.
 # People using segmented architectures thus can't use this, but Tcl
 # doesn't run on such systems either AFAIK.

 array set ::structures {}
 array set ::functions {}
 array set ::methods {}
 array set ::externals {}
 array set ::corewords {}
 array set ::frame {}

 set ::max_type_size 8
 set ::local_count 0
 set ::method_count 0
 set ::current_structure ""

 set ::init_code {
 #include <stdio.h>
 #include <stdlib.h>

 static void *coopobject (size_t s) {
     void *p = malloc (s);
     if (NULL == p) {
         perror ("coopobject - malloc");
         abort ();
     }
     return p;
 }

 static void *coopdestroy (void *o) {
     /*TODO work with the _references slot to make this automatic. */
     free (o);
     return NULL;
 }
 }

 #This is used to create core words/commands.
 proc core {name argpat body} {
     global corewords
     set corewords($name) coop$name
     proc coop$name $argpat $body
 }

 #All core words return a 2 element list.  
 #The 2 element lists consists of an optional type name, and a value.
 #Words that don't care about the type should just return [list "" $value].
 foreach op [list + - * / %] {
     core $op {a b} [string map [list OP $op] {
         global local_count

         lassign $a atype a
         lassign $b btype b

         incr local_count
         set tmp cooptmp$local_count
         code "int $tmp = ($a OP $b);"
         return [list "" $tmp]
     }]
 }

 #I like a ? suffix for operators like these:
 foreach {coreop impop} [list >? > <? < =? = !=? !=] {
     core $coreop {a b} [string map [list IMPOP $impop] {
         global local_count
         lassign $a atype a
         lassign $b btype b
         incr local_count
         set tmp cooptmp$local_count
         code "int $tmp = ($a IMPOP $b);"
         return [list "" $tmp]
     }]
 }

 core break {} {
     code "break;"
     return [list "" ""]
 }

 if 0 {
     cond value {
         {>? value 2} { puts "value is > 2" }
         {<? value 1} { puts "value is less than 1" }
         {!=? value 6} { puts "it's not 6!" }
     }
 }

 core cond {value body} {
     #MORE THOUGHT REQUIRED...
     return [list "" ""]
 }

 core continue {} {
     code "continue;"
     return [list "" ""]
 }

 core declare {type args} {
     global frame

     set type [lindex $type 1]
     foreach v $args {
         code "$type [lindex $v 1];"
         set frame($v) $type
     }
     return [list "" ""]
 }

 core declare-object {obj s} {
     global frame

     lassign $obj objtype objname

     set frame($objname) [lindex $s 1]

     code "void *$objname = NULL;"

     return [list "" ""]
 }


 #This is used to define a method.
 core define {s name arglist body} {
     global structures max_type_size methods method_count current_structure
     array unset frame *

     lassign $s stype sname
     lassign $name nametype n

     if {![info exists structures($sname)]} {
         return -code error "structure to define this method for doesn't exist"
     }

     set current_structure $sname


     incr method_count
     set func coopmethod$method_count
     lappend structures($sname,methods) [list $n $func]
     set structures($sname,method,$n) $n
     set structures($sname,method,$n,arguments) [evaluate-parameters [lindex $arglist 1]]
     incr structures($sname,size) $max_type_size

     code "/* Implementation of $n */"
     code "static void * $func (void *self, $structures($sname,method,$n,arguments)) \{"

     lassign $body btype b

     if {"BLOCK" ne $btype} {
         return -code error "expected a block as the final argument to define -- $btype"
     }

     set i 0
     evaluate [parse $b i]

     code "return self;"
     code "\}"

     set current_structure ""

     return [list "" ""]
 }

 core external {type name returntype} {
     global externals

     lassign $name ntype n
     set externals($n) [lindex $type 1]
     set externals($n,returntype) [lindex $returntype 1]
     core $n args [subst -nocommands -nobackslashes {
         return [external-callback $n [set args]]
     }]
 }

 core function {name arglist body returntype} {
     global functions frame

     array unset frame *

     set functions($name) [lindex $name 1]
     set functions($name,args) [set args [lindex $arglist 1]]
     set functions($name,returntype) [lindex $returntype 1]

     # This builds up the frame.
     set functions($name,cargs) [evaluate-parameters $args]

     lassign $body type b
     if {"BLOCK" ne $type} {
         return -code error "function expected a block but received a $type"
     }

     code "$functions($name,returntype) $functions($name) ($functions($name,cargs)) \{"
     set i 0
     evaluate [parse $b i]
     code "\}"

     return [list "" ""]
 }                          

 core if {cond block} {
     lassign $cond condtype c
     lassign $block btype b

     if {"BLOCK" ne $condtype} {
         return -code error "if expects a block 1st argument -- $condtype"
     }

     if {"BLOCK" ne $btype} {
         return -code error "if expects a block 2nd argument -- $btype"
     }

     set i 0
     set tmp [evaluate [parse $c i]]

     code "if ([lindex $tmp 1]) \{"
     set i 0
     evaluate [parse $b i]
     code "\}"

     return [list "" ""]
 }

 core object s {
     global local_count structures max_type_size

     lassign $s stype sname

     if {![info exists structures($sname)]} {
         return -code error "invalid structure: $sname"
     }

     if {"" ne $stype} {
         return -code error "invalid structure name type: $stype"
     }

     incr local_count
     set tmp cooptmp$local_count
     code "void *$tmp = coopobject ($structures($sname,size));"
     code "coopinit_$sname ($tmp);"

     set result $tmp

     if {!$structures($sname,generated)} {
         code-head "static void coopinit_$sname (void *obj);"
         code-tail "static void coopinit_$sname (void *obj) \{"

         set offset 0

         foreach m $structures($sname,methods) {
             lassign $m name impfunc
             set tmp coopfun$offset

             code-tail "void **$tmp = (void **)(((unsigned char *)obj) + $offset); *$tmp = (void *) $impfunc;"
             incr offset $max_type_size
         }

         foreach p $structures($sname,properties) {
             lassign $p type name

             set tmp coopprop$offset

              code-tail "$type *$tmp = (void *)(((unsigned char *)obj) + $offset); *$tmp = 0;"
             incr offset $max_type_size
         }

         set structures($sname,generated) 1
         code-tail "\}"
     }

     return [list $sname $result]
 }

 core property {s type args} {
     global structures max_type_size

     set s [lindex $s 1]

     if {![info exists structures($s)]} {
         return -code error "invalid structure $s"
     }

     foreach i $args {
         lappend structures($s,properties) [list [lindex $type 1] [lindex $i 1]]
         incr structures($s,size) $max_type_size
     }
     return [list "" ""]
 }

 core return {value} {
     lassign $value type v
     code "return $v;"
     return ""
 }

 core self {args} {
     global current_structure 

     dispatch $current_structure self $args
 }


 proc dispatch {s objname arglist} {
     global local_count
     switch -- [llength $arglist] {
         0 {
             return -code error "invalid arguments to $objname.  Should be: $objname key ?value?"
         }

         1 {
             # [obj key]
             lassign [lindex $arglist 0] type key
             set look [lookup-offset $s $key]
             lassign $look type proptype offset

             incr local_count
             set tmp cooptmp$local_count

             if {"method" eq $type} {
                 #The user is trying to retrieve the address of a method.
                 code "void *(*$tmp) = (void *)(((unsigned char *)$objname) + $offset);"
                 return [list "method" $tmp]
             } else {
                 #The user is trying to get the value of a property.
                 code "$proptype *$tmp = (void *)(((unsigned char *)$objname) + $offset);"
                 return [list $proptype (*$tmp)]
             }

             return $tmp
         }

            default {
             # [obj method x y ...]
             set key [lindex $arglist 0]
             lassign $key ktype key

             set look [lookup-offset $s $key]
             lassign $look type proptype offset

             incr local_count
             set tmp cooptmp$local_count

             if {"method" eq $type} {
                 #This is a method call.
                 code "void *(*$tmp)() = *(void **)(((unsigned char *)$objname) + $offset);"

                 set code "$tmp ($objname,"

                 foreach value [lrange $arglist 1 end] {
                     lassign $value vtype v
                     if {"QUOTE" eq $vtype} {
                         set v \"$v\"
                     }
                     append code "$v,"
                 }
                 #Trim trailing ,
                 set code [string range $code 0 end-1]
                 append code ");"
                 #Write the code.
                 code $code

                 return [list "" ""]
             } else {
                 #This is probably the setting of a property.
                 # First make sure it just has one value.

                 if {2 != [llength $arglist]} {
                     return -code error "too many values for property set of structure $s and key $key"
                 }

                 set value [lindex $arglist 1]
                 lassign $value vtype v
                 if {"QUOTE" eq $vtype} {
                     set v \"$v\"
                 }

                 code "$proptype *$tmp = (void *)(((unsigned char *)$objname) + $offset);"
                 code "*$tmp = $v;"
                 return $value
             }         
         }
     }

     return [list "" ""]
 }

 core set {var value} {
     lassign $var vtype v
     lassign $value valtype val

     if {"QUOTE" eq $valtype} {
         code "$v = \"$val\";
     } else {
         code "$v = $val;"
     }
     return $var
 }

 core structure s {
     global structures max_type_size

     lassign $s type s
     if {"" ne $type} {
         return -code error "expected a string but received type: $type -- $s"
     }

     set structures($s) $s
     set structures($s,methods) [list [list destroy coopdestroy]]
     # Property offsets come after methods.
     set structures($s,properties) [list]
     # All members of the structure are a multiple of this.  We start off with 1 method.
     set structures($s,size) $max_type_size
     # This is set to 1 when the initialization function has been created.
     set structures($s,generated) 0
     return ""
 }

 core while {cond body} {
     global local_count

     lassign $cond condtype c
     lassign $body btype b

     if {"BLOCK" ne $condtype} {
         return -code error "while expects a block for the cond argument -- $condtype"
     }

     if {"BLOCK" ne $btype} {
         return -code error "while expects a block for the 2nd argument -- $btype"
     }

     incr local_count
     set label "cooplabel$local_count"
     code ${label}:
     code "\{"
     set i 0
     set tmp [evaluate [parse $c i]]

     code "if ([lindex $tmp 1]) \{"
     set i 0
     evaluate [parse $b i]
     code "goto $label;"
     code "\}"
     code "\}"

     return [list "" ""]
 }


 set ::code_head ""
 set ::code_body ""
 set ::code_tail ""

 proc code c {
     global code_body
     append code_body "$c\n"
 }

 proc code-head c {
     global code_head
     append code_head "$c\n"
 }

 proc code-tail c {
     global code_tail
     append code_tail "$c\n"
 }


 proc evaluate-parameters {argpat} {
     global frame
     set cargs ""
     foreach decl [split $argpat \;] {
         lassign [split $decl :] type var
         if {[regexp {(.+)(\[.*\])} $type all type ar]} {
             append var $ar
         }
         set frame($var) $type
         append cargs "$type $var,"
     }
     # Trim the trailing ,
     return [string range $cargs 0 end-1]
 }

 proc evaluate-word {argpat} {
     global corewords frame current_structure

     set cw [lindex [lindex $argpat 0] 1]
     puts "CW:$cw"

     if {![info exists corewords($cw)]} {
         if {[info exists frame($cw)]} {
             return [dispatch $frame($cw) $cw [lrange $argpat 1 end]]
         } else {
             return -code error "invalid command $cw"
         }
     }
     return [uplevel \#0 $corewords($cw) [lrange $argpat 1 end]]
 }

 proc evaluate words {
     global corewords
     set args [list]
     set results [list]

     foreach obj $words {
         lassign $obj type value
         if {"RECURSE" eq $type} {
             lappend args [evaluate $value]
         } elseif {"EOC" eq $type} {
             if {[llength $args]} {
                 lappend results [evaluate-word $args]
                 set args [list]
             }
         } else {
             lappend args $obj
         }
     }

     if {[llength $args]} {
         lappend results [evaluate-word $args]
     }

     return [lindex $results end]
 }

 proc external-callback {name arglist} {
     global externals local_count

     incr local_count

     set tmp cooptmp$local_count

     if {"void" eq $externals($name,returntype)} {
         code "$name ("
         set result [list "" ""]
     } else {
         code "$externals($name,returntype) $tmp = $name ("
         set result [list "" $tmp]
     }

     set code ""
     foreach a $arglist {
         lassign $a type val
         if {"QUOTE" eq $type} {
             append code \"$val\",
         } else {
             append code $val,
         }
     }
     set code [string range $code 0 end-1]
     code $code
     code ");"

     return $result
 }

 #This looks up the offset for a structure's member.
 #This also returns the type of the member.
 proc lookup-offset {s key} {
     global structures max_type_size

     # Methods come first in a structure.
     set offset 0
     foreach m $structures($s,methods) {
         lassign $m method impfunc
         if {$key eq $method} {
             return [list method "" $offset]
         }

         incr offset $max_type_size
     }

     foreach p $structures($s,properties) {
         lassign $p type name
         if {$key eq $name} {
             return [list property $type $offset]
         }
         incr offset $max_type_size
     }
     return -code error "$key isn't in structure $s"
 }


 proc compile {file code} {
     global init_code code_head code_body code_tail

     set i 0
     puts [evaluate [parse $code i]]

     set outchan [open $file w]
     puts $outchan $init_code
     puts $outchan $code_head
     puts $outchan $code_body
     puts $outchan $code_tail
     puts "wrote $file"
 }

 proc parse {script ivar} {
     upvar $ivar i
     set words [list]
     set word ""
     set state 0
     set lastchar ""
     set bracecount 0
     set type ""

     #This is a little state machine.  It's used to parse code bodies.

     for {} {$i < [string length $script]} {incr i} {
         set c [string index $script $i]
         #puts "C:$c STATE:$state I:$i"

         if {"QUOTE" eq $state} {
             if {"\"" eq $c && "\\" ne $lastchar} {
                 set state 0
                 lappend words [list $type $word]
                 set word ""
                 set type ""
             } else {
                 append word $c
             }

         } elseif {"BLOCK" eq $state} {
             if {"\}" eq $c && "\\" ne $lastchar} {
                 incr bracecount -1
                 if {$bracecount <= 0} {
                     set state 0
                     lappend words [list $type $word]
                     set word ""
                     set type ""
                 } else {
                     append word $c
                 }
             } elseif {"\{" eq $c && "\\" ne $lastchar} {
                 incr bracecount
                 append word $c
             } else {
                 append word $c
             }
         } elseif {"\n" eq $c || ";" eq $c} {
             if {[llength $words]} {
                 if {[string length $word]} {
                     lappend words [list $type $word]
                     set type ""
                     set word ""
                 }
                 lappend words [list EOC ""]
             }
         } elseif {[string is space -strict $c]} {
             if {[string length $word]} {
                 lappend words [list $type $word]
                 set word ""
                 set type ""
             }
         } elseif {"\"" eq $c} {
             set state QUOTE
             set type QUOTE
         } elseif {"\{" eq $c} {
             set state BLOCK
             set type BLOCK
             set word ""
             set bracecount 1
         } elseif {"\[" eq $c} {
             incr i
             set words [concat $words [list [list RECURSE [parse $script i]]]]
         } elseif {"\]" eq $c} {
             if {[string length $word]} {
                 lappend words [list $type $word]
             }
             return $words
         } else {
             append word $c
         }
         set lastchar $c
     }        

     if {[string length $word]} {
         lappend words [list "" $word]
     }

     return $words
 }

 if {$argc != 2} {
     puts stderr "syntax is: [info script] output.c input.coop2"
     exit 1
 }

 set fd [open [lindex $argv 1] r]
 set code [read $fd]
 close $fd
 compile [lindex $argv 0] $code