Version 3 of Threaded Code Tcl VM

Updated 2005-01-31 02:48:30 by cl

George Peter Staplin: As an experiment I decided to try to build a prototype for a Tcl VM that uses threaded code. Threaded code is an interesting concept that is fairly well documented on the web. Threaded code has been used in Ken Thompson's original B language, and Charles Moore's Forth.

This prototype converts a series of bytecodes into addresses. The addresses are then used to jump around in the threaded code.

Note: this is not related to POSIX threads, or other concurrent programming techniques.


Tcl_Types.h

 #define TCL_TYPE_STRING 2
 #define TCL_TYPE_INT 4
 #define TCL_TYPE_DOUBLE 8

main.c

 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>

 #include "Tcl_Types.h"

 /* We define an operator table that is used with the symbols for bytecodes to lookup code addresses. 
  */
 struct {
  void *code;
 } ops[3];

 /* These are our bytecodes: */
 enum {
  TCL_ADD,
  TCL_PUSH,
  TCL_RETURN
 };

 /* These must match the .equ in vm.S */
 typedef struct {
  int type; 
  int ref; /* 4 */
  int i; /* 8 */
  char *s; /* 12 */
 } Tcl_Obj;

 /* This is our execution token.  It stores a linear series of addresses. */
 typedef struct {
  caddr_t *start;
  /* Our cursor is the current position after the last address was appended. */
  caddr_t *cursor;
  size_t remaining;
  size_t total;
 } Tcl_Xt;

 extern void Tcl_InitOperators ();
 extern Tcl_Obj *Tcl_Run ();
 extern void Tcl_SetStringObj (Tcl_Obj *obj, char *str);

 /* bytecode to address */
 #define BCTOA(bc) ops[bc].code

 /* append code to xt */
 #define ACTOXT(xt,sym) \
  *(xt->cursor) = sym; \
  xt->cursor += 1

 void *Tcl_Alloc (size_t s) {
  void *r = malloc (s);

  if (NULL == r) {
   perror ("malloc");
   exit (EXIT_FAILURE);
  }
  return r;
 }

 void Tcl_Free (void *p) {
  free (p);
 }

 void *Tcl_Realloc (void *old, size_t s) {
  void *new;

  new = realloc (old, s);

  if (NULL == new) {
   perror ("realloc");
   exit (EXIT_FAILURE);
  }

  return new;
 }

 void Tcl_EnlargeXt (Tcl_Xt *xt) {
  size_t new_size;
  size_t used;
  void *p;

  new_size = (xt->total * 2);

  used = (xt->remaining - xt->total);

  p = Tcl_Realloc (xt->start, new_size);

  xt->remaining = (new_size - used);
  xt->total = new_size;
  xt->cursor = (p + (xt->cursor - xt->start));
  xt->start = p; 
 }

 void Tcl_CompileExampleAdd (Tcl_Obj *objv[], Tcl_Xt *xt) {

  if ((sizeof (void *) * 7) < xt->remaining) {
   Tcl_EnlargeXt (xt);
  }

  ACTOXT (xt, BCTOA (TCL_PUSH));
  ACTOXT (xt, (caddr_t)objv[0]);
  ACTOXT (xt, BCTOA (TCL_PUSH));
  ACTOXT (xt, (caddr_t)objv[1]);
  ACTOXT (xt, BCTOA (TCL_ADD));
  ACTOXT (xt, BCTOA (TCL_RETURN));

  xt->remaining -= (sizeof (void *) * 7);
 }


 Tcl_Obj *Tcl_NewObj (void) {
  Tcl_Obj *obj = Tcl_Alloc (sizeof (Tcl_Obj));

  obj->type = TCL_TYPE_STRING;
  obj->ref = 0;

  return obj;
 }

 Tcl_Xt *Tcl_NewXt (void) {
  Tcl_Xt *xt = Tcl_Alloc (sizeof (Tcl_Xt));

  xt->total = xt->remaining = 2000;
  xt->cursor = xt->start = Tcl_Alloc (xt->remaining);

  return xt;
 }

 void Tcl_SetStringObj (Tcl_Obj *obj, char *str) {
  obj->type = TCL_TYPE_STRING;
  obj->s = str;
 }

 extern void test_convert (Tcl_Obj *obj);

 int main () { 
  Tcl_Obj **objv;
  Tcl_Obj *result;
  Tcl_Xt *xt;

  Tcl_InitOperators (
   &(ops[TCL_ADD].code), 
   &(ops[TCL_PUSH].code),
   &(ops[TCL_RETURN].code));

  printf ("add_op code is %p\n", ops[TCL_ADD].code);
  printf ("push_op code is %p\n", ops[TCL_PUSH].code);
  printf ("return code is %p\n", ops[TCL_RETURN].code);

  objv = Tcl_Alloc (sizeof (Tcl_Obj *) * 2);
  objv[0] = Tcl_NewObj ();
  objv[1] = Tcl_NewObj ();

  Tcl_SetStringObj (objv[0], "255");
  Tcl_SetStringObj (objv[1], "456");

  xt = Tcl_NewXt ();

  Tcl_CompileExampleAdd (objv, xt);

 #if 0
  printf ("type before %d\n", objv[0]->type);
  test_convert (objv[1]);
  printf ("type after %d i %d\n", objv[0]->type, objv[0]->i);

  test_convert (objv[1]);
  return 0;
 #endif

  result = Tcl_Run (xt->start);

  printf ("RESULT is %d\n", result->i);

  printf ("after return from Tcl_Run\n");

  return EXIT_SUCCESS;
 }

registers.h

 /* We may need to change the register allocation in the future, so we use these macros.
  */

 /* OPERAND STACK */
 #define ops esi

 /* TOP OF STACK */
 #define tos edi

 /* VIRTUAL INSTRUCTION POINTER */
 #define vip edx

vm.S

 #include "registers.h"
 #include "Tcl_Types.h"

 /* We cache the top-of-stack (TOS) in a register.
  * This is a technique that can result in better performance.
  */

 .macro NEXT
  movl (%vip),%eax
  jmp *%eax
 .endm

 .macro POP_INTO reg
  movl %tos,\reg
  movl (%ops),%tos
  addl $4,%ops
 .endm

 .macro PUSH reg
  subl $4,%ops
  movl %tos,(%ops)
  movl \reg,%tos
 .endm

 .macro RESTORE_TOS
  movl (%ops),%tos
  addl $4,%ops
 .endm

 .macro SAVE_TOS
  subl $4,%ops
  movl %tos,(%ops)
 .endm

 .macro VM_WORD word
  \word:
  addl $4,%vip
 .endm


 /* our Tcl_Obj structure layout */
 .equ type_offset, 0
 .equ ref_offset, 4
 .equ int_offset, 8
 .equ string_offset, 12

 /**** EXPORTED SYMBOLS ****/
 .global Tcl_InitOperators
 .global Tcl_Run


 /**** SETUP VARIOUS SECTIONS ****/

 .data
  .comm operand_stack,2000
 .text

 .section .rodata
  emit_hex_fmt: .string "HEX 0x%x\n"
  emit_invalid_integer: .string "This isn't an integer string.\n"
  emit_string_fmt: .string "DEBUG STRING %s\n"
  emit_ptr_fmt: .string "ptr %p\n"
  emit_internal_error: .string "internal error\n"
 .text


 /**** UTILITY CODE ****/

 /* convert the integer in the Tcl_Obj stored in %eax
  (saving any registers that could be clobbered) 

  We use:

   %eax result
   %cl low bits for the character
   %esi our offset into the string
   %edi our Tcl_Obj (after we move it from %eax)
   %edx for multiplication
  */

 convert_to_int:
  pushl %edi
  pushl %esi
  pushl %ecx
  pushl %edx

  movl %eax,%edi
  movl string_offset(%eax),%esi

  /* our initial value */
  movl $0,%eax

  repeat:
   /* clear our temporary register's high bits */
   movl $0,%ecx
   movb (%esi),%cl

   cmpb $0,%cl
   je got_zero_byte

   /* %eax * 10 */
   movl $10,%edx
   mull %edx

   cmpb $'0',%cl
   jl not_integer 
   cmpb $'9',%cl
   jg not_integer

   subb $'0',%cl
   addl %ecx,%eax

   /* advance to the next char */
   addl $1,%esi
   jmp repeat

  got_zero_byte:

   /* save the result in our Tcl_Obj's memory */
   movl $TCL_TYPE_INT,type_offset(%edi)
   movl %eax,int_offset(%edi)
   movl %edi,%eax

   /* now restore the registers in reverse order */
   popl %edx
   popl %ecx
   popl %esi
   popl %edi
   ret

  not_integer:
   pushl $emit_invalid_integer
   call printf
   addl $4,%esp
   /* XXX lovely error handling.  I think in the final version we will use a longjmp. */
   call abort


 /**** ENTRY POINTS ****/

 Tcl_InitOperators:
  movl 4(%esp),%eax
  movl $add_op,(%eax)

  movl 8(%esp),%eax
  movl $push_op,(%eax)

  movl 12(%esp),%eax
  movl $exit_Tcl_Run,(%eax)

  ret


 /* Tcl_Run (start) 
  */
 Tcl_Run:
  movl 4(%esp),%vip
  movl $operand_stack,%ops
  /* our stack grows downward */
  addl $2000,%ops

  NEXT

  exit_Tcl_Run:

   /* return our Tcl_Obj result in %eax */
   POP_INTO %eax
   ret


 .global test_convert
 test_convert:
  movl 4(%esp),%eax
  call convert_to_int

  pushl int_offset(%eax)
  pushl $emit_hex_fmt
  call printf
  addl $8,%esp

  ret

 emit_debug:
  pusha
  pushl %eax
  pushl $emit_hex_fmt
  call printf
  addl $8,%esp
  popa
  ret

 /**** OPERATORS ****/

 /* The idea is that we have various VM words that get threaded together.  
  * Each word ends with a NEXT macro that expands to jump to the next address.
  */


 /* add $a $b */
 VM_WORD add_op
  /* convert object $b to int if needed */ 
  POP_INTO %eax
  cmpl $TCL_TYPE_INT,type_offset(%eax)
  je 1f
   call convert_to_int
  1:

  /* save the Tcl_Obj for $b */
  pushl %eax

  POP_INTO %eax
  cmpl $TCL_TYPE_INT,type_offset(%eax)
  je 1f
   call convert_to_int
  1:

  movl int_offset(%eax),%ecx
  popl %eax
  addl int_offset(%eax),%ecx

  /* Now we save the critical registers that the C code will possibly clobber. */
  pushl %ecx
  pushl %tos
  pushl %vip
  pushl %ops

  call Tcl_NewObj

  popl %ops
  popl %vip
  popl %tos
  popl %ecx

  movl %ecx,int_offset(%eax)
  movl $TCL_TYPE_INT,type_offset(%eax)

  PUSH %eax

  NEXT


 VM_WORD push_op
  SAVE_TOS
  /* set TOS to the Tcl_Obj */
  movl (%vip),%tos
  addl $4,%vip

  NEXT

 /*END*/

1. Ken Thompson's B language (the precursor to C): http://cm.bell-labs.com/cm/cs/who/dmr/kbman.html

2. Threaded Code Definition at FOLDOC: http://wombat.doc.ic.ac.uk/foldoc/foldoc.cgi?query=threaded+code

3. My revisions of this code (tcltcvm): http://www.xmission.com/~georgeps/engineering/prototype/


Category Concept | Category Example