George Peter Staplin Nov 2, 2007 - I needed a break from working on NexTk, to avoid going insane. I decided it would be fun to try to generate some simple Tcl_Obj commands at runtime, without invoking gcc. This project was a success -- I'm still sane :) And the commands work too!
/* A Tcl_Obj Command Machine Code Generator By George Peter Staplin gcc -shared codeproc.c -I/usr/local/include /usr/local/lib/libtclstub8.5.a -o codeproc.so gcc -Wall -shared codeproc.c -I/usr/local/include /usr/local/lib/libtclstub8.5.a -o codeproc.so */ #include <tcl.h> #include <string.h> #include <stdlib.h> #include <stdint.h> #include <sys/mman.h> #include <unistd.h> struct code { int localoffset; unsigned char *start; unsigned char *pc; size_t length; }; static struct code *current_code = NULL; #define OBJ_CMD_ARGS ClientData clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[] #define defcmd(func,name) \ Tcl_CreateObjCommand (interp, name, func, (ClientData) NULL, (Tcl_CmdDeleteProc *) NULL) static void dump_eax (struct code *code) { static char *fmt = "dump: 0x%x\n"; uintptr_t fun = (uintptr_t)printf; uintptr_t fun2 = (uintptr_t)abort; unsigned char docall[] = {0xff, 0xd0}; /* 7f: 50 push %eax 80: 8b 44 24 08 mov 0x8(%esp),%eax 84: 50 push %eax 85: ff 75 0c pushl 0xc(%ebp) 88: b8 dd ee ff 00 mov $0xffeedd,%eax 8d: ff d0 call *%eax */ *(code->pc) = 0x50; code->pc++; /*pushl*/ *(code->pc) = 0xb8; code->pc++; /*mov*/ memcpy (code->pc, &fmt, sizeof (char *)); code->pc += sizeof (char *); *(code->pc) = 0x50; code->pc++; /*push*/ *(code->pc) = 0xb8; code->pc++; /*mov*/ memcpy (code->pc, &fun, sizeof fun); code->pc += sizeof fun; memcpy (code->pc, docall, sizeof docall); code->pc += sizeof docall; /*now abort()*/ *(code->pc) = 0xb8; code->pc++; /*mov*/ memcpy (code->pc, &fun2, sizeof fun2); code->pc+= sizeof fun2; memcpy (code->pc, docall, sizeof docall); code->pc += sizeof docall; } static int long_cmd (OBJ_CMD_ARGS) { struct code *code = current_code; int i; for (i = 1; i < objc; ++i) { Tcl_Obj *obj = Tcl_NewIntObj (code->localoffset); Tcl_IncrRefCount (obj); if (NULL == Tcl_ObjSetVar2 (interp, objv[i], NULL, obj, TCL_LEAVE_ERR_MSG)) { Tcl_DecrRefCount (obj); return TCL_ERROR; } Tcl_DecrRefCount (obj); code->localoffset += sizeof (long); } return TCL_OK; } static int pointer_cmd (OBJ_CMD_ARGS) { struct code *code = current_code; int i; for (i = 1; i < objc; ++i) { Tcl_Obj *obj = Tcl_NewIntObj (code->localoffset); Tcl_IncrRefCount (obj); if (NULL == Tcl_ObjSetVar2 (interp, objv[i], NULL, obj, TCL_LEAVE_ERR_MSG)) { Tcl_DecrRefCount (obj); return TCL_ERROR; } Tcl_DecrRefCount (obj); code->localoffset += sizeof (void *); } return TCL_OK; } static int get_long_cmd (OBJ_CMD_ARGS) { struct code *code = current_code; uintptr_t fun = (uintptr_t)Tcl_GetLongFromObj; int objoffset, longoffset; unsigned char leal[] = {0x8d, 0x44, 0x24, /*modify*/ 0x00}; unsigned char movl[] = {0x8b, 0x44, 0x24, /*modify*/ 0x00}; unsigned char pushinterp[] = {0xff, 0x75, 0xc}; if (3 != objc) { Tcl_WrongNumArgs (interp, 1, objv, "obj-local long-local"); return TCL_ERROR; } if (TCL_OK != Tcl_GetIntFromObj (interp, objv[1], &objoffset)) return TCL_ERROR; if (TCL_OK != Tcl_GetIntFromObj (interp, objv[2], &longoffset)) return TCL_ERROR; /* leal long-local,%eax pushl %eax movl obj-local,%eax pushl %eax pushl 12(%ebp) 7b: 8d 44 24 04 lea 0x4(%esp),%eax 7f: 50 push %eax 80: 8b 44 24 08 mov 0x8(%esp),%eax 84: 50 push %eax 85: ff 75 0c pushl 0xc(%ebp) 88: b8 dd ee ff 00 mov $0xffeedd,%eax 8d: ff d0 call *%eax 8f: 83 c4 0c add $0xc,%esp */ leal[3] = longoffset; memcpy (code->pc, leal, sizeof leal); code->pc += sizeof leal; *(code->pc) = 0x50; code->pc++; /*pushl*/ printf ("longoffset %d\n", longoffset); movl[3] = objoffset + 4; memcpy (code->pc, movl, sizeof movl); code->pc += sizeof movl; *(code->pc) = 0x50; code->pc++; /*pushl*/ printf ("objoffset %d\n", objoffset); memcpy (code->pc, pushinterp, sizeof pushinterp); code->pc += sizeof pushinterp; *(code->pc) = 0xb8; code->pc++; /*movl (literal)*/ memcpy (code->pc, &fun, sizeof fun); code->pc += sizeof fun; *(code->pc) = 0xff; code->pc++; /*call fun*/ *(code->pc) = 0xd0; code->pc++; *(code->pc) = 0x83; code->pc++; /*cleanup after the pushes*/ *(code->pc) = 0xc4; code->pc++; *(code->pc) = 12; code->pc++; return TCL_OK; } static void code_free (struct code *code) { munmap (code->start, code->length); free (code); } static int code_init (Tcl_Interp *interp, struct code **code) { struct code *r; size_t s; r = malloc (sizeof *r); if (NULL == r) { const char *err = Tcl_PosixError (interp); Tcl_SetResult (interp, "unable to malloc", TCL_STATIC); Tcl_AddErrorInfo (interp, err); return TCL_ERROR; } s = sysconf (_SC_PAGESIZE); r->pc = r->start = mmap (NULL, s, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (((void *) -1) == r->pc) { const char *err = Tcl_PosixError (interp); Tcl_SetResult (interp, "unable to mmap", TCL_STATIC); Tcl_AddErrorInfo (interp, err); free (r); return TCL_ERROR; } r->localoffset = sizeof (void *); r->length = s; *code = r; return TCL_OK; } static void enter_function (struct code *code) { *(code->pc) = 0x55; code->pc++; /* push %ebp */ *(code->pc) = 0x89; code->pc++; *(code->pc) = 0xe5; code->pc++; /* mov %esp,%ebp */ } static void delete_compiled (ClientData cdata) { struct code *code = (void *)cdata; code_free (code); } static int compile_cmd (OBJ_CMD_ARGS) { struct code *code; int (*fun) (OBJ_CMD_ARGS); if (TCL_OK != code_init (interp, &code)) return TCL_ERROR; current_code = code; fun = (Tcl_ObjCmdProc *) code->start; printf ("code->start %p\n", (void *)code->start); enter_function (code); if (TCL_ERROR == Tcl_EvalObjEx (interp, objv[2], TCL_EVAL_DIRECT)) { code_free (code); return TCL_ERROR; } Tcl_CreateObjCommand (interp, Tcl_GetString (objv[1]), fun, (ClientData)code, delete_compiled); return TCL_OK; } static int begin_code_cmd (OBJ_CMD_ARGS) { struct code *code = current_code; /* 83 ec 08 sub $0x8,%esp */ *code->pc = 0x83; code->pc++; *code->pc = 0xec; code->pc++; *code->pc = code->localoffset; code->pc++; return TCL_OK; } static int leave_cmd (OBJ_CMD_ARGS) { struct code *code = current_code; /* c9 leave c3 ret */ *(code->pc) = 0xc9; code->pc++; *(code->pc) = 0xc3; code->pc++; return TCL_OK; } static int result_cmd (OBJ_CMD_ARGS) { struct code *code = current_code; char *s; int r; /* b8 00 00 00 00 mov $0x0,%eax */ if (2 != objc) { Tcl_WrongNumArgs (interp, 1, objv, "result"); return TCL_ERROR; } s = Tcl_GetString (objv[1]); if (!strcmp ("ok", s)) { r = TCL_OK; } else if (!strcmp ("error", s)) { r = TCL_ERROR; } *(code->pc) = 0xb8; code->pc++; memcpy (code->pc, &r, 4); code->pc += 4; return TCL_OK; } static int get_arg_cmd (OBJ_CMD_ARGS) { struct code *code = current_code; int argvoffset, localoffset; unsigned char m[] = { 0x8b, 0x4d, 0x14, 0x83, 0xc1, /*argvoffset*/ 0x00, 0x8b, 0x01, 0x89, 0x44, 0x24, /*localoffset*/ 0x00 }; if (3 != objc) { Tcl_WrongNumArgs (interp, 1, objv, "argv-offset local"); return TCL_ERROR; } if (TCL_OK != Tcl_GetIntFromObj (interp, objv[1], &argvoffset)) return TCL_ERROR; if (TCL_OK != Tcl_GetIntFromObj (interp, objv[2], &localoffset)) return TCL_ERROR; /* 7b: 8b 4d 14 mov 0x14(%ebp),%ecx 7e: 83 c1 04 add $0x4,%ecx 81: 8b 01 mov (%ecx),%eax 83: 89 44 24 04 mov %eax,0x4(%esp) (0x4 is objv[1]) (0x8 is objv[2]) */ m[5] = argvoffset * 4; m[11] = localoffset; memcpy (code->pc, m, sizeof m); code->pc += sizeof m; return TCL_OK; } static int long_plus_cmd (OBJ_CMD_ARGS) { struct code *code = current_code; unsigned char movleax[] = {0x8b, 0x44, 0x24, /*modify*/ 0x00}; unsigned char movlecx[] = {0x8b, 0x4c, 0x24, /*modify*/ 0x00}; unsigned char addl[] = {0x01, 0xc8}; unsigned char movlresult[] = {0x89, 0x44, 0x24, /*modify*/ 0x00}; int a, b, r; /* these are local variable offsets */ /* 7b: 8b 44 24 04 mov 0x4(%esp),%eax 7f: 8b 4c 24 08 mov 0x8(%esp),%ecx 83: 01 c8 add %ecx,%eax 85: 89 44 24 12 mov %eax,0x12(%esp) */ if (4 != objc) { Tcl_WrongNumArgs (interp, 1, objv, "a b result"); return TCL_ERROR; } if (TCL_OK != Tcl_GetIntFromObj (interp, objv[1], &a)) return TCL_ERROR; if (TCL_OK != Tcl_GetIntFromObj (interp, objv[2], &b)) return TCL_ERROR; if (TCL_OK != Tcl_GetIntFromObj (interp, objv[3], &r)) return TCL_ERROR; movleax[3] = a; memcpy (code->pc, movleax, sizeof movleax); code->pc += sizeof movleax; movlecx[3] = b; memcpy (code->pc, movlecx, sizeof movlecx); code->pc += sizeof movlecx; memcpy (code->pc, addl, sizeof addl); code->pc += sizeof addl; movlresult[3] = r; memcpy (code->pc, movlresult, sizeof movlresult); code->pc += sizeof movlresult; return TCL_OK; } static int long_obj_cmd (OBJ_CMD_ARGS) { struct code *code = current_code; uintptr_t fun = (uintptr_t)Tcl_NewLongObj; unsigned char docall[] = {0xff, 0xd0}; unsigned char movlocal[] = {0x8b, 0x44, 0x24, /*modify*/ 0x00}; unsigned char movlresult[] = {0x89, 0x44, 0x24, /*modify*/ 0x00}; int longoffset, objoffset; /* 7f: 50 push %eax 80: 8b 44 24 08 mov 0x8(%esp),%eax 84: 50 push %eax 85: ff 75 0c pushl 0xc(%ebp) 88: b8 dd ee ff 00 mov $0xffeedd,%eax 8d: ff d0 call *%eax */ if (3 != objc) { Tcl_WrongNumArgs (interp, 1, objv, "longoffset objoffset"); return TCL_ERROR; } if (TCL_OK != Tcl_GetIntFromObj (interp, objv[1], &longoffset)) return TCL_ERROR; if (TCL_OK != Tcl_GetIntFromObj (interp, objv[2], &objoffset)) return TCL_ERROR; movlocal[3] = longoffset; memcpy (code->pc, movlocal, sizeof movlocal); code->pc += sizeof movlocal; *(code->pc) = 0x50; code->pc++; /*push*/ *(code->pc) = 0xb8; code->pc++; /*mov*/ memcpy (code->pc, &fun, sizeof fun); code->pc += sizeof fun; memcpy (code->pc, docall, sizeof docall); code->pc += sizeof docall; *(code->pc) = 0x83; code->pc++; /*cleanup after the push*/ *(code->pc) = 0xc4; code->pc++; *(code->pc) = 4; code->pc++; movlresult[3] = objoffset; memcpy (code->pc, movlresult, sizeof movlresult); code->pc += sizeof movlresult; return TCL_OK; } static int set_result_cmd (OBJ_CMD_ARGS) { struct code *code = current_code; uintptr_t fun = (uintptr_t)Tcl_SetObjResult; unsigned char docall[] = {0xff, 0xd0}; unsigned char movlocal[] = {0x8b, 0x44, 0x24, /*modify*/ 0x00}; unsigned char pushinterp[] = {0xff, 0x75, 0xc}; int objoffset; if (2 != objc) { Tcl_WrongNumArgs (interp, 1, objv, "objoffset"); return TCL_ERROR; } if (TCL_OK != Tcl_GetIntFromObj (interp, objv[1], &objoffset)) return TCL_ERROR; movlocal[3] = objoffset; memcpy (code->pc, movlocal, sizeof movlocal); code->pc += sizeof movlocal; *(code->pc) = 0x50; code->pc++; /*push*/ /*push the interp*/ memcpy (code->pc, pushinterp, sizeof pushinterp); code->pc += sizeof pushinterp; *(code->pc) = 0xb8; code->pc++; /*mov*/ memcpy (code->pc, &fun, sizeof fun); code->pc += sizeof fun; memcpy (code->pc, docall, sizeof docall); code->pc += sizeof docall; /*cleanup the stack*/ *(code->pc) = 0x83; code->pc++; /*cleanup after the push*/ *(code->pc) = 0xc4; code->pc++; *(code->pc) = 4; code->pc++; return TCL_OK; } int Codeproc_Init (Tcl_Interp *interp) { if (NULL == Tcl_InitStubs (interp, TCL_VERSION, 0)) return TCL_ERROR; if (TCL_ERROR == Tcl_PkgProvide (interp, "codeproc", "1.0")) return TCL_ERROR; defcmd (compile_cmd, "compile"); defcmd (long_cmd, "long"); defcmd (pointer_cmd, "pointer"); defcmd (get_long_cmd, "get-long"); defcmd (begin_code_cmd, "begin-code"); defcmd (leave_cmd, "leave"); defcmd (result_cmd, "result"); defcmd (get_arg_cmd, "get-arg"); defcmd (long_plus_cmd, "long+"); defcmd (long_obj_cmd, "long-obj"); defcmd (set_result_cmd, "set-result"); return TCL_OK; } /* TEST CODE proc container {} { compile testadd { long a b c begin-code result ok leave } puts "$a $b $c" } load ./codeproc.so proc container {} { compile testadd { pointer aobj bobj robj long a b r begin-code get-arg 1 $aobj get-long $aobj $a get-arg 2 $bobj get-long $bobj $b long+ $a $b $r long-obj $r $robj set-result $robj result ok leave } puts $aobj } container puts RESULT:[testadd 2 3] */