Version 1 of Proper integers implementation

Updated 2004-04-15 13:12:25

This is some bits and pieces of an implementation of the tclBigIntType suggested on Proper integers for Tcl 8.5 or 9.0.

  • There is no guarantee the below will even compile, but I think it is mostly correct.
  • All of it is about implementing the Tcl_Obj type, i.e., it is about storage. There is not yet any code for doing any operations (save possibly some format conversions) on the data.
 Tcl_ObjType tclBigIntType = {
     "bigint",                           /* name */
     FreeBigIntInternalRep,              /* freeIntRepProc */
     DupBigIntInternalRep,               /* dupIntRepProc */
     UpdateStringOfBigInt,               /* updateStringProc */
     SetBigIntFromAny                    /* setFromAnyProc */
 };



 /*
  * The following structure represents a subtype of bigint, which is a
  * particular internal representation for a bigint object plus a set of
  * procedures that provide standard operations on objects of that type.
  */

 typedef struct Tcl_BigIntSubtype {
     char *name;                 /* Name of the subtype, e.g. "SLM-hexadecimal". */
     int version;                /* A struct version number. Just in case 
                                    it will evolve. */
 /* 
  * These are all as in Tcl_ObjType.
  */
     Tcl_FreeInternalRepProc *freeIntRepProc;
                                 /* Called to free any storage for the type's
                                    internal rep. NULL if the internal rep
                                    does not need freeing. */
     Tcl_DupInternalRepProc *dupIntRepProc;
                                 /* Called to create a new object as a copy
                                    of an existing object. */
     Tcl_UpdateStringProc *updateStringProc;
                                 /* Called to update the string rep from the
                                    type's internal representation. */
     Tcl_SetFromAnyProc *setFromAnyProc;
                                 /* Called to convert the object's internal
                                    rep to this type. Frees the internal rep
                                    of the old type. Returns TCL_ERROR on
                                    failure. */
 /* 
    Here should follow pointers to functions that carry out 
    arithmetical operations on big integers of this subtype.
    TO DO: Define these. */
 } Tcl_BigIntSubtype;

 #define GET_SUBTYPE(objPtr) ( \
     (Tcl_BigIntSubtype *)(objPtr) ->internalRep.twoPtrValue.ptr1 \
 )
 #define SUBTYPE_ACTION(field,objPtr) ( \
     GET_SUBTYPE(objPtr) -> field \
 )
 #define BIGINT_VALUE(objPtr) ((objPtr) ->internalRep.twoPtrValue.ptr2)


 /*
  *----------------------------------------------------------------------
  *
  * FreeBigIntInternalRep --
  *
  *      Deallocate the storage associated with a bigint data object's
  *      internal representation.
  *
  * Results:
  *      None.
  *
  * Side effects:
  *      Frees memory. 
  *
  *----------------------------------------------------------------------
  */

 static void
 FreeBigIntInternalRep(objPtr)
     Tcl_Obj *objPtr;            /* Object with internal rep to free. */
 {
     if (GET_SUBTYPE(objPtr) != NULL) {
         SUBTYPE_ACTION(freeIntRepProc,objPtr)(objPtr);
     }
 }


 /*
  *----------------------------------------------------------------------
  *
  * UpdateStringOfBigInt --
  *
  *      Update the string representation for an integer object.
  *      Note: This only does anything when the subtype is non-NULL.
  *      
  *      An existing old string rep is not freed so storage will be 
  *      lost if this has not already been done. 
  *
  * Results:
  *      None.
  *
  * Side effects:
  *      The object's string is set to a valid string that results from
  *      the int-to-string conversion.
  *
  *----------------------------------------------------------------------
  */

 static void
 UpdateStringOfBigInt(objPtr)
     Tcl_Obj *objPtr;            /* Object with string rep to update. */
 {
     if (GET_SUBTYPE(objPtr) != NULL) {
         SUBTYPE_ACTION(updateStringProc,objPtr)(objPtr);
     }
 }


 /*
  *----------------------------------------------------------------------
  *
  * DupBigIntInternalRep --
  *
  *      Initialize the internal representation of a new Tcl_Obj to a
  *      copy of the internal representation of an existing bigint object.
  *
  * Results:
  *      None.
  *
  * Side effects:
  *      copyPtr's internal rep is set to a copy of srcPtr's internal
  *      representation.
  *
  *----------------------------------------------------------------------
  */

 static void
 DupBigIntInternalRep(srcPtr, copyPtr)
     Tcl_Obj *srcPtr;    /* Object with internal rep to copy.  Must
  • have an internal rep of type "bigint". */
     Tcl_Obj *copyPtr;   /* Object with internal rep to set.  Must
  • not currently have an internal rep.*/
 {
     if (GET_SUBTYPE(srcPtr) == NULL) {
         copyPtr->internalRep.twoPtrValue.ptr1 = NULL;
         copyPtr->internalRep.twoPtrValue.ptr2 = NULL;
     } else {
         SUBTYPE_ACTION(dupIntRepProc,srcPtr)(srcPtr,copyPtr);
     }
 }



 /*
  *----------------------------------------------------------------------
  *
  * SetBigIntFromAny --
  *
  *      Attempt to change the type of the Tcl object "objPtr" to bigint. 
  *      This always uses the null bigint subtype, so in practice it 
  *      only verifies that the string representation of the object is 
  *      a valid bigint.
  *
  * Results:
  *      The return value is a standard object Tcl result. If an error 
  *      occurs during conversion and "interp" is not NULL, an error 
  *      message is left in that interpreter's result.
  *
  * Side effects:
  *      If no error occurs and the "objPtr" has an internal 
  *      representation, then that is freed.
  *
  *----------------------------------------------------------------------
  */

 static int
 SetBigIntFromAny(interp, objPtr)
     Tcl_Interp *interp;         /* Used for error reporting if not NULL. */
     register Tcl_Obj *objPtr;   /* The object to convert. */
 {
     char *string;
     register char *p;

     /*
  • Get the string representation. Make it up-to-date if necessary.
      */

     p = string = Tcl_GetString(objPtr);

     /*
  • Now check that "objPtr"s string can be parsed as an int.
  • First skip initial spaces.
      */

     for ( ;  isspace(UCHAR(*p));  p++) { /* INTL: ISO space. */
         /* Empty loop body. */
     }

     /* 
  • Then step past the sign, if there is one.
  • Q: Some languages allow multiple signs. Maybe we should too?
      */

     if (*p == '-' || *p == '+') {
         p++;
     }

     /* 
  • Next check for a non-decimal-number prefix, and branch out
  • into the various cases depending on what is found.
      */

     if (*p != '0') {
         /* 
  • There was no prefix (they all start with 0), so what follows
  • should be a non-empty sequence of decimal digits.
  • Scan through that.
          */

         if (!isdigit(UCHAR(*p))) { /* INTL: digit */
             goto badInteger;
         }
         for ( ;  isdigit(UCHAR(*p));  p++) { /* INTL: digit */
             /* Empty loop body. */
         }

     } else {
         /* 
  • There is a prefix. But of what kind?
          */
         p++;
         switch (*p) {
             case 'x':
             case 'X':
                 /* 
  • A hexadecimal 0x prefix has been found.
  • The rest must be a nonempty sequence of hexadecimal
  • digits.
                  */
                 p++;
                 if (!isxdigit(UCHAR(*p))) {
                     goto badInteger;
                 }
                 p++;
                 for ( ;  isxdigit(UCHAR(*p));  p++) { /* INTL: digit */
                     /* Empty loop body. */
                 }
                 break;

             case 'o':
             case 'O':
                 /* 
  • A 0o prefix has been found. Since there is a movement
  • to make this the only octal prefix in Tcl 9, we may
  • at least allow it as one octal prefix here. For this
  • to be an integer though, the rest must be a nonempty
  • sequence of octal digits.
                  */
                 p++;
                 if (!isdigit(UCHAR(*p)) || (UCHAR(*p) >= '8')) { /* INTL: digit */
                     goto badInteger;
                 }
             case '0':
             case '1':
             case '2':
             case '3':
             case '4':
             case '5':
             case '6':
             case '7':
                 /* 
  • Just 0 is however also acceptable as an octal prefix.
                  */
                 p++;
                 for ( ; isdigit(UCHAR(*p)) && (UCHAR(*p) < '8');  p++) { /* INTL: digit */
                     /* Empty loop body. */
                 }
         }
     }

     /* 
  • A valid sequence of digits has been found. What remains is to
  • check that there is no garbage after it.
      */

     for ( ;  isspace(UCHAR(*p));  p++) { /* INTL: ISO space. */
         /* Empty loop body. */
     }
     if (*p != 0) {
         goto badInteger;
     }

     /*
  • The string is a valid bigint!
  • Free the old internalRep, then set the new type.
      */

     if ((objPtr->typePtr != NULL) && 
         (objPtr->typePtr->freeIntRepProc != NULL)) {
             objPtr->typePtr->freeIntRepProc(objPtr);
     }

     objPtr->typePtr = &tclBigIntType;
     objPtr->internalRep.twoPtrValue.ptr1 = NULL;
     objPtr->internalRep.twoPtrValue.ptr2 = NULL;

     return TCL_OK;


   badInteger:
     /* 
  • If we get here, the string is NOT a valid bigint.
  • Generate an appropriate error message if "interp" is non-null.
      */

     if (interp != NULL) {
         /*
  • Must copy string before resetting the result in case a caller
  • is trying to convert the interpreter's result to an int.
          */

         Tcl_DString ds;
         Tcl_DStringInit(&ds);
         Tcl_DStringAppend(&ds, "expected integer (possibly big) but got \"", -1);
         Tcl_DStringAppend(&ds, string, length);
         Tcl_DStringAppend(&ds, "\" instead", -1);
         Tcl_DStringResult(interp, &ds);
     }

     return TCL_ERROR;
 }


 /* 
  * The following functions and definitions implement a "SLM-hexadecimal" 
  * subtype of bigint. The internal representation of that type is a 
  * straightforward string, but with a slightly different structure from 
  * the normal string representations of integers. The first character is 
  * the sign, and it is one of '+', '0', and '-'. If it is '0' then the 
  * string ends there and the value is 0. If it is '+' or '-' then the 
  * integer is positive or negative respectively, and the remaining 
  * characters are the digits of the integer. The least significant 
  * digit comes first, and then the others in order of ascending 
  * significance. The SLM part of the name stands for "Sign, Least 
  * significant, Most significant" and highlights the order of these 
  * extremes.
  * 
  * The digit characters are 0123456789ABCDEF, as the "hexadecimal" part
  * of the name indicates. The last (most significant) digit must not be 0.
  * 
  * Examples:
  * 
  *   Decimal   SLM-hexadecimal
  *   -------   ---------------
  *        0    0
  *        1    +1
  *        2    +2
  *       10    +A
  *       16    +01
  *      256    +001
  *       -1    -1
  *      -10    -A
  *     1000    +8E3
  */


 /*
  *----------------------------------------------------------------------
  *
  * FreeBigIntSLMHexadecimalInternalRep --
  *
  *      Deallocate the storage associated with a SLM-hexadecimal bigint 
  *      data object's internal representation.
  *
  * Results:
  *      None.
  *
  * Side effects:
  *      The twoPtrValue.ptr2 of the internal representation of 
  *      "objPtr" should not be NULL and we furthermore assume that 
  *      it points to a string. The subtype (twoPtrValue.ptr1) is 
  *      changed to NULL.
  *
  *----------------------------------------------------------------------
  */

 static void
 FreeBigIntSLMHexadecimalInternalRep(objPtr)
     Tcl_Obj *objPtr;            /* Object with internal rep to free. */
 {
     ckfree( (char *) BIGINT_VALUE(objPtr) );
     objPtr->internalRep.twoPtrValue.ptr1 = NULL;
 }


 /*
  *----------------------------------------------------------------------
  *
  * DupBigIntSLMHexadecimalInternalRep --
  *
  *      Initialize the internal representation of a new Tcl_Obj to a
  *      copy of the internal representation of an existing bigint object.
  *
  * Results:
  *      None.
  *
  * Side effects:
  *      copyPtr's internal rep is set to a copy of srcPtr's internal
  *      representation.
  *
  *----------------------------------------------------------------------
  */

 static void
 DupBigIntSLMHexadecimalInternalRep(srcPtr, copyPtr)
     Tcl_Obj *srcPtr;    /* Object with internal rep to copy.  Must
  • have an internal rep of type "bigint",
  • subtype "SLM-hexadecimal". */
     Tcl_Obj *copyPtr;   /* Object with internal rep to set.  Must
  • not currently have an internal rep.*/
 {
     ---
     if (GET_SUBTYPE(srcPtr) == NULL) {
         copyPtr->internalRep.twoPtrValue.ptr1 = NULL;
         copyPtr->internalRep.twoPtrValue.ptr2 = NULL;
     } else {
         SUBTYPE_ACTION(dupIntRepProc,srcPtr)(srcPtr,copyPtr);
     }
 }





 void
 CarryIntoSLM (dsPtr, offset, radix, shift, carry)
   Tcl_DString  *dsPtr;   /* Pointer to the dynamic string structure 
  • into which new data will be carried. */
   int           offset;  /* Offset into dynamic string of first digit. */
   unsigned char radix;   /* The radix used in the dynamic string. */
   long          shift;   /* The factor by which the data already in 
  • the string should be multiplied. */
   long          carry;   /* The carry to add after having multiplied. */
 {
     register char *p =   /* Points to current digit. */
                      Tcl_DStringValue(dsPtr) + offset;
     char newchar;

     while (*p != 0) {
         carry += shift * (*p - '0');
         *p = '0' + carry % radix;
         carry = carry / radix;
         p++;
     }
     while (carry > 0) {
         newchar = '0' + carry % radix;
         Tcl_DStringAppend(dsPtr, &newchar, 1);
         carry = carry / radix;
     }
 }


 /*
  *----------------------------------------------------------------------
  *
  * UpdateStringOfBigInt_SLMHexadecimal --
  *
  *      Update the string representation for a big integer object 
  *      of subtype SLM-hexadecimal.
  *      
  *      An existing old string rep is not freed so storage will be 
  *      lost if this has not already been done. A new string is 
  *      allocated.
  *
  * Results:
  *      None.
  *      
  * Side effects:
  *      The twoPtrValue.ptr2 of the internal representation of 
  *      "objPtr" should not be NULL and we furthermore assume that 
  *      it points to a valid SLM-hexadecimal string.
  *
  *----------------------------------------------------------------------
  */

 static void
 UpdateStringOfBigInt_SLMHexadecimal(objPtr)
     Tcl_Obj *objPtr;      /* Object with string rep to update. */
 {
     Tcl_DString temp;     /* Temporary storage for the decimal digits.
  • They appear here in least-to-most order. */
     char     *hexPtr;     /* Internal object representation pointer. */
     int          len;     /* Length of computed string representation. */


     Tcl_DStringInit(&temp);

     /* 
  • First the digits are converted to decimal. The more significant hex
  • digits are processed first.
      */

     hexPtr = (char *) objPtr->internalRep.twoPtrValue.ptr2;
     {
         register char *p;     /* Points to hex digit being shifted in. */
         p = hexPtr + strlen(hexPtr) - 1;
         while (p > hexPtr) {
             CarryIntoSLM (&temp, 0, 10, 16, 
                 (*p >= 'A') ? (*p - 'A' + 10) : (*p - '0')
             );
             p--;
         }
     }

     /* 
  • Then memory is allocated to hold the string representation and
  • data are copied. It is necessary to reverse the order of digits.
      */

     len = Tcl_DStringLength(&temp);
     if (*hexPtr == '-') {
         len++;
     }
     objPtr->bytes = (char *) ckalloc((unsigned) len + 1);
     objPtr->length = len;

     {
         register char *p, *q;  /* Moving pointers. */
         p = Tcl_DStringValue(&temp);
         q = objPtr->bytes + len;
         *q = '\0';
         q--;
         while (*p != '\0') {
             *q = *p;
             p++;
             q--;
         }
         if (*hexPtr == '-') {
             *q = '-';
         }
     }

     Tcl_DStringFree(&temp);
 }