Version 4 of Proper integers implementation

Updated 2004-04-15 13:18:30

This is some bits and pieces of an implementation of the tclBigIntType suggested on Proper integers for Tcl 8.5 or 9.0.

  • There is no guarantee the below will even compile, but I think it is mostly correct.
  • All of it is about implementing the Tcl_Obj type, i.e., it is about storage. There is not yet any code for doing any operations (save possibly some format conversions) on the data.
 Tcl_ObjType tclBigIntType = {
     "bigint",                           /* name */
     FreeBigIntInternalRep,              /* freeIntRepProc */
     DupBigIntInternalRep,               /* dupIntRepProc */
     UpdateStringOfBigInt,               /* updateStringProc */
     SetBigIntFromAny                    /* setFromAnyProc */
 };



 /*
  * The following structure represents a subtype of bigint, which is a
  * particular internal representation for a bigint object plus a set of
  * procedures that provide standard operations on objects of that type.
  */

 typedef struct Tcl_BigIntSubtype {
     char *name;                 /* Name of the subtype, e.g. "SLM-hexadecimal". */
     int version;                /* A struct version number. Just in case 
                                    it will evolve. */
 /* 
  * These are all as in Tcl_ObjType.
  */
     Tcl_FreeInternalRepProc *freeIntRepProc;
                                 /* Called to free any storage for the type's
                                    internal rep. NULL if the internal rep
                                    does not need freeing. */
     Tcl_DupInternalRepProc *dupIntRepProc;
                                 /* Called to create a new object as a copy
                                    of an existing object. */
     Tcl_UpdateStringProc *updateStringProc;
                                 /* Called to update the string rep from the
                                    type's internal representation. */
     Tcl_SetFromAnyProc *setFromAnyProc;
                                 /* Called to convert the object's internal
                                    rep to this type. Frees the internal rep
                                    of the old type. Returns TCL_ERROR on
                                    failure. */
 /* 
    Here should follow pointers to functions that carry out 
    arithmetical operations on big integers of this subtype.
    TO DO: Define these. */
 } Tcl_BigIntSubtype;

 #define GET_SUBTYPE(objPtr) ( \
     (Tcl_BigIntSubtype *)(objPtr) ->internalRep.twoPtrValue.ptr1 \
 )
 #define SUBTYPE_ACTION(field,objPtr) ( \
     GET_SUBTYPE(objPtr) -> field \
 )
 #define BIGINT_VALUE(objPtr) ((objPtr) ->internalRep.twoPtrValue.ptr2)


 /*
  *----------------------------------------------------------------------
  *
  * FreeBigIntInternalRep --
  *
  *      Deallocate the storage associated with a bigint data object's
  *      internal representation.
  *
  * Results:
  *      None.
  *
  * Side effects:
  *      Frees memory. 
  *
  *----------------------------------------------------------------------
  */

 static void
 FreeBigIntInternalRep(objPtr)
     Tcl_Obj *objPtr;            /* Object with internal rep to free. */
 {
     if (GET_SUBTYPE(objPtr) != NULL) {
         SUBTYPE_ACTION(freeIntRepProc,objPtr)(objPtr);
     }
 }


 /*
  *----------------------------------------------------------------------
  *
  * UpdateStringOfBigInt --
  *
  *      Update the string representation for an integer object.
  *      Note: This only does anything when the subtype is non-NULL.
  *      
  *      An existing old string rep is not freed so storage will be 
  *      lost if this has not already been done. 
  *
  * Results:
  *      None.
  *
  * Side effects:
  *      The object's string is set to a valid string that results from
  *      the int-to-string conversion.
  *
  *----------------------------------------------------------------------
  */

 static void
 UpdateStringOfBigInt(objPtr)
     Tcl_Obj *objPtr;            /* Object with string rep to update. */
 {
     if (GET_SUBTYPE(objPtr) != NULL) {
         SUBTYPE_ACTION(updateStringProc,objPtr)(objPtr);
     }
 }


 /*
  *----------------------------------------------------------------------
  *
  * DupBigIntInternalRep --
  *
  *      Initialize the internal representation of a new Tcl_Obj to a
  *      copy of the internal representation of an existing bigint object.
  *
  * Results:
  *      None.
  *
  * Side effects:
  *      copyPtr's internal rep is set to a copy of srcPtr's internal
  *      representation.
  *
  *----------------------------------------------------------------------
  */

 static void
 DupBigIntInternalRep(srcPtr, copyPtr)
     Tcl_Obj *srcPtr;    /* Object with internal rep to copy.  Must
                            have an internal rep of type "bigint". */
     Tcl_Obj *copyPtr;   /* Object with internal rep to set.  Must
                            not currently have an internal rep.*/
 {
     if (GET_SUBTYPE(srcPtr) == NULL) {
         copyPtr->internalRep.twoPtrValue.ptr1 = NULL;
         copyPtr->internalRep.twoPtrValue.ptr2 = NULL;
     } else {
         SUBTYPE_ACTION(dupIntRepProc,srcPtr)(srcPtr,copyPtr);
     }
 }



 /*
  *----------------------------------------------------------------------
  *
  * SetBigIntFromAny --
  *
  *      Attempt to change the type of the Tcl object "objPtr" to bigint. 
  *      This always uses the null bigint subtype, so in practice it 
  *      only verifies that the string representation of the object is 
  *      a valid bigint.
  *
  * Results:
  *      The return value is a standard object Tcl result. If an error 
  *      occurs during conversion and "interp" is not NULL, an error 
  *      message is left in that interpreter's result.
  *
  * Side effects:
  *      If no error occurs and the "objPtr" has an internal 
  *      representation, then that is freed.
  *
  *----------------------------------------------------------------------
  */

 static int
 SetBigIntFromAny(interp, objPtr)
     Tcl_Interp *interp;         /* Used for error reporting if not NULL. */
     register Tcl_Obj *objPtr;   /* The object to convert. */
 {
     char *string;
     register char *p;

     /*
        Get the string representation. Make it up-to-date if necessary.
      */

     p = string = Tcl_GetString(objPtr);

     /*
        Now check that "objPtr"s string can be parsed as an int. 
        First skip initial spaces.
      */

     for ( ;  isspace(UCHAR(*p));  p++) { /* INTL: ISO space. */
         /* Empty loop body. */
     }

     /* 
        Then step past the sign, if there is one.
        Q: Some languages allow multiple signs. Maybe we should too?
      */

     if (*p == '-' || *p == '+') {
         p++;
     }

     /* 
        Next check for a non-decimal-number prefix, and branch out 
        into the various cases depending on what is found.
      */

     if (*p != '0') {
         /* 
            There was no prefix (they all start with 0), so what follows 
            should be a non-empty sequence of decimal digits. 
            Scan through that.
          */

         if (!isdigit(UCHAR(*p))) { /* INTL: digit */
             goto badInteger;
         }
         for ( ;  isdigit(UCHAR(*p));  p++) { /* INTL: digit */
             /* Empty loop body. */
         }

     } else {
         /* 
            There is a prefix. But of what kind?
          */
         p++;
         switch (*p) {
             case 'x':
             case 'X':
                 /* 
                    A hexadecimal 0x prefix has been found. 
                    The rest must be a nonempty sequence of hexadecimal 
                    digits.
                  */
                 p++;
                 if (!isxdigit(UCHAR(*p))) {
                     goto badInteger;
                 }
                 p++;
                 for ( ;  isxdigit(UCHAR(*p));  p++) { /* INTL: digit */
                     /* Empty loop body. */
                 }
                 break;

             case 'o':
             case 'O':
                 /* 
                    A 0o prefix has been found. Since there is a movement 
                    to make this the only octal prefix in Tcl 9, we may 
                    at least allow it as one octal prefix here. For this 
                    to be an integer though, the rest must be a nonempty 
                    sequence of octal digits.
                  */
                 p++;
                 if (!isdigit(UCHAR(*p)) || (UCHAR(*p) >= '8')) { /* INTL: digit */
                     goto badInteger;
                 }
             case '0':
             case '1':
             case '2':
             case '3':
             case '4':
             case '5':
             case '6':
             case '7':
                 /* 
                    Just 0 is however also acceptable as an octal prefix.
                  */
                 p++;
                 for ( ; isdigit(UCHAR(*p)) && (UCHAR(*p) < '8');  p++) { /* INTL: digit */
                     /* Empty loop body. */
                 }
         }
     }

     /* 
        A valid sequence of digits has been found. What remains is to 
        check that there is no garbage after it.
      */

     for ( ;  isspace(UCHAR(*p));  p++) { /* INTL: ISO space. */
         /* Empty loop body. */
     }
     if (*p != 0) {
         goto badInteger;
     }

     /*
        The string is a valid bigint! 
        Free the old internalRep, then set the new type.
      */

     if ((objPtr->typePtr != NULL) && 
         (objPtr->typePtr->freeIntRepProc != NULL)) {
             objPtr->typePtr->freeIntRepProc(objPtr);
     }

     objPtr->typePtr = &tclBigIntType;
     objPtr->internalRep.twoPtrValue.ptr1 = NULL;
     objPtr->internalRep.twoPtrValue.ptr2 = NULL;

     return TCL_OK;


   badInteger:
     /* 
        If we get here, the string is NOT a valid bigint. 
        Generate an appropriate error message if "interp" is non-null.
      */

     if (interp != NULL) {
         /*
            Must copy string before resetting the result in case a caller
            is trying to convert the interpreter's result to an int.
          */

         Tcl_DString ds;
         Tcl_DStringInit(&ds);
         Tcl_DStringAppend(&ds, "expected integer (possibly big) but got \"", -1);
         Tcl_DStringAppend(&ds, string, length);
         Tcl_DStringAppend(&ds, "\" instead", -1);
         Tcl_DStringResult(interp, &ds);
     }

     return TCL_ERROR;
 }


 /* 
  * The following functions and definitions implement a "SLM-hexadecimal" 
  * subtype of bigint. The internal representation of that type is a 
  * straightforward string, but with a slightly different structure from 
  * the normal string representations of integers. The first character is 
  * the sign, and it is one of '+', '0', and '-'. If it is '0' then the 
  * string ends there and the value is 0. If it is '+' or '-' then the 
  * integer is positive or negative respectively, and the remaining 
  * characters are the digits of the integer. The least significant 
  * digit comes first, and then the others in order of ascending 
  * significance. The SLM part of the name stands for "Sign, Least 
  * significant, Most significant" and highlights the order of these 
  * extremes.
  * 
  * The digit characters are 0123456789ABCDEF, as the "hexadecimal" part
  * of the name indicates. The last (most significant) digit must not be 0.
  * 
  * Examples:
  * 
  *   Decimal   SLM-hexadecimal
  *   -------   ---------------
  *        0    0
  *        1    +1
  *        2    +2
  *       10    +A
  *       16    +01
  *      256    +001
  *       -1    -1
  *      -10    -A
  *     1000    +8E3
  */


 /*
  *----------------------------------------------------------------------
  *
  * FreeBigIntSLMHexadecimalInternalRep --
  *
  *      Deallocate the storage associated with a SLM-hexadecimal bigint 
  *      data object's internal representation.
  *
  * Results:
  *      None.
  *
  * Side effects:
  *      The twoPtrValue.ptr2 of the internal representation of 
  *      "objPtr" should not be NULL and we furthermore assume that 
  *      it points to a string. The subtype (twoPtrValue.ptr1) is 
  *      changed to NULL.
  *
  *----------------------------------------------------------------------
  */

 static void
 FreeBigIntSLMHexadecimalInternalRep(objPtr)
     Tcl_Obj *objPtr;            /* Object with internal rep to free. */
 {
     ckfree( (char *) BIGINT_VALUE(objPtr) );
     objPtr->internalRep.twoPtrValue.ptr1 = NULL;
 }


 /*
  *----------------------------------------------------------------------
  *
  * DupBigIntSLMHexadecimalInternalRep --
  *
  *      Initialize the internal representation of a new Tcl_Obj to a
  *      copy of the internal representation of an existing bigint object.
  *
  * Results:
  *      None.
  *
  * Side effects:
  *      copyPtr's internal rep is set to a copy of srcPtr's internal
  *      representation.
  *
  *----------------------------------------------------------------------
  */

 static void
 DupBigIntSLMHexadecimalInternalRep(srcPtr, copyPtr)
     Tcl_Obj *srcPtr;    /* Object with internal rep to copy.  Must
                            have an internal rep of type "bigint", 
                            subtype "SLM-hexadecimal". */
     Tcl_Obj *copyPtr;   /* Object with internal rep to set.  Must
                            not currently have an internal rep.*/
 {
     ---
     if (GET_SUBTYPE(srcPtr) == NULL) {
         copyPtr->internalRep.twoPtrValue.ptr1 = NULL;
         copyPtr->internalRep.twoPtrValue.ptr2 = NULL;
     } else {
         SUBTYPE_ACTION(dupIntRepProc,srcPtr)(srcPtr,copyPtr);
     }
 }





 void
 CarryIntoSLM (dsPtr, offset, radix, shift, carry)
   Tcl_DString  *dsPtr;   /* Pointer to the dynamic string structure 
                             into which new data will be carried. */
   int           offset;  /* Offset into dynamic string of first digit. */
   unsigned char radix;   /* The radix used in the dynamic string. */
   long          shift;   /* The factor by which the data already in 
                             the string should be multiplied. */
   long          carry;   /* The carry to add after having multiplied. */
 {
     register char *p =   /* Points to current digit. */
                      Tcl_DStringValue(dsPtr) + offset;
     char newchar;

     while (*p != 0) {
         carry += shift * (*p - '0');
         *p = '0' + carry % radix;
         carry = carry / radix;
         p++;
     }
     while (carry > 0) {
         newchar = '0' + carry % radix;
         Tcl_DStringAppend(dsPtr, &newchar, 1);
         carry = carry / radix;
     }
 }


 /*
  *----------------------------------------------------------------------
  *
  * UpdateStringOfBigInt_SLMHexadecimal --
  *
  *      Update the string representation for a big integer object 
  *      of subtype SLM-hexadecimal.
  *      
  *      An existing old string rep is not freed so storage will be 
  *      lost if this has not already been done. A new string is 
  *      allocated.
  *
  * Results:
  *      None.
  *      
  * Side effects:
  *      The twoPtrValue.ptr2 of the internal representation of 
  *      "objPtr" should not be NULL and we furthermore assume that 
  *      it points to a valid SLM-hexadecimal string.
  *
  *----------------------------------------------------------------------
  */

 static void
 UpdateStringOfBigInt_SLMHexadecimal(objPtr)
     Tcl_Obj *objPtr;      /* Object with string rep to update. */
 {
     Tcl_DString temp;     /* Temporary storage for the decimal digits.
                              They appear here in least-to-most order. */
     char     *hexPtr;     /* Internal object representation pointer. */
     int          len;     /* Length of computed string representation. */


     Tcl_DStringInit(&temp);

     /* 
        First the digits are converted to decimal. The more significant hex 
        digits are processed first.
      */

     hexPtr = (char *) objPtr->internalRep.twoPtrValue.ptr2;
     {
         register char *p;     /* Points to hex digit being shifted in. */
         p = hexPtr + strlen(hexPtr) - 1;
         while (p > hexPtr) {
             CarryIntoSLM (&temp, 0, 10, 16, 
                 (*p >= 'A') ? (*p - 'A' + 10) : (*p - '0')
             );
             p--;
         }
     }

     /* 
        Then memory is allocated to hold the string representation and 
        data are copied. It is necessary to reverse the order of digits.
      */

     len = Tcl_DStringLength(&temp);
     if (*hexPtr == '-') {
         len++;
     }
     objPtr->bytes = (char *) ckalloc((unsigned) len + 1);
     objPtr->length = len;

     {
         register char *p, *q;  /* Moving pointers. */
         p = Tcl_DStringValue(&temp);
         q = objPtr->bytes + len;
         *q = '\0';
         q--;
         while (*p != '\0') {
             *q = *p;
             p++;
             q--;
         }
         if (*hexPtr == '-') {
             *q = '-';
         }
     }

     Tcl_DStringFree(&temp);
 }

2004-04-15: Fixed formatting, was needed.