Version 19 of foriter - a loop command

Updated 2006-03-14 16:01:44

Sarnold -- 2005/12/13

Typically, when you do some list processing, you can use the foreach command which is efficient, but a construct like:

 for {set i 0} {$i<[llength $mylist]} {incr i} {...}

is slow because it has to call llength at each iteration. I admit it is not new. So a construct like :

 for {set i 0;set len [llength $mylist]} {$i<$len} {incr i} {...}

would be faster, but less readable as there is a new variable that does not mean anything in the rest of the program.

MG Can you not just do something like this:

  for {set i 0} "\$i < [llength $myList]" {incr i} {...}

so that the llength is only evaluated once (at the very beginning), and its result is used in future (for each loop)?

In Python, you can get more speed by the range() function (see also Integer range generator):

 for i in range(l.length):
     # some code

I submit a new command called foriter that mimics Python's functionality:

 foriter loopvar ?start? end ?increment? body

 loopvar - the name of a variable (existing or not) that holds the counter
 start - defaults to 0
 increment - defaults to 1

Then,

 foriter i start end increment body

is equivalent to:

 for {set i $start} {$i<$end} {incr i $increment} $body

Some benchmarks show that it is faster than for in many cases.


LV So using foriter with the original example:

 foriter i 0 [llength $mylist] {...}

and the llength only executes once, right?

Sarnold Exactly! And you may even omit the start argument:

 foriter i [llength $mylist] {...}

KPV Two comments: first, calls to [llength] are extremely fast, probably just as fast as a variable assignment. Second, I bet that most of the time when a loop goes from 0 to [llength...] there will be, very early in the loop body, a call to [lindex...]. Thus, I believe that the construct that is really needed is a variation on foreach so that it has a counter variable.

Sarnold You are right, a foreach alternative is certainly better. But I still find Python's syntax appealing because it adds sugar.

Well, every programmer has been taught the for syntax early in his/her trainings, but I am *bored* with this syntax because I have to type too much. That is the major reason.


Benchmarks :

    load foriter.dll
    foreach {nb times} {100 100 1000 10 10000 5} {
        puts "foriter: $nb increments"
        puts [time {foriter i $nb {}} $times]
        puts "for: $nb increments"
        puts [time {for {set i 0} {$i<$nb} {incr i} {}} $times]
    }
    puts OK

Results :

    foriter: 100 increments
    168 microseconds per iteration
    for: 100 increments
    225 microseconds per iteration
    foriter: 1000 increments
    1730 microseconds per iteration
    for: 1000 increments
    2115 microseconds per iteration
    foriter: 10000 increments
    16785 microseconds per iteration
    for: 10000 increments
    21354 microseconds per iteration
    OK

Here is an implementation in C:

2006-03-14 - I updated the command to handle better the errorInfo message

    critcl::ccode {
        #include <stdio.h>
    }

    #package provide foriter 1.0

    critcl::ccommand foriter {dummy interp objc objv} {
        int result;
        /* foriter loop : from start to end-1 increment by <increment> do <body> */
        Tcl_Obj *obj_body = NULL;
        Tcl_Obj *obj_counter = NULL; 
        /* the int values of the loop range */
        int start = 0;
        int end;
        int increment = 1;
        int sign = 1;
        int isSharedObjCounter = 0;
        char errorInfo[64];

        memset(errorInfo,'\0',sizeof(errorInfo));

        if (objc<4 || objc>6) {
            Tcl_WrongNumArgs(interp, 1, objv, "varname ?start? end ?increment? body");
            return TCL_ERROR;
        }


        if (objc == 4) {
            result = Tcl_GetIntFromObj(interp, objv[2], &end);
            if (result != TCL_OK) {
                return result;
            }
            obj_body = objv[3];

            /* the start of the number range */
            obj_counter = Tcl_NewIntObj(0);
            if (obj_counter == NULL) {
                return TCL_ERROR;
            }
        } else  {
            /*  the user provided 'start' as 2nd argument,
            and the 'end' of the range is then the 3rd argument  */

            /* the start of the number range */
            obj_counter = objv[2];

            result = Tcl_GetIntFromObj(interp, objv[2], &start);
            if (result != TCL_OK) {
                return result;
            }

            result = Tcl_GetIntFromObj(interp, objv[3], &end);
            if (result != TCL_OK) {
                return result;
            }

            if (objc == 5) {
                obj_body = objv[4];
            } else  {
                obj_body = objv[5];
            }
        }

        if (objc == 6) {
            result = Tcl_GetIntFromObj(interp, objv[4], &increment);
            if (result != TCL_OK) {
                return result;
            }
        }

        /* validation tests of the number range */
        if (increment == 0) {
            /*  the increment is zero so there
    • will be no iteration */
            Tcl_SetObjResult(interp, Tcl_NewStringObj(
            "cannot increment by zero", -1));
            return TCL_ERROR;
        }

        if ((end-start) * increment < 0) {
            /*  the iterating range goes the other way
            than incrementation does                 */
            Tcl_SetObjResult(interp, Tcl_NewStringObj(
            "invalid range : an endless loop would occur", -1));
            return TCL_ERROR;
        }
        /* sign let us iterate upper or lower */
        if (increment<0) {
            sign=-1;
        }


        /* the cleanest way to initialize the counter */
        if ( Tcl_IsShared( obj_counter ) ) {
            obj_counter = Tcl_DuplicateObj( obj_counter );
            Tcl_IncrRefCount( obj_counter );
            isSharedObjCounter=1;
        }

        obj_counter = Tcl_ObjSetVar2(interp, objv[1], NULL, obj_counter, TCL_LEAVE_ERR_MSG);
        if ( obj_counter == NULL ) {
            return TCL_ERROR;
        }
        /* end initialize the counter */


        /* please note that a negative increment could be used
    • (I wonder if one shall do so ?)
    • now, the loop begins */
        while (sign*start < sign*end) {
            /* we are into the loop */
            result = Tcl_EvalObjEx(interp, obj_body, 0);
            if ((result != TCL_OK) && (result != TCL_CONTINUE)) {
                if (result == TCL_ERROR) {
                    sprintf(errorInfo, "\n    (\"foriter\" body line %d)", interp->errorLine);
                    Tcl_AddErrorInfo(interp, errorInfo);
                    /*  when an error occurs, we quit the loop
                    and clean up things like References    */
                    break;
                }
                /*  when the user breaks the evaluation
                we have to break out of the loop */
                break;
            }

            obj_counter = Tcl_ObjGetVar2(interp, objv[1], NULL, TCL_LEAVE_ERR_MSG);
            if ( obj_counter == NULL ) {
                return TCL_ERROR;
            }

            if ( Tcl_IsShared( obj_counter ) ) {
                obj_counter = Tcl_DuplicateObj( obj_counter );
                Tcl_IncrRefCount( obj_counter );
                isSharedObjCounter=1;
            }

            result = Tcl_GetIntFromObj(interp, obj_counter, &start);
            if (result != TCL_OK) {
                return result;
            }

            start+=increment;
            Tcl_SetIntObj(obj_counter, start);
            /* setting the variable */
            obj_counter = Tcl_ObjSetVar2(interp, objv[1], NULL, obj_counter, TCL_LEAVE_ERR_MSG);
            if ( obj_counter == NULL ) {
                return TCL_ERROR;
            }
        }
        /* ending of the loop */

        if ( isSharedObjCounter ) {
            Tcl_DecrRefCount( obj_counter );
        }


        if (result == TCL_ERROR) {
            return result;
        }

        Tcl_ResetResult(interp);
        return TCL_OK;
    }

wdb my approach is a slower but pure Tcl. The procedure range returns a list of integers or floats which are easy to iterate:

 proc range {num args} {
    switch [llength $args] {
        0 {
            set from 0
            set to $num
            set step 1
        }
        1 {
            set from $num
            set to $args
            set step 1
        }
        2 {
            set from $num
            lassign $args to step
        }
    }
    set result [list ]
    for {set i $from} {$i < $to} {set i [expr {$i + $step}]} {
        lappend result $i
    }
    return $result
 }
 % range 5
 0 1 2 3 4
 %range 1 6
 1 2 3 4 5
 %range 1 3 .5
 1 1.5 2.0 2.5
 % foreach i [range 5] {puts -nonewline "($i) "}
 (0) (1) (2) (3) (4) %

Category Command | Category Control Structure | Category Critcl