Version 0 of Microbenchmarking Tcl 8.6

Updated 2011-01-29 22:36:32 by dkf

DKF: I've been experimenting with a benchmarking system that is a bit different to the main Tcl benchmarks. Because it's aimed more squarely at tackling very fast parts of Tcl, it includes a self-tuning system that attempts to factor out once-only compilation times and to ensure that the time to run the benchmark is on the order of a second (by tuning the iterations parameter to time).

The benchmarks are:

  • loop1 10000 — A for loop in a proc that counts up to its parameter (10k). Acts as a comparison baseline for the other loops which all feature an extra increment, a call out to some external code, or both.
  • loop2 10000 — Includes a call to a do-nothing procedure.
  • loop3 10000 — Includes a call to a do-nothing TclOO method.
  • loop4 10000 — Includes an increment of a variable imported into the for loop's procedure; baseline for incrementing.
  • loop5 10000 — Increment in procedure of variable imported using variable.
  • loop6 10000 — Increment in method of instance variable imported using variable (method declared on instance).
  • loop7 10000 — Increment in method of declared instance variable (method declared on instance).
  • loop8 10000 — Increment in method of declared instance variable (method declared on class)
  • loop9 10000 — Increment in already-created coroutine. (Only on Tcl 8.6)

Code

proc cps {script} {
    # Eat the script compilation costs
    uplevel 1 [list time $script]

    # Have a guess at how many iterations to run for around a second
    set s [uplevel 1 [list time $script 5]]
    set iters [expr {round(1.1/([lindex $s 0]/1e6))}]
    if {$iters < 50} {
        puts "WARNING: number of iterations low"
    }

    # The main timing run
    while 1 {
        set s [uplevel 1 [list time $script $iters]]
        # Only use the run if it was for at least a second, otherwise increase
        # the number of iterations and try again.
        if {[lindex $s 0]*$iters >= 1e6} {
            break
        }
        incr iters $iters
    }

    # Produce the results
    set cps [expr {round(1/([lindex $s 0]/1e6))}]
    puts "$cps calls per second of: [string trim $script]"
}

proc loop1 n {
    for {set i 0} {$i < $n} {incr i} {
    }
}
proc doNothing {} {}
proc add {} {variable x; incr x}
proc loop2 n {
    for {set i 0} {$i < $n} {incr i} {
        doNothing
    }
}
package require TclOO
oo::object create o
oo::objdefine o {
    method nothing {} {}
    method add1 {} {variable x; incr x}
    variable y
    method add2 {} {incr y}
}
proc loop3 n {
    for {set i 0} {$i < $n} {incr i} {
        o nothing
    }
}
proc loop4 n {
    variable x
    for {set i 0} {$i < $n} {incr i} {
        incr x
    }
}
proc loop5 n {
    for {set i 0} {$i < $n} {incr i} {
        add
    }
}
proc loop6 n {
    for {set i 0} {$i < $n} {incr i} {
        o add1
    }
}
proc loop7 n {
    for {set i 0} {$i < $n} {incr i} {
        o add2
    }
}
oo::class create Foo { variable x; method add3 {} {incr x} }
Foo create bar
proc loop8 n {
    for {set i 0} {$i < $n} {incr i} {
        bar add3
    }
}

puts Tcl=[package require Tcl]
parray tcl_platform
cps {loop1 10000}
cps {loop2 10000}
cps {loop3 10000}
cps {loop4 10000}
cps {loop5 10000}
cps {loop6 10000}
cps {loop7 10000}
cps {loop8 10000}
if {[package vsatisfies [package require Tcl] 8.6]} {
    coroutine add4 apply {{} {yield 0;while 1 {yield [incr x]}}}
    proc loop9 n {
        for {set i 0} {$i < $n} {incr i} {
            add4
        }
    }
    cps {loop9 10000}
}

Results

All these are on the same MacBook Pro running OSX Leopard. All builds were optimized compiles and were threaded.

Tcl 8.5.2

Tcl=8.5.2
tcl_platform(byteOrder)   = littleEndian
tcl_platform(machine)     = i386
tcl_platform(os)          = Darwin
tcl_platform(osVersion)   = 9.8.0
tcl_platform(platform)    = unix
tcl_platform(pointerSize) = 4
tcl_platform(threaded)    = 1
tcl_platform(user)        = dkf
tcl_platform(wordSize)    = 4
1612 calls per second of: loop1 10000
229 calls per second of: loop2 10000
154 calls per second of: loop3 10000
1294 calls per second of: loop4 10000
146 calls per second of: loop5 10000
110 calls per second of: loop6 10000
140 calls per second of: loop7 10000
130 calls per second of: loop8 10000

Tcl 8.6b1.2

Tcl=8.6b1.2
tcl_platform(byteOrder)     = littleEndian
tcl_platform(machine)       = i386
tcl_platform(os)            = Darwin
tcl_platform(osVersion)     = 9.8.0
tcl_platform(pathSeparator) = :
tcl_platform(platform)      = unix
tcl_platform(pointerSize)   = 4
tcl_platform(threaded)      = 1
tcl_platform(user)          = dkf
tcl_platform(wordSize)      = 4
1448 calls per second of: loop1 10000
161 calls per second of: loop2 10000
105 calls per second of: loop3 10000
967 calls per second of: loop4 10000
116 calls per second of: loop5 10000
64 calls per second of: loop6 10000
99 calls per second of: loop7 10000
92 calls per second of: loop8 10000
155 calls per second of: loop9 10000