Eratosthenes Sieve

Eratosthenes Sieve - an ancient method for finding primes.

DL I wrote this script to help my 5th grader understand Eratosthenes Sieve. The class handouts as well as the Java applets around the web didn't allow for resizing and reshaping. With small tables (i.e., up to 100) you can't get a feel for the sieve before you run out of primes that knock out multiples. And without being able to reshape the table, you can't see that large primes have simple patterns just like small primes.

Some things to try when running the script:

  • Try reshaping the table to an even number of columns, say 16, and click the first few entries in column 15. Now reshape the table to 17 columns. What happened to the patterns?
  • Try reshaping the table to an odd number of columns, say 17 and then selecting both 8 and 9.
  • Try selecting a number and one of its divisors say 20 and 4
  • Try selecting two numbers that have a common divisor.
  • Try selecting two numbers that are co-prime (have no common divisors).

 # Name:         sieve.tcl
 # Description:  Explore Eratosthenes Sieve
 # Created:      9/28/04
 set author      "Don Libes <[email protected]>"
 set version     1.2
 set versionDate 9/29/04

 package require Tk

 set fieldWidth         4 ;# provide enough space for 4 digit numbers
 tk_setPalette          #d8d8d8     ;# light gray
 set color(compositeBg) #abeaab     ;# light green
 set color(compositeFg) red
 set color(unknownFg)   black
 #set color(unknownBg)  defined dynamically later
 set divisors(all)      {} ;# all divisors selected so far
 set numMax             2  ;# max number displayed onscreen
 set font(max)          15
 set font(min)          5
 set font(face) Courier
 switch $tcl_platform(platform) "unix" {
    set font(size) -12
 } "windows" {
    set font(size) 10
 } "macintosh" {
    set font(size) 10
 }

 proc fontChange {incr} {
    global font

    # don't let it get too small
    if {(abs($font(size)) <= $font(min)) && ($incr == -1)} return

    # or too large
    if {(abs($font(size)) >= $font(max)) && ($incr == 1)} return

    # handle negative font specs by reversing sign of incr
    if {$font(size) < 0} {
        set incr [expr {0 - $incr}]
    }

    incr font(size) $incr

    if {abs($font(size)) == $font(max)} {
        .f.plus  config -state disabled
    } else {
        .f.plus  config -state normal
    }
    if {abs($font(size)) == $font(min)} {
        .f.minus config -state disabled
    } else {
        .f.minus config -state normal
    }
    
    fontSizeUpdate

    # On Windows, if the window is in a zoomed state, changing the
    # font size changes the size of the (gridded) window WITHOUT a
    # <configure> event!  Furthermore, you have to flush the font
    # change (with update) in order to get the new window size.
    update
    configReal
 }

 proc fontSizeUpdate {} {
    .c config -font "$::font(face) $::font(size)"
 }

 proc clear {} {
    for {set i 2} {$i < $::numMax} {incr i} {
        .c tag configure $i -background $::color(unknownBg)
    }

    set ::divisors(all) {}
    configReal
 }

 proc config {} {
    # delay resize handling to reduce jitter

    cursorBusy

    catch {after cancel $configId}
    set configId [after 200 configReal]
 }

 proc configReal {args} {
    scan [wm geometry .] "%dx%d" width height

    # reset everything
    .c delete 1.0 end
    set divs $::divisors(all)
    unset ::divisors
    set ::divisors(all) {}
    for {set i 2} {$i < $::numMax} {incr i} {
        .c tag delete $i
    }

    set i 1
    for {set h 0} {$h < $height} {incr h} {
        numCreate $i
        incr i
        set w [expr {$::fieldWidth + 1}]
        while {1} {
            incr w [expr {$::fieldWidth+1}]
            if {$w > $width} break
            numCreate $i
            incr i
        }
        .c insert end "\n"
    }
    set ::numMax $i

    foreach d $divs {
        tagClick $d
    }

    cursorIdle
 }

 proc numCreate {i} {
    if {$i == 1} {
        # avoid tagging very first character to work around text widget bug
        .c insert end " "
        .c insert end [format "%[expr {$::fieldWidth - 1}]d " $i] $i
    } else {
        .c insert end [format "%${::fieldWidth}d " $i] $i
    }
    .c tag config $i -borderwidth 2
    .c tag bind $i <1>     "tagClick $i;break"
    .c tag bind $i <Enter> "tagEnter $i;break"
    .c tag bind $i <Leave> "tagLeave $i;break"
    set ::divisors($i) {}
 }

 proc tagEnter {n} {
    if {$n == 1} {
        set ::label "1 is neither prime nor composite"
    } else {
        set factors [factors $n]
        if {1 == [llength $factors]} {
            set ::label "$n is prime"
        } else {
            set ::label "$n = [join $factors *]"
        }
    }

    set mult $n
    while {$mult <= $::numMax} {
        .c tag configure $mult -foreground $::color(compositeFg)
        incr mult $n
    }
 }

 proc tagLeave {n} {
    set mult $n
    while {$mult <= $::numMax} {
        .c tag configure $mult -foreground $::color(unknownFg)
        incr mult $n
    }
 }

 # on button click, mark/unmark multiples
 proc tagClick {divisor} {
    set d $divisor

    if {$divisor == 1} return

    if {-1 == [lsearch $::divisors(all) $divisor]} {
        .c tag configure $d -relief ridge
        .c tag configure $d -relief ridge
        # strike out multiples of this divisor
        while {1} {
            incr d $divisor
            if {$d > $::numMax} break
            .c tag configure $d -background $::color(compositeBg)
            lappend ::divisors($d) $divisor
        }
        lappend ::divisors(all) $divisor
    } else {
        .c tag configure $d -relief flat

        # unstrike out multiples
        while {1} {
            incr d $divisor
            if {$d > $::numMax} break
            set i [lsearch $::divisors($d) $divisor]
            set ::divisors($d) [lreplace $::divisors($d) $i $i]
            if {0 == [llength $::divisors($d)]} {
                .c tag configure $d -background $::color(unknownBg)
            }
        }
        set i [lsearch $::divisors(all) $divisor]
        set ::divisors(all) [lreplace $::divisors(all) $i $i]
    }
 }

 proc factors {n} {
    set buf {}
    set limit [expr sqrt($n)]
    for {set d 2} {$d <= $limit} {incr d} {
        while {$n % $d == 0} {
            set n [expr {$n/$d}]
            lappend buf $d
        }
    }
    if {$n != 1} {
        lappend buf $n
    }
    return $buf
 }

 proc about {} {
    set w .about
    if {[winfo exists $w]} {
        wm deiconify $w
        raise $w
        return
    }
    toplevel     $w
    wm title     $w "About Eratosthenes Sieve"
    wm iconname  $w "about sieve"
    wm resizable $w 0 0

    button $w.b -text Dismiss -command [list wm withdraw $w]

    label $w.title -text "Eratosthenes Sieve" -font "Times 16" \
        -borderwidth 10 -fg red
    label $w.version -text "Version $::version, Released $::versionDate"
    label $w.author -text "Written by Don Libes <[email protected]>"
    label $w.using -text "Using Tcl $::tcl_patchLevel,\
                                Tk $::tk_patchLevel"
    grid $w.title
    grid $w.version
    grid $w.author
    grid $w.using
    grid $w.b -sticky ew
 }

 proc cursorIdle {} {
    .c config -cursor arrow
 }

 proc cursorBusy {} {
    .c config -cursor watch
    update
 }

 proc help {} {
    if {[winfo exists .help]} {
        destroy .help
        return
    }

    toplevel .help
    wm title .help "Eratosthenes Sieve Help"
    wm iconname .help "Sieve help"

    scrollbar .help.sb -command {.help.text yview}
    text .help.text -width 80 -height 24 -yscroll {.help.sb set} -wrap word

    button .help.ok -text "OK" -command {destroy .help} -relief raised
    bind .help <Return> {destroy .help;break}
    grid .help.sb -row 0 -column 0 -sticky ns
    grid .help.text -row 0 -column 1 -sticky nsew
    grid .help.ok -row 1 -columnspan 2 -sticky ew  -padx 2 -pady 2

    # let text box only expand
    grid rowconfigure .help 0 -weight 1
    grid columnconfigure .help 1 -weight 1

    .help.text tag configure h1 -foreground blue

    .help.text insert end "Eratosthenes Sieve" h1
    .help.text insert end \n\n
    .help.text insert end "To find all primes:

 Step 1. Click on \"2\".
 Step 2. Click on the next larger number that is not highlighted.
 Step 3. Go back to step 2."
    .help.text insert end \n\n

    .help.text insert end "Fun Things To Try" h1
    .help.text insert end "\n\n"
    .help.text insert end "Change the window size to make patterns more evident or to show more/less numbers.  Try making the window 19 wide and then select 9 and 10.\n\n"

    .help.text insert end "Move the mouse over a number to highlight its multiples and display its prime factorization.  What number has the largest number of prime factors?  What number has the largest number of *different* prime factors? What are the smallest such numbers?\n\n"

    .help.text insert end "Fun Things to Think About" h1
    .help.text insert end "\n\n"
    .help.text insert end "Imagine extending the sieve a long, long way.  Do you think there are infinitely many primes?  Or do you think the sieve will eventually reach a point where all subsequent numbers are divisible by previous numbers?\n\n"

    .help.text insert end Warnings h1
    .help.text insert end \n\n
    .help.text insert end "When the window has been enlarged to show many numbers, some operations may take a long time on slow computers."

    switch {$::tcl_platform(platform)} "windows" {
        .help.text insert end \n
    }
 }

 wm minsize  . 1 1
 wm maxsize  . 999 999
 wm iconname . sieve
 wm title    . "Eratosthenes Sieve"
 wm protocol . WM_DELETE_WINDOW exit

 menu .m -tearoff 0
 .m add cascade -menu .m.file -label "File"
 .m add cascade -menu .m.edit -label "Edit"
 .m add cascade -menu .m.help -label "Help"

 menu .m.file -tearoff 0
 menu .m.edit -tearoff 0
 menu .m.help -tearoff 0

 .m.file add command -label "Exit"         -command exit
 .m.edit add command -label "Clear All"     -command clear
 .m.edit add command -label "Font Increase" -command {fontChange 1}
 .m.edit add command -label "Font Decrease" -command {fontChange -1}
 .m.help add command -label "About"         -command about
 .m.help add command -label "Help"          -command help
 . config -m .m

 frame  .f
 label  .f.l     -textvar label -relief ridge -width 30
 button .f.c     -text "Clear All" -command clear
 label  .f.font  -text "Font:"
 button .f.plus  -text "+" -command {fontChange 1}
 button .f.minus -text "-" -command {fontChange -1}

 grid .f.l     -column 0 -row 0 -sticky ens
 grid .f.c     -column 1 -row 0 -sticky wns
 grid .f.font  -column 2 -row 0 -sticky wns
 grid .f.plus  -column 3 -row 0 -sticky wns
 grid .f.minus -column 4 -row 0 -sticky wns

 grid .f       -column 0 -row 0 -sticky ewns
 text .c -setgrid 1 ;# -wrap word
 fontSizeUpdate
 set color(unknownBg) [.c cget -background]
 cursorIdle
 bind .c <1> break
 bind .c <B1-Motion> break
 grid .c -column 0 -row 1 -sticky ewns
 grid rowconfigure    . 1 -weight 1
 grid columnconfigure . 0 -weight 1

 bind .c <Configure> config

AM (15 march 2005) The language shootout (...) includes a test for a naive sieve algorithm. I decided to give it a go. I was happily surprised that with the code below, Tcl 8.4.1 and a computer that is no match for today's hardware (a Pentium II, 350 MHz and 64 MB RAM) the script only takes some 2 seconds to run with N=4, meaning counting the primes in the range 1 - 160,000. The results may be off by 1, as the script includes the number 1 as a prime. The shootout requires counting in three ranges for some odd but undisclosed reason. Hence it will print three numbers :).

IG The shootout requires counting in three ranges so we can do more work (without machine memory becoming a limiting factor).

Note: if you can make it run faster, please do and document how you did it.

 # nsieve.tcl --
 #    Attempt to make a simple Sieve of Erathostenes
 #    - part of the "language shootout"
 #
 proc countPrimes {n} {
     #
     # Auxiliary parameters
     #
     set nd  [expr {int(pow(2,$n)*10000)}]
     set nd2 [expr {int(pow(2,$n-1)*10000)}]
     set nd4 [expr {int(pow(2,$n-2)*10000)}]

     set maxi [expr {int(sqrt($nd))}]

     #
     # Create a list with $nd 1's - avoid shimmering
     #
     set data [expr {1}]
     set zero [expr {0}]

     set i 4
     while { $i > 0 } {
         set data [concat $data $data $data $data $data \
                          $data $data $data $data $data ]
         incr i -1
     }

     set i $n
     while { $i > 0 } {
         set data [concat $data $data]
         incr i -1
     }

     #
     # Now the sieve itself
     #
     set i 2
     while { $i <= $maxi } {
         set j [expr {$i-1}]
         if { [lindex $data $j] == 1 } {
             incr j $i
             while { $j < $nd } {
                 lset data $j $zero
                 incr j $i
             }
         }
         incr i
     }

     #
     # Count the number of ones
     #
     set i      0
     set count1 0
     set count2 0
     set count3 0
     foreach d $data {
         incr i
         if { $d == 1 } {
             incr count1
             if { $i <= $nd2 } { incr count2 }
             if { $i <= $nd4 } { incr count3 }
         }
     }
     return [list $count1 $count2 $count3]
 }

 #puts [time {countPrimes 2} 20]
 if { [llength $argv] != 1 } {
    puts "Usage: nsieve <number>"
 } else {
    puts [countPrimes [lindex $argv 0]]
 }

NEM: I managed to shave off a fraction of a second by changing the final foreach loop to use lsearch:

 proc countPrimesNEM {n} {
     #
     # Auxiliary parameters
     #
     set nd  [expr {int(pow(2,$n)*10000)}]
     set nd2 [expr {int(pow(2,$n-1)*10000)}]
     set nd4 [expr {int(pow(2,$n-2)*10000)}]

     set maxi [expr {int(sqrt($nd))}]

     #
     # Create a list with $nd 1's - avoid shimmering
     #
     set data [expr {1}]
     set zero [expr {0}]

     set i 4
     while { $i > 0 } {
         set data [concat $data $data $data $data $data \
                          $data $data $data $data $data ]
         incr i -1
     }

     set i $n
     while { $i > 0 } {
         set data [concat $data $data]
         incr i -1
     }

     #
     # Now the sieve itself
     #
     set i 2
     while { $i <= $maxi } {
         set j [expr {$i-1}]
         if { [lindex $data $j] == 1 } {
             incr j $i
             while { $j < $nd } {
                 lset data $j $zero
                 incr j $i
             }
         }
         incr i
     }

     #
     # Count the number of ones
     #
     set indices [lsearch -all -exact $data 1]
     set count1 [llength $indices]
     set count2 0; set count3 0
     foreach i $indices {
         if {$i <= $nd2 } { incr count2 }
         if {$i <= $nd4 } { incr count3 }
     }
     return [list $count1 $count2 $count3]
 }

Those concats look like a place for optimisation -- using string repeat doesn't seem to make any noticeable difference to the timing on my system (800MHz G4 iBook, 640MB RAM). Possibly some use of lsort at the end might speed up the count2/3 loop.

AM With the wonderful help from MS I was able to gain quite a bit of improvement: 1.5 seconds for N=4 instead of 2.2 - a gain of 40 to 50%. Here is the improved code:

 # nsieve.tcl --
 #    Second attempt to make a simple Sieve of Erathostenes
 #    - part of the "language shootout"
 #
 proc countPrimes {n} {
     #
     # Auxiliary parameters
     #
     set nd  [expr {int(pow(2,$n)*10000)}]
     set nd2 [expr {int(pow(2,$n-1)*10000)}]
     set nd4 [expr {int(pow(2,$n-2)*10000)}]

     set maxi [expr {int(sqrt($nd))}]

     #
     # Create a list with $nd 1's - avoid shimmering
     #
     # New (tip from Miguel Sofer)
     set data [split [string repeat 1 $nd] {}]
     lset data 0 0

     #
     # Now the sieve itself
     #
     set i 2
     while { $i <= $maxi } {
         set j $i
         if { [lindex $data $j] } {
             incr j $i
             while { $j < $nd } {
                 lset data $j 0
                 incr j $i
             }
         }
         incr i
     }

     # New (tip from Miguel Sofer)
     set count3 [string length [string map {0 {} { } {}} \
                    [lrange $data 0 [expr {$nd4-1}]]]]
     set count2 [string length [string map {0 {} { } {}} \
                    [lrange $data $nd4 [expr {$nd2-1}]]]]
     set count1 [string length [string map {0 {} { } {}} \
                    [lrange $data $nd2 end]]]
 
     set count2 [expr {$count2+$count3}]
     set count1 [expr {$count1+$count2}]
     return [list $count1 $count2 $count3]
 }

 #puts [time {countPrimes 4} 20]
 if { [llength $argv] != 1 } {
    puts "Usage: nsieve <number>"
 } else {
    puts [countPrimes [lindex $argv 0]]
 }

MS proposes (in order of expected impact; untested => modulo typos)

  • simplified inner and outer loops (fewer bytecodes):
     set i 1
     while { [incr i] <= $maxi } {
         set j $i
         if { [lindex $data $j] } {
             while { [incr j $i] < $nd } {
                 lset data $j 0
             }
         }
     }
  • a still faster counting at the end. Using the (efficient in 8.4) idiom for in-place replacement, we can avoid copying the list several times.
    set total [string length [string map {0 {} { } {}} $data]

    set data [lreplace $data[set $data {}] [expr {$nd2+1}] end]
    set upToNd2 [string length [string map {0 {} { } {}} $data]

    set data [lreplace $data[set $data {}] [expr {$nd4+1}] end]
    set upToNd4 [string length [string map {0 {} { } {}} $data]

    return [list $total $upToNd2 $upToNd4]

DKF: Here's my attempt (uses lrepeat) that's fairly fast:

 set a {}
 proc nsieve n {
    incr n
    global a
    if {[llength $a] < $n} {
       set a [lrepeat $n 0]
    }
    set count 0
    #Bit0=sieved,bit1=nonprime
    for {set i 2} {$i<$n} {incr i} {
       if {([set v [lindex $a $i]]&2)==0} {
          incr count
          lset a $i 1
       }
       if {!$v} {
          for {set j [expr {$i+$i}]} {$j<$n} {incr j $i} {
             lset a $j 3
          }
       }
    }
    puts "Primes up to\t[expr {$n-1}]\t$count"
 }
 proc dotest M {
    nsieve [expr {2**$M*10000}]
    nsieve [expr {2**($M-1)*10000}]
    nsieve [expr {2**($M-2)*10000}]
 }

ferrieux: Below is an incremental variant of Eratosthenes' so that you don't have to specify the limit in advance. Uses only addition and hash lookups/updates.

 set cur 1
 while {1} {
    incr cur
    if {![info exists np($cur)]} {
        puts "PRIME:$cur #PR:[array size primes] #NP:[array size np]"
        set primes($cur) $cur
        set lp [list $cur]
    } else {
        set lp $np($cur)
        unset np($cur)
    }
    foreach p $lp {
        set n  [incr primes($p) $p]
        lappend np($n) $p
    }
 }

DKF: I've put an implementation of the algorithm on the coroutine page (and in that command's manual page), not because it is efficient but because it is a nice demonstration.


AMG: The Sieve of Atkin [L1 ] is a more efficient method for finding prime numbers. It is also much harder to understand (but easier to pronounce!) than the Sieve of Eratosthenes [L2 ].

KPV: There's also Sieve of Sundaram which is a bit more efficient but also a bit harder to understand than the Sieve of Eratosthenes. I used to use it as a programming problem when running interviews at Google.

The Sieve of Eratosthenes is featured near the end of The Dark Tower III: The Wastelands, by Stephen King. "You have to prime the pump to get me going. Only my pump primes backwards."


DKF: An implementation of the sieve algorithm that uses some interesting features of dictionaries:

proc sieve n {
    set primes [list]
    if {$n < 2} {return $primes}
    set nums [dict create]
    for {set i 2} {$i <= $n} {incr i} {
        dict set nums $i ""
    }
    set next 2
    set limit [expr {sqrt($n)}]
    while {$next <= $limit} {
        for {set i $next} {$i <= $n} {incr i $next} {dict unset nums $i}
        lappend primes $next
        dict for {next -} $nums break
    }
    return [concat $primes [dict keys $nums]]
}

Of particular interest is the reasonably efficient way of picking out the next prime number to check, which uses dict for to avoid building extra large intermediate structures.

KPV: Check out Dijkstra's Prime Algorithm for a dynamic programming approach to the Sieve of Erastosthenes. It's not quite as efficient but uses much less auxiliary space.