Version 10 of Binary image compression challenge

Updated 2004-09-03 13:39:17

Richard Suchenwirth 2004-09-02 - Discussion in the Tcl chatroom brought me to propose this challenge for a Tcl contest:

  • Given a set of moderate-sized black&white GIF images,
  • write a pair of encoder/decoder between photo image and a binary string
  • so that the decoding of the encoded image is equal to the original (lossless)
  • and the average length of the encoded strings is minimal

I prefer GIF here because it is lossless in itself, and can be read in by plain Tk. Bitmap images cannot be processed pixel-by-pixel, so they're excluded, although they're binary.

Test images (feel free to add more. The GIF file size is the initial target to beat):

courier12.gif - 1923 bytes:

http://mini.net/files/courier12.gif

times12i.gif - 1947 bytes:

http://mini.net/files/times12i.gif

castle.gif - 11598 bytes

http://www.100dollarhamburger.com/castle.gif


Links to solutions to this challenge:


Little helpers to compare and tally photo images:

 proc photo'eq {im1 im2} {
    #-- returns 1 if both images are exactly equal, else 0
    set h [image height $im1]
    if {[image height $im2] != $h} {return 0}
    set w [image width $im1]
    if {[image width $im2] != $w} {return 0}
    for {set y 0} {$y<$h} {incr y} {
        for {set x 0} {$x<$w} {incr x} {
            if {[$im1 get $x $y] ne [$im2 get $x $y]} {return 0}
        }
    }
    return 1
 }
 proc photo'colors img {
    #-- return a list of {{r g b} n} tallies of pixel colors,
    #-- sorted decreasing by n (number of pixels of that color)
    set h [image height $img]
    set w [image width  $img]
    for {set y 0} {$y<$h} {incr y} {
        for {set x 0} {$x<$w} {incr x} {
            set color [$img get $x $y]
            if {![info exists a($color)]} {set a($color) 0}
            incr a($color)
        }
    }
    foreach {color n} [array get a] {lappend tally [list $color $n]}
    lsort -decreasing -index 1 -integer $tally
 }

JH: Here is my solution, which doesn't compression at all (doesn't beat the GIF format, but is close, the 1923b becomes 2163b). It just represents the data in a small, raw form, but not compressed. It will hopefully provide someone with a starting point to apply compression. MS: castle.gif is 'compressed' to 15764 bytes by this.

 proc binimg'encode img {
     set clrs [photo'colors $img]
     if {[llength $clrs] != 2} { return -code error "not a 2 color image" }
     set clr0 [lindex $clrs 0 0]
     set clr1 [lindex $clrs 1 0]
     set h [image height $img]
     set w [image width  $img]
     set str ""
     for {set y 0} {$y<$h} {incr y} {
         for {set x 0} {$x<$w} {incr x} {
             set color [$img get $x $y]
             append str [string equal $color $clr1]
         }
     }
     foreach {r g b} $clr0 { set color0 [expr {$r<<16 | $g <<8 | $b}]; break }
     foreach {r g b} $clr1 { set color1 [expr {$r<<16 | $g <<8 | $b}]; break }
     # store image as <w><h><clr0><clr1><binimgdata> where w and h are shorts
     set binstr [binary format ssiib* $w $h $color0 $color1 $str]
     return $binstr
 }
 proc binimg'decode data {
     binary scan $data ssiib* w h color0 color1 clrs
     set img [image create photo -width $w -height $h]
     set clr(0) \#[format %.6x $color0]
     set clr(1) \#[format %.6x $color1]
     set i 0
     set data ""
     set line ""
     foreach c [split $clrs {}] {
         lappend line $clr($c)
         if {[incr i] eq $w} {
             set i 0
             lappend data $line
             set line ""
         }
     }
     $img put $data -to 0 0
     return $img
 }

DKF: The simplest cheat way is to use gzip to compress the GIF files themselves! Both images compress to somewhere in the region of 1600-1650 bytes. Can you do better?


kroc: here's my attempt, based on RLE. Only black points are stored, so I cheat a little bit ;^) However my result is 923 bytes for courier12.gif and 904 bytes for times12i.gif and bigger for castle.gif : 15421 bytes.

    package require Tk
    package require Img

    # Decompress filename
    proc rle2gif { fichier } {
        catch "image delete gif"
        image create photo gif
        set fin [open $fichier r]
        fconfigure $fin -encoding binary -translation binary
        set data [read $fin]
        close $fin
        set ln 0
        foreach ligne [split $data \x01] {
            set col 0
            set lres ""
            foreach octet [split $ligne {}] {
                binary scan $octet b8 res
                set pat [string range $res 0 4]
                set occ [string range $res 5 7]
                append lres [pixel $pat $occ]
            }
            foreach p [split $lres {}] {
                incr col
                if $p {
                    gif put black -to $col $ln
                }
            }
            incr ln
        }
        gif write [file rootname $fichier]-res.gif
    }

    # Compress filename
    proc gif2rle { fichier } {
        catch "image delete rle"
        image create photo rle -file $fichier
        set data [ascii rle]
        set CMP_RES ""
        foreach ligne $data {
            set PAT ""
            set occ 1
            while {[string length $ligne]} {
                if {[string first 1 $ligne] == -1} {
                    set ligne ""
                }
                set pat [string range $ligne 0 4]
                set ligne [string range $ligne 5 end]
                if {[string match $pat $PAT] && $occ < 7} {
                    incr occ
                } else {
                    if {[string length $PAT] && $occ > 0 && $occ < 8} {
                        append CMP_RES [octet $PAT $occ]
                    }
                    set PAT $pat
                    set occ 1
                }
            }
            if {[string length $PAT] && $occ > 0 && $occ < 8} {
                append CMP_RES [octet $PAT $occ]
            }
            append CMP_RES \x01
        }
        set fout [open [file rootname $fichier].rle w]
        fconfigure $fout -encoding binary -translation binary
        puts -nonewline $fout $CMP_RES
        close $fout
    }

    proc ascii { image } {
        set res ""
        set largeur [image width $image]
        set data [lindex [string map {; "" \" "" , ""} [$image data -format xpm]] end]
        set PNindex [lsearch $data #000000]
        if {$PNindex} {
            set PN [lindex $data [expr $PNindex -2]]
        } else {
            return
        }
        foreach ligne [split $data \n] {
            set l ""
            if {[string length $ligne] == $largeur} {
                foreach p [split $ligne {}] {
                    if { $p eq $PN } {
                        append l 1
                    } else {
                        append l 0
                    }
                }
                lappend res $l
            }
        }
        return $res
    }

    proc octet { pat occ } {
        switch $occ \
                1 "set N 001" 2 "set N 010" 3 "set N 011" 4 "set N 100" \
                5 "set N 101" 6 "set N  110" 7 "set N  111"
        return [binary format b8 ${pat}${N}]
    }

    proc pixel { pat occ } {
        switch $occ \
                001 "set N 1" 010 "set N 2" 011 "set N 3" 100 "set N 4" \
                101 "set N 5" 110 "set N  6" 111 "set N  7" default "set N 0"
        return [string repeat $pat $N]
    }

Arts and crafts of Tcl-Tk programming