Binary image compression challenge - Pascal's Entry

PS 3Sept2004

My entry:

 #!/bin/sh
 # -*- tcl -*-
 # The next line is executed by /bin/sh, but not tcl \
 exec wish "$0" ${1+"$@"}

 package require Tk
 package require Img
 proc ps_compress_bitstream { bits } {
    
    # Stream encoding:
    # bit-1bitrepeatcount
    # samebit-1bit longer count word
    
    # Turn into bit length bit length ... list:
    set prevbit ""
    set cnt 0
    set repeat_list {}
    foreach bit [split $bits ""] {
        if {$cnt > 0 && $bit ne $prevbit} {
            lappend repeat_list $prevbit $cnt
            set cnt 0
        }
        set prevbit $bit
        incr cnt        
    }
    lappend repeat_list $prevbit $cnt
    
    foreach {bit len} $repeat_list {
        #Do the actual encoding:
        set bin [int_to_bin $len]
        set i 0
        set bits [string range $bin 0 $i]
        set bin [string range $bin [expr {$i+1}] end]
        while { $bits ne "" } {
            set bits $bits[string repeat 0 [expr {$i+1-[string length $bits]}]]
            append out $bit $bits
            incr i
            set bits [string range $bin 0 $i]
            set bin [string range $bin [expr {$i+1}] end]
        }
    }
    return $out
 }
 
 proc ps_decompress_bitstream { compressed } {
    set lastbit ""
    set count ""
    set num 0
    for {set i 0} {$i < [string length $compressed] } {incr i} {
        set bit [string index $compressed $i]
        if { $i>0 && $bit ne $lastbit } {
            append out [string repeat $lastbit [bin_to_int $count]]
            set count ""
            set num 1
        } else {
            incr num
        }
        append count [string range $compressed [expr {$i+1}] [expr {$i+$num}]]
        set i [expr {$i+$num}]
        set lastbit $bit
    }
    append out [string repeat $lastbit [bin_to_int $count]]
 }

 proc photo'eq {im1 im2} {
    #-- returns 1 if both images are exactly equal, else 0
    set h [image height $im1]
    if {[image height $im2] != $h} {return 0}
    set w [image width $im1]
    if {[image width $im2] != $w} {return 0}
    for {set y 0} {$y<$h} {incr y} {
        for {set x 0} {$x<$w} {incr x} {
            if {[$im1 get $x $y] ne [$im2 get $x $y]} {return 0}
        }
    }
    return 1
 }
 proc photo'colors img {
    #-- return a list of {{r g b} n} tallies of pixel colors,
    #-- sorted decreasing by n (number of pixels of that color)
    set h [image height $img]
    set w [image width  $img]
    for {set y 0} {$y<$h} {incr y} {
        for {set x 0} {$x<$w} {incr x} {
            set color [$img get $x $y]
            if {![info exists a($color)]} {set a($color) 0}
            incr a($color)
        }
    }
    foreach {color n} [array get a] {lappend tally [list $color $n]}
    lsort -decreasing -index 1 -integer $tally
 }
 proc binimg'encode img {
     set clrs [photo'colors $img]
     if {[llength $clrs] != 2} { return -code error "not a 2 color image" }
     set clr0 [lindex $clrs 0 0]
     set clr1 [lindex $clrs 1 0]
     set h [image height $img]
     set w [image width  $img]
     set str ""
     for {set y 0} {$y<$h} {incr y} {
         for {set x 0} {$x<$w} {incr x} {
             set color [$img get $x $y]
             append str [string equal $color $clr1]
         }
     }
     foreach {r g b} $clr0 { set color0 [expr {$r<<16 | $g <<8 | $b}]; break }
     foreach {r g b} $clr1 { set color1 [expr {$r<<16 | $g <<8 | $b}]; break }
     # store image as <w><h><clr0><clr1><binimgdata> where w and h are shorts
     set binstr [binary format ssiib* $w $h $color0 $color1 $str]
     return $binstr
 }
 proc binimg'decode data {
     binary scan $data ssiib* w h color0 color1 clrs
     set img [image create photo -width $w -height $h]
     set clr(0) \#[format %.6x $color0]
     set clr(1) \#[format %.6x $color1]
     set i 0
     set data ""
     set line ""
     foreach c [split $clrs {}] {
         lappend line $clr($c)
         if {[incr i] eq $w} {
             set i 0
             lappend data $line
             set line ""
         }
     }
     $img put $data -to 0 0
     return $img
 }
 
 proc int_to_bin { val } {
    binary scan [binary format i $val] b* bits
    return [string trimright $bits 0]    
 }
 
 proc bin_to_int { bits } {
    set bits $bits[string repeat 0 [expr {32-[string length $bits]}]]    
    binary scan [binary format b* $bits] i val
    return $val
 }
 
 
 proc ps_compress { file outfile } {
    set img [image create photo -file $file]        
    binary scan [binimg'encode $img] b* bin_stream    
    image delete $img
    set out [open $outfile w]
    fconfigure $out -translation binary
    set compressed [ps_compress_bitstream $bin_stream]
    puts -nonewline $out [binary format b* $compressed]
    close $out        
 }
 
 proc ps_decompress { file outfile } {
    set in [open $file r]
    fconfigure $in -translation binary
    binary scan [read $in] b* compressed    
    close $in
    set binimg [ps_decompress_bitstream $compressed]
    set img [binimg'decode [binary format b* $binimg]]
    $img write $outfile
    image delete $img    
 }
 
 ps_compress p:/times12i.gif p:/times12i.gif.psz
 ps_decompress p:/times12i.gif.psz p:/times12i-res.gif

 ps_compress p:/courier12.gif p:/courier12.gif.psz
 ps_decompress p:/courier12.gif.psz p:/courier12-res.gif

RS To make this more compliant, I added the two wrapper procs:

 proc ps_encode img  {ps_compress_bitstream [binimg'encode $img]}
 proc ps_decode bits {binimg'decode [ps_decompress_bitstream $bits]}

but I don't see reductions of 50% as seen on the chat:

 % string length [ps_encode image20]
 1700
 % string length [binimg'encode image20]
 2163

where image20 is courier12.gif.

PS Yes - that is because you are measuring the string before it has been binary formated.


PS Attempt number two: now with Huffman(?) encoding for runlengths which occur 5 or more times.

This actually compresses castle.gif! My new size is 11442 bytes, which is 156 bytes less than the original file!

courier12 compresses to 671 bytes, times12i compresses to 718...

 #!/bin/sh
 # -*- tcl -*-
 # The next line is executed by /bin/sh, but not tcl \
 exec wish "$0" ${1+"$@"}

 package require Tk
 package require Img
 
 proc ps_compress_bitstream { data } {
    set rawlength [string length $data]
    binary scan $data b* bits

    # Stream encoding:
    # bit-1bitrepeatcount
    # samebit-1bit longer count word

    # Turn into bit length bit length ... list:
    set prevbit ""
    set cnt 0
    set repeat_list {}
    foreach bit [split $bits ""] {
        if {$cnt > 0 && $bit ne $prevbit} {
            lappend repeat_list $prevbit $cnt
            if { [info exists freq($cnt)] } {
                incr freq($cnt)
            } else {
                set freq($cnt) 1
            }
            set cnt 0
        }
        set prevbit $bit
        incr cnt
    }
    lappend repeat_list $prevbit $cnt

    # Build the frequency table: Anything which occurs > 4 times gets an entry.
    # (4 determined by trial and error... should really be dynamic for each compression)    
    set ftable {}
    foreach count [array names freq] {
        if { $freq($count) > 4 } {            
            lappend ftable [list $count $freq($count)]
        }
    }
    set ftable [lsort -integer -index 1 -decreasing $ftable]
    set flipflop 0
    set index 0
    set codetable {}
    foreach f $ftable {
        set number [lindex $f 0]
        set count [lindex $f 1]
        #puts "Added $index $number ($count) $flipflop"
        lappend codetable $flipflop $number
        set codebook($number) $index

        set flipflop [expr {!$flipflop}]
        incr index
    }
    #puts "$index items in the table"
    set offset $index
    # Terminate the codebook
    lappend codetable $flipflop 0 [expr {!$flipflop}] 0
    
    foreach {bit len} $codetable {
        #Do the encoding of the codebook:
        set bin [int_to_bin $len]
        set i 0
        set bits [string range $bin 0 $i]
        set bin [string range $bin [expr {$i+1}] end]
        while { $bits ne "" } {
            set bits $bits[string repeat 0 [expr {$i+1-[string length $bits]}]]
            append out $bit $bits
            incr i
            set bits [string range $bin 0 $i]
            set bin [string range $bin [expr {$i+1}] end]
        }
    }
    #puts "offset $offset"
    foreach {bit len} $repeat_list {
        #Do the encoding of the image:
        if { [info exists codebook($len)] } {
            # Items in the codebook get their codebook entry
            set bin [int_to_bin $codebook($len)]
        } else {
            # Others get shifted out of the codebook number range:
            set bin [int_to_bin [expr {$len+$offset}]]
        }
        set i 0
        set bits [string range $bin 0 $i]
        set bin [string range $bin [expr {$i+1}] end]
        while { $bits ne "" } {
            set bits $bits[string repeat 0 [expr {$i+1-[string length $bits]}]]
            append out $bit $bits
            incr i
            set bits [string range $bin 0 $i]
            set bin [string range $bin [expr {$i+1}] end]
        }
    }
    set out [binary format b* $out]
    set comprlen [string length $out]
    #puts "$rawlength -> $comprlen, yields [expr {$rawlength-$comprlen}] bytes."
    return $out
 }

 proc ps_decompress_bitstream { compressed } {
    
    binary scan $compressed b* compressed
    
    #First read the codebook. The codebook ends in two zero counts.
    set offset 0
    set lastbit ""
    set count ""
    set num 0
    array set codebook {}
    set codetable {}
    for {set i 0} {$i < [string length $compressed] } {incr i} {
        set bit [string index $compressed $i]
        if { $i>0 && $bit ne $lastbit } {
            set codebook($offset) [bin_to_int $count]
            #puts "$offset [bin_to_int $count]"
            if { $codebook($offset) == 0 } {
                # At the end of the codebook!
                unset codebook($offset)
                
                # Skip past the bit.
                incr i 
                break
            }
            incr offset
            set count ""
            set num 1
        } else {
            incr num
        }
        append count [string range $compressed [expr {$i+1}] [expr {$i+$num}]]
        set i [expr {$i+$num}]
        set lastbit $bit
    }    
    set offset [llength [array names codebook]]
    #puts "Read $offset items from codebook"
    #puts [string range $compressed 0 $i]
    set lastbit ""
    set count ""
    set num 0
    for {incr i} {$i < [string length $compressed] } {incr i} {
        set bit [string index $compressed $i]
        if { $i>0 && $bit ne $lastbit } {
            set cnt [bin_to_int $count]
            if { [info exists codebook($cnt)] } {
                set cnt $codebook($cnt)
            } else {
                set cnt [expr {$cnt-$offset}]
            }
            append out [string repeat $lastbit $cnt]
            set count ""
            set num 1
        } else {
            incr num
        }
        append count [string range $compressed [expr {$i+1}] [expr {$i+$num}]]
        set i [expr {$i+$num}]
        set lastbit $bit
    }
    append out [string repeat $lastbit [bin_to_int $count]]
    return [binary format b* $out]
    
 }

 proc photo'eq {im1 im2} {
    #-- returns 1 if both images are exactly equal, else 0
    set h [image height $im1]
    if {[image height $im2] != $h} {return 0}
    set w [image width $im1]
    if {[image width $im2] != $w} {return 0}
    for {set y 0} {$y<$h} {incr y} {
        for {set x 0} {$x<$w} {incr x} {
            if {[$im1 get $x $y] ne [$im2 get $x $y]} {return 0}
        }
    }
    return 1
 }
 proc photo'colors img {
    #-- return a list of {{r g b} n} tallies of pixel colors,
    #-- sorted decreasing by n (number of pixels of that color)
    set h [image height $img]
    set w [image width  $img]
    for {set y 0} {$y<$h} {incr y} {
        for {set x 0} {$x<$w} {incr x} {
            set color [$img get $x $y]
            if {![info exists a($color)]} {set a($color) 0}
            incr a($color)
        }
    }
    foreach {color n} [array get a] {lappend tally [list $color $n]}
    lsort -decreasing -index 1 -integer $tally
 }
 proc binimg'encode img {
     set clrs [photo'colors $img]
     if {[llength $clrs] != 2} { return -code error "not a 2 color image" }
     set clr0 [lindex $clrs 0 0]
     set clr1 [lindex $clrs 1 0]
     set h [image height $img]
     set w [image width  $img]
     set str ""
     for {set y 0} {$y<$h} {incr y} {
         for {set x 0} {$x<$w} {incr x} {
             set color [$img get $x $y]
             append str [string equal $color $clr1]
         }
     }
     foreach {r g b} $clr0 { set color0 [expr {$r<<16 | $g <<8 | $b}]; break }
     foreach {r g b} $clr1 { set color1 [expr {$r<<16 | $g <<8 | $b}]; break }
     # store image as <w><h><clr0><clr1><binimgdata> where w and h are shorts
     set binstr [binary format ssiib* $w $h $color0 $color1 $str]
     return $binstr
 }
 proc binimg'decode data {
     binary scan $data ssiib* w h color0 color1 clrs
     set img [image create photo -width $w -height $h]
     set clr(0) \#[format %.6x $color0]
     set clr(1) \#[format %.6x $color1]
     set i 0
     set data ""
     set line ""
     foreach c [split $clrs {}] {
         lappend line $clr($c)
         if {[incr i] eq $w} {
             set i 0
             lappend data $line
             set line ""
         }
     }
     $img put $data -to 0 0
     return $img
 }

 proc int_to_bin { val } {
    if { $val == 0 } {
        return 0
    }
    binary scan [binary format i $val] b* bits
    return [string trimright $bits 0]
 }

 proc bin_to_int { bits } {
    set bits $bits[string repeat 0 [expr {32-[string length $bits]}]]
    binary scan [binary format b* $bits] i val
    return $val
 }

 proc ps_compress { file outfile } {
    
    set img [image create photo -file $file]
    set data [binimg'encode $img]
    image delete $img
    
    set out [open $outfile w]
    fconfigure $out -translation binary
    puts -nonewline $out [ps_compress_bitstream $data]
    
    close $out
 }

 proc ps_decompress { file outfile } {
    set in [open $file r]
    fconfigure $in -translation binary
    set compressed [read $in]
    close $in
    
    set binimg [ps_decompress_bitstream $compressed]
    set img [binimg'decode $binimg]
    $img write $outfile
    
    image delete $img
 }

 proc ps_encode img  {ps_compress_bitstream [binimg'encode $img]}
 proc ps_decode bits {binimg'decode [ps_decompress_bitstream $bits]}

 foreach name {courier12 times12i castle} {
    puts "$name:"
    ps_compress $name.gif $name.gif.psz
    ps_decompress $name.gif.psz $name-res.gif
 }
 exit