PS 3Sept2004
My entry:
#!/bin/sh # -*- tcl -*- # The next line is executed by /bin/sh, but not tcl \ exec wish "$0" ${1+"$@"} package require Tk package require Img proc ps_compress_bitstream { bits } { # Stream encoding: # bit-1bitrepeatcount # samebit-1bit longer count word # Turn into bit length bit length ... list: set prevbit "" set cnt 0 set repeat_list {} foreach bit [split $bits ""] { if {$cnt > 0 && $bit ne $prevbit} { lappend repeat_list $prevbit $cnt set cnt 0 } set prevbit $bit incr cnt } lappend repeat_list $prevbit $cnt foreach {bit len} $repeat_list { #Do the actual encoding: set bin [int_to_bin $len] set i 0 set bits [string range $bin 0 $i] set bin [string range $bin [expr {$i+1}] end] while { $bits ne "" } { set bits $bits[string repeat 0 [expr {$i+1-[string length $bits]}]] append out $bit $bits incr i set bits [string range $bin 0 $i] set bin [string range $bin [expr {$i+1}] end] } } return $out } proc ps_decompress_bitstream { compressed } { set lastbit "" set count "" set num 0 for {set i 0} {$i < [string length $compressed] } {incr i} { set bit [string index $compressed $i] if { $i>0 && $bit ne $lastbit } { append out [string repeat $lastbit [bin_to_int $count]] set count "" set num 1 } else { incr num } append count [string range $compressed [expr {$i+1}] [expr {$i+$num}]] set i [expr {$i+$num}] set lastbit $bit } append out [string repeat $lastbit [bin_to_int $count]] } proc photo'eq {im1 im2} { #-- returns 1 if both images are exactly equal, else 0 set h [image height $im1] if {[image height $im2] != $h} {return 0} set w [image width $im1] if {[image width $im2] != $w} {return 0} for {set y 0} {$y<$h} {incr y} { for {set x 0} {$x<$w} {incr x} { if {[$im1 get $x $y] ne [$im2 get $x $y]} {return 0} } } return 1 } proc photo'colors img { #-- return a list of {{r g b} n} tallies of pixel colors, #-- sorted decreasing by n (number of pixels of that color) set h [image height $img] set w [image width $img] for {set y 0} {$y<$h} {incr y} { for {set x 0} {$x<$w} {incr x} { set color [$img get $x $y] if {![info exists a($color)]} {set a($color) 0} incr a($color) } } foreach {color n} [array get a] {lappend tally [list $color $n]} lsort -decreasing -index 1 -integer $tally } proc binimg'encode img { set clrs [photo'colors $img] if {[llength $clrs] != 2} { return -code error "not a 2 color image" } set clr0 [lindex $clrs 0 0] set clr1 [lindex $clrs 1 0] set h [image height $img] set w [image width $img] set str "" for {set y 0} {$y<$h} {incr y} { for {set x 0} {$x<$w} {incr x} { set color [$img get $x $y] append str [string equal $color $clr1] } } foreach {r g b} $clr0 { set color0 [expr {$r<<16 | $g <<8 | $b}]; break } foreach {r g b} $clr1 { set color1 [expr {$r<<16 | $g <<8 | $b}]; break } # store image as <w><h><clr0><clr1><binimgdata> where w and h are shorts set binstr [binary format ssiib* $w $h $color0 $color1 $str] return $binstr } proc binimg'decode data { binary scan $data ssiib* w h color0 color1 clrs set img [image create photo -width $w -height $h] set clr(0) \#[format %.6x $color0] set clr(1) \#[format %.6x $color1] set i 0 set data "" set line "" foreach c [split $clrs {}] { lappend line $clr($c) if {[incr i] eq $w} { set i 0 lappend data $line set line "" } } $img put $data -to 0 0 return $img } proc int_to_bin { val } { binary scan [binary format i $val] b* bits return [string trimright $bits 0] } proc bin_to_int { bits } { set bits $bits[string repeat 0 [expr {32-[string length $bits]}]] binary scan [binary format b* $bits] i val return $val } proc ps_compress { file outfile } { set img [image create photo -file $file] binary scan [binimg'encode $img] b* bin_stream image delete $img set out [open $outfile w] fconfigure $out -translation binary set compressed [ps_compress_bitstream $bin_stream] puts -nonewline $out [binary format b* $compressed] close $out } proc ps_decompress { file outfile } { set in [open $file r] fconfigure $in -translation binary binary scan [read $in] b* compressed close $in set binimg [ps_decompress_bitstream $compressed] set img [binimg'decode [binary format b* $binimg]] $img write $outfile image delete $img } ps_compress p:/times12i.gif p:/times12i.gif.psz ps_decompress p:/times12i.gif.psz p:/times12i-res.gif ps_compress p:/courier12.gif p:/courier12.gif.psz ps_decompress p:/courier12.gif.psz p:/courier12-res.gif
RS To make this more compliant, I added the two wrapper procs:
proc ps_encode img {ps_compress_bitstream [binimg'encode $img]} proc ps_decode bits {binimg'decode [ps_decompress_bitstream $bits]}
but I don't see reductions of 50% as seen on the chat:
% string length [ps_encode image20] 1700 % string length [binimg'encode image20] 2163
where image20 is courier12.gif.
PS Yes - that is because you are measuring the string before it has been binary formated.
PS Attempt number two: now with Huffman(?) encoding for runlengths which occur 5 or more times.
This actually compresses castle.gif! My new size is 11442 bytes, which is 156 bytes less than the original file!
courier12 compresses to 671 bytes, times12i compresses to 718...
#!/bin/sh # -*- tcl -*- # The next line is executed by /bin/sh, but not tcl \ exec wish "$0" ${1+"$@"} package require Tk package require Img proc ps_compress_bitstream { data } { set rawlength [string length $data] binary scan $data b* bits # Stream encoding: # bit-1bitrepeatcount # samebit-1bit longer count word # Turn into bit length bit length ... list: set prevbit "" set cnt 0 set repeat_list {} foreach bit [split $bits ""] { if {$cnt > 0 && $bit ne $prevbit} { lappend repeat_list $prevbit $cnt if { [info exists freq($cnt)] } { incr freq($cnt) } else { set freq($cnt) 1 } set cnt 0 } set prevbit $bit incr cnt } lappend repeat_list $prevbit $cnt # Build the frequency table: Anything which occurs > 4 times gets an entry. # (4 determined by trial and error... should really be dynamic for each compression) set ftable {} foreach count [array names freq] { if { $freq($count) > 4 } { lappend ftable [list $count $freq($count)] } } set ftable [lsort -integer -index 1 -decreasing $ftable] set flipflop 0 set index 0 set codetable {} foreach f $ftable { set number [lindex $f 0] set count [lindex $f 1] #puts "Added $index $number ($count) $flipflop" lappend codetable $flipflop $number set codebook($number) $index set flipflop [expr {!$flipflop}] incr index } #puts "$index items in the table" set offset $index # Terminate the codebook lappend codetable $flipflop 0 [expr {!$flipflop}] 0 foreach {bit len} $codetable { #Do the encoding of the codebook: set bin [int_to_bin $len] set i 0 set bits [string range $bin 0 $i] set bin [string range $bin [expr {$i+1}] end] while { $bits ne "" } { set bits $bits[string repeat 0 [expr {$i+1-[string length $bits]}]] append out $bit $bits incr i set bits [string range $bin 0 $i] set bin [string range $bin [expr {$i+1}] end] } } #puts "offset $offset" foreach {bit len} $repeat_list { #Do the encoding of the image: if { [info exists codebook($len)] } { # Items in the codebook get their codebook entry set bin [int_to_bin $codebook($len)] } else { # Others get shifted out of the codebook number range: set bin [int_to_bin [expr {$len+$offset}]] } set i 0 set bits [string range $bin 0 $i] set bin [string range $bin [expr {$i+1}] end] while { $bits ne "" } { set bits $bits[string repeat 0 [expr {$i+1-[string length $bits]}]] append out $bit $bits incr i set bits [string range $bin 0 $i] set bin [string range $bin [expr {$i+1}] end] } } set out [binary format b* $out] set comprlen [string length $out] #puts "$rawlength -> $comprlen, yields [expr {$rawlength-$comprlen}] bytes." return $out } proc ps_decompress_bitstream { compressed } { binary scan $compressed b* compressed #First read the codebook. The codebook ends in two zero counts. set offset 0 set lastbit "" set count "" set num 0 array set codebook {} set codetable {} for {set i 0} {$i < [string length $compressed] } {incr i} { set bit [string index $compressed $i] if { $i>0 && $bit ne $lastbit } { set codebook($offset) [bin_to_int $count] #puts "$offset [bin_to_int $count]" if { $codebook($offset) == 0 } { # At the end of the codebook! unset codebook($offset) # Skip past the bit. incr i break } incr offset set count "" set num 1 } else { incr num } append count [string range $compressed [expr {$i+1}] [expr {$i+$num}]] set i [expr {$i+$num}] set lastbit $bit } set offset [llength [array names codebook]] #puts "Read $offset items from codebook" #puts [string range $compressed 0 $i] set lastbit "" set count "" set num 0 for {incr i} {$i < [string length $compressed] } {incr i} { set bit [string index $compressed $i] if { $i>0 && $bit ne $lastbit } { set cnt [bin_to_int $count] if { [info exists codebook($cnt)] } { set cnt $codebook($cnt) } else { set cnt [expr {$cnt-$offset}] } append out [string repeat $lastbit $cnt] set count "" set num 1 } else { incr num } append count [string range $compressed [expr {$i+1}] [expr {$i+$num}]] set i [expr {$i+$num}] set lastbit $bit } append out [string repeat $lastbit [bin_to_int $count]] return [binary format b* $out] } proc photo'eq {im1 im2} { #-- returns 1 if both images are exactly equal, else 0 set h [image height $im1] if {[image height $im2] != $h} {return 0} set w [image width $im1] if {[image width $im2] != $w} {return 0} for {set y 0} {$y<$h} {incr y} { for {set x 0} {$x<$w} {incr x} { if {[$im1 get $x $y] ne [$im2 get $x $y]} {return 0} } } return 1 } proc photo'colors img { #-- return a list of {{r g b} n} tallies of pixel colors, #-- sorted decreasing by n (number of pixels of that color) set h [image height $img] set w [image width $img] for {set y 0} {$y<$h} {incr y} { for {set x 0} {$x<$w} {incr x} { set color [$img get $x $y] if {![info exists a($color)]} {set a($color) 0} incr a($color) } } foreach {color n} [array get a] {lappend tally [list $color $n]} lsort -decreasing -index 1 -integer $tally } proc binimg'encode img { set clrs [photo'colors $img] if {[llength $clrs] != 2} { return -code error "not a 2 color image" } set clr0 [lindex $clrs 0 0] set clr1 [lindex $clrs 1 0] set h [image height $img] set w [image width $img] set str "" for {set y 0} {$y<$h} {incr y} { for {set x 0} {$x<$w} {incr x} { set color [$img get $x $y] append str [string equal $color $clr1] } } foreach {r g b} $clr0 { set color0 [expr {$r<<16 | $g <<8 | $b}]; break } foreach {r g b} $clr1 { set color1 [expr {$r<<16 | $g <<8 | $b}]; break } # store image as <w><h><clr0><clr1><binimgdata> where w and h are shorts set binstr [binary format ssiib* $w $h $color0 $color1 $str] return $binstr } proc binimg'decode data { binary scan $data ssiib* w h color0 color1 clrs set img [image create photo -width $w -height $h] set clr(0) \#[format %.6x $color0] set clr(1) \#[format %.6x $color1] set i 0 set data "" set line "" foreach c [split $clrs {}] { lappend line $clr($c) if {[incr i] eq $w} { set i 0 lappend data $line set line "" } } $img put $data -to 0 0 return $img } proc int_to_bin { val } { if { $val == 0 } { return 0 } binary scan [binary format i $val] b* bits return [string trimright $bits 0] } proc bin_to_int { bits } { set bits $bits[string repeat 0 [expr {32-[string length $bits]}]] binary scan [binary format b* $bits] i val return $val } proc ps_compress { file outfile } { set img [image create photo -file $file] set data [binimg'encode $img] image delete $img set out [open $outfile w] fconfigure $out -translation binary puts -nonewline $out [ps_compress_bitstream $data] close $out } proc ps_decompress { file outfile } { set in [open $file r] fconfigure $in -translation binary set compressed [read $in] close $in set binimg [ps_decompress_bitstream $compressed] set img [binimg'decode $binimg] $img write $outfile image delete $img } proc ps_encode img {ps_compress_bitstream [binimg'encode $img]} proc ps_decode bits {binimg'decode [ps_decompress_bitstream $bits]} foreach name {courier12 times12i castle} { puts "$name:" ps_compress $name.gif $name.gif.psz ps_decompress $name.gif.psz $name-res.gif } exit