Version 3 of Reading image type and dimensions

Updated 2011-02-21 21:00:11 by paul

AMG: I collected code from other Wiki pages on reading image dimensions of various formats (GIF, PNG, JPEG, and BMP) and made a single proc that can determine the type and dimensions all at once.

The argument is the name of the channel connected to the image data.

The image is expected to begin at the current channel seek position, and the channel must have "binary" translation.

The returned value is a one- or three-element list. The first element is the type, which is gif, png, jpeg, bmp, or unknown. The second and third elements are the image width and height in pixels; they're omitted if the first element is unknown.

proc imageinfo {chan} {
    set tell [chan tell $chan]
    if {[chan read $chan 16] eq "\211PNG\r\n\32\n\0\0\0\rIHDR"} {
        set type png
        binary scan [chan read $chan 8] II width height
    } elseif {"[chan seek $chan $tell][chan read $chan 6]" in {GIF87a GIF89a}} {
        set type gif
        binary scan [chan read $chan 4] ss width height
    } elseif {"[chan seek $chan $tell][chan read $chan 2]" eq "\377\330"} {
        set type jpeg
        while {![chan eof $chan]} {
            while {[chan read $chan 1] ne "\377"} {}
            while {[set byte [chan read $chan 1]] eq "\377"} {}
            if {$byte in {\300 \301 \302 \303 \305 \306 \307
                          \311 \312 \313 \315 \316 \317}} {
                binary scan [chan read $chan 7] x3SS height width
                break
            } else {
                binary scan [chan read $chan 2] S offset
                chan seek $chan [expr {($offset & 0xffff) - 2}] current
            }
        }
    } elseif {"[chan seek $chan $tell][chan read $chan 2]" eq "BM"} {
        set type bmp
        binary scan [chan read $chan 24] x16ii width height
    }
    if {[info exists type] && [info exists width] && [info exists height]} {
        list $type [expr {$width & 0xffff}] [expr {$height & 0xffff}]
    } else {
        list unknown
    }
}

I designed it for use with the SQLite [$db incrblob] command, which produces a channel providing incremental access to a database blob. It's also suitable for use with ordinary file I/O. Remember to close the channel when you're done with it.

Here's a version that operates directly on the image data:

proc imageinfo {data} {
    if {[string equal -length 16 $data \211PNG\r\n\32\n\0\0\0\rIHDR]} {
        set type png
        binary scan [string range $data 16 23] II width height
    } elseif {[string range $data 0 5] in {GIF87a GIF89a}} {
        set type gif
        binary scan [string range $data 6 9] ss width height
    } elseif {[string equal -length 2 $data \377\330]} {
        set type jpeg
        set pos 2
        while {[regexp -start $pos {\377([^\377])(.{2,7})} $data _ mark args]} {
            if {$mark in {\300 \301 \302 \303 \305 \306 \307
                          \311 \312 \313 \315 \316 \317}} {
                binary scan $args x3SS height width
                break
            } else {
                binary scan $args S offset
                set pos [expr {$pos + ($offset & 0xffff) - 2}]
            }
        }
    } elseif {[string equal -length 2 $data BM]} {
        set type bmp
        binary scan [string range $data 18 25] ii width height
    }
    if {[info exists type] && [info exists width] && [info exists height]} {
        list $type [expr {$width & 0xffff}] [expr {$height & 0xffff}]
    } else {
        list unknown
    }
}

PO 2011/22/19 -

Inspired by the code above, I've extended the image format list and came up with the following.

# Get information about an image file.
# The procedure detects all image formats supplied by the Img extension.
# It returns a 3-element list containing the image type, width and height.
# Image type is the same string as needed for the "-format" option of the Img extension.
# If the file format could not be identified, {"none" -1 -1} is returned.

proc GetImageInfo { fileName } {
    set noImg [list "none" -1 -1]
    set imgInfo $noImg

    if { ! [file isfile $fileName] } {
        return $noImg
    }
    
    if { [file size $fileName] == 0 } {
        return $noImg
    }

    if { [catch {
        set fp [open $fileName "r"]
        fconfigure $fp -translation binary
    } errMsg] } {
        catch { close $fp }
        return $noImg
    }

    set test [read $fp 1024]

    if { [string match "GIF8?a*" $test] } {
        binary scan [string range $test 6 9] ss width height
        set imgInfo [list "gif" $width $height]

    } elseif { [string equal -length 16 $test "\x89PNG\r\n\32\n\0\0\0\rIHDR"] } {
        binary scan [string range $test 16 23] II width height
        set imgInfo [list "png" $width $height]

    } elseif { [string match "\xFF\xD8\xFF*" $test] } {
        binary scan $test x3H2x2a5 marker txt
        if { $marker eq "e0" && $txt eq "JFIF\x00" || \
             $marker eq "e1" && $txt eq "Exif\x00" } {
            seek $fp 0 start
            read $fp 2
            while { ! [eof $fp] } {
                # Search for the next marker, read the marker type byte, and throw out
                # any extra "ff"'s.
                while { [read $fp 1] ne "\xFF" } {}
                while { [set byte [read $fp 1]] eq "\xFF"} {}

                if { $byte in { \xc0 \xc1 \xc2 \xc3 \xc5 \xc6 \xc7
                                \xc9 \xca \xcb \xcd \xce \xcf }} {
                    # This is the SOF marker; read a chunk of data containing the dimensions.
                    binary scan [read $fp 7] x3SS height width
                    break
                } else {
                    # This is not the the SOF marker; read in the offset of the next marker.
                    binary scan [read $fp 2] S offset

                    # The offset includes itself own two bytes so subtract them, then move
                    # ahead to the next marker.
                    seek $fp [expr {($offset & 0xffff) - 2}] current
                }
            }
            set imgInfo [list "jpeg" $width $height]
        }

    } elseif { [string match "II\*\x00*" $test] || [string match "MM\x00\**" $test] } {
        if { [string match "MM\x00\**" $test] } {
            set endian bigendian
        } else {
            set endian smallendian
        }

        set byteFmt "c"
        if { $endian eq "smallendian" } {
            set longFmt  "i"
            set shortFmt "s"
        } else {
            set longFmt  "I"
            set shortFmt "S"
        }
        set typesFmt(1) $byteFmt
        set typesFmt(3) $shortFmt
        set typesFmt(4) $longFmt

        seek $fp 4 start
        set tiff [read $fp 4]
        binary scan $tiff $longFmt offset

        seek $fp $offset start
        set tiff [read $fp 2]
        binary scan $tiff $shortFmt numDirs

        while { $numDirs > 0 } {
            set tiff [read $fp 12]
            binary scan [string range $tiff 0 1] $shortFmt tag
            if { $tag == 256 || $tag == 257 } {
                binary scan [string range $tiff 2 3] $shortFmt type
                if { [info exists typesFmt($type)] } {
                    binary scan [string range $tiff 8 11] $typesFmt($type) val
                    if { $tag == 256 } {
                        set width $val
                    } else {
                        set height $val
                    }
                }
            }
            if { [info exists width] && [info exists height] } {
                break
            }
            incr numDirs -1
        }
        set imgInfo [list "tiff" $width $height]

    } elseif { [string match "P\[356\]\[\x0a\x0d\]*" $test] } {
        if [regexp {P[356]\s*#} $test] {
            seek $fp 0 start
            gets $fp line
            while { [gets $fp line] >= 0 } {
                if { ! [string match "#*" $line] } {
                    break
                }
            }
            scan $line "%d %d" width height
        } else {
            regexp {(P[356])\s*(\d+)\s+(\d+)} $test -> fmt width height
        }
        set imgInfo [list "ppm" $width $height]

    } elseif { [string match "/\* XPM*" $test] } {
        regexp {(\/\* XPM.*\")(\d+)\s+(\d+)} $test -> dummy width height
        set imgInfo [list "xpm" $width $height]

    } elseif { [string match "#define *" $test] } {
        regexp -line {(#define .*)\s+(\d+)\s+(#define .*)\s+(\d+)\s+} $test \
               -> dummy1 width dummy2 height
        set imgInfo [list "xbm" $width $height]

    } elseif { [string match "\x59\xa6\x6a\x95*" $test] } {
        binary scan [string range $test 4 12] II width height
        set imgInfo [list "sun" $width $height]

    } elseif { [string match "\x01\xda*" $test] } {
        binary scan [string range $test 6 9] SS width height
        set imgInfo [list "sgi" $width $height]

    } elseif { [string match "\xda\x01*" $test] } {
        binary scan [string range $test 6 9] ss width height
        set imgInfo [list "sgi" $width $height]

    } elseif { [regexp {^[\x0a].+.+[\x01\x08]} $test] } {
        binary scan [string range $test 4 12] ssss x1 y1 x2 y2
        set width  [expr {$x2 - $x1 + 1}]
        set height [expr {$y2 - $y1 + 1}]
        set imgInfo [list "pcx" $width $height]

    } elseif { [string match "BM*" $test] && ([string range $test 6 9] eq "\x00\x00\x00\x00") } {
        binary scan [string range $test 14 14] c bmpType
        if { $bmpType == 40 || $bmpType == 64 } {
            binary scan [string range $test 18 25] ii width height
        } elseif { $bmpType == 12 } {
            binary scan [string range $test 18 21] ss width height
        }
        set imgInfo [list "bmp" $width $height]

    } elseif { [string match "\x00\x00\x01\x00*" $test] } {
        binary scan [string range $test 4 8] scc numImgs width height
        set imgInfo [list "ico" $width $height]

    } elseif { [string equal -length 3 "UHL" $test] && \
               [string equal -length 3 "DSI" [string range $test 80 83]] } {
        set offset1 [expr {80 + 281}]
        set offset2 [expr {$offset1 + 4}]
        scan [string range $test $offset1 [expr $offset1+3]] "%d" height
        scan [string range $test $offset2 [expr $offset2+3]] "%d" width
        set imgInfo [list "dted" $width $height]

    } elseif { [string match "Magic=RAW*" $test] } {
        regexp {Magic=RAW\s+(Width=)(\d+)\s+(Height=)(\d+)\s+} $test \
               -> dummy1 width dummy2 height
        set imgInfo [list "raw" $width $height]

    } elseif { [regexp {^.+[\x00\x01][\x01-\x03\x09-\x0b].{13}[\x08\x0f\x10\x18\x20]} $test] } {
        binary scan [string range $test 12 16] ss width height
        set imgInfo [list "tga" $width $height]
    }
    close $fp
    if { [lindex $imgInfo 1] <= 0 || [lindex $imgInfo 2] <= 0 } {
        return $noImg
    }
    return $imgInfo
}

Here is a test program to recursively check image files in a directory.

I have tested several thousand image files with success. If you have an image file not recognized correctly, please let me know.

# Test program to recursively check image files in a directory.

set optCheckAgainstImg false
set optFilePattern "*"

if { $argc == 0 } {
    set dir [pwd]
} else {
    set dir [lindex $argv 0]
    if { $argc > 1 } {
        set optFilePattern [lindex $argv 1]
    }
    if { $argc > 2 } {
        set optCheckAgainstImg [lindex $argv 2]
    }
}

proc GetDirList {dirName \
                   {showDirs 1} {showFiles 1} \
                   {showHiddenDirs 1} {showHiddenFiles 1} \
                   {dirPattern *} {filePattern *}} {
    set curDir [pwd]
    set catchVal [catch {cd $dirName}]
    if { $catchVal } {
        return [list]
    }

    set absDirList  [list]
    set relFileList [list]

    if { $showDirs } {
        set relDirList [glob -nocomplain -types d -- {*}$dirPattern]
        foreach dir $relDirList {
            if { [string index $dir 0] eq "~" } {
                set dir [format "./%s" $dir]
            }
            set absName [file join $dirName $dir]
            lappend absDirList $absName
        }
        if { $showHiddenDirs } {
            set relHiddenDirList \
                [glob -nocomplain -types {d hidden} -- {*}$dirPattern]
            foreach dir $relHiddenDirList {
                if { $dir eq "." || $dir eq ".." } {
                    continue
                }
                set absName [file join $dirName $dir]
                lappend absDirList $absName
            }
        }
    }
    if { $showFiles } {
        set relFileList [glob -nocomplain -types f -- {*}$filePattern]
        if { $showHiddenFiles } {
            set relHiddenFileList \
                [glob -nocomplain -types {f hidden} -- {*}$filePattern]
            if { [llength $relHiddenFileList] != 0 } {
                set relFileList [concat $relFileList $relHiddenFileList]
            }
        }
    }
    cd $curDir

    return [list $absDirList $relFileList]
}

proc FindRecursive { srcDir filePattern checkAgainstImg } {
    global tcl_platform errorInfo

    puts "FindRecursive $srcDir $filePattern $checkAgainstImg"
    if { ! [file isdirectory $srcDir] } {
        return -code error "\"$srcDir\" is not a directory"
    }

    set retVal [catch { cd $srcDir } ]
    if { $retVal } {
        return -code error "Could not read directory \"$srcDir\""
    }

    # Note: GetDirList returns directory names as absolute pathes, 
    # file names as relative pathnames.
    set dirCont [GetDirList $srcDir 1 1  1 1  * $filePattern]
    if { $tcl_platform(platform) eq "unix" } {
        set matchCmd "string match"
    } else {
        set matchCmd "string match -nocase"
    }

    set dirList [lsort [lindex $dirCont 0]]
    foreach dir $dirList {
        set dirName [file tail $dir]
        if { [string first "~" $dirName] == 0 } {
            # File starts with tilde.
            set dirName [format "./%s" $dirName]
        }
        FindRecursive $dir $filePattern $checkAgainstImg
    }

    set fileList [lsort [lindex $dirCont 1]]
    foreach fileName $fileList {
        if { [string first "~" $fileName] == 0 } {
            # File starts with tilde.
            set fileName [format "./%s" $fileName]
        }
        set srcNative [file nativename [file join $srcDir $fileName]]

        # Check if file matches the patterns in given in search window.
        # Compare only the pure filename, not the path part.
        set pureFileName [file tail $fileName]
        set ignore 1
        if { [eval $matchCmd {$filePattern $pureFileName}] } {
            set ignore 0
        }
        if { $ignore } {
            continue
        }
        set imgInfo [GetImageInfo $srcNative]
        foreach { fileType fileWidth fileHeight } $imgInfo { break }

        puts -nonewline [format "%s: %s" $srcNative $fileType]
        if { $fileType ne "none" } {
            puts -nonewline " $fileWidth $fileHeight"
            if { $checkAgainstImg } {
                set catchVal [catch {image create photo -file $srcNative -format $fileType} img]
                if { $catchVal } {
                    puts -nonewline " NOIMAGE"
                } else {
                    set imgWidth  [image width  $img]
                    set imgHeight [image height $img]
                    image delete $img
                    if { $imgWidth  == $fileWidth && \
                         $imgHeight == $fileHeight } {
                        puts -nonewline " OK"
                    } else {
                        puts -nonewline " FAIL ($imgWidth x $imgHeight)"
                    }
                }
            }
        }
        puts ""
    }
}

if { $optCheckAgainstImg } {
    package require Img
    FindRecursive $dir $optFilePattern true
} else {
    FindRecursive $dir $optFilePattern false
}
exit 0

See also