Version 1 of tclshapefile

Updated 2012-01-02 16:39:35 by joheid

Tclshapefile is a tcl-only package to read esri shapefiles (well at least points, polylines and polygones). It reads both the geometry (.shp) files and the attribute (.dbf) file. Data are returned as dicts. It uses the specific reader components shpreader and dbfreader.


 # $Id: tclshapefile.tcl,v 1.1 2012/01/02 16:18:03 joheid Exp $
 # 
 # package to read esri shapefiles
 # uses shpreader and dbfreader as components
 # (C) Joachim Heidemeier 2011
 # published under the same license als Tcl
 # 
 # methods
 # readShape
 # readCoords -> delegated
 # readAttributes -> delegated
 # getCoords -> delegated
 # getAttributes -> delegated
 # dbfInfo -> delegatd
 # shpInfo  -> delegated 
 # getRecord
 # readProjection 
 # 

 package require snit
 # specific reader packages
 package require shpreader.tcl
 package require dbfreader.tcl
 #
 # 
 #
 #
 #
 # 
 package provide shapefile 0.5
 snit::type shapefile {
 #
 # components
 # 
 component dbfReader
 delegate option -encoding to dbfReader
 delegate method attributes to dbfReader
 delegate method readAttributes to dbfReader as readData
 delegate method getAttributes to dbfReader as data 
 delegate method dbfInfo to dbfReader as dbfInfo
 #
 component shpReader
 delegate option -bbox to shpReader
 delegate option -shapetype to shpReader
 delegate option -nrecords to shpReader
 delegate method readCoords to shpReader as readData
 delegate method getCoords to shpReader as coords
 delegate method shpInfo to shpReader 
 #
 # options
 # 
 #  
 option -filename -validatemethod CheckFile -configuremethod ShapeFileName
 # 
 # for use as component
 # option -partof 
 # 
 option -projection none
 #
 # variablen
 # 
 variable status ;# interneral status
 set status(shpcheck) 0
 set status(dbfcheck) 0
 set status(projection) 0
 set status(projectionfile) {}    
 #
 # validatemethods
 #
 # checks whether a shp, shx and dbf and proj file is available
 # error if no shp &  dbf
 # 
 method CheckFile {option value}  { 

    set fn [file rootname $value]
    if {!([file exists $fn.shp] && [file exists $fn.shx] && [file exists $fn.dbf])} {
        error "$fn ist kein gültiger shapefile"
    } else {
        set status(shpcheck) 1
        set status(dbfcheck) 1
        }
    if {[file exists $fn.prj]} {
            set status(projection) 1 
        }
 }
 #
 # configuremethods
 # 
 method ShapeFileName {option value} {
        set f  [file rootname $value]
        set options(-filename) $f
        set status(projectionfile) ${f}.prj
        $dbfReader configure -filename ${f}.dbf
        $shpReader configure -filename ${f}.shp
 return
 }
 #
 #
 constructor {args} {
    install dbfReader using dbasefile  %AUTO% -partof $self
    install shpReader using shpfile %AUTO% -partof $self
    $self configurelist $args
 }
 destructor  {
        catch {$dbRreader destroy}
        catch {$shpReader destroy}
 }    
 #
 # read the proj file
 # public methods
 method readProjection {} {
    if {$status(projection)} {
        set option(-projection) [read [open $status(projectionfile) r]]
    }
 }
 #
 # 
 # opens shp, dbf and proj, set options
 # reads all data
 #
 method readShape {} {
        $shpReader readData
        $dbfReader readData
        if {[$shpReader cget -nrecords] != [lindex [$dbfReader dbfInfo] 0]} {
            error {different number of records in shp and dbf File}
        }
        $self readProjection
    }
 method getRow {recnr} {
    set x [dict create]
    if {[string is integer $recnr] && $recnr <= [$shpReader cget -nrecords]} {
        dict append x coords [dict get [$shpReader coords $recnr] $recnr]
        dict append x attributes [dict get [$dbfReader data  $recnr] $recnr]
        } else {error "not existing record requested"}
    return $x
 }
 }
 #
 #
 if {0} {
 shapefile s1 -filename [tk_getOpenFile]
 s1 readShape
 set c [s1 getCoords 1]
 set a [s1 getAttributes 1]
 set x [s1 getRow 1]
 set t [s1 cget -shapetype]
 }

Now the shp-file reader: # $Id: shpreader.tcl,v 1.3 2012/01/02 16:17:40 joheid Exp $

 # (C) Joachim Heidemeier
 # published under the same licence as Tcl
 # pure tcl snit object to read the files with the geometries in shapefiles (.shp files)
 # public methods 
 # readData -- read and decode the file
 # coords   -- outputs the coordinates of one or all geometric objects
 # shpInfo  -- give information on internal details (mostly for inspection purposes)
 # shapetype, bounding box and number of geometric objects are available as (read-only)
 # obtions.
 #
 if {0} {

The information on shapefile structure is taken from the ESRI shapefile documentation (http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf ). In the tables the first columns including ByteOrder/Content are taken from the documentation. A a shapefile consists of a 100 Byte long Header and subsequent records made up from an 8 byte record header and the record content. The record content structure is shapetype dependent.

1. Definition of Shape File Header (100 Bytes long), {II5Iiiqqqqqqqq}

Position Field Value Type ByteOrder FormatSpecifier TclVariable ===================================================================================================== Byte 0 FileCode 9994 Integer Big I info(fc) Byte 4 Unused 0 Integer Big { discarded Byte 8 Unused 0 Integer Big discarded Byte 12 Unused 0 Integer Big I5 discarded Byte 16 Unused 0 Integer Big discarded Byte 20 Unused 0 Integer Big } discarded Byte 24 FileLength FileLength Integer Big I info(FLength),in Bytes Byte 28 Version 1000 Integer Little i discarded Byte 32 ShapeType ShapeType Integer Little i option(-shapetype) Byte 36 BoundingBox Xmin Double Little q { Byte 44 BoundingBox Ymin Double Little q Byte 52 BoundingBox Xmax Double Little q option(-bbox) Byte 60 BoundingBox Ymax Double Little q as dict, key coordname Byte 68* BoundingBox Zmin Double Little q { Byte 76* BoundingBox Zmax Double Little q if required, Byte 84* BoundingBox Mmin Double Little q else discarded Byte 92* BoundingBox Mmax Double Little q }} ===================================================================================================== * Unused, with value 0.0, if not Measured or Z type The value for file length is the total length of the file in 16-bit words (including the fifty 16-bit words that make up the header).

Available shape types

Value ShapeType Content supported =================================================== 0 NullShape Null y 1 Point P-Struct y 3 PolyLine PL-Struct y 5 Polygon PG-Struct y 8 MultiPoint 11 PointZ 13 PolyLineZ 15 PolygonZ 18 MultiPointZ 21 PointM 23 PolyLineM 25 PolygonM 28 MultiPointM 31 MultiPatch ======================================================

2. Record Header fixed (8 bytes long)

Position Field Value Type ByteOrder FormatSpecifier TclVariable ================================================================================================================================ Byte 0 RecordNumber RecordNumber Integer Big I IInfo(RecNR), Key für coords Dict Byte 4 ContentLength ContentLength Integer Big I info(RecNR,Rlength), in Bytes ================================================================================================================================= The content length for a record is the length of the record contents section measured in 16-bit words. Each record, therefore, contributes (4 + content length) 16-bit words toward the total length of the file, as stored at Byte 24 in the file header.

3. Record Content (variable length)

ShapeType(4Bytes) + geometric data according to type returned Tcl Variable coords is a dict with RecordCount entries, key is RecNr value is list of X Y coordinates

3.1 P-Struct

Position Field Value Type Number ByteOrder FormatSpecifier TclVariable ============================================================================================================== Byte 0 ShapeType 1 Integer 1 Little i internal check Byte 4 X X Double 1 Little q coords Byte 12 Y Y Double 1 Little q coords ==============================================================================================================

3.2 Polyline-Struct consists of parts

Position Field Value Type Number ByteOrder Remark TclVariable =============================================================================================================== Byte 0 ShapeType 3 Integer 1 Little internal check Byte 4 BBox BBox Double 4 Little xmin,ymin,xmax,ymax info(bbox,recNr) Byte 36 NumParts NumParts Integer 1 Little info(numParts,recNr) Byte 40 NumPoints NumPoints Integer 1 Little for all parts info(numPoints,recNr) Byte 44 Parts Parts Integer NumParts Little offset to first point of part, 0_indexed Byte X Points Points Point NumPoints Little no part delimiter Note: X = 44 + 4 * NumParts ================================================================================================================

3.3 Polygon-Struct consists of rings, structure analog to PL.

Position Field Value Type Number ByteOrder Remark TclVariable ========================================================================================================== Byte 0 ShapeType 5 Integer 1 Little internal check Byte 4 BBox BBox Double 4 Little xmin,ymin,xmax,ymax Byte 36 NumParts NumParts Integer 1 Little Byte 40 NumPoints NumPoints Integer 1 Little for all rings Byte 44 Parts Parts Integer NumParts Little offset to first point of part, 0_indexed Byte X Points Points Point NumPoints Little no part delimiter Note: X = 44 + 4 * NumParts =============================================================================================================

 }
 # 
 # 
 #
 package require snit
 #
 package provide shpreader 0.5
 snit::type shpfile {
 variable TCL_OK 0
 variable TCL_CONTINUE 3
 #
 #
 # daten as list of lists
 variable coords
 variable info
 variable fh    
 variable status ;# interner Status
 variable shapeTypes ;# gueltige shapes mit Nr / Namenmapping
 set status(shp_ok) 0
 set status(shp_open) 0
 set status(eof) 0    
 # options
 option -filename  -configuremethod ShpFileName
 # ohne extension, die wird in configuremethod weggeworfen
 option -bbox -readonly yes ;# bounding Box wird aus shapefile gelesen
 # normally used as component
 option -partof
 # shapetype
 option -shapetype -default null -readonly yes
 option -nrecords -readonly yes
 #
 # configuremethods
 # 
 method ShpFileName {option value} {
 #
  set options(-filename) "[file rootname $value].shp"
  return
 }
 #  
 constructor {args} {
 #
    set status(read) 0
    array set shapeTypes [list\
                0 Null\
                1 Point\
                8 MultiPoint\
                3 PolyLine\
                5 Polygon\
                21 PointM\
                28 MultiPointM\
                23 PolyLineM\
                25 PolygonM\
                11 PointZ\
                18 MultiPointZ\
                13 PolyLineZ\
                15 PolygonZ\
                31 MultiPatch]
    $self configurelist $args
 }
 #
 # reads and evalutes the header
 # stores relevant data in the info or options variable
 #
 method ReadHeader {} {
 # formatstring for binary scan accoring to table 1
 set formatstring {II5Iiiqqqqqqqq}
 #
 set rec [read $fh 100]
 set status(eof) [eof $fh]
 if {!$status(eof)} {   
    binary scan $rec $formatstring info(fc) tmp info(fLength) vers info(type) xm ym XM YM zm ZM mm MM
 #
    if {$info(fc) != 9994} {
        error {file is not a shp-file}
    } else {
        set info(fLength) [expr {2 * $info(fLength)}]
        set options(-shapetype) $shapeTypes($info(type))
        set options(-bbox) [dict create XMin $xm YMin $ym XMax $XM YMax $YM]
        if {$info(type) > 8}  {
            if {$info(type) < 21} {
                dict append options(-bbox) ZMin $zm
                dict append options(-bbox) ZMax $ZM
            } else {
                dict append options(-bbox) MMin mm
                dict append options(-bbox) MMax MM
            }
        }
        return $TCL_OK
    } 
    } else {
    return $TCL_CONTINUE    
    }
 }
 #
 # open File or raise error
 #
 method OpenShpFile {} {

    if {[catch {open $options(-filename)} res]} {
       return -code error $res
    }
    set fh $res
    set status(shp_open) 1
    fconfigure $fh -translation binary
    return $TCL_OK
 }
 #
 # decode a point record
 # 

proc getPoint {rcont} {

   if {[string length $rcont != 20]} {error "wrong contentlength in getPoint [string length $rcont] instead of 20"}
   binary scan $rcont iq2 t c
   if {$t != 1} {error "wrong shapetype in getPoint $t instead 1"}
   return [dict create p $c]
 }
 #
 # Polyline and Polygone need the same algorithm
 # returns a dict over parts
 #
 proc getPoly {selfns type recnr rcont} {
    set pp 0   
    set offset 0
    set tmp [dict create]
 # get type and check, we assume one type per file
    binary scan $rcont i t
    if {$t != $type} {error "wrong shapetype in getPoly $t instead $type"}
    set offset 4 ;#start of data section
 # get record info
    binary scan $rcont @${offset}q4ii bbox  npart npoint
    set info(bbox,$recnr) $bbox
    set info(numParts,$recnr) $npart
    set info(numPoints,$recnr) $npoint
    set offset 44 ;#start of variable section
 # get part-offsets
    set formatstring "@${offset}i$npart"
    binary scan $rcont $formatstring ofs
    set offset [expr {44 + 4* $npart }]
 # set end of last part        
    lappend ofs  $npoint
 # build formatstring and varlist for scan
    set formatstring "@$offset"
    set varlist [list]
    set keylist [list]
    for {set i 0} {$i < $npart} {incr i} {
        set start [lindex $ofs $i]
        set end  [expr {[lindex $ofs $i+1] -1}]
        set np [expr {$end - $start} +1]
        set nfloat [expr {$np * 2}]
        append formatstring "q$nfloat"
        set pp [expr {$pp + $np}]    
        lappend keylist part${i}
    }
    set varlist $keylist
 #
 # the conversion
 #
    binary scan $rcont $formatstring {*}$varlist
 #
 # the return value
 #
    foreach key $keylist var $varlist {
        dict append tmp $key [set $var]
    }
    if {$pp != $npoint} {error "wrong number of points"}       
    return $tmp   

 }
 #
 # method GetNextRecord
 # reads one record
 # assign it to dict
 #
 method GetNextRecord {} {
 # first the RecordHeader

        set headerFormat II
        set header [read $fh 8]
         set status(eof) [eof $fh]
        if {!$status(eof)} {   
        binary scan $header $headerFormat recNr contentLength
        set contentLengthBytes [expr {$contentLength *2}]
        } else {
    return $TCL_CONTINUE    
    }
        set content [read $fh $contentLengthBytes]
        set status(eof) [eof $fh]
        if {!$status(eof)} {   
        switch -exact -- $info(type) {
            0 {dict append coords $recNr {}}
            1 {
                dict append coords "$recNr" [getPoint $content]
            }
            3 {
                dict append coords "$recNr" [getPoly $selfns $info(type) $recNr $content]
            }
            5 {
                dict append coords "$recNr" [getPoly $selfns $info(type) $recNr $content]
            }
            default {error "unsupported shape type in GetNextRecord"}
        }
        set info(recnr) $recNr
        return $TCL_OK
    } else {
    return $TCL_CONTINUE    
    }
 }
 #
 # public methods
 # read the file
 #
 method readData {} {
    if {!$status(read)} {
        $self OpenShpFile
        $self ReadHeader
        set retval  [$self GetNextRecord]
        while {$retval == $TCL_OK} {
        set retval  [$self GetNextRecord]
        }
         set options(-nrecords) $info(recnr)   
        set status(read) 1
        if {[catch {close $fh} msg]} {
                unset fh
                return -code error -errorcode $errorCode $msh
        }
        return $TCL_OK
    } else {
            error {file already read}
  }
 }
 #
 # retrieve the coords
 # of one record or all
 # argument
 # 
 #

 method coords {{recnr all}} {
    if {"$recnr" eq "all"} {
        return $coords
    } elseif {[string is integer $recnr] && $recnr <= $info(recnr)} {
        return [dict filter $coords key $recnr]
    } else {error "Record $recnr not available"}
 }
 #
 # return internal infos from info array
 # what is optional pattern 
 #
 method shpInfo {{what all}} {
  if {"$what" eq "all"} {
    return [array get info]
  } else {
    set s [array get info $what]
    if {[llenth $s]} {
        return $s
     } else {
        return "info $what not available" 
        }
    }
  }
 }