tclshapefile

Difference between version 6 and 7 - Previous - Next
Tclshapefile is a tcl-only package to read esri shapefiles (well at least points, polylines and polygones). It reads both the geometry (.shp) files and the attribute (.dbf) file. Data are returned as [dict]s.
It uses the specific reader components shpreader and [dbfreader]. 
----
======
 # $Id: tclshapefile.tcl,v 1.1 2012/01/02 16:18:03 joheid Exp $
 # 
 # package to read esri shapefiles
 # uses shpreader and dbfreader as components
 # (C) Joachim Heidemeier 2011
 # published under the same license als Tcl
 # 
 # methods
 # readShape
 # readCoords -> delegated
 # readAttributes -> delegated
 # getCoords -> delegated
 # getAttributes -> delegated
 # dbfInfo -> delegatd
 # shpInfo  -> delegated 
 # getRecord
 # readProjection 
 # 

 package require snit
 # specific reader packages
 package require shpreader.tcl
 package require dbfreader.tcl
 #
 # 
 #
 #
 #
 # 
 package provide shapefile 0.5
 snit::type shapefile {
 #
 # components
 # 
 component dbfReader
 delegate option -encoding to dbfReader
 delegate method attributes to dbfReader
 delegate method readAttributes to dbfReader as readData
 delegate method getAttributes to dbfReader as data 
 delegate method dbfInfo to dbfReader as dbfInfo
 #
 component shpReader
 delegate option -bbox to shpReader
 delegate option -shapetype to shpReader
 delegate option -nrecords to shpReader
 delegate method readCoords to shpReader as readData
 delegate method getCoords to shpReader as coords
 delegate method shpInfo to shpReader 
 #
 # options
 # 
 #  
 option -filename -validatemethod CheckFile -configuremethod ShapeFileName
 # 
 # for use as component
 # option -partof 
 # 
 option -projection none
 #
 # variablen
 # 
 variable status ;# interneral status
 set status(shpcheck) 0
 set status(dbfcheck) 0
 set status(projection) 0
 set status(projectionfile) {}    
 #
 # validatemethods
 #
 # checks whether a shp, shx and dbf and proj file is available
 # error if no shp &  dbf
 # 
 method CheckFile {option value}  { 

    set fn [file rootname $value]
    if {!([file exists $fn.shp] && [file exists $fn.shx] && [file exists $fn.dbf])} {
        error "$fn ist kein gültiger shapefile"
    } else {
        set status(shpcheck) 1
        set status(dbfcheck) 1
        }
    if {[file exists $fn.prj]} {
            set status(projection) 1 
        }
 }
 #
 # configuremethods
 # 
 method ShapeFileName {option value} {
        set f  [file rootname $value]
        set options(-filename) $f
        set status(projectionfile) ${f}.prj
        $dbfReader configure -filename ${f}.dbf
        $shpReader configure -filename ${f}.shp
 return
 }
 #
 #
 constructor {args} {
    install dbfReader using dbasefile  %AUTO% -partof $self
    install shpReader using shpfile %AUTO% -partof $self
    $self configurelist $args
 }
 destructor  {
        catch {$dbRreader destroy}
        catch {$shpReader destroy}
 }    
 #
 # read the proj file
 # public methods
 method readProjection {} {
    if {$status(projection)} {
        set option(-projection) [read [open $status(projectionfile) r]]
    }
 }
 #
 # 
 # opens shp, dbf and proj, set options
 # reads all data
 #
 method readShape {} {
        $shpReader readData
        $dbfReader readData
        if {[$shpReader cget -nrecords] != [lindex [$dbfReader dbfInfo] 0]} {
            error {different number of records in shp and dbf File}
        }
        $self readProjection
    }
 method getRow {recnr} {
    set x [dict create]
    if {[string is integer $recnr] && $recnr <= [$shpReader cget -nrecords]} {
        dict append x coords [dict get [$shpReader coords $recnr] $recnr]
        dict append x attributes [dict get [$dbfReader data  $recnr] $recnr]
        } else {error "not existing record requested"}
    return $x
 }
 }
 #
 #
 if {0} {
 shapefile s1 -filename [tk_getOpenFile]
 s1 readShape
 set c [s1 getCoords 1]
 set a [s1 getAttributes 1]
 set x [s1 getRow 1]
 set t [s1 cget -shapetype]
 }
======
----
Now the shp-file reader:
======
 # $Id: shpreader.tcl,v 1.3 2012/01/02 16:17:40 joheid Exp $
 # (C) Joachim Heidemeier
 # published under the same licence as Tcl
 # pure tcl snit object to read the files with the geometries in shapefiles (.shp files)
 # public methods 
 # readData -- read and decode the file
 # coords   -- outputs the coordinates of one or all geometric objects
 # shpInfo  -- give information on internal details (mostly for inspection purposes)
 # shapetype, bounding box and number of geometric objects are available as (read-only)
 # obtions.
 #
 if {0} {
 The information on shapefile structure is taken from the ESRI shapefile documentation (http://www.esri.com/library/whitepapers/pdfs/shapefile.pdf).
 In the tables the first columns including ByteOrder/Content are taken from the documentation.
 A a shapefile consists of a 100 Byte long Header and subsequent records made up from an 8 byte record header and the record content.
 The record content structure is shapetype dependent. 

 1. Definition of Shape File Header (100 Bytes long), {II5Iiiqqqqqqqq}

 Position    Field       Value       Type    ByteOrder         FormatSpecifier  TclVariable
 =====================================================================================================
 Byte 0      FileCode    9994        Integer Big                        I       info(fc)
 Byte 4      Unused       0          Integer Big                {               discarded                        
 Byte 8      Unused      0           Integer Big                                discarded
 Byte 12     Unused      0           Integer Big                        I5      discarded
 Byte 16     Unused      0           Integer Big                                discarded
 Byte 20     Unused      0           Integer Big                        }       discarded
 Byte 24     FileLength FileLength   Integer Big                        I       info(FLength),in Bytes
 Byte 28     Version     1000        Integer Little                i            discarded
 Byte 32     ShapeType ShapeType     Integer Little                i            option(-shapetype)
 Byte 36     BoundingBox Xmin        Double Little                q             {
 Byte 44     BoundingBox Ymin        Double Little                q                
 Byte 52     BoundingBox Xmax        Double Little                q             option(-bbox)
 Byte 60     BoundingBox Ymax        Double Little                q             as dict, key coordname
 Byte 68*    BoundingBox Zmin        Double Little                q              {
 Byte 76*    BoundingBox Zmax        Double Little                q              if required,
 Byte 84*    BoundingBox Mmin        Double Little                q              else discarded
 Byte 92*    BoundingBox Mmax        Double Little                q              }}
 =====================================================================================================
 * Unused, with value 0.0, if not Measured or Z type
 The value for file length is the total length of the file in 16-bit words (including the fifty
 16-bit words that make up the header).

 Available shape types

 Value       ShapeType      Content     supported
 ===================================================
 0           NullShape       Null          y
 1           Point           P-Struct      y
 3           PolyLine        PL-Struct     y
 5           Polygon         PG-Struct     y
 8           MultiPoint
 11          PointZ
 13          PolyLineZ
 15          PolygonZ
 18          MultiPointZ
 21          PointM
 23          PolyLineM
 25          PolygonM
 28          MultiPointM
 31          MultiPatch
 ======================================================

 2. Record Header fixed (8 bytes long)

 Position    Field       Value           Type    ByteOrder        FormatSpecifier                TclVariable
 ================================================================================================================================
 Byte 0  RecordNumber    RecordNumber    Integer     Big                        I                IInfo(RecNR), Key für coords Dict
 Byte 4  ContentLength   ContentLength   Integer     Big                        I                info(RecNR,Rlength), in Bytes
 =================================================================================================================================
 The content length for a record is the length of the record contents section measured in
 16-bit words. Each record, therefore, contributes (4 + content length) 16-bit words
 toward the total length of the file, as stored at Byte 24 in the file header. 

 3. Record Content (variable length)

 ShapeType(4Bytes) + geometric data according to type
 returned Tcl Variable coords is a dict with RecordCount entries, key is RecNr value is list of X Y coordinates 

 3.1 Point-Struct

 Position    Field       Value   Type    Number  ByteOrder        FormatSpecifier                TclVariable
 ==============================================================================================================
 Byte 0      ShapeType   1       Integer     1   Little                        i                internal check
 Byte 4      X           X       Double      1   Little                        q                coords 
 Byte 12     Y           Y       Double      1   Little                        q                coords
 ============================================================================================================== 



 3.2 Polyline-Struct
 consists of parts

 Position    Field       Value       Type       Number   ByteOrder   Remark                TclVariable
 ===============================================================================================================
 Byte 0      ShapeType   3           Integer     1       Little                                internal check
 Byte 4      BBox        BBox        Double      4       Little      xmin,ymin,xmax,ymax  info(bbox,recNr)
 Byte 36     NumParts    NumParts    Integer     1       Little                           info(numParts,recNr)
 Byte 40     NumPoints   NumPoints   Integer     1       Little      for all parts        info(numPoints,recNr) 
 Byte 44     Parts       Parts       Integer NumParts    Little      offset to first point of part, 0_indexed
 Byte X      Points      Points      Point   NumPoints   Little      no part delimiter
 Note: X = 44 + 4 * NumParts
 ================================================================================================================

 3.3 Polygon-Struct
 consists of rings, structure analogue to PL.

 Position    Field       Value       Type       Number   ByteOrder   Remark                TclVariable
 ==========================================================================================================
 Byte 0      ShapeType   5           Integer     1       Little                                internal check
 Byte 4      BBox        BBox        Double      4       Little      xmin,ymin,xmax,ymax
 Byte 36     NumParts    NumParts    Integer     1       Little
 Byte 40     NumPoints   NumPoints   Integer     1       Little      for all rings
 Byte 44     Parts       Parts       Integer NumParts    Little      offset to first point of part, 0_indexed
 Byte X      Points      Points      Point   NumPoints   Little      no part delimiter
 Note: X = 44 + 4 * NumParts
 =============================================================================================================
 }
 # 
 # 
 #
 package require snit
 #
 package provide shpreader 0.5
 snit::type shpfile {
 variable TCL_OK 0
 variable TCL_CONTINUE 3
 #
 #
 # daten as list of lists
 variable coords
 variable info
 variable fh    
 variable status ;# interner Status
 variable shapeTypes ;# gueltige shapes mit Nr / Namenmapping
 set status(shp_ok) 0
 set status(shp_open) 0
 set status(eof) 0    
 # options
 option -filename  -configuremethod ShpFileName
 # ohne extension, die wird in configuremethod weggeworfen
 option -bbox -readonly yes ;# bounding Box wird aus shapefile gelesen
 # normally used as component
 option -partof
 # shapetype
 option -shapetype -default null -readonly yes
 option -nrecords -readonly yes
 #
 # configuremethods
 # 
 method ShpFileName {option value} {
 #
  set options(-filename) "[file rootname $value].shp"
  return
 }
 #  
 constructor {args} {
 #
    set status(read) 0
    array set shapeTypes [list\
                0 Null\
                1 Point\
                8 MultiPoint\
                3 PolyLine\
                5 Polygon\
                21 PointM\
                28 MultiPointM\
                23 PolyLineM\
                25 PolygonM\
                11 PointZ\
                18 MultiPointZ\
                13 PolyLineZ\
                15 PolygonZ\
                31 MultiPatch]
    $self configurelist $args
 }
 #
 # reads and evalutes the header
 # stores relevant data in the info or options variable
 #
 method ReadHeader {} {
 # formatstring for binary scan accoring to table 1
 set formatstring {II5Iiiqqqqqqqq}
 #
 set rec [read $fh 100]
 set status(eof) [eof $fh]
 if {!$status(eof)} {   
    binary scan $rec $formatstring info(fc) tmp info(fLength) vers info(type) xm ym XM YM zm ZM mm MM
 #
    if {$info(fc) != 9994} {
        error {file is not a shp-file}
    } else {
        set info(fLength) [expr {2 * $info(fLength)}]
        set options(-shapetype) $shapeTypes($info(type))
        set options(-bbox) [dict create XMin $xm YMin $ym XMax $XM YMax $YM]
        if {$info(type) > 8}  {
            if {$info(type) < 21} {
                dict append options(-bbox) ZMin $zm
                dict append options(-bbox) ZMax $ZM
            } else {
                dict append options(-bbox) MMin mm
                dict append options(-bbox) MMax MM
            }
        }
        return $TCL_OK
    } 
    } else {
    return $TCL_CONTINUE    
    }
 }
 #
 # open File or raise error
 #
 method OpenShpFile {} {
    
    if {[catch {open $options(-filename)} res]} {
       return -code error $res
    }
    set fh $res
    set status(shp_open) 1
    fconfigure $fh -translation binary
    return $TCL_OK
 }
 #
 # decode a point record
 # 
 proc getPoint {rcont} {
   if {[string length $rcont != 20]} {error "wrong contentlength in getPoint [string length $rcont] instead of 20"}
   binary scan $rcont iq2 t c
   if {$t != 1} {error "wrong shapetype in getPoint $t instead 1"}
   return [dict create p $c]
 }
 #
 # Polyline and Polygone need the same algorithm
 # returns a dict over parts
 #
 proc getPoly {selfns type recnr rcont} {
    set pp 0   
    set offset 0
    set tmp [dict create]
 # get type and check, we assume one type per file
    binary scan $rcont i t
    if {$t != $type} {error "wrong shapetype in getPoly $t instead $type"}
    set offset 4 ;#start of data section
 # get record info
    binary scan $rcont @${offset}q4ii bbox  npart npoint
    set info(bbox,$recnr) $bbox
    set info(numParts,$recnr) $npart
    set info(numPoints,$recnr) $npoint
    set offset 44 ;#start of variable section
 # get part-offsets
    set formatstring "@${offset}i$npart"
    binary scan $rcont $formatstring ofs
    set offset [expr {44 + 4* $npart }]
 # set end of last part        
    lappend ofs  $npoint
 # build formatstring and varlist for scan
    set formatstring "@$offset"
    set varlist [list]
    set keylist [list]
    for {set i 0} {$i < $npart} {incr i} {
        set start [lindex $ofs $i]
        set end  [expr {[lindex $ofs $i+1] -1}]
        set np [expr {$end - $start} +1]
        set nfloat [expr {$np * 2}]
        append formatstring "q$nfloat"
        set pp [expr {$pp + $np}]    
        lappend keylist part${i}
    }
    set varlist $keylist
 #
 # the conversion
 #
    binary scan $rcont $formatstring {*}$varlist
 #
 # the return value
 #
    foreach key $keylist var $varlist {
        dict append tmp $key [set $var]
    }
    if {$pp != $npoint} {error "wrong number of points"}       
    return $tmp   
   
 }
 #
 # method GetNextRecord
 # reads one record
 # assign it to dict
 #
 method GetNextRecord {} {
 # first the RecordHeader

        set headerFormat II
        set header [read $fh 8]
         set status(eof) [eof $fh]
        if {!$status(eof)} {   
        binary scan $header $headerFormat recNr contentLength
        set contentLengthBytes [expr {$contentLength *2}]
        } else {
    return $TCL_CONTINUE    
    }
        set content [read $fh $contentLengthBytes]
        set status(eof) [eof $fh]
        if {!$status(eof)} {   
        switch -exact -- $info(type) {
            0 {dict append coords $recNr {}}
            1 {
                dict append coords "$recNr" [getPoint $content]
            }
            3 {
                dict append coords "$recNr" [getPoly $selfns $info(type) $recNr $content]
            }
            5 {
                dict append coords "$recNr" [getPoly $selfns $info(type) $recNr $content]
            }
            default {error "unsupported shape type in GetNextRecord"}
        }
        set info(recnr) $recNr
        return $TCL_OK
    } else {
    return $TCL_CONTINUE    
    }
 }
 #
 # public methods
 # read the file
 #
 method readData {} {
    if {!$status(read)} {
        $self OpenShpFile
        $self ReadHeader
        set retval  [$self GetNextRecord]
        while {$retval == $TCL_OK} {
        set retval  [$self GetNextRecord]
        }
         set options(-nrecords) $info(recnr)   
        set status(read) 1
        if {[catch {close $fh} msg]} {
                unset fh
                return -code error -errorcode $errorCode $msh
        }
        return $TCL_OK
    } else {
            error {file already read}
  }
 }
 #
 # retrieve the coords
 # of one record or all
 # argument
 # 
 #

 method coords {{recnr all}} {
    if {"$recnr" eq "all"} {
        return $coords
    } elseif {[string is integer $recnr] && $recnr <= $info(recnr)} {
        return [dict filter $coords key $recnr]
    } else {error "Record $recnr not available"}
 }
 #
 # return internal infos from info array
 # what is optional pattern 
 #
 method shpInfo {{what all}} {
  if {"$what" eq "all"} {
    return [array get info]
  } else {
    set s [array get info $what]
    if {[llenth $s]} {
        return $s
     } else {
        return "info $what not available" 
        }
    }
  }
 }
======<<categories>> Package | Geography