Version 2 of CSV Sql

Updated 2011-01-04 15:49:01 by JSB

I needed a way to quickly query CSV files without writing them to a DB. I started with subSQL and worked it up to this:

#
# Simple SQL select like code overlay for CSV files.
#
# Scott Beasley Jan 3 2011
# Started with Richard Suchenwirth subSQL code.
#

package require csv

proc csv_Select {select_statement {show_col_info 0}} {
   set select_statement [string trimleft $select_statement]
   set ret [regsub -all -nocase -- "^select" $select_statement "" select_statement]

   if {$ret != "1"} {
      error "does not seem to be a select statement"
   }
   
   set select_statement [string trimleft $select_statement]
   set fields {}
   set got_from 0
   set got_where 0      
   foreach field [split $select_statement ", "] {
      if {$field == {}} {
         continue         
      }
      
      set select_statement [string trimleft $select_statement "$field"]
      if {![string compare -nocase $field {from}]} {
         set got_from 1
         break        
      }
      
      lappend fields $field
      set select_statement [string trimleft $select_statement ", "]
   }

   if {$got_from == 0} {
      error "Select is missing the from"
   }

   set select_statement [string trimleft $select_statement]
   set table [string trim [lindex [split [string trimleft $select_statement] " \n\t"] 0]]
   set select_statement [string trimleft $select_statement "$table"]
   set select_statement [string trimleft $select_statement]

   set ret [regsub -all -nocase -- "^where" $select_statement "" select_statement]
   set args [string trim [string range $select_statement 0 end]]
   if {$ret != {1} && $args == {}} {
      set got_where 0
   } else {
      set got_where 1      
   }

   upvar $table db
   regsub -all -nocase -- " LIKE " $args "=" args
   set hits {}; set hitsl {}; set ress {}
   if {$got_where == 1} {
      foreach a $args {
         if {[string toupper $a] == {OR}} continue
         if {[string toupper $a] == {AND}} {
            lappend hitsl $hits; set hits {}
         } elseif {![regexp "=" $a]} {
            error "WHERE clause must be column=value"
         }

         regsub -all "_" $a "?" a
         regsub -all "%" $a "*" a
         regsub "=" $a "," index
         foreach i [array names db $index] {
            append hits " $db($i)"
         }
      }

      lappend hitsl $hits
      set hits [eval land $hitsl]
   } else {
      set hits $db(records)
   }

   if {$fields=="*"} {set fields $db(fields)}
   foreach hit $hits {
      set res {}
      foreach field $fields {
         if {![catch {set db($hit,$field)} t]} {
            if {$show_col_info} {
               lappend res "$field=$t"
            } else {
               lappend res $t
            }               
         }
      }
      
      if {$res != {}} {
         lappend ress $res
      }
   }
   
   return $ress
}

#------------------------------------------list unique, remove duplicates
proc luniq {list} {
   set t [lsort $list]
   set previous [set res [lindex $t 0]]
   foreach i [lrange $t 1 end] {
      if {$i != $previous} {
         lappend res $i
         set previous $i
      }
   }
   return $res
}

#------------------------------------------------- list non-unique
proc lnuniq {list} {
   set t [lsort $list]
   set res {}
   set previous "some dummy value which is not expected to occur"
   foreach i $t {
      if {$i != $previous} {
         set previous $i
      } else {
         lappend res $i
      }
   }
   return $res
}

#------------------------------------------------------- list AND
proc land {arg args} {
   set t [luniq $arg]
   foreach i $args {
      set t [lnuniq [concat $t [luniq $i]]]
   }
   return $t
}

#----------------------------------------------------------------------
proc load_data {csvfile tablename {col_names {}}} {
   upvar $tablename db
   set fd_in [open $csvfile "r"]
   if {$col_names == {}} {
      set cols [split [gets $fd_in] ","]
   } else {
      # See if the user sent in the column names in a list.
      set cols $col_names
   }
   set db(fields) $cols
   
   while {![eof $fd_in]} {
      set line [gets $fd_in]
      if {[string trim $line] == {}} {
         break   
      }

      set data_row {}
      set col_ndx 0
      set n [expr {[llength $db(records)] + 1}]
      lappend db(records) $n
      foreach value [::csv::split $line ","] {
          set type [lindex $cols $col_ndx]
         set db($n,$type) $value
         lappend db($type,$value) $n
         incr col_ndx
      }
   }

   close $fd_in
}

#----------------------------------------------------------------------
set quake(fields) {}
set quake(records) {}

# load up the test data from: 
# http://earthquake.usgs.gov/earthquakes/catalogs/eqs1day-M1.txt
# or http://earthquake.usgs.gov/earthquakes/catalogs/eqs7day-M1.txt
#load_data "./eqs1day-M1.txt" quake
load_data "./eqs7day-M1.txt" quake
puts [csv_Select "select * from quake where Version=1"]
puts "------------------------------------------"
gets stdin
puts [csv_Select "select * from quake where Lat=36.5692" 1]
puts "------------------------------------------"
gets stdin
puts [csv_Select "select * from quake"]
puts "------------------------------------------"
gets stdin
puts [csv_Select "select * from quake where Version=1 and Src=ak"]