I needed a way to quickly query CSV files without writing them to a DB. I started with subSQL and worked it up to this:
# # Simple SQL select like code overlay for CSV files. # # Scott Beasley Jan 3 2011 # Started with Richard Suchenwirth's subSQL code. # package require csv proc csv_Select {select_statement {show_col_info 0}} { set select_statement [string trimleft $select_statement] set ret [regsub -all -nocase -- "^select" $select_statement "" select_statement] if {$ret != "1"} { error "does not seem to be a select statement" } set select_statement [string trimleft $select_statement] set fields {} set got_from 0 set got_where 0 foreach field [split $select_statement ", "] { if {$field == {}} { continue } set select_statement [string trimleft $select_statement "$field"] if {![string compare -nocase $field {from}]} { set got_from 1 break } lappend fields $field set select_statement [string trimleft $select_statement ", "] } if {$got_from == 0} { error "Select is missing the from" } set select_statement [string trimleft $select_statement] set table [string trim [lindex [split [string trimleft $select_statement] " \n\t"] 0]] set select_statement [string trimleft $select_statement "$table"] set select_statement [string trimleft $select_statement] set ret [regsub -all -nocase -- "^where" $select_statement "" select_statement] set args [string trim [string range $select_statement 0 end]] if {$ret != {1} && $args == {}} { set got_where 0 } else { set got_where 1 } upvar $table db regsub -all -nocase -- " LIKE " $args "=" args set hits {}; set hitsl {}; set ress {} if {$got_where == 1} { foreach a $args { if {[string toupper $a] == {OR}} continue if {[string toupper $a] == {AND}} { lappend hitsl $hits; set hits {} } elseif {![regexp "=" $a]} { error "WHERE clause must be column=value" } regsub -all "_" $a "?" a regsub -all "%" $a "*" a regsub "=" $a "," index foreach i [array names db $index] { append hits " $db($i)" } } lappend hitsl $hits set hits [eval land $hitsl] } else { set hits $db(records) } if {$fields=="*"} {set fields $db(fields)} foreach hit $hits { set res {} foreach field $fields { if {![catch {set db($hit,$field)} t]} { if {$show_col_info} { lappend res "$field=$t" } else { lappend res $t } } } if {$res != {}} { lappend ress $res } } return $ress } #------------------------------------------list unique, remove duplicates proc luniq {list} { set t [lsort $list] set previous [set res [lindex $t 0]] foreach i [lrange $t 1 end] { if {$i != $previous} { lappend res $i set previous $i } } return $res } #------------------------------------------------- list non-unique proc lnuniq {list} { set t [lsort $list] set res {} set previous "some dummy value which is not expected to occur" foreach i $t { if {$i != $previous} { set previous $i } else { lappend res $i } } return $res } #------------------------------------------------------- list AND proc land {arg args} { set t [luniq $arg] foreach i $args { set t [lnuniq [concat $t [luniq $i]]] } return $t } #---------------------------------------------------------------------- proc load_data {csvfile tablename {term {,}} {col_names {}}} { upvar $tablename db set fd_in [open $csvfile "r"] if {$col_names == {}} { set cols [split [gets $fd_in] $term] } else { # See if the user sent in the column names in a list. set cols $col_names } set db(fields) $cols while {![eof $fd_in]} { set line [gets $fd_in] if {[string trim $line] == {}} { break } set data_row {} set col_ndx 0 set n [expr {[llength $db(records)] + 1}] lappend db(records) $n foreach value [::csv::split $line $term] { set type [lindex $cols $col_ndx] set db($n,$type) $value lappend db($type,$value) $n incr col_ndx } } close $fd_in } #---------------------------------------------------------------------- set quake(fields) {} set quake(records) {} # load up the test data from: # http://earthquake.usgs.gov/earthquakes/catalogs/eqs1day-M1.txt # or http://earthquake.usgs.gov/earthquakes/catalogs/eqs7day-M1.txt #load_data "./eqs1day-M1.txt" quake load_data "./eqs7day-M1.txt" quake puts [csv_Select "select * from quake where Version=1"] puts "------------------------------------------" gets stdin puts [csv_Select "select Lon, Version,Datetime from quake where Lat=36.5692" 1] puts "------------------------------------------" gets stdin puts [csv_Select "select * from quake"] puts "------------------------------------------" gets stdin puts [csv_Select "select * from quake where Version=1 and Src=ak"]