Version 9 of Basic GeoIP with Tcl

Updated 2004-09-24 14:19:49

Kroc - 24 Sep 2004 - This is basic GeoIP (see http://www.maxmind.com for details) that return country code for a given IP adress.

    ################################################################################
    #
    # Basic GeoIP for tcl.
    #
    # Copyright © 2004 - David Zolli - http://www.kroc.tk
    #
    # This script is under NOL : http://wiki.tcl.tk/nol
    #
    # Version 1.0 - 24 Sep 2004
    #
    ################################################################################

    # This needs Maxmind CVS database available here :
    # http://www.maxmind.com/download/geoip/database/GeoIPCountryCSV.zip
    proc GeoIP  { IP {cvsfile GeoIPCountryWhois.csv} } {
        foreach "a b c d" [split $IP .] {
            set V [expr 300+$a][expr 300+$b][expr 300+$c][expr 300+$d]
        }
        set fin [open $cvsfile r]
        while {![eof $fin]} {
            foreach "1 2 3 4 5 6" [split [gets $fin] ,] {
                foreach "a b c d" [split [lindex $1 0] .] {
                    set min [expr 300+$a][expr 300+$b][expr 300+$c][expr 300+$d]
                }
                foreach "a b c d" [split [lindex $2 0] .] {
                    set max [expr 300+$a][expr 300+$b][expr 300+$c][expr 300+$d]
                }
                if { $V >= $min && $V <= $max } {
                    close $fin
                    return "[lindex $5 0] ([lindex $6 0])"
                }
            }
        }
        close $fin
        return "?? (unknow)"
    }

rmax - 24 Sep 2004. This variant uses a modified file with fixed line length and makes a binary search over it. A conversion proc for the csv file is also included.

 proc convert {} {
    set fd [open GeoIPCountryWhois.csv]
    set data [split [read -nonewline $fd] \n]
    close $fd
    foreach line [string map {\" ""} $data] {
        foreach {a b c d e f} [split $line ,] break
        puts [format "%ld % %s" [expr {$d - $c + 1}] $c $d $e ]

    }
 }

 proc main {ip} {
    foreach {a b c d} [split $ip .] break
    set find [expr {$a<<24 | $b<<16 | $c<<8 | $d}]
    set fd [open bar.ssv]
    set size [file size bar.ssv]
    set lines [expr {$size/21}]
    set a 0; set b $lines; set found 0
    while {!$found && $a != $b} {
        set point [expr {($b+$a)/2}]
        seek $fd [expr {$point * 21}]
        foreach {start end country} [gets $fd] break
        if "$find >= 0x$start" {
            if "$find <= 0x$end" {
                puts $ip $country
                set found 1
            } else {
                set a $point
            }
        } else {
            set b $point
        }
    }
 }

 foreach ip $argv {
    puts [time {main $ip}]
 }

SS 24Sep2004. This version tries to do a binary search directly on the original CSV format. The parsing is a bit tricky, it's not unlikely it may contain errors, but appears to work from some simple test. Some cleanup should be possible, and some part seems to be written more in C than in Tcl...

 # Copyright (C) 2004 Salvatore Sanfilippo
 # Under the same license as Tcl/Tk 8.4

 namespace eval geoip {}
 set ::geoip::filename "GeoIPCountryWhois.csv"
 set ::geoip::fd -1

 proc ::geoip::geoip {ipstr} {
     # Open the file only the first time.
     if {$::geoip::fd == -1} {
         set ::geoip::fd [open $::geoip::filename]
         file stat $::geoip::filename statbuf
         set ::geoip::len $statbuf(size)
     }
     # Convert the IP address into a number
     foreach {a b c d} [split $ipstr .] break
     set ip [expr {(wide($a)<<24)+($b<<16)+($c<<8)+$d}]
     # Binary search
     set start 0
     set end [expr {$::geoip::len -1}]
     while 1 {
         #puts "RANGE: $start - $end"
         set half [expr {int($start+(($end-$start)/2))}]
         set buf {}
         # Seek the start/end of the line
         if {($half-120) < 0} {
             append buf [string repeat "\n" [expr {120-$half}]]
             seek $::geoip::fd 0
         } else {
             seek $::geoip::fd [expr {$half-120}]
         }
         append buf [read $::geoip::fd 241]
         #puts "BUF: $buf"
         if {[string length $buf] < 121} {
             set linestart [expr {[string length $buf]-1}]
         } else {
             set linestart 120
         }
         set lineend $linestart
         # Go forward if we seek a newline as first character. We want
         # to be in the middle of the line.
         while {[string index $buf $linestart] eq "\n"} {
             incr linestart -1
             incr lineend -1
         }
         # Seek the line start/end
         while {[string index $buf $linestart] ne "\n" &&
                [string index $buf $linestart] ne {}} {
                incr linestart -1
         }
         while {[string index $buf $lineend] ne "\n" &&
                [string index $buf $lineend] ne {}} {
                incr lineend 1
         }
         # Get the line
         set line [string range $buf $linestart $lineend]
         foreach {_ _ rangestart rangeend code country} [split $line ,] break
         foreach var {rangestart rangeend code country} {
             set $var [string range [string trim [set $var] "\r\n "] 1 end-1]
         }
         #puts "LINE: $line"
         #puts "FOUND: ($rangestart,$rangeend) - $ip"
         # Trivial binary search
         if {$ip >= $rangestart && $ip <= $rangeend} {
             return [list $code $country]
         }
         if {$ip > $rangestart} {
             set start [expr {$half+1}]
         } else {
             set end [expr {$half-1}]
         }
         if {abs($start-$end)<5} {
             return {}
         }
     }
 }

 puts [time {geoip::geoip [lindex $argv 0]} 100]
 puts [geoip::geoip [lindex $argv 0]]