Version 8 of Basic GeoIP with Tcl

Updated 2004-09-24 14:17:30

Kroc - 24 Sep 2004 - This is basic GeoIP (see http://www.maxmind.com for details) that return country code for a given IP adress.

    ################################################################################
    #
    # Basic GeoIP for tcl.
    #
    # Copyright © 2004 - David Zolli - http://www.kroc.tk
    #
    # This script is under NOL : http://wiki.tcl.tk/nol
    #
    # Version 1.0 - 24 Sep 2004
    #
    ################################################################################

    # This needs Maxmind CVS database available here :
    # http://www.maxmind.com/download/geoip/database/GeoIPCountryCSV.zip
    proc GeoIP  { IP {cvsfile GeoIPCountryWhois.csv} } {
        foreach "a b c d" [split $IP .] {
            set V [expr 300+$a][expr 300+$b][expr 300+$c][expr 300+$d]
        }
        set fin [open $cvsfile r]
        while {![eof $fin]} {
            foreach "1 2 3 4 5 6" [split [gets $fin] ,] {
                foreach "a b c d" [split [lindex $1 0] .] {
                    set min [expr 300+$a][expr 300+$b][expr 300+$c][expr 300+$d]
                }
                foreach "a b c d" [split [lindex $2 0] .] {
                    set max [expr 300+$a][expr 300+$b][expr 300+$c][expr 300+$d]
                }
                if { $V >= $min && $V <= $max } {
                    close $fin
                    return "[lindex $5 0] ([lindex $6 0])"
                }
            }
        }
        close $fin
        return "?? (unknow)"
    }

rmax - 24 Sep 2004. This variant uses a modified file with fixed line length and makes a binary search over it. A conversion proc for the csv file is also included.

 proc convert {} {
    set fd [open GeoIPCountryWhois.csv]
    set data [split [read -nonewline $fd] \n]
    close $fd
    foreach line [string map {\" ""} $data] {
        foreach {a b c d e f} [split $line ,] break
        puts [format "%ld % %s" [expr {$d - $c + 1}] $c $d $e ]

    }
 }

 proc main {ip} {
    foreach {a b c d} [split $ip .] break
    set find [expr {$a<<24 | $b<<16 | $c<<8 | $d}]
    set fd [open bar.ssv]
    set size [file size bar.ssv]
    set lines [expr {$size/21}]
    set a 0; set b $lines; set found 0
    while {!$found && $a != $b} {
        set point [expr {($b+$a)/2}]
        seek $fd [expr {$point * 21}]
        foreach {start end country} [gets $fd] break
        if "$find >= 0x$start" {
            if "$find <= 0x$end" {
                puts $ip $country
                set found 1
            } else {
                set a $point
            }
        } else {
            set b $point
        }
    }
 }

 foreach ip $argv {
    puts [time {main $ip}]
 }

SS 24Sep2004. This version try to operate a binary search directly on the original CSV format. The parsing is a bit tricky, it's not unlikely it may contain error, but appears to work from some simple test. Some cleanup should be possible.

 # Copyright (C) 2004 Salvatore Sanfilippo
 # Under the same license as Tcl/Tk 8.4

 namespace eval geoip {}
 set ::geoip::filename "GeoIPCountryWhois.csv"
 set ::geoip::fd -1

 proc ::geoip::geoip {ipstr} {
     # Open the file only the first time.
     if {$::geoip::fd == -1} {
         set ::geoip::fd [open $::geoip::filename]
         file stat $::geoip::filename statbuf
         set ::geoip::len $statbuf(size)
     }
     # Convert the IP address into a number
     foreach {a b c d} [split $ipstr .] break
     set ip [expr {(wide($a)<<24)+($b<<16)+($c<<8)+$d}]
     # Binary search
     set start 0
     set end [expr {$::geoip::len -1}]
     while 1 {
         #puts "RANGE: $start - $end"
         set half [expr {int($start+(($end-$start)/2))}]
         set buf {}
         # Seek the start/end of the line
         if {($half-120) < 0} {
             append buf [string repeat "\n" [expr {120-$half}]]
             seek $::geoip::fd 0
         } else {
             seek $::geoip::fd [expr {$half-120}]
         }
         append buf [read $::geoip::fd 241]
         #puts "BUF: $buf"
         if {[string length $buf] < 121} {
             set linestart [expr {[string length $buf]-1}]
         } else {
             set linestart 120
         }
         set lineend $linestart
         # Go forward if we seek a newline as first character. We want
         # to be in the middle of the line.
         while {[string index $buf $linestart] eq "\n"} {
             incr linestart -1
             incr lineend -1
         }
         # Seek the line start/end
         while {[string index $buf $linestart] ne "\n" &&
                [string index $buf $linestart] ne {}} {
                incr linestart -1
         }
         while {[string index $buf $lineend] ne "\n" &&
                [string index $buf $lineend] ne {}} {
                incr lineend 1
         }
         # Get the line
         set line [string range $buf $linestart $lineend]
         foreach {_ _ rangestart rangeend code country} [split $line ,] break
         foreach var {rangestart rangeend code country} {
             set $var [string range [string trim [set $var] "\r\n "] 1 end-1]
         }
         #puts "LINE: $line"
         #puts "FOUND: ($rangestart,$rangeend) - $ip"
         # Trivial binary search
         if {$ip >= $rangestart && $ip <= $rangeend} {
             return [list $code $country]
         }
         if {$ip > $rangestart} {
             set start [expr {$half+1}]
         } else {
             set end [expr {$half-1}]
         }
         if {abs($start-$end)<5} {
             return {}
         }
     }
 }

 puts [time {geoip::geoip [lindex $argv 0]} 100]
 puts [geoip::geoip [lindex $argv 0]]