A while ago my brother and I started playing with the IP to country code tables provided by ARIN and others.
The problem is this:
Given IP 123.4.5.6 and a list of ranges like this:
0.0.0.0 to 4.255.255.255 us 5.0.0.1 to 5.10.255.255 nl 5.15.0.0 to 20.1.4.128 de ... 255.0.0.0 to 255.255.255.255 xx
find the range, if any, that corresponds to the given IP.
If you sort the list, you can do a binary search and you can improve upon that by providing an index.
You can get a copy of the full list (compiled mid 2002) here [L1 ] that the code below needs.
This code illustrates, I think it does anyway, that the way you do something in C is likely not the way to do it in Tcl. Two separate lists that hold the IP ranges and corresponding country codes would likely be much, much faster. As it is, the Tcl (8.4.2 on linux) code is about 430 times slower that the C implementation.
13May2003 PS.
This is the code in C:
#include <stdio.h> #define REC_OFFSET 8 #define REC_SIZE 18 struct record_t { unsigned int low; unsigned int high; char country[3]; }; unsigned int IpToUInt(const char *ipStr) { unsigned int d1=0, d2=0, d3=0, d4=0; sscanf(ipStr, "%u.%u.%u.%u", &d1, &d2, &d3, &d4); return ( ((d1 & 0xFF) << 24) | \ ((d2 & 0xFF) << 16) | \ ((d3 & 0xFF) << 8) | \ (d4 & 0xFF) ); } unsigned int HexToUInt(const char *hexIp) { unsigned int ip=0; sscanf(hexIp, "%x", &ip); return ip; } int GetRecord(struct record_t *r, int num, FILE *infile) { char data[9]; data[8]=0; // terminate data. //seek to the selected record: if ( fseek(infile, REC_OFFSET + num * REC_SIZE, SEEK_SET)==-1) return 0; //read&parse low IP if( !fread(&data, 8, 1, infile) ) { return 0; } r->low = HexToUInt(data); //read&parse high IP if( !fread(&data, 8, 1, infile) ) { return 0; } r->high = HexToUInt(data); //read country if( !fread(r->country, 2, 1, infile) ) { return 0; } r->country[2]=0; //terminate string return 1; } const char *lookup(unsigned int ip, int records, struct record_t *r, int *index, int *hindex) { int steps=0; int found=0; unsigned int myMin=0, myMax=0, avg; myMin=index[ip >> 24]; myMax=hindex[ip >> 24]; while( (myMax - myMin) > 0 ) { steps++; avg = myMax - ((myMax - myMin) /2); if( r[avg].low <= ip && r[avg].high >= ip ) { found=1; break; } else if (r[avg].low <= ip) { myMin = avg; } else { myMax = avg; } if (steps > 32) { //Infiniteloop break; } } if (found) { return r[avg].country; } else { //check one above: if ( avg < records ) if( r[avg+1].low <= ip && r[avg+1].high >= ip ) return r[avg+1].country; //check one below: if (avg > 0) if( r[avg-1].low <= ip && r[avg-1].high >= ip ) return r[avg-1].country; return 0; } } int main (int argc, char *argv[]) { unsigned int ip; char *country; FILE *infile=0; int i; char data[9]; struct record_t *r; int index[256], hindex[256], indexptr; int records=0; if (argc < 2) { printf("Usage: lookup <dotted-decimal-ip> <...>\n"); return 2; } if( (infile=fopen("opentarget.dat", "r"))==NULL ) { printf("Could not open database\n"); return 2; } data[8]=0; // terminate data. if ( fseek(infile, 0, SEEK_SET)==-1) return 0; if( !fread(&data, 8, 1, infile) ) { printf("File read error while initialising\n"); return 2; } records = HexToUInt(data); if ( (r = (struct record_t *)malloc(sizeof(struct record_t)*records)) == NULL) { printf("Insufficient memory\n"); return 2; } for (i=0; i<256; i++) index[i]=hindex[i]=0; index[0]=0; indexptr=0; i=0; while (i < records) { //read&parse low IP if( !fread(&data, 8, 1, infile) ) { printf("Error reading database record %d\n", i); return 2; } r[i].low = HexToUInt(data); //read&parse high IP if( !fread(&data, 8, 1, infile) ) { printf("Error reading database record %d\n", i); return 2; } r[i].high = HexToUInt(data); //read country if( !fread(r[i].country, 2, 1, infile) ) { printf("Error reading database record %d\n", i); return 2; } r[i].country[2]=0; //terminate string //add to index? if (indexptr < (r[i].low >> 24)) { indexptr=r[i].low >> 24; index[indexptr]=i; } i++; } fclose(infile); for (i=1; i<256; i++) { if(index[i]==0) index[i]=index[i-1]; } hindex[255]=records; for (i=254; i>=0; i--) { if (index[i+1]>index[i]) { hindex[i]=index[i+1]; } else { hindex[i]=hindex[i+1]; } } //for (i=0; i<256; i++) printf("%04x", index[i]); //for (i=0; i<256; i++) printf("%04x", hindex[i]); //return 0; int j; for (i=1; i<argc; i++) { // uncomment the next loop to do a speed test: //for (j=0; j<1000000; j++) { ip=IpToUInt(argv[i]); country=lookup(ip, records, r, index, hindex); //} if ( country!=NULL ) { printf("%s %s\n", argv[i], country); } else { printf("%s NOMATCH\n", argv[i]); } } return 0; }
The same code in Tcl looks like this:
namespace eval ::opentarget { set max 0 set theArray(0) foo set dbLoaded 0 set theList [list] proc calcIp {ip} { set lIp [split $ip .] while {[llength $lIp] < 4} { lappend lIp 0 } set res [expr ((((([lindex $lIp 0].0 * 256.0) + [lindex $lIp 1].0) * 256.0) + [lindex $lIp 2].0) * 256.0) + [lindex $lIp 3].0] return [lindex [split $res .] 0] } proc calcIpS {ip} { #calc IP, but drop the LSB set lIp [split $ip .] while {[llength $lIp] < 4} { lappend lIp 0 } set res [expr (((( [lindex $lIp 0]) * 256) + [lindex $lIp 1]) * 256) + [lindex $lIp 2]] return $res } proc calcIpHex {hex} { return [format %u 0x$hex] } proc openFile {fileName} { regsub -all {\\} $fileName {/} fileName variable max set if [open $fileName r] set lenHex [read $if 8] set len [format %i 0x$lenHex] set max [expr $len - 1] return $if } proc readDatabase {fileName} { regsub -all {\\} $fileName {/} fileName set key -1 variable theArray variable theList variable max variable dbLoaded set if [open $fileName r] set max [expr 1 * 0x[read $if 8]] while {![eof $if] && [string length [set entry [read $if 18]]] == 18} { lappend line [string range $entry 0 7] lappend line [string range $entry 8 15] lappend line [string range $entry 16 17] incr key set theArray($key) $line unset line if { ![string equal [string range $entry 6 7] "00"] } { #puts "$key starts at [string range $entry 6 7]" } if { ![string equal [string range $entry 14 15] "ff"] } { #puts "$key ends at [string range $entry 14 15]" } lappend line [calcIpHex [string range $entry 0 5]] lappend line [calcIpHex [string range $entry 8 13]] lappend line [string range $entry 16 17] lappend theList $line unset line } set max $key close $if set dbLoaded 1 } proc lookup {args} { variable theArray variable dbLoaded if {[llength $args] == 1 && !$dbLoaded} { error "Database not loaded" } elseif {[llength $args] == 1 && $dbLoaded} { lookupArray [lindex $args 0] } else { lookupFile [lindex $args 0] [lindex $args 1] } } proc lookupArray {ip} { variable theArray variable max variable dbLoaded if {!$dbLoaded} {error "database not loaded"} set nip [calcIp $ip] set myMin 0 set myMax $max set steps 0 set avg 0 while {[expr $myMax - $myMin] > 0 && $steps < 32} { incr steps if {$avg == 1} { set avg 0 } else { set avg [expr $myMax - (($myMax - $myMin) /2)] } set current $theArray($avg) set low [calcIpHex [lindex $current 0]] set hi [calcIpHex [lindex $current 1]] if {[expr "$low.0 <= $nip.0"] && [expr "$hi.0 >= $nip.0"]} { return [lindex $current 2] } elseif {[expr "$low.0 <= $nip.0"]} { set myMin $avg } else { set myMax $avg } } return "" } proc lookupArrayS {ip} { variable theList variable max variable dbLoaded if {!$dbLoaded} {error "database not loaded"} set nip [calcIpS $ip] set myMin 0 set myMax $max set steps 0 set avg 0 while {[expr $myMax - $myMin] > 0 && $steps < 32} { incr steps if {$avg == 1} { set avg 0 } else { set avg [expr $myMax - (($myMax - $myMin) /2)] } set current [lindex $theList $avg] set low [lindex $current 0] set hi [lindex $current 1] if {[expr $low <= $nip] && [expr $hi >= $nip]} { return [lindex $current 2] } elseif {[expr $low <= $nip]} { set myMin $avg } else { set myMax $avg } } return "" } proc lookupFile {fileName ip} { variable max set if [openFile $fileName] set found 0 set nip [calcIp $ip] set myMin 0 set myMax $max set steps 0 while {[expr $myMax - $myMin] > 0 && $steps < 32} { incr steps if {$myMax == 1} { set avg 0 } else { set avg [expr $myMax - (($myMax - $myMin) /2)] } seek $if [expr 8 + ($avg * 18)] start set low [calcIpHex [read $if 8]] set hi [calcIpHex [read $if 8]] set country [read $if 2] if {[expr $low.0 <= $nip.0] && [expr $hi.0 >= $nip.0]} { set found 1 set return $country break } elseif {[expr $low.0 <= $nip.0]} { set myMin $avg } else { set myMax $avg } } close $if if {!$found} { return "" } else { return $return } } } opentarget::readDatabase opentarget.dat puts [time { set foo [::opentarget::lookupArray $args] } 1000]