A while ago my brother and I started playing with the IP to country code tables provided by ARIN and others.

The problem is this:

Given IP ''123.4.5.6'' and a list of ranges like this:

 0.0.0.0 to 4.255.255.255 us
 5.0.0.1 to 5.10.255.255 nl
 5.15.0.0 to 20.1.4.128 de
 ...
 255.0.0.0 to 255.255.255.255 xx

find the range, if any, that corresponds to the given IP.

If you sort the list, you can do a ''binary search'' and you can improve upon that by providing an index.

You can get a copy of the full list (compiled mid 2002) here [http://pascal.scheffers.net/opentarget.dat.gz] that the code below needs.

This code illustrates, I think it does anyway, that the way you do something in C is likely not the way to do it in Tcl. In Tcl, two separate lists that hold the IP ranges and corresponding country codes would likely be much, much faster. As it is, the Tcl (8.4.2 on linux) code is about 430 times slower than the C implementation.

13May2003 [PS].

This is the code in C:

    #include <stdio.h>

    #define REC_OFFSET 8
    #define REC_SIZE   18

    struct record_t {
	unsigned int low;
	unsigned int high;
	char country[3];
    };

    unsigned int IpToUInt(const char *ipStr)
	{
	    unsigned int d1=0, d2=0, d3=0, d4=0;

	    sscanf(ipStr, "%u.%u.%u.%u", &d1, &d2, &d3, &d4);

	    return ( ((d1 & 0xFF) << 24) | \
		     ((d2 & 0xFF) << 16) | \
		     ((d3 & 0xFF) << 8)  | \
		     (d4 & 0xFF) );

	}

    unsigned int HexToUInt(const char *hexIp)
	{
	    unsigned int ip=0;
	    sscanf(hexIp, "%x", &ip);
	    return ip;
	}

    int GetRecord(struct record_t *r, int num, FILE *infile)
	{
	    char data[9];
	    data[8]=0; // terminate data.

	    //seek to the selected record:
	    if ( fseek(infile, REC_OFFSET + num * REC_SIZE, SEEK_SET)==-1) return 0;

	    //read&parse low IP
	    if( !fread(&data, 8, 1, infile) ) {
		return 0;
	    }  
	    r->low = HexToUInt(data);

	    //read&parse high IP
	    if( !fread(&data, 8, 1, infile) ) {
		return 0;
	    }  
	    r->high = HexToUInt(data);
  
	    //read country
	    if( !fread(r->country, 2, 1, infile) ) {
		return 0;
	    }  

	    r->country[2]=0; //terminate string

	    return 1; 

	}

    const char *lookup(unsigned int ip, int records, struct record_t *r, int *index, int *hindex)
	{
	    int steps=0;
	    int found=0;

	    unsigned int myMin=0, myMax=0, avg;  
  
	    myMin=index[ip >> 24];
	    myMax=hindex[ip >> 24];
	    while( (myMax - myMin) > 0 ) {
		steps++;

		avg = myMax - ((myMax - myMin) /2);

		if( r[avg].low <= ip && r[avg].high >= ip ) {
		    found=1;
		    break;
		} else if (r[avg].low <= ip) {
		    myMin = avg;
		} else {
		    myMax = avg;
		}
		if (steps > 32) {
		    //Infiniteloop
		    break;
		}
	    }
	    if (found) {
		return r[avg].country;
	    } else {

		//check one above:
		if ( avg < records ) 
		    if( r[avg+1].low <= ip && r[avg+1].high >= ip ) 
			return r[avg+1].country;

		//check one below:
		if (avg > 0) 
		    if( r[avg-1].low <= ip && r[avg-1].high >= ip ) 
			return r[avg-1].country;

		return 0;
	    }

	}


    int main (int argc, char *argv[])
	{
	    unsigned int ip;
	    char *country;
	    FILE *infile=0;
	    int i;

	    char data[9];
	    struct record_t *r;
	    int index[256], hindex[256], indexptr;
	    int records=0;

	    if (argc < 2) {
		printf("Usage: lookup <dotted-decimal-ip> <...>\n");
		return 2;
	    }

  
	    if( (infile=fopen("opentarget.dat", "r"))==NULL ) {
		printf("Could not open database\n");    
		return 2;
	    }

	    data[8]=0; // terminate data.

	    if ( fseek(infile, 0, SEEK_SET)==-1) return 0;  
	    if( !fread(&data, 8, 1, infile) ) {
		printf("File read error while initialising\n");
		return 2;
	    }  
	    records = HexToUInt(data);

	    if ( (r = (struct record_t *)malloc(sizeof(struct record_t)*records)) == NULL) {
		printf("Insufficient memory\n");
		return 2;    
	    }

	    for (i=0; i<256; i++) index[i]=hindex[i]=0;
	    index[0]=0;
	    indexptr=0;

	    i=0;
	    while (i < records) {
		//read&parse low IP
		if( !fread(&data, 8, 1, infile) ) {
		    printf("Error reading database record %d\n", i);
		    return 2;
		}  
		r[i].low = HexToUInt(data);

		//read&parse high IP
		if( !fread(&data, 8, 1, infile) ) {
		    printf("Error reading database record %d\n", i);
		    return 2;
		}  
		r[i].high = HexToUInt(data);
  
		//read country
		if( !fread(r[i].country, 2, 1, infile) ) {
		    printf("Error reading database record %d\n", i);
		    return 2;
		}  

		r[i].country[2]=0; //terminate string  

		//add to index?
		if (indexptr < (r[i].low >> 24)) {
		    indexptr=r[i].low >> 24;
		    index[indexptr]=i;	
		}
		i++;
	    }
	    fclose(infile);

	    for (i=1; i<256; i++) {
		if(index[i]==0) index[i]=index[i-1];
	    }
	    hindex[255]=records;
	    for (i=254; i>=0; i--) {
		if (index[i+1]>index[i]) {
		    hindex[i]=index[i+1];
		} else {
		    hindex[i]=hindex[i+1];
		}
	    }

	    //for (i=0; i<256; i++) printf("%04x", index[i]);
	    //for (i=0; i<256; i++) printf("%04x", hindex[i]);
	    //return 0;
	    int j;
	    for (i=1; i<argc; i++) {
                // uncomment the next loop to do a speed test:
		//for (j=0; j<1000000; j++) {
		    ip=IpToUInt(argv[i]);
		    country=lookup(ip, records, r, index, hindex);   
		//}
		if ( country!=NULL ) {
		    printf("%s %s\n", argv[i], country);
		} else {
		    printf("%s NOMATCH\n", argv[i]);
		}
	    }  
	    return 0;
 
	}

The same code in Tcl looks like this:

 namespace eval ::opentarget {

    set max 0
    set theArray(0) foo
    set dbLoaded 0
    set theList [list]

    proc calcIp {ip} {
	set lIp [split $ip .]
	while {[llength $lIp] < 4} {
	    lappend lIp 0
	}
	set res [expr ((((([lindex $lIp 0].0 * 256.0) + [lindex $lIp 1].0) * 256.0) + [lindex $lIp 2].0) * 256.0) + [lindex $lIp 3].0]
	return [lindex [split $res .] 0]
    }

    proc calcIpS {ip} {
	#calc IP, but drop the LSB
	set lIp [split $ip .]
	while {[llength $lIp] < 4} {
	    lappend lIp 0
	}
	set res [expr (((( [lindex $lIp 0]) * 256) + [lindex $lIp 1]) * 256) + [lindex $lIp 2]]
	return $res
    }

    proc calcIpHex {hex} {
	return [format %u 0x$hex]
    }

    proc openFile {fileName} {
	regsub -all {\\} $fileName {/} fileName
	variable max
	set if [open $fileName r]
	set lenHex [read $if 8]
	set len [format %i 0x$lenHex]
	set max [expr $len - 1]
	return $if
    }

    proc readDatabase {fileName} {
	regsub -all {\\} $fileName {/} fileName
	set key -1
	variable theArray
	variable theList
	variable max
	variable dbLoaded
	set if [open $fileName r]
	set max [expr 1 * 0x[read $if 8]]
	while {![eof $if] && [string length [set entry [read $if 18]]] == 18} {
	    lappend line [string range $entry 0 7]
	    lappend line [string range $entry 8 15]
	    lappend line [string range $entry 16 17]
	    incr key
	    set theArray($key) $line
	    unset line
	    if { ![string equal [string range $entry 6 7] "00"] } {
		#puts "$key starts at [string range $entry 6 7]"
	    }
	    if { ![string equal [string range $entry 14 15] "ff"] } {
		#puts "$key ends at [string range $entry 14 15]"
	    }
	    lappend line [calcIpHex [string range $entry 0 5]]
	    lappend line [calcIpHex [string range $entry 8 13]]
	    lappend line [string range $entry 16 17]
	    lappend theList $line
	    unset line

	}
	
	set max $key
	close $if
	set dbLoaded 1
    }

    proc lookup {args} {
	variable theArray
	variable dbLoaded
	if {[llength $args] == 1 && !$dbLoaded} {
	    error "Database not loaded"
	} elseif {[llength $args] == 1 && $dbLoaded} {
	    lookupArray [lindex $args 0]
	} else {
	    lookupFile [lindex $args 0] [lindex $args 1]
	}
    }

    proc lookupArray {ip} {
	
	variable theArray
	variable max
	variable dbLoaded
	if {!$dbLoaded} {error "database not loaded"}

	set nip [calcIp $ip]
	set myMin 0
	set myMax $max
	set steps 0
	set avg 0
	while {[expr $myMax - $myMin] > 0 && $steps < 32} {
	    incr steps
	    if {$avg == 1} {
		set avg 0
	    } else {
		set avg [expr $myMax - (($myMax - $myMin) /2)]
	    }
	    set current $theArray($avg)
	    set low [calcIpHex [lindex $current 0]]
	    set hi [calcIpHex  [lindex $current 1]]
	    
	    if {[expr "$low.0 <= $nip.0"] && [expr "$hi.0 >= $nip.0"]} {
		return [lindex $current 2]
	    } elseif {[expr "$low.0 <= $nip.0"]} {
		set myMin $avg
	    } else {
		set myMax $avg
	    }
	}
	
	return ""
    }

    proc lookupArrayS {ip} {
	
	variable theList
	variable max
	variable dbLoaded
	if {!$dbLoaded} {error "database not loaded"}

	set nip [calcIpS $ip]
	set myMin 0
	set myMax $max
	set steps 0
	set avg 0
	while {[expr $myMax - $myMin] > 0 && $steps < 32} {
	    incr steps
	    if {$avg == 1} {
		set avg 0
	    } else {
		set avg [expr $myMax - (($myMax - $myMin) /2)]
	    }
	    set current [lindex $theList $avg]
	    set low [lindex $current 0]
	    set hi [lindex $current 1]
	    
	    if {[expr $low <= $nip] && [expr $hi >= $nip]} {
		return [lindex $current 2]
	    } elseif {[expr $low <= $nip]} {
		set myMin $avg
	    } else {
		set myMax $avg
	    }
	}
	
	return ""
    }


    proc lookupFile {fileName ip} {
	variable max
	set if [openFile $fileName]
	set found 0
	set nip [calcIp $ip]
	set myMin 0
	set myMax $max
	set steps 0
	while {[expr $myMax - $myMin] > 0 && $steps < 32} {
	    incr steps
	    if {$myMax == 1} {
		set avg 0
	    } else {
		set avg [expr $myMax - (($myMax - $myMin) /2)]
	    }
	    seek $if [expr 8 + ($avg * 18)] start
	    set low [calcIpHex [read $if 8]]
	    set hi [calcIpHex [read $if 8]]
	    set country [read $if 2]
	    
	    if {[expr $low.0 <= $nip.0] && [expr $hi.0 >= $nip.0]} {
		set found 1
		set return $country
		break
	    } elseif {[expr $low.0 <= $nip.0]} {
		set myMin $avg
	    } else {
		set myMax $avg
	    }
	}
	close $if
	if {!$found} {
	    return ""
	} else {
	    return $return
	}
	
    }

 }

 opentarget::readDatabase opentarget.dat
 puts [time {
 set foo [::opentarget::lookupArray $args]
 } 1000]

----
[RS] has this version:
	proc ip2s ip {eval format %02x%02x%02x%02x [split $ip .]}
	set fp [open [file dirname [info script]]/opentarget.dat]
	read $fp 8 ;# skip record count
	set data {}
	while 1 {
	    set from [read $fp 8]
	    set to [read $fp 8]
	    set country [read $fp 2]
	    if [eof $fp] break
	    lappend data s$from s$to $country
	}
	close $fp
	foreach arg $argv {
	    set ips s[ip2s $arg]
	    foreach {from to country} $data {
		if {$from<=$ips && $to>=$ips} {puts $arg:$country; break}
	    }
	}
which may not be optimally fast, but responds fast enough when called from command line.
----

[MS] did some performance tuning on the original algorithm presented here, modifying the list-based lookupArrayS to avoid runtime parsing (lookupArrayS1) and by a slight change in the loop algorithm (lookupArrayS2).

This produced a 17x speedup over the original (lookupArray) - so that this algorithm is now about 25 times slower than C.

     proc lookupArrayS1 {ip} {

	variable theList
	variable max
	variable dbLoaded
	if {!$dbLoaded} {error "database not loaded"}

	set nip [calcIpS $ip]
	set myMin 0
	set myMax $max
	set steps 0
	set avg 0
	while {$myMax > $myMin && $steps < 32} {
	    incr steps
	    if {$avg == 1} {
		set avg 0
	    } else {
		set avg [expr {$myMax - (($myMax - $myMin) /2)}]
	    }
	    set current [lindex $theList $avg]
	    set low [lindex $current 0]
	    set hi [lindex $current 1]

	    if {$low <= $nip && $hi >= $nip} {
		return [lindex $current 2]
	    } elseif {$low <= $nip} {
		set myMin $avg
	    } else {
		set myMax $avg
	    }
	}
    }

    proc lookupArrayS2 {ip} {
	
        variable theList
        variable max
        variable dbLoaded
        if {!$dbLoaded} {error "database not loaded"}
	
        set nip [calcIpS $ip]
        set myMin 0
        set myMax $max
		
        while 1 {      
            set avg [expr {$myMax - (($myMax - $myMin) /2)}]
            set current [lindex $theList $avg]
            set low [lindex $current 0]
            set hi [lindex $current 1]

            if {$low <= $nip} {
                if {$hi >= $nip} {
                    return [lindex $current 2]
                } elseif {$avg < $myMax}  {
                    set myMin [expr {$avg + 1}]
                } else {
		    # $nip not in any range in our db
		    return ""
		}
	    } elseif {$myMin < $myMax} {
                set myMax [expr {$avg - 1}]
	    } else {
		# ($myMin == $myMax) and it doesn't fit
		#  => $nip is in a hole in our database
		return ""
	    }
        }
    }


 % time {opentarget::lookupArray 128.1.0.1} 1000
 2788 microseconds per iteration
 % time {opentarget::lookupArrayS 128.1.0.1} 1000
 2044 microseconds per iteration
 % time {opentarget::lookupArrayS1 128.1.0.1} 1000
 199 microseconds per iteration
 % time {opentarget::lookupArrayS2 128.1.0.1} 1000
 161 microseconds per iteration
 % 
 % expr 430*161/2788.
 24.831420373