Version 1 of xxHash

Updated 2017-05-23 16:52:11 by dbohdan

xxHash is a non-cryptographic hashing algorithm (i.e., an alternative to CRC but not SHA256) developed by Yann Collet.

Tcl implementation

Warning: this isn't fast.

# A pure xxHash32 implementation.
# Copyright (c) 2017 dbohdan
# License: MIT
namespace eval ::xxhash {
    variable version 0.1.0
}

proc ::xxhash::assert-equal-int {actual expected} {
    if {$actual != $expected} {
        error "expected 0x[format %08x $expected],\
               but got 0x[format %08x $actual]"
    }
}

proc ::xxhash::rol {x n} {
    set x [expr {$x & 0xffffffff}]
    return [expr {(($x << $n) | ($x >> (32 - $n))) & 0xffffffff}]
}

proc ::xxhash::xxhash32 {data seed} {
    set prime1 0x9e3779b1
    set prime2 0x85ebca77
    set prime3 0xc2b2ae3d
    set prime4 0x27d4eb2f
    set prime5 0x165667b1

    set ptr 0
    set len [string length $data]
    if {$len >= 16} {
        set limit [expr {$len - 16}]
        set v1 [expr {$seed + $prime1 + $prime2}]
        set v2 [expr {$seed + $prime2}]
        set v3 $seed
        set v4 [expr {$seed - $prime1}]

        while 1 {
            binary scan $data "@$ptr iu" x
            incr ptr 4
            incr v1 [expr {$x * $prime2}]
            set v1 [expr {[rol $v1 13] * $prime1}]

            binary scan $data "@$ptr iu" x
            incr ptr 4
            incr v2 [expr {$x * $prime2}]
            set v2 [expr {[rol $v2 13] * $prime1}]

            binary scan $data "@$ptr iu" x
            incr ptr 4
            incr v3 [expr {$x * $prime2}]
            set v3 [expr {[rol $v3 13] * $prime1}]

            binary scan $data "@$ptr iu" x
            incr ptr 4
            incr v4 [expr {$x * $prime2}]
            set v4 [expr {[rol $v4 13] * $prime1}]

            if {$ptr > $limit} break
        }

        set  hash  [expr {
            ([rol $v1 1] + [rol $v2 7] + [rol $v3 12] + [rol $v4 18])
            & 0xffffffff
        }]
    } else {
        set hash [expr {$seed + $prime5}]
    }

    incr hash $len

    set limit [expr {$len - 4}]
    while {$ptr <= $limit} {
        binary scan $data "@$ptr iu" x
        set hash [expr {$hash + $x * $prime3}]
        set hash [expr {[rol $hash 17] * $prime4}]
        incr ptr 4
    }

    while {$ptr < $len} {
        binary scan $data "@$ptr cu" x
        set hash [expr {$hash + $x * $prime5}]
        set hash [expr {[rol $hash 11] * $prime1}]
        incr ptr 1
    }

    set hash [expr {$hash & 0xffffffff}]
    set hash [expr {(($hash ^ ($hash >> 15)) * $prime2) & 0xffffffff}]
    set hash [expr {(($hash ^ ($hash >> 13)) * $prime3) & 0xffffffff}]
    set hash [expr {($hash ^ ($hash >> 16)) & 0xffffffff}]

    return $hash
}

proc ::xxhash::test {} {
    assert-equal-int [rol 0 0] 0
    assert-equal-int [rol 0xffffffff 5]  0xffffffff
    assert-equal-int [rol 0xcd0000ab 8]  0x0000abcd
    assert-equal-int [rol 0xcd0000ab 16] 0x00abcd00
    assert-equal-int [rol 0xcd0000ab 24] 0xabcd0000
    assert-equal-int [rol 0xcd0000ab 32] 0xcd0000ab
    assert-equal-int [xxhash32 abc 0] 0x32d153ff
    assert-equal-int [xxhash32 abc 0x12345678] 0x11364062
    assert-equal-int [xxhash32 {Hello, World! This is a test.} 0] 0x9ea357ea
}

if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} {
    ::xxhash::test
}

See also