Version 1 of ULID

Updated 2017-06-08 09:23:59 by dbohdan

ULID, the Universally Unique Lexicographically Sortable Identifier, is an alternative to UUID. The specification and the reference implementation in JavaScript are available from https://github.com/alizain/ulid .

Why use ULID

The README explains the motivation:


UUID can be suboptimal for many uses-cases because:

  • It isn't the most character efficient way of encoding 128 bits of randomness
  • UUID v1/v2 is impractical in many environments, as it requires access to a unique, stable MAC address
  • UUID v3/v5 requires a unique seed and produces randomly distributed IDs, which can cause fragmentation in many data structures
  • UUID v4 provides no other information than randomness which can cause fragmentation in many data structures

Instead, herein is proposed ULID:

  • 128-bit compatibility with UUID
  • 1.21e+24 unique ULIDs per millisecond
  • Lexicographically sortable!
  • Canonically encoded as a 26 character string, as opposed to the 36 character UUID
  • Uses Crockford's base32 for better efficiency and readability (5 bits per character)
  • Case insensitive
  • No special characters (URL safe)

Implementation

The follow module has been tested in Tcl 8.5-8.6 and Jim Tcl 0.73.

Download with wiki-reaper: wiki-reaper 48827 0 > ulid-0.1.0.tm

Code

# A ULID generator with optional Critcl acceleration.
# The default RNG is [::tcl::mathfunc::rand] in pure Tcl and the libc rand()
# when using Critcl. You can replace the default RNG by setting
# ::ulid::defaultRng to a script that returns a number between 0 and 31.
# Copyright (c) 2017 dbohdan.
# License: MIT.
namespace eval ::ulid {
    variable version 0.1.0
    variable base32 [split 0123456789ABCDEFGHJKMNPQRSTVWXYZ {}]
    if {![info exists useCritcl]} {
        variable useCritcl 0
        if {![catch {
            package require critcl 3
        }]} {
            set useCritcl [::critcl::compiling]
        }        
    }
}

# $t is the time in milliseconds.
proc ::ulid::encode-time {t len} {
    if {($t < 0) || ($t > 0xffffffffffff)} {
        error "expected unsigned integer representable in 48 bits but got\
               \"$t\""
    }

    set result {}
    for {set i 0} {$i < $len} {incr i} {
        set m [expr {$t % 32}]
        set result [lindex $::ulid::base32 $m]$result
        set t [expr {($t - $m) / 32}]
    }

    return $result
}

proc ::ulid::gen-random {rng len} {
    set result {}
    for {set i 0} {$i < $len} {incr i} {
        append result [lindex $::ulid::base32 [{*}$rng]]
    }
    return $result
}

proc ::ulid::rng {} {
    return [expr {int(32 * rand())}]
}

if {$::ulid::useCritcl} {
    ::critcl::ccode "#define ULID_BASE32 \"[join $::ulid::base32 {}]\""
    ::critcl::cinit {
        srand(time(NULL));
    } {}
    critcl::cproc ulid::rng-accel {} int {
        return rand() % 32;
    }
    critcl::ccommand ulid::ulid-accel {cdata interp objc objv} {
        Tcl_Obj* result;
        char s[27];
        int i, m;
        Tcl_WideInt t;

        if (objc != 3) {
            Tcl_WrongNumArgs(interp, 1, objv, "t rng");
            return TCL_ERROR;
        }
        if ((Tcl_GetWideIntFromObj(interp, objv[1], &t) != TCL_OK) ||
            (t < 0) || (t > 0xffffffffffff)) {
            Tcl_SetObjResult(interp,
                             Tcl_NewStringObj("expected unsigned integer "
                                              "representable in 48 bits", -1));
            return TCL_ERROR;
        }

        for (i = 9; i >= 0; i--) {
            m = t % 32;
            s[i] = ULID_BASE32[m];
            t = (t - m) / 32;
        }
        for (i = 10; i < 26; i++) {
            int random;
            int rc = Tcl_EvalObjEx(interp, objv[2], 0);
            if (rc != TCL_OK) {
                return rc;
            }
            rc = Tcl_GetIntFromObj(interp, Tcl_GetObjResult(interp), &random);
            if (rc != TCL_OK) {
                return rc;
            }
            if ((random < 0) || (random > 31)) {
                Tcl_SetObjResult(interp,
                                 Tcl_NewStringObj("expected random integer "
                                                  "between 0 and 31", -1));
                return TCL_ERROR;
            }
            s[i] = ULID_BASE32[random % 32];
        }
        s[26] = '\0';

        Tcl_SetObjResult(interp, Tcl_NewStringObj(s, -1));
        return TCL_OK;
    }
    ulid::ulid-accel 0 ::ulid::rng
}

proc ::ulid::ulid {{t {}} {rng {}}} {
    if {$t eq {}} {
        set t [expr {[clock milliseconds]}]
    }
    if {$rng eq {}} {
        if {[info exists ::ulid::defaultRng]} {
            set rng $::ulid::defaultRng
        } elseif {$::ulid::useCritcl} {
            set rng ::ulid::rng-accel
        } else {
            set rng ::ulid::rng
        }
    }
    if {$::ulid::useCritcl} {
        return [ulid-accel $t $rng]
    } else {
        return [encode-time $t 10][gen-random $rng 16]
    }
}

proc ::ulid::test {} {
    if {$::ulid::useCritcl} {
        for {set i 0} {$i < [clock seconds]} {incr i 100000} {
            set accel [string range [::ulid::ulid $i] 0 9]
            set ::ulid::useCritcl 0
            set pure  [string range [::ulid::ulid $i] 0 9]
            set ::ulid::useCritcl 1
            if {$accel ne $pure} {
                error "Critcl ULID time \"$accel\" doesn't equal\
                       pure Tcl ULID time \"$pure\" for t $i"
            }
        }

        set ::ulid::useCritcl 0
        catch {::ulid::ulid nope}
        catch {::ulid::ulid 0 error}
        set ::ulid::useCritcl 1

        puts -nonewline "with Critcl:\n    "
    }
    
    catch {::ulid::ulid nope}
    catch {::ulid::ulid 0 error}
    puts [time ::ulid::ulid 10000]
    
    if {$::ulid::useCritcl} {
        puts -nonewline "without Critcl:\n    "
        set ::ulid::useCritcl 0
        puts [time ::ulid::ulid 10000]
    }
}

if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} {
    if {$argv eq {--test}} {
        ::ulid::test
    } else {
        puts [::ulid::ulid]
    }
}

Performance test

$ tclsh ulid.tcl  --test
with Critcl:
    9.0863 microseconds per iteration
without Critcl:
    38.7804 microseconds per iteration