Version 8 of Zstandard

Updated 2017-05-20 09:21:37 by dbohdan

Zstandard or zstd is a fast compression algorithm implemented as a C library. It is notable for compressing data at least twice as fast as zlib with comparable compression ratios.

Using Zstandard from Tcl

The following code provides libzstd bindings for Tcl through Critcl.

Download it with wiki-reaper: wiki-reaper 48788 0 > zstd-0.1.1.tm

# Zstandard bindings/demo for Tcl.
# Copyright (c) 2017 dbohdan.
# License: MIT.
# If you installed libzstd.so.1 to /usr/local/lib/ on *nix, you may need to
# run Tcl with /usr/local/lib/ in $LD_LIBRARY_PATH. E.g.,
# $ LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib tclsh zstd.tcl
# in the POSIX shell or
# > env "LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib" tclsh zstd.tcl
# in fish.
package require critcl 3

namespace eval ::zstd {
    variable bindingsVersion 0.1.1
    variable version {}
}
critcl::ccode {
    #include <zstd.h>
}
critcl::clibraries -lzstd

critcl::cinit {
    int version = ZSTD_versionNumber();
    Tcl_CreateNamespace(interp, "::zstd", NULL, NULL);
    Tcl_SetVar2Ex(interp, "::zstd::version", NULL,
                  Tcl_ObjPrintf("%d.%d.%d",
                                version / 10000,
                                version / 100 % 100,
                                version % 100),
                  0);
} {}

critcl::ccommand zstd::compress {cdata interp objc objv} {
    int level = 1;
    int max_level = ZSTD_maxCLevel();
    int rc = -1;
    void *source_buf;
    void *dest_buf;
    int source_len;
    size_t dest_size;
    size_t compressed_size;

    if (objc != 2 && objc != 3) {
        Tcl_WrongNumArgs(interp, objc, objv, "data ?level?");
        return TCL_ERROR;
    }
    if (objc == 3) {
        rc = Tcl_GetIntFromObj(interp, objv[2], &level);
        if (rc != TCL_OK || ((level < 1) || (level > max_level))) {
            Tcl_SetObjResult(interp,
                             Tcl_ObjPrintf("level must be integer between "
                                           "1 and %d", max_level));
            return TCL_ERROR;
        }
    }

    source_buf = (void *)Tcl_GetByteArrayFromObj(objv[1], &source_len);
    dest_size = ZSTD_compressBound(source_len);
    dest_buf = malloc(dest_size);
    if (dest_buf == NULL) {
        Tcl_SetObjResult(interp,
                         Tcl_NewStringObj("can't allocate memory to compress "
                                          "data", -1));
        return TCL_ERROR;
    }

    compressed_size = ZSTD_compress(dest_buf, dest_size, source_buf,
                                    source_len, level);
    if (ZSTD_isError(compressed_size)) {
            Tcl_SetObjResult(interp,
                             Tcl_ObjPrintf("zstd encoding error: %s",
                                           ZSTD_getErrorName(compressed_size)));
            return TCL_ERROR;
    }

    Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(dest_buf, compressed_size));
    free(dest_buf);

    return TCL_OK;
}

critcl::ccommand zstd::decompress {cdata interp objc objv} {
#define ZSTD_CONTENTSIZE_UNKNOWN (0ULL - 1)
#define ZSTD_CONTENTSIZE_ERROR   (0ULL - 2)
    void *source_buf;
    void *dest_buf;
    int source_len;
    unsigned long long dest_size;
    size_t decompressed_size;

    if (objc != 2) {
        Tcl_WrongNumArgs(interp, objc, objv, "data");
        return TCL_ERROR;
    }

    source_buf = (void *)Tcl_GetByteArrayFromObj(objv[1], &source_len);
    dest_size = ZSTD_findDecompressedSize(source_buf, source_len);
    if (dest_size == ZSTD_CONTENTSIZE_ERROR) {
        Tcl_SetObjResult(interp,
                         Tcl_NewStringObj("data wasn't compressed by zstd",
                                          -1));
        return TCL_ERROR;
    } else if (dest_size == ZSTD_CONTENTSIZE_UNKNOWN) {
        Tcl_SetObjResult(interp,
                         Tcl_NewStringObj("original data size unknown; "
                                          "can't decompress due to no support "
                                          "for streaming decompression", -1));
        return TCL_ERROR;
    }
    dest_buf = malloc(dest_size);
    if (dest_buf == NULL) {
        Tcl_SetObjResult(interp,
                         Tcl_NewStringObj("can't allocate memory to decompress "
                                          "data", -1));
        return TCL_ERROR;
    }

    decompressed_size = ZSTD_decompress(dest_buf, dest_size, source_buf,
                                        source_len);
    if (decompressed_size != dest_size) {
        Tcl_SetObjResult(interp,
                         Tcl_ObjPrintf("zstd decoding error: %s",
                                       ZSTD_getErrorName(decompressed_size)));
        return TCL_ERROR;
    }

    Tcl_SetObjResult(interp, Tcl_NewByteArrayObj(dest_buf, decompressed_size));
    free(dest_buf);

    return TCL_OK;
}

# If this is the main script...
if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} {
    {*}$auto_index(zstd::compress)
    puts "zstd version $zstd::version"

    puts [zstd::decompress [zstd::compress hello!]]

    set ch [open [info script] rb]
    set x [read $ch]
    close $ch

    if {[catch {package require md5}]} {
        puts {no package "md5" found -- skipping test}
    } else {
        set checksum1 [::md5::md5 -hex $x]
        set y [zstd::compress $x]
        set checksum2 [::md5::md5 -hex [zstd::decompress $y]]
        if {$checksum1 ne $checksum2} {
            error "decompressed data differs from original"
        }
    }

    set z [string repeat $x 10000]
    puts [string length $z]
    zstd::compress $z
    foreach level {1 2 3 5 10 19} {
        puts [format {level=%1$2d   size=%3$d   %2$s} \
                $level \
                [time {
                    set size [string length [zstd::compress $z $level]]
                } 10] \
                $size]
    }
}