Version 5 of Alternative JSON

Updated 2017-10-16 02:03:22 by AMG

AMG: [jsonEncode] and [jsonDecode] convert between a tagged data format and JSON.

Types

The available type tags are:

array
List of elements.
object
Key-value dictionary of elements.
string
Arbitrary string value.
number
Integer and real numeric value.
literal
false, null, or true.
raw
Preformatted JSON text.

Array and object type composition is supported in two ways. If the type is array or object, each element is represented as a two-element list, being the type and the value. This gets quite cumbersome when the values all have uniform type, so the outer type may be a two-element list, the second of which is the type used for all values. More deeply nested data structures can be defined either by further nesting the second type element or by flattening the list.

Examples

% jsonEncode {string hello}
"hello"
% jsonEncode {number 42}
42
% jsonEncode {literal null}
null
% jsonEncode {array {{string hello} {number 42} {literal null}}}
["hello",42,null]
% jsonEncode {object {foo {string hello} bar {number 42} quux {literal null}}}
{"foo":"hello","bar":42,"quux":null}
% jsonEncode {array {{array {{number 1} {number 2}}} {array {{number 3} {number 4}}}}}
[[1,2],[3,4]]
% jsonEncode {{array {array number}} {{1 2} {3 4}}}
[[1,2],[3,4]]
% jsonEncode {{array array number} {{1 2} {3 4}}}
[[1,2],[3,4]]
% jsonEncode {{array array string} {{1 2} {3 4}}}
[["1","2"],["3","4"]]
% jsonEncode {{object object string} {name {first Andy last Goth} address {web http://tcl.tk/}}}
{"name":{"first":"Andy","last":"Goth"},"address":{"web":"http://tcl.tk/"}}
% jsonDecode {"hello"}
string hello
% jsonDecode 42
number 42
% jsonDecode null
literal null
% jsonDecode {["hello",42,null]}
array {{string hello} {number 42} {literal null}}
% jsonDecode {{"foo":"hello","bar":42,"quux":null}}
object {foo {string hello} bar {number 42} quux {literal null}}
% jsonDecode {[[1,2],[3,4]]}
array {{array {{number 1} {number 2}}} {array {{number 3} {number 4}}}}
% jsonDecode {{"name":{"first":"Andy","last":"Goth"},"address":{"web":"http://tcl.tk/"}}}
object {name {object {first {string Andy} last {string Goth}}} address {object {web {string http://tcl.tk/}}}}

Implementation

[jsonEncode]

# jsonEncode --
# Encodes data in the JSON format per https://tools.ietf.org/html/rfc7159.
# Source: http://wiki.tcl.tk/49031
proc jsonEncode {data} {
    # Extract type and value from data argument.
    if {[llength $data] != 2} {
        error "invalid JSON data: must be a two-element list consisting of\
                type and value"
    }
    lassign $data type value

    # Extract top and subtype from type element.
    set toptype [lindex $type 0]
    if {[llength $type] >= 2} {
        if {[llength $type] == 2} {
            set subtype [lindex $type 1]
        } else {
            set subtype [lrange $type 1 end]
        }
        if {$toptype ni {array object}} {
            set toptype {}
        }
    }

    # Perform type-specific JSON encoding.
    switch $toptype {
    array {
        # Recursively encode each array element.  If a subtype was specified, it
        # is shared between all elements.  Otherwise, each element is itself a
        # two-element list consisting of type and value.
        set first 1
        append result \[
        foreach element $value {
            if {$first} {
                set first 0
            } else {
                append result ,
            }
            if {[info exists subtype]} {
                append result [jsonEncode [list $subtype $element]]
            } else {
                append result [jsonEncode $element]
            }
        }
        append result \]
        return $result
    } object {
        # Recursively encode each object key and element.  Keys are always
        # strings.  If a subtype was specified, it is shared between all
        # elements.  Otherwise, each element is itself a two-element list
        # consisting of type and underlying data value.
        set first 1
        append result \{
        foreach {key element} $value {
            if {$first} {
                set first 0
            } else {
                append result ,
            }
            append result [jsonEncode [list string $key]] :
            if {[info exists subtype]} {
                append result [jsonEncode [list $subtype $element]]
            } else {
                append result [jsonEncode $element]
            }
        }
        append result \}
        return $result
    } string {
        # Encode the minimal set of required escape sequences.
        return \"[string map {
            \x00 \\u0000    \x01 \\u0001    \x02 \\u0002    \x03 \\u0003
            \x04 \\u0004    \x05 \\u0005    \x06 \\u0006    \x07 \\u0007
            \x08 \\u0008    \x09 \\u0009    \x0a \\u000a    \x0b \\u000b
            \x0c \\u000c    \x0d \\u000d    \x0e \\u000e    \x0f \\u000f
            \x10 \\u0010    \x11 \\u0011    \x12 \\u0012    \x13 \\u0013
            \x14 \\u0014    \x15 \\u0015    \x16 \\u0016    \x17 \\u0017
            \x18 \\u0018    \x19 \\u0019    \x1a \\u001a    \x1b \\u001b
            \x1c \\u001c    \x1d \\u001d    \x1e \\u001e    \x1f \\u001f
            \\   \\\\       \"   \\\"
        } $value]\"
    } number {
        # Attempt to normalize the number to comply with the JSON standard.
        regsub {^\+(?=[\d.])} $value {} result        ;# Strip leading plus.
        regsub {^(-?)0+(?=\d)} $result {\1} result    ;# Strip leading 0's.
        regsub {(^|-)\.(?=\d)} $result {\10.} result  ;# Prefix leading dot.
        regsub {(\d)\.(?=\D|$)} $result {\1.0} result ;# Suffix trailing dot.
        if {![regexp {^-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][-+]?\d+)?$} $result]} {
            error "invalid JSON number \"$value\":\
                    see https://tools.ietf.org/html/rfc7159#section-6"
        }
        return $result
    } literal {
        # The only valid literals are false, null, and true.
        if {$value ni {false null true}} {
            error "invalid JSON literal \"$value\":\
                    must be false, null, or true"
        }
        return $value
    } raw {
        # Raw data.  The caller must only supply correctly formatted JSON.
        return $value
    } default {
        # Invalid type.
        error "invalid JSON type \"$type\": must be array, object, string,
                number, literal, raw, or {array|object ?...? subtype}, where
                subtype is recursively any valid JSON type"
    }}
}

[jsonDecode]

# jsonDecode --
# Decodes data from the JSON format per https://tools.ietf.org/html/rfc7159.
# The optional indexVar argument is the name of a variable that holds the index
# at which decoding begins (defaults to 0 if the variable doesn't exist) and
# will hold the index immediately following the end of the decoded element.  If
# indexVar is not specified, the entire JSON input is decoded, and it is an
# error for it to be followed by any non-whitespace characters.
# Source: http://wiki.tcl.tk/49031
proc jsonDecode {json {indexVar {}}} {
    # Link to the caller's index variable.
    if {$indexVar ne {}} {
        upvar 1 $indexVar index
    }

    # By default, start decoding at the start of the input.
    if {![info exists index]} {
        set index 0
    }

    # Skip leading whitespace.  Return empty at end of input.
    if {![regexp -indices -start $index {[^\t\n\r ]} $json range]} {
        return
    }
    set index [lindex $range 0]

    # The first character 
    switch [string index $json $index] {
    \" {
        # JSON strings start with double quote.
        set type string

        # The value is the text between matching double quotes.
        if {![regexp -indices -start $index {\A\"((?:[^"]|\\.)*)\"}\
                $json range sub]} {
            return -code error "invalid JSON string at index $index:\
                    must end with close quote"
        }
        set value [string range $json {*}$sub]

        # Process all backslash substitutions in the value.
        set start 0
        while {[regexp -indices -start $start {\\u[[:xdigit:]]{4}|\\[^u]}\
                $value sub]} {
            set char [string index $value [expr {[lindex $sub 0] + 1}]]
            switch $char {
                u {set char [subst [string range $value {*}$sub]]}
                b {set char \b} f {set char \f} n {set char \n}
                r {set char \r} t {set char \t}
            }
            set value [string replace $value {*}$sub $char]
            set start [expr {[lindex $sub 0] + 1}]
        }
    } \{ - \[ {
        # JSON objects/arrays start with open brace/bracket.
        if {[string index $json $index] eq "\{"} {
            set type object
            set endRe {\A[\t\n\r ]*\}}
            set charName brace
        } else {
            set type array
            set endRe {\A[\t\n\r ]*\]}
            set charName bracket
        }
        set value {}
        incr index

        # Loop until close brace/bracket is encountered.
        while {![regexp -indices -start $index $endRe $json range]} {
            # Each element other than the first is preceded by comma.
            if {[llength $value]} {
                if {![regexp -indices -start $index\
                        {\A[\t\n\r ]*,} $json range]} {
                    return -code error "invalid JSON $type at index $index:\
                            element not followed by comma or close $charName"
                }
                set index [expr {[lindex $range 1] + 1}]
            }

            # For objects, get key and confirm it is followed by colon.
            if {$type eq "object"} {
                set key [jsonDecode $json index]
                if {![llength $key]} {
                    return -code error "invalid JSON object at index $index:\
                            must end with close brace"
                } elseif {[lindex $key 0] ne "string"} {
                    return -code error "invalid JSON object at index $index:\
                            key type is \"[lindex $key 0]\", must be string"
                } elseif {![regexp -indices -start $index {\A[\t\n\r ]*:}\
                        $json range]} {
                    return -code error "invalid JSON object at index $index:\
                            key not followed by colon"
                }
                set index [expr {[lindex $range 1] + 1}]
                lappend value [lindex $key 1]
            }

            # Get element value.
            lappend value [jsonDecode $json index]
        }
    } t - f - n {
        # JSON literals are true, false, or null.
        set type literal
        if {![regexp -indices -start $index {(?:true|false|null)\M}\
                $json range]} {
            return -code error "invalid JSON literal at index $index"
        }
        set value [string range $json {*}$range]
    } - - + - 0 - 1 - 2 - 3 - 4 - 5 - 6 - 7 - 8 - 9 - . {
        # JSON numbers are integers or real numbers.
        set type number
        if {![regexp -indices -start $index --\
                {-?(?:0|[1-9]\d*)(?:\.\d+)?(?:[eE][-+]?\d+)?\M} $json range]} {
            return -code error "invalid JSON number at index $index"
        }
        set value [string range $json {*}$range]
    } default {
        # JSON allows only the above-listed types.
        return -code error "invalid JSON data at index $index"
    }}

    # Continue decoding after the last character matched above.
    set index [expr {[lindex $range 1] + 1}]

    # When performing a full decode, ensure only whitespace appears at end.
    if {$indexVar eq {} && [regexp -start $index {[^\t\n\r\ ]} $json]} {
        return -code error "junk at end of JSON"
    }

    # Return the type and value.
    list $type $value
}

Testing

package require tcltest
foreach {name jsonEncode jsonDecode description tcl json} {
    1.1 1 1 "empty string"
        {string {}}
        {""}
    1.2 1 1 "nonempty string"
        {string hello}
        {"hello"}
    1.3 1 0 "string with quoted characters"
        {string \"a\nb\\c\"}
        {"\"a\u000ab\\c\""}
    1.4 1 1 "string with canonical quoted characters"
        "string \{\"a\nb\\c\"\}"
        {"\"a\u000ab\\c\""}
    2.1 1 1 integer
        {number 42}
        42
    2.2 1 1 "negative integer"
        {number -42}
        -42
    2.3 1 0 "positive integer"
        {number +42}
        42
    2.4 1 0 "leading zeroes"
        {number 000}
        0
    2.5 1 1 zero
        {number 0}
        0
    2.6 1 1 "negative zero"
        {number -0}
        -0
    2.7 1 0 "negative zero with leading zeroes"
        {number -000}
        -0
    2.8 1 1 "real number"
        {number 1.23}
        1.23
    2.9 1 1 "negative real number"
        {number -1.23}
        -1.23
    2.10 1 1 "negative real number with exponent"
        {number -1e5}
        -1e5
    2.11 1 1 "real number with capital exponent"
        {number 1E5}
        1E5
    2.12 1 1 "real number with fraction and exponent"
        {number 1.23e4}
        1.23e4
    2.13 1 0 "positive real number with fraction and positive exponent"
        {number +1.23e+4}
        1.23e+4
    2.14 1 1 "real number with fraction and negative exponent"
        {number 1.23e-4}
        1.23e-4
    2.15 1 0 "real number with dot and no fraction"
        {number 1.}
        1.0
    2.16 1 0 "real number with dot and no integer"
        {number .1}
        0.1
    2.17 1 0 "real number with dot, no fraction, and exponent"
        {number 1.E5}
        1.0E5
    2.18 1 0 "real number with dot, no integer, and exponent"
        {number .1E-5}
        0.1E-5
    2.19 1 0 "real number with leading zeroes"
        {number 00123.45}
        123.45
    2.20 1 0 "small real number with leading zeroes"
        {number 00000.45}
        0.45
    2.21 1 0 "zero real number with leading zeroes and exponent"
        {number 00000e9}
        0e9
    3.1 1 1 "literal false"
        {literal false}
        false
    3.2 1 1 "literal null"
        {literal null}
        null
    3.3 1 1 "literal true"
        {literal true}
        true
    4.1 1 1 "array with variable type"
        {array {{string hello} {number 42} {literal null}}}
        {["hello",42,null]}
    4.2 1 1 "array with constant but unshared type"
        {array {{array {{number 1} {number 2}}} {array {{number 3} {number 4}}}}}
        {[[1,2],[3,4]]}
    4.3 1 0 "array with shared type, nested syntax"
        {{array {array number}} {{1 2} {3 4}}}
        {[[1,2],[3,4]]}
    4.4 1 0 "array with shared type, flattened syntax"
        {{array array number} {{1 2} {3 4}}}
        {[[1,2],[3,4]]}
    4.5 1 0 "array of strings"
        {{array array string} {{1 2} {3 4}}}
        {[["1","2"],["3","4"]]}
    4.6 1 1 "empty array"
        {array {}}
        {[]}
    4.7 1 0 "empty array with unnecessary shared type"
        {{array string} {}}
        {[]}
    5.1 1 1 "object with variable type"
        {object {foo {string hello} bar {number 42} quux {literal null}}}
        {{"foo":"hello","bar":42,"quux":null}}
    5.2 1 1 "object with constant but unshared type"
        {object {name {object {first {string Andy} last {string Goth}}} address {object {web {string http://tcl.tk/}}}}}
        {{"name":{"first":"Andy","last":"Goth"},"address":{"web":"http://tcl.tk/"}}}
    5.3 1 0 "object with shared type, flattened syntax"
        {{object object string} {name {first Andy last Goth} address {web http://tcl.tk/}}}
        {{"name":{"first":"Andy","last":"Goth"},"address":{"web":"http://tcl.tk/"}}}
    5.4 1 1 "empty object"
        {object {}}
        {{}}
    5.5 1 0 "empty object with unnecessary shared type"
        {{object string} {}}
        {{}}
    6.1 1 0 "empty raw"
        {raw {}}
        {}
    6.2 1 0 "nonempty raw"
        {raw {"foobar"}}
        {"foobar"}
} {
    if {$jsonEncode} {
        tcltest::test jsonEncode-$name $description\
                -body [list jsonEncode $tcl] -result $json
    }
    if {$jsonDecode} {
        tcltest::test jsonDecode-$name $description\
                -body [list jsonDecode $json] -result $tcl
    }
}
tcltest::cleanupTests

TODO

  • Type compression options in [jsonDecode]
  • Pretty printing options in [jsonEncode]
  • Tests showing whitespace tolerance in [jsonDecode]
  • Tests showing error detection
  • Submit to tcllib, or too much overlap with huddle?