Version 5 of twtxt

Updated 2020-07-30 13:27:15 by dbohdan

twtxt is a minimalist news feed file format and protocol for sharing status updates. You can compare it to RSS, Finger , and a decentralized Twitter.

A twtxt feed is a text file that looks like this.

2016-02-04T13:30:00+01:00        You can really go crazy here! ┐(゚∀゚)┌
2016-02-03T23:05:00+01:00        @<example http://example.org/twtxt.txt> welcome to twtxt!
2016-02-01T11:00:00+01:00        This is just another example.
2015-12-12T12:00:00+01:00        Fiat lux!

twtxt feeds often include emoji, which can cause problems for standard builds of Tcl 8.

Parser library

# A twtxt parser library for Tcl 8.5+ with partial support for Jim Tcl.
# (The parser working in Jim Tcl depends on whether strptime() in libc
# understands time zones.  In musl libc it does not.)
# Copyright (c) 2020 D. Bohdan.
# License: MIT.

namespace eval twtxt {
    variable baseTimeFormat %Y-%m-%dT%H:%M:%S
    variable version 0.1.0
}

if {[info commands try] ne {try}} {
    package require try
}

proc twtxt::scan-time time {
    variable baseTimeFormat

    # Remove fractions of a second.
    regsub {(\d+-\d+-\d+T\d+:\d+:\d+)\.\d+} $time {\1} time

    foreach {suffix label} {%z tz {} local} {
        try {
            return [list [clock scan $time \
                -format $baseTimeFormat$suffix] $label]
        } on error e {}
    }

    error [list can't scan time $time due to error $e]
}

proc twtxt::parse-feed feed {
    set lines [split [string trimright $feed \n] \n]

    set bad {}
    set metadata {}
    set statuses {}

    set lead true
    foreach line $lines {
        if {[regexp {^\s*$} $line]} continue

        # Metadata is not part of the spec but is found in the wild.
        if {$lead} {
            if {[regexp {^#\s*?([^\s]+?)\s*?=\s*(.*)$} $line _ key value]} {
                regsub {\s+$} $value {} value
                dict set metadata $key $value
                continue
            } else {
                set lead 0
            }
        }

        # Comments are also not part of the spec.
        if {[regexp ^# $line]} continue

        try {
            set i [string first \t $line]
            set time [string range $line 0 $i-1]
            set text [string range $line $i+1 end]

            lappend statuses [lindex [scan-time $time] 0] $text
        } on error e {
            lappend bad $line $e
        }
    }

    return [dict create metadata $metadata statuses $statuses bad $bad]
}

proc twtxt::run-tests {} {
    set stats [dict create total 0 passed 0 failed 0]

    proc test {name script arrow expected} {
        upvar stats stats

        dict incr stats total

        catch $script result

        set matched [switch -- $arrow {
            ->   { expr {$result eq $expected} }
            ->*  { string match $expected $result }
            ->$  { regexp -- $expected $result }
            default {
                return -code error \
                       -errorcode {JIMLIB TEST BAD-ARROW} \
                       [list unknown arrow: $arrow]
            }
        }]

        if {!$matched} {
            set error {}
            append error "\n>>>>> $name failed: [list $script]\n"
            append error "      got: [list $result]\n"
            append error " expected: [list $expected]"
            if {$arrow ne {->}} {
                append error "\n    match: $arrow"
            }

            dict incr stats failed

            puts stderr $error
            return
        }

        dict incr stats passed
    }

    test basic-1 {
        parse-feed "2020-07-21T05:51:29.000000Z\tHello, world!"
    } -> {metadata {} statuses {1595310689 {Hello, world!}} bad {}}

    test basic-2 {
        parse-feed "2020-07-21T05:51:29+0100\tHello, world!\n"
    } -> {metadata {} statuses {1595307089 {Hello, world!}} bad {}}

    test basic-3 {
        parse-feed "\n\n1970-01-01T00:00:00Z\tHello...\n# Comment!\
                    \n1970-01-01T00:00:01-0000\t...world!\n"
    } -> {metadata {} statuses {0 Hello... 1 ...world!} bad {}}

    test basic-4 {
        parse-feed "hell\nno"
    } ->$ {metadata \{\} statuses \{\} bad \{hell \{.*\} no \{.*\}\}}

    variable baseTimeFormat
    set t [clock seconds]
    test basic-5 {
        upvar 1 t t baseTimeFormat baseTimeFormat
        parse-feed "[clock format $t -format $baseTimeFormat]\tasdf\n#\
                    not = meta\nfail"
    } ->* "metadata {} statuses {$t asdf} bad {*}"

    test basic-6 {
        parse-feed "\n\n\n   \n\t\n"
    } -> {metadata {} statuses {} bad {}}

    test scan-time-1 {
        scan-time {}
    } ->* {can't scan time {} due to error*}

    test scan-time-2 {
        scan-time 1970-01-01T00:00:00Z
    } -> {0 tz}

    test scan-time-3 {
        scan-time 2015-01-01T00:00:00
    } ->$ {14\d+ local}

    test metadata-1 {
        parse-feed "#key = value\n#\tfoo=bar 123"
    } -> {metadata {key value foo {bar 123}} statuses {} bad {}}

    test metadata-2 {
        parse-feed "# url =  https://example.com/feed.txt \t \n"
    } -> {metadata {url https://example.com/feed.txt} statuses {} bad {}}

    puts stderr $stats
}

# If this is the main script...
if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} {
    twtxt::run-tests
}

package provide twtxt 0

Competition