'''https://twtxt.readthedocs.io/en/latest/user/twtxtfile.html%|%twtxt%|%''' is a minimalist news feed file format and protocol for sharing status updates. You can compare it to [RSS], https://en.wikipedia.org/wiki/Finger_protocol%|%Finger%|%, and a decentralized [Twitter]. A twtxt '''feed''' is a text file that looks like this. ======none 2016-02-04T13:30:00+01:00 You can really go crazy here! ┐(゚∀゚)┌ 2016-02-03T23:05:00+01:00 @ welcome to twtxt! 2016-02-01T11:00:00+01:00 This is just another example. 2015-12-12T12:00:00+01:00 Fiat lux! ====== twtxt feeds often include [emoji], which can cause problems for standard builds of Tcl 8. ** Parser library ** ====== # A twtxt parser library for Tcl 8.5+ with partial support for Jim Tcl. # (The parser working in Jim Tcl depends on whether strptime() in libc # understands time zones. In musl libc it does not.) # Copyright (c) 2020 D. Bohdan. # License: MIT. namespace eval twtxt { variable baseTimeFormat %Y-%m-%dT%H:%M:%S variable version 0.1.0 } if {[info commands try] ne {try}} { package require try } proc twtxt::scan-time time { variable baseTimeFormat # Remove fractions of a second. regsub {(\d+-\d+-\d+T\d+:\d+:\d+)\.\d+} $time {\1} time foreach {suffix label} {%z tz {} local} { try { return [list [clock scan $time \ -format $baseTimeFormat$suffix] $label] } on error e {} } error [list can't scan time $time due to error $e] } proc twtxt::parse-feed feed { set lines [split [string trimright $feed \n] \n] set bad {} set metadata {} set statuses {} set lead true foreach line $lines { if {[regexp {^\s*$} $line]} continue # Metadata is not part of the spec but is found in the wild. if {$lead} { if {[regexp {^#\s*?([^\s]+?)\s*?=\s*(.*)$} $line _ key value]} { regsub {\s+$} $value {} value dict set metadata $key $value continue } else { set lead 0 } } # Comments are also not part of the spec. if {[regexp ^# $line]} continue try { set i [string first \t $line] set time [string range $line 0 $i-1] set text [string range $line $i+1 end] lappend statuses [lindex [scan-time $time] 0] $text } on error e { lappend bad $line $e } } return [dict create metadata $metadata statuses $statuses bad $bad] } proc twtxt::run-tests {} { set stats [dict create total 0 passed 0 failed 0] proc test {name script arrow expected} { upvar stats stats dict incr stats total catch $script result set matched [switch -- $arrow { -> { expr {$result eq $expected} } ->* { string match $expected $result } ->$ { regexp -- $expected $result } default { return -code error \ -errorcode {JIMLIB TEST BAD-ARROW} \ [list unknown arrow: $arrow] } }] if {!$matched} { set error {} append error "\n>>>>> $name failed: [list $script]\n" append error " got: [list $result]\n" append error " expected: [list $expected]" if {$arrow ne {->}} { append error "\n match: $arrow" } dict incr stats failed puts stderr $error return } dict incr stats passed } test basic-1 { parse-feed "2020-07-21T05:51:29.000000Z\tHello, world!" } -> {metadata {} statuses {1595310689 {Hello, world!}} bad {}} test basic-2 { parse-feed "2020-07-21T05:51:29+0100\tHello, world!\n" } -> {metadata {} statuses {1595307089 {Hello, world!}} bad {}} test basic-3 { parse-feed "\n\n1970-01-01T00:00:00Z\tHello...\n# Comment!\ \n1970-01-01T00:00:01-0000\t...world!\n" } -> {metadata {} statuses {0 Hello... 1 ...world!} bad {}} test basic-4 { parse-feed "hell\nno" } ->$ {metadata \{\} statuses \{\} bad \{hell \{.*\} no \{.*\}\}} variable baseTimeFormat set t [clock seconds] test basic-5 { upvar 1 t t baseTimeFormat baseTimeFormat parse-feed "[clock format $t -format $baseTimeFormat]\tasdf\n#\ not = meta\nfail" } ->* "metadata {} statuses {$t asdf} bad {*}" test basic-6 { parse-feed "\n\n\n \n\t\n" } -> {metadata {} statuses {} bad {}} test scan-time-1 { scan-time {} } ->* {can't scan time {} due to error*} test scan-time-2 { scan-time 1970-01-01T00:00:00Z } -> {0 tz} test scan-time-3 { scan-time 2015-01-01T00:00:00 } ->$ {14\d+ local} test metadata-1 { parse-feed "#key = value\n#\tfoo=bar 123" } -> {metadata {key value foo {bar 123}} statuses {} bad {}} test metadata-2 { parse-feed "# url = https://example.com/feed.txt \t \n" } -> {metadata {url https://example.com/feed.txt} statuses {} bad {}} puts stderr $stats } # If this is the main script... if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} { twtxt::run-tests } package provide twtxt 0 ====== ** Competition ** * https://en.wikipedia.org/wiki/ActivityPub%|%ActivityPub%|% * https://www.jsonfeed.org/%|%JSON Feed%|% * https://en.wikipedia.org/wiki/OStatus%|%OStatus%|% * [RSS] and Atom <> Data Serialization Format | Internet | Jim Package | Package