Version 4 of recfile

Updated 2018-03-06 09:11:32 by dbohdan

Recfile is the file format used by GNU Recutils . It can be seen as a "vertical" counterpart to CSV.

Sample data

# Default type
ThisIsARecordOfTheDefaultType: true

%rec: Empty

# No records of this type.

%rec: Test

Name: John Smith
Email: [email protected]
Email: [email protected]

LongLine: This is a quite long value \
comprising a single unique logical line \
split in several physical lines.

Foo: bar1
+ bar2
+  bar3

Parser

The following code should be able to parse every recfile. However, it ignores record descriptors other than the record type (%rec).

#!/usr/bin/env tclsh
namespace eval ::rec {
    variable version 0.1.1
}

# Return format: a dictionary where each key is the record "type" and each value
# is a list of records. An empty string is used to indicate the default record
# type. Each record is in turn a dictionary where the key is the field name and
# the value is a list of one or values for the field.
proc ::rec::parse data {
    set type {} ;# The record type.
    set result {}
    set currentRecord {}
    set lastField {}
    set lastValue {}
    foreach line [split $data \n] {
        # Skip comments.
        if {[string match #* $line]} {
            continue
        }
        if {$line eq {}} {
            if {$currentRecord ne {}} {
                dict lappend result $type $currentRecord
                set currentRecord {}
            }
        } else {
            if {[regexp {^([a-zA-Z%][a-zA-Z0-9_]*):[ \t]?(.*)$} $line _ \
                    field value]} {
                # The line is field.
                if {[string match %* $line]} {
                    switch -exact -- $field {
                        %rec {
                            set type $value
                            if {![dict exists $result $type]} {
                                dict set result $type {}
                            }
                        }
                        default {
                            # Ignore unrecognized record descriptors.
                        }
                    }
                } else {
                    dict lappend currentRecord $field $value
                    set lastField $field
                    set lastValue $value
                }
            } else {
                # The line is not a field. See if it is a continuation of the
                # value for the field from the previous line.
                if {[string match {*\\} $lastValue]} {
                    set lastValue [string range $lastValue 0 end-1]$line
                    dict set currentRecord $lastField $lastValue
                } elseif {[regexp {^\+ ?(.*)$} $line _ afterNewline]} {
                    set lastValue $lastValue\n$afterNewline
                    dict set currentRecord $lastField $lastValue
                } {
                    error "wrong line format: \"$line\""
                }
            }
        }
    }
    return $result
}

proc ::rec::main {argv0 argv} {
    set ch [open [lindex $argv 0]]
    puts [::rec::parse [read $ch]]
    close $ch
}

# If this is the main script...
if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} {
    ::rec::main $argv0 $argv
}

See also