Recfile is the file format used by GNU Recutils . It can be seen as a "vertical" counterpart to CSV.
# Default type ThisIsARecordOfTheDefaultType: true %rec: Empty # No records of this type. %rec: Test Name: John Smith Email: [email protected] Email: [email protected] LongLine: This is a quite long value \ comprising a single unique logical line \ split in several physical lines. Foo: bar1 + bar2 + bar3
The following code should be able to parse every recfile. However, it ignores record descriptors other than the record type (%rec).
#!/usr/bin/env tclsh namespace eval ::rec { variable version 0.1.1 } # Return format: a dictionary where each key is the record "type" and each value # is a list of records. An empty string is used to indicate the default record # type. Each record is in turn a dictionary where the key is the field name and # the value is a list of one or values for the field. proc ::rec::parse data { set type {} ;# The record type. set result {} set currentRecord {} set lastField {} set lastValue {} foreach line [split $data \n] { # Skip comments. if {[string match #* $line]} { continue } if {$line eq {}} { if {$currentRecord ne {}} { dict lappend result $type $currentRecord set currentRecord {} } } else { if {[regexp {^([a-zA-Z%][a-zA-Z0-9_]*):[ \t]?(.*)$} $line _ \ field value]} { # The line is field. if {[string match %* $line]} { switch -exact -- $field { %rec { set type $value if {![dict exists $result $type]} { dict set result $type {} } } default { # Ignore unrecognized record descriptors. } } } else { dict lappend currentRecord $field $value set lastField $field set lastValue $value } } else { # The line is not a field. See if it is a continuation of the # value for the field from the previous line. if {[string match {*\\} $lastValue]} { set lastValue [string range $lastValue 0 end-1]$line dict set currentRecord $lastField $lastValue } elseif {[regexp {^\+ ?(.*)$} $line _ afterNewline]} { set lastValue $lastValue\n$afterNewline dict set currentRecord $lastField $lastValue } { error "wrong line format: \"$line\"" } } } } return $result } proc ::rec::main {argv0 argv} { set ch [open [lindex $argv 0]] puts [::rec::parse [read $ch]] close $ch } # If this is the main script... if {[info exists argv0] && ([file tail [info script]] eq [file tail $argv0])} { ::rec::main $argv0 $argv }