[KBK] (2004-11-29) -- The [clock] command has always asserted in the documentation of its free-form parser that it is capable of parsing ISO8601 date/time strings. ''The documentation lies.'' It is capable of parsing only a couple of the many possible formats that an ISO8601 point-in-time specification can take. In Tcl 8.5, moreover, we are deprecating free-form scan in [clock] in favor of having the caller specify the expected format. The ''-format'' option to the [clock] command, however, is ovrspecific - it may be that we know that an input string is expected to "be in ISO8601 format," while still not knowing the precise format (for instance, whether it is year-month-day of month or year-week-day of week; whether the year has two digits or four; and so on). It seems, therefore, that we need an intermediate layer that can examine a string that is known to be in ISO8601 format and figure out a ''-format'' string to pass to [clock]. This process is not overly difficult, although getting decent performance out of it is perhaps a trifle tricky. The code on this page is an initial attempt at an ISO8601 parser, presented so that the community can comment on it. Among the open questions: * Does the code implement the right functionality? * Does the interface look reasonable? * What is the proper medium to include this? ([KBK] is more inclined to put it in [tcllib] than in the Tcl Core proper.) ---- package provide iso8601 0.1 package require Tcl 8.5 namespace eval iso8601 { namespace export parse_date parse_time # Enumerate the patterns that we recognize for an ISO8601 date as both # the regexp patterns that match them and the [clock] patterns that scan # them. variable DatePatterns { {\d\d\d\d-\d\d-\d\d} {%Y-%m-%d} {\d\d\d\d\d\d\d\d} {%Y%m%d} {\d\d\d\d-\d\d\d} {%Y-%j} {\d\d\d\d\d\d\d} {%Y%j} {\d\d-\d\d-\d\d} {%y-%m-%d} {\d\d\d\d\d\d} {%y%m%d} {\d\d-\d\d\d} {%y-%j} {\d\d\d\d\d} {%y%j} {--\d\d-\d\d} {--%m-%d} {--\d\d\d\d} {--%m%d} {--\d\d\d} {--%j} {---\d\d} {---%d} {\d\d\d\d-W\d\d-\d} {%G-W%V-%u} {\d\d\d\dW\d\d\d} {%GW%V%u} {\d\d-W\d\d-\d} {%g-W%V-%u} {\d\dW\d\d\d} {%gW%V%u} {-W\d\d-\d} {-W%V-%u} {-W\d\d\d} {-W%V%u} {-W-\d} {%u} } # MatchTime -- (constructed procedure) # # Match an ISO8601 date/time string and indicate how it matched. # # Parameters: # string -- String to match. # fieldArray -- Name of an array in caller's scope that will receive # parsed fields of the time. # # Results: # Returns 1 if the time was scanned successfully, 0 otherwise. # # Side effects: # Initializes the field array. The keys that are significant: # - Any date pattern in 'DatePatterns' indicates that the # corresponding value, if non-empty, contains a date string # in the given format. # - The patterns T, Hcolon, and Mcolon indicate a literal # T preceding the time, a colon following the hour, or # a colon following the minute. # - %H, %M, %S, and %Z indicate the presence of the # corresponding parts of the time. proc init {} { variable DatePatterns set cmd {regexp -expanded -nocase -- {PATTERN} $timeString ->} set re \(?:\(?: set sep {} foreach {regex interpretation} $DatePatterns { append re $sep \( $regex \) append cmd " " [list field($interpretation)] set sep | } append re \) {(T|[[:space:]]+)} \)? append cmd { field(T)} append re {(\d\d)(?:(:?)(\d\d)(?:(:?)(\d\d)))} append cmd { field(%H) field(Hcolon) } \ {field(%M) field(Mcolon) field(%S)} append re {[[:space:]]*(Z|[-+]\d\d\d\d)?} append cmd { field(%Z)} set cmd [string map [list {{PATTERN}} [list $re]] \ $cmd] proc MatchTime { timeString fieldArray } " upvar 1 \$fieldArray field $cmd " } init rename init {} } # iso8601::parse_date -- # # Parse an ISO8601 date/time string in an unknown variant. # # Parameters: # string -- String to parse # args -- Arguments as for [clock scan]; may include any of # the '-base', '-gmt', '-locale' or '-timezone options. # # Results: # Returns the given date in seconds from the Posix epoch. proc iso8601::parse_date { string args } { variable DatePatterns foreach { regex interpretation } $DatePatterns { if { [regexp "^$regex\$" $string] } { return [eval [linsert $args 0 \ clock scan $string -format $interpretation]] } } return -code error "not an iso8601 date string" } # iso8601::parse_time -- # # Parse a point-in-time in ISO8601 format # # Parameters: # string -- String to parse # args -- Arguments as for [clock scan]; may include any of # the '-base', '-gmt', '-locale' or '-timezone options. # # Results: # Returns the given time in seconds from the Posix epoch. proc iso8601::parse_time { timeString args } { variable DatePatterns MatchTime $timeString field set pattern {} foreach {regex interpretation} $DatePatterns { if { $field($interpretation) ne {} } { append pattern $interpretation } } append pattern $field(T) if { $field(%H) ne {} } { append pattern %H $field(Hcolon) if { $field(%M) ne {} } { append pattern %M $field(Mcolon) if { $field(%S) ne {} } { append pattern %S } } } if { $field(%Z) ne {} } { append pattern %Z } return [eval [linsert $args 0 clock scan $timeString -format $pattern]] } # Usage examples if { [info exists ::argv0] && ( $::argv0 eq [info script] ) } { puts "iso8601::parse_date" puts [iso8601::parse_date 1970-01-02 -timezone :UTC] puts [iso8601::parse_date 1970-W01-5 -timezone :UTC] puts [time {iso8601::parse_date 1970-01-02 -timezone :UTC} 1000] puts [time {iso8601::parse_date 1970-W01-5 -timezone :UTC} 1000] puts "iso8601::parse_time" puts [clock format [iso8601::parse_time 2004-W33-2T18:52:24Z] \ -format {%X %x %z} -locale system] puts [clock format [iso8601::parse_time 18:52:24Z] \ -format {%X %x %z} -locale system] puts [time {iso8601::parse_time 2004-W33-2T18:52:24Z} 1000] puts [time {iso8601::parse_time 18:52:24Z} 1000] } ----