Simple parser for vCard, a file format standard for electronic business cards [L1 ].
package provide vCard 2.1 # CR = <ASCII CR, carriage return> ;(15, 13.) # LF = <ASCII LF, linefeed> ;(12, 10.) # CRLF = CR LF # SPACE = <ASCII SP, space> ;(40, 32.) # HTAB = <ASCII HT, horizontal-tab> ;(11, 9.) # All literal property names are valid as upper, lower, or mixed case. # ws = 1*(SPACE | HTAB) ; "whitespace," one or more spaces or tabs # wsls = 1*(SPACE | HTAB | CRLF) ; whitespace with line separators # word = <any printable 7bit us-ascii except []=:., > # groups = groups "." word | word # these may be "folded": # value = 7bit | quoted-printable | base64 # 7bit = <7bit us-ascii printable chars, excluding CR LF> # 8bit = <MIME RFC 1521 8-bit text> # quoted-printable = <MIME RFC 1521 quoted-printable text> # base64 = <MIME RFC 1521 base64 text> ; the end of the text is marked with two CRLF # sequences this results in one blank line before the start of the next property # these may be "folded": # name = "LOGO" | "PHOTO" | "LABEL" | "FN" | "TITLE" | "SOUND" | "VERSION" | "TEL" # | "EMAIL" | "TZ" | "GEO" | "NOTE" | "URL" | "BDAY" | "ROLE" | "REV" | "UID" # | "KEY" | "MAILER" | "X-" word # knowntype = "DOM" | "INTL" | "POSTAL" | "PARCEL" | "HOME" | "WORK" | "PREF" | "VOICE" | "FAX" # | "MSG" | "CELL" | "PAGER" | "BBS" | "MODEM" | "CAR" | "ISDN" | "VIDEO" | "AOL" # | "APPLELINK" | "ATTMAIL" | "CIS" | "EWORLD" | "INTERNET" | "IBMMAIL" | "MCIMAIL" # | "POWERSHARE" | "PRODIGY" | "TLX" | "X400" | "GIF" | "CGM" | "WMF" | "BMP" | "MET" # | "PMB" | "DIB" | "PICT" | "TIFF" | "PDF" | "PS" | "JPEG" | "QTIME" | "MPEG" # | "MPEG2" | "AVI" | "WAVE" | "AIFF" | "PCM" | "X509" | "PGP" # ptypeval = knowntype | "X-" word # pvalueval = "INLINE" | "URL" | "CONTENT-ID" | "CID" | "X-" word # pencodingval = "7BIT" | "8BIT" | "QUOTED-PRINTABLE" | "BASE64" | "X-" word # charsetval = <a character set string as defined in Section 7.1 of RFC 1521> # langval = <a language string as defined in RFC 1766> # param = "TYPE" [ws] "=" [ws] ptypeval | "VALUE" [ws] "=" [ws] pvalueval # | "ENCODING" [ws] "=" [ws] pencodingval | "CHARSET" [ws] "=" [ws] charsetval # | "LANGUAGE" [ws] "=" [ws] langval | "X-" word [ws] "=" [ws] word | knowntype # paramlist = paramlist [ws] ";" [ws] param | param # params = ";" [ws] paramlist # nonsemi = <any non-control ASCII except ";"> # strnosemi = *(*nonsemi ("\;" | "\" CRLF)) *nonsemi ; To include a semicolon in this string, # it must be escaped with a "\" character. # addressparts = 0*6(strnosemi ";") strnosemi ; PO Box, Extended Addr, Street, Locality, Region, Postal Code, Country Name # orgparts = *(strnosemi ";") strnosemi ; First is Organization Name, remainder are Organization Units. # nameparts = 0*4(strnosemi ";") strnosemi ; Family, Given, Middle, Prefix, Suffix. #; these may be "folded" # item = [groups "."] name [params] ":" value CRLF | [groups "."] "ADR" [params] ":" addressparts CRLF # | [groups "."] "ORG" [params] ":" orgparts CRLF | [groups "."] "N" [params] ":" nameparts CRLF # | [groups "."] "AGENT" [params] ":" vcard CRLF # items = items *CRLF item | item # vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD" # vcard_file = [wsls] vcard [wsls] namespace eval vCard { variable encodings {BASE64 QUOTED-PRINTABLE 8BIT} variable params {ENCODING CHARSET LANGUAGE VALUE TYPE} variable values {INLINE URL CONTENT-ID} variable properties {FN} # FN - Formatted Name - specifies the formatted name string for vCard object. # N - Name - This property specifies a structured representation of the name of the person, # place or thing - consists of the components of the name specified as positional fields # separated by the Field Delimiter character (ASCII decimal 59). The property value is a # concatenation of the Family Name (first field), Given Name (second field), Additional # Names (third field), Name Prefix (fourth field), and Name Suffix (fifth field) strings. # PHOTO - Photograph - This property specifies an image or photograph of an individual variable ptype {GIF CGM WMF BMP MET PMB DIB PICT TIFF PS PDF JPEG MPEG MPEG2 AVI QTIME} # BDAY - Birthdate - date of birth of the individual associated with the vCard. The value # for this property is a calendar date in a complete representation consistent with ISO 8601. # ADR - Delivery Address - components that are based on the X.500 Post Office Box attribute, # the X.520 Street Address geographical attribute, the X.520 Locality Name geographical # attribute, the X.520 State or Province Name geographical attribute, the X.520 Postal Code # attribute, and the X.520 Country Name geographical attribute. variable adrParam {TYPE {DOM INTL POSTAL PARCEL HOME WORK}} # LABEL - Delivery Label - This property is based on the semantics of the X.520 Postal Address # attribute. This specification has added semantics to those defined by the X.500 Series # standard for differentiating Home, Work, Parcel, Postal, Domestic, and International # delivery label types. OPT # TEL - Telephone Number - variable teltype {PREF WORK HOME VOICE FAX MSG CELL PAGER BBS MODEM CAR ISDN VIDEO} # EMAIL - variable emailType {INTERNET} # - Time Zone - ISO 8601 # AGENT - Agent - This property is equivalent to nesting another vCard with the specified vCard. # ORG - Organization Name and Organizational Unit - a concatenation of the Organization Name # (first field), Organizational Unit (second field) strings. Additional positional fields, # if specified, contain additional Organizational Units. # NOTE - Comment - # REV - Last Revision - calendar date and time of day of the last update to the vCard object ISO 8601 # SOUND - sound - variable soundType {WAVE PCM AIFF} # URL - - # UID - Unique Idxentifier - persistent, globally unique identifier associated with the object # VERSION - - vcard spec supported 2.1 # KEY - Public Key - variable keyType {X509 PGP} proc parse {text} { set state none regsub -all {\n[ \t]+} $text { } text set result {} foreach line [split $text \n] { set line [string trim $line] if {$line eq ""} continue set value [join [lassign [split $line :] begin] :] set groups [lassign [split $begin .] name] set params [lassign [split $name {;}] name] #puts stderr "parse: $name - $value" switch -glob -- [string toupper $name] { BEGIN { set entry [dict create] } END { lappend result $entry } ADR { lassign [split $value {;}] box ext street locality region postcode country foreach v {box ext street locality region postcode country} { dict set entry address $v [set $v] } } ORG { set units [lassign [split $value {;}] orgname] dict set entry org name $orgname dict set entry org units $units } N { lassign [split $value {;}] family given middle prefix suffix foreach v {family given middle prefix suffix} { dict set entry name $v [set $v] } } AGENT { error "Can't handle agents" } LOGO - PHOTO - LABEL - FN - TITLE - SOUND - VERSION - TEL - EMAIL - TZ - GEO - NOTE - URL - BDAY - ROLE - REV - UID - KEY - MAILER - X-* { dict set entry $name $value } default { } } } return $result } namespace export -clear * namespace ensemble create -subcommands {} }
UKo 2009-11-15: small change to parse collections of vCards, too. The parse subcommand now returns a list of entries even if there is only one entry.