if 0 { [Brian Theado] - A paper with the same title as this page can be found at [http://www.cs.sfu.ca/~cameron/REX.html]. The Appendix of the paper includes sample implementation in Perl, Javascript and Flex/Lex. The Appendix also includes an interactive demo (using the Javascript implementation apparently). Here is a translation of the Perl code into Tcl. The translation mostly involved escaping the square braces and enclosing some variable names in {}. } # REX/Perl 1.0 # Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions", # Technical Report TR 1998-17, School of Computing Science, Simon Fraser # University, November, 1998. # Copyright (c) 1998, Robert D. Cameron. # The following code may be freely used and distributed provided that # this copyright and citation notice remains intact and that modifications # or additions are clearly identified. # # 06Apr03 Brian Theado - Direct translation from Perl to Tcl set TextSE "\[^<]+" set UntilHyphen "\[^-]*-" set Until2Hyphens "${UntilHyphen}(?:\[^-]$UntilHyphen)*-" set CommentCE "${Until2Hyphens}>?" set UntilRSBs "\[^\\]]*](?:\[^\\]]+])*]+" set CDATA_CE "${UntilRSBs}(?:\[^\\]>]$UntilRSBs)*>" set S "\[ \\n\\t\\r]+" set NameStrt "\[A-Za-z_:]|\[^\\x00-\\x7F]" set NameChar "\[A-Za-z0-9_:.-]|\[^\\x00-\\x7F]" set Name "(?:$NameStrt)(?:$NameChar)*" set QuoteSE "\"\[^\"]*\"|'\[^']*'" set DT_IdentSE "$S${Name}(?:${S}(?:${Name}|$QuoteSE))*" set MarkupDeclCE "(?:\[^\\]\"'><]+|$QuoteSE)*>" set S1 "\[\\n\\r\\t ]" set UntilQMs "\[^?]*\\?+" set PI_Tail "\\?>|$S1${UntilQMs}(?:\[^>?]$UntilQMs)*>" set DT_ItemSE "<(?:!(?:--${Until2Hyphens}>|\[^-]$MarkupDeclCE)|\\?${Name}(?:$PI_Tail))|%$Name;|$S" set DocTypeCE "${DT_IdentSE}(?:$S)?(?:\\\[(?:$DT_ItemSE)*](?:$S)?)?>?" set DeclCE "--(?:$CommentCE)?|\\\[CDATA\\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?" set PI_CE "${Name}(?:$PI_Tail)?" set EndTagCE "${Name}(?:$S)?>?" set AttValSE "\"\[^<\"]*\"|'\[^<']*'" set ElemTagCE "${Name}(?:$S${Name}(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?" set MarkupSPE "<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)" set XML_SPE "$TextSE|$MarkupSPE" proc ShallowParse {xml} { global XML_SPE return [regexp -inline -all $XML_SPE $xml] } if 0 { Example use: % set xml { XML Shallow Parsing with Regular Expressions } % ShallowParse $xml { } { } {XML Shallow Parsing with Regular Expressions} { } {} { } {} { } {} { } {} { } } # %|[Category XML] | [Category Parsing]|%