Version 0 of Textile to HTML

Updated 2009-11-16 01:48:42 by jdp

jdp 2009-11-15 01:42:49 UTC Here's some code for converting Textile [L1 ]. At the moment it's reasonably reliable (I think there are a few cases which cause grief, but none come to mind), but table support is nonexistent. It doesn't generate a full document, just the part that would go between the <body> tags. Enjoy.

proc parse {filename} {



        set datain [open $filename]

        set doctext [read $datain]

        close $datain

        

        regsub -all -- {\n{2,}} $doctext "\uFFFF" doctext

        set text [split $doctext \uFFFF]

        

        set out [list]

        set premode 0

        

        foreach line $text {

        

                if {[regexp {<pre>.*</pre>} $line]} {

                        if {[catch {lappend out [subst $line]} e i]} {

                                warn $e

                                log $i

                                log $line

                        }

                        continue

                } elseif {[regexp {<pre>} $line]} {

                        if {[catch {lappend out [subst $line]} e i]} {

                                warn $e

                                log $i

                                log $line

                        }

                        set premode 1

                        continue

                } elseif {[regexp {</pre>} $line]} {

                        if {[catch {lappend out [subst $line]} e i]} {

                                warn $e

                                log $i

                                log $line

                        }

                        set premode 0

                        continue

                } elseif {$premode} {

                        if {[catch {lappend out [subst $line]} e i]} {

                        warn $e

                        log $i

                        log $line

                }

                        continue

                }

        

                if {[regexp {^h([1-6])(.*?)\. (.*?)$} $line match level style text]} {

                        set line "<h$level[style $style]>$text</h$level>"

                } elseif {[regexp {^bq(.*?)\. (.*?)$} $line match style text]} {

                        set line "<blockquote[style $style]>$text</blockquote>"

                } elseif {[regexp {^fn(\d)(.*?)\. (.*?)$} $line match number style text]} {

                        set line "<p id=\"fn$number\"[style $style]><sup>$number</sup> $text</p>"

                } elseif {[regexp {^#.*? .*$} $line match style]} {

                        set line "<ol>\n$match\n</ol>"

                } elseif {[regexp {^\* .*$} $line match]} {

                        set line "<ul>\n$match\n</ul>"

                #} elseif {[regexp {^(\|.*?)+\|} $line match]} {

                #        set line "<table>\n$match\n</table>"

                } elseif {[regexp {^p(.*?)\. (.*?)$} $line match style text]} {

                        set line "<p[style $style]>$text</p>"

                } else {

                        set line "<p>$line</p>"

                }

                

                regsub -all -line -- {^#(.*?) (.*?)(\n)} $line {<li[style "\1"]>\2</li>\3} line

                regsub -all -line -- {^\*(.*?) (.*?)(\n)} $line {<li[style "\1"]>\2</li>\3} line

                

                regsub -all -- {"([^\"]*?)(?:\((.*?)\))??":([[:graph:]]+)([[:space:];#%&\*\{\}\\<>\?\+]|$)} $line {<a href="\3" title="\2">\1</a>\4} line

                

                regsub -all -- {\!([[:graph:]]+)(?:\((.*?)\))?\!:([[:graph:]]+)([[:space:]]|$)} $line {<a href="\3"><img src="\1" alt="\2" /></a>\4} line

                regsub -all -- {\!([[:graph:]]+)(?:\((.*?)\))?\!} $line {<img src="\1" alt="\2" />} line

                

                regsub -all -- {([^()])\m__([[({].*?[})\]])??(\S.*?)__\M([^()])} $line {\1<i[style "\2"]>\3</i>\4} line

                regsub -all -- {([^()])\m_([[({].*?[})\]])??(\S.*?)_\M([^()])} $line {\1<em[style "\2"]>\3</em>\4} line

                

                regsub -all -- {\*\*\m([[({].*?[})\]])??(\S.*?)\M\*\*} $line {<b[style "\1"]>\2</b>} line

                regsub -all -- {\*\m([[({].*?[})\]])??(\S.*?)\M\*} $line {<strong[style "\1"]>\2</strong>} line

                

                regsub -all -- {\?\?\m([[({].*?[})\]])??(\S.*?)\M\?\?} $line {<cite[style "\1"]>\2</cite>} line

                

                regsub -all -- {-\m([[({].*?[})\]])??(\S.*?)\M-} $line {<del[style "\1"]>\2</del>} line

                regsub -all -- {\+\m([[({].*?[})\]])??(\S.*?)\M\+} $line {<ins[style "\1"]>\2</ins>} line



                regsub -all -- {\^\m([[({].*?[})\]])??(\S.*?)\M\^} $line {<sup[style "\1"]>\2</sup>} line

                regsub -all -- {~\m([[({].*?[})\]])??(\S.*?)\M~} $line {<sub[style "\1"]>\2</sub>} line

                

                regsub -all -- {%\m([[({].*?[})\]])??(\S.*?)\M%} $line {<span[style "\1"]>\2</span>} line

                

                regsub -all -- {[[:graph:]]+?\((.+?)\)} $line {<acronym title="\2">\1</acronym>} line

                regsub -all -- {([[:graph:]])\[(\d+?)\]} $line {\1<sup><a href="#fn\2">\2</a></sup>} line

                regsub -all -- {([[:alnum:]_])\n} $line {\1<br />\n} line

                

                if {[catch {lappend out [subst $line]} e i]} {

                        warn $e

                        log $i

                        log $line

                }

        }

        

        return [join $out \n\n]

}



proc style {attrs} {

        set result [list]

        set stylelist [list]

        if {[regexp {\(#([[:alnum:]_]+?)\)} $attrs match id]} {

                lappend result " id=\"$id\""

        }

        if {[regexp {\(([[:alnum:]_]+?)\)} $attrs match class]} {

                lappend result " class=\"$class\""

        }

        if {[regexp {\(([[:alnum:]_]+?)#([[:alnum:]_]+?)\)} $attrs match class id]} {

                lappend result "class=\"$class\""

                lappend result " id=\"$id\""

        }

        if {[regexp {\{(.+?)\}} $attrs match style]} {

                lappend stylelist $style

        }

        if {[regexp {\[([[:alpha:]]+?)\]} $attrs match lang]} {

                lappend result " lang=\"$lang\""

        }

        

        if {[regexp {^<>} $attrs]} {

                lappend stylelist "text-align:justify"

        } elseif {[regexp {^>} $attrs]} {

                lappend stylelist "text-align:right"

        } elseif {[regexp {^<} $attrs]} {

                lappend stylelist "text-align:left"

        } elseif {[regexp {^=} $attrs]} {

                lappend stylelist "text-align:center"

        } elseif {[regexp {^(\(*?)(\)*?)(?![[:alnum:]_])} $attrs match left right]} {

                if {[string length $left] > 0} {

                        lappend stylelist "padding-left:[string length $left]em"

                }

                if {[string length $right] > 0} {

                        lappend stylelist "padding-right:[string length $right]em"

                }

        }

        if {[llength $stylelist] > 0} {

                lappend result " style=\"[join $stylelist ;]\""

        }

        

        return [join $result ""]

}