Text2PDF

Keith Vetter 2007-10-16 : I've been playing with Trampoline! recently and wanted a similar tool for converting ASCII text into PDF. So here's a tcl package that takes some text and returns the text converted into PDF format.

The package exports just two functions: Text2PDF which does the conversion, and Configure which lets you specify a bunch of formatting options like page size, landscape mode, font and tab size.

Also included here is code for a command line utility using this package for converting text files into pdf files.

The code started as a port of C utility linked from the PDF page [L1 ]. Then, after seeing an ActiveState's python cookbook entry [L2 ] and the C code it was based on [L3 ], I decided to add a few more features.

Caveat: PDF is a binary format (there are lots of file offsets) so when saving the output be sure to configure the channel to binary format.

To Do (volunteers anyone?)

  • 2 column output
  • handle text widget formatting
  • wrapping text
  • other encodings (currently uses WinAnsiEncoding)
  • unicode handling

 ##+##########################################################################
 #
 # text2pdf -- package to convert text into pdf
 # by Keith Vetter
 # based on code by P. G. Womack, Diss, Norfolk, UK.
 #
 # Package to create PDF output from supplied text.
 # Two routines are available:
 #   Text2PDF::Configure ?optionName value...? -- lets you tweak some constants
 #   Text2PDF::Text2PDF text                   -- returns text converted to PDF
 #
 # NB. PDF is a BINARY format so you must configure files to binary translation
 #
 # Missing features:
 #  2 column output
 #  better font handling
 #  non-ASCII character handling
 #
 
 package provide text2pdf 0.2
 package require textutil                        ;# For tab handling
 
 namespace eval ::Text2PDF {
    namespace export Configure Text2PDF
 
    variable P
    set P(needInit) 1
 }
 proc ::Text2PDF::_Reset {} {
    variable P
 
    set P(pdf) ""
    set P(nPages) 0
    set P(objID) 1
    set P(ypos) 0
    array unset P xrefs,*
    array unset P page,*
 }
 proc ::Text2PDF::Configure {args} {
    set options {-width -height -margin -font -fontsize -leadsize -tabsize
        -a4 -a3 -letter -landscape -portrait}
    variable P
 
    # Paper size:
    #   A3 842 x 1190 px
    #   A4 595 x 842  px
    #   US letter 216 × 279 mm
    if {$args eq {} || $P(needInit)} {
        unset -nocomplain P
        ::Text2PDF::_Reset
 
        set P(needInit) 1
 
        set P(width) 612
        set P(height) 792
        set P(margin) 30
        set P(font) Courier
        set P(fontSize) 10
        set P(leadSize) 10
        set P(tabSize) 8
        set P(landscape) 0
 
    }
 
    foreach {arg val} [concat $args "MISSING"] {
        if {$arg eq "MISSING"} break
        if {[lsearch $options $arg] == -1} {error "unknown option \x22$arg\x22"}
        if {$val eq "MISSING"} { error "value for \x22$arg\x22 missing" }
        switch -exact -- $arg {
            "-width" { set P(width) $val }
            "-height" { set P(height) $val }
            "-margin" { set P(margin) $val }
            "-font" { set P(font) $val }
            "-fontsize" { set P(fontsize) $val }
            "-leadsize" { set P(leadsize) $val }
            "-tabsize" { set P(tabsize) $val }
            "-A4" { if {$val} { set P(width) 595; set P(height) 842 }}
            "-A3" { if {$val} { set P(width) 842; set P(height) 1190 }}
            "-letter" { if {$val} { set P(width) 842; set P(height) 1190 }}
            "-landscape" { set P(landscape) $val }
            "-portrait" { set P(landscape) [expr {! $val}] }
        }
    }
 
    if {$P(landscape)} {
        foreach {P(width) P(height)} [list $P(height) $P(width)] break
    }
    set P(needInit) 0
 }
 proc ::Text2PDF::Text2PDF {txt} {
    variable P
 
    if {$P(needInit)} ::Text2PDF::Configure
 
    ::Text2PDF::_MyPuts "%PDF-1.0\n"
    set P(pageTreeID) [::Text2PDF::_NextObjectID]
    ::Text2PDF::_DoText $txt
 
    set fontID [::Text2PDF::_NextObjectID]
    ::Text2PDF::_StartObject $fontID
    ::Text2PDF::_MyPuts "<</Type/Font/Subtype/Type1/"
    ::Text2PDF::_MyPuts "BaseFont/$P(font)/Encoding/WinAnsiEncoding>>\nendobj\n"
    ::Text2PDF::_StartObject $P(pageTreeID)
    ::Text2PDF::_MyPuts "<</Type /Pages /Count $P(nPages)\n"
 
    ::Text2PDF::_MyPuts "/Kids\[\n"
    for {set i 0} {$i < $P(nPages)} {incr i} {
        ::Text2PDF::_MyPuts "$P(page,$i) 0 R\n"
    }
    ::Text2PDF::_MyPuts "]\n"
 
    ::Text2PDF::_MyPuts "/Resources<</ProcSet\[/PDF/Text]/Font"
    ::Text2PDF::_MyPuts "<</F0 $fontID 0 R>> >>\n"
    ::Text2PDF::_MyPuts "/MediaBox \[ 0 0 $P(width) $P(height) ]\n"
    ::Text2PDF::_MyPuts ">>\nendobj\n"
    set catalogID [::Text2PDF::_NextObjectID]
    ::Text2PDF::_StartObject $catalogID
    ::Text2PDF::_MyPuts "<</Type/Catalog/Pages $P(pageTreeID) 0 R>>\nendobj\n"
 
    set startXRef [::Text2PDF::_GetPosition]
    ::Text2PDF::_MyPuts "xref\n"
    ::Text2PDF::_MyPuts "0 $P(objID)\n"
    ::Text2PDF::_MyPuts "0000000000 65535 f \n"
    for {set i 1} {$i < $P(objID)} {incr i} {
        ::Text2PDF::_MyPuts [format "%010ld 00000 n \n" $P(xrefs,$i)]
    }
    ::Text2PDF::_MyPuts "trailer\n<<\n/Size $P(objID)\n"
    ::Text2PDF::_MyPuts "/Root $catalogID 0 R\n>>\n"
    ::Text2PDF::_MyPuts "startxref\n$startXRef\n%%EOF\n"
 
    set pdf $P(pdf)
    ::Text2PDF::_Reset                          ;# Clear out memory
    return $pdf
 }
 
 proc ::Text2PDF::_MyPuts {str} {
    append ::Text2PDF::P(pdf) $str
 }
 proc ::Text2PDF::_GetPosition {} {
    return [string length $::Text2PDF::P(pdf)]
 }
 
 proc ::Text2PDF::_StorePage {id} {
    variable P
 
    set P(page,$P(nPages)) $id
    incr P(nPages)
 }
 
 proc ::Text2PDF::_StartObject {id} {
    set ::Text2PDF::P(xrefs,$id) [::Text2PDF::_GetPosition]
    ::Text2PDF::_MyPuts "$id 0 obj\n"
 }
 proc ::Text2PDF::_NextObjectID {} {
    set val $::Text2PDF::P(objID)
    incr ::Text2PDF::P(objID)
    return $val
 }
 
 proc ::Text2PDF::_StartPage {} {
    variable P
 
    set P(streamID) [::Text2PDF::_NextObjectID]
    set P(streamLenID) [::Text2PDF::_NextObjectID]
    ::Text2PDF::_StartObject $P(streamID)
    ::Text2PDF::_MyPuts "<< /Length $P(streamLenID) 0 R >>\n"
    ::Text2PDF::_MyPuts "stream\n"
    set P(streamStart) [::Text2PDF::_GetPosition]
    ::Text2PDF::_MyPuts "BT\n/F0 $P(fontSize) Tf\n"
    set P(ypos) [expr {$P(height) - $P(margin)}]
    ::Text2PDF::_MyPuts "$P(margin) $P(ypos) Td\n"
    ::Text2PDF::_MyPuts "$P(leadSize) TL\n"
 }
 proc ::Text2PDF::_EndPage {} {
    variable P
 
    set pageID [::Text2PDF::_NextObjectID]
    ::Text2PDF::_StorePage $pageID
    ::Text2PDF::_MyPuts "ET\n"
    set streamLen [expr {[::Text2PDF::_GetPosition] - $P(streamStart)}]
    ::Text2PDF::_MyPuts "endstream\nendobj\n"
    ::Text2PDF::_StartObject $P(streamLenID)
    ::Text2PDF::_MyPuts "$streamLen\nendobj\n"
    ::Text2PDF::_StartObject $pageID
    ::Text2PDF::_MyPuts "<</Type/Page/Parent $P(pageTreeID) 0 R"
    ::Text2PDF::_MyPuts "/Contents $P(streamID) 0 R>>\nendobj\n"
 }
 
 proc ::Text2PDF::_DoText {txt} {
    variable P
 
    ::Text2PDF::_StartPage
 
    foreach line [split $txt \n] {
        set line [::textutil::untabify $line $P(tabSize)]
        if {$P(ypos) < $P(margin)} {
            ::Text2PDF::_EndPage
            ::Text2PDF::_StartPage
        }
        if {$line eq ""} {
            ::Text2PDF::_MyPuts "T*\n"
        } else {
            if {[string index $line 0] eq "\f"} {
                ::Text2PDF::_EndPage
                ::Text2PDF::_StartPage
            } else {
                regsub -all {([\\()])} $line {\\\1} line
                ::Text2PDF::_MyPuts "($line)'\n"
            }
        }
        set P(ypos) [expr {$P(ypos) - $P(leadSize)}]
    }
    ::Text2PDF::_EndPage
 }

 package require text2pdf
 
 ################################################################
 #
 # Create a simple command line utility for the text2pdf package
 #
 proc Usage {} {
    upvar ::Text2PDF::P PP                      ;# Cheat and peek inside
 
    puts stderr "usage: text2pdf ?options? textfile pdffile"
    puts stderr "  options:"
    puts stderr "\t-x ##\tpage width (default $PP(width))"
    puts stderr "\t-y ##\tpage height (default $PP(height))"
    puts stderr "\t-m ##\tmargin (default $PP(margin))"
    puts stderr "\t-f <font>\tfont (default $PP(font))"
    puts stderr "\t-s ##\tfont size (default $PP(fontSize))"
    puts stderr "\t-v ##\tline spacing (default $PP(leadSize))"
    puts stderr "\t-t ##\ttab size (default $PP(tabSize))"
    puts stderr "\t-A4\tuse A4 paper (default letter)"
    puts stderr "\t-A3\tuse A3 paper (default letter)"
    puts stderr "\t-L\tlandscape mode"
    exit
 
 }
 proc ParseArgs {} {
    global argc argv
 
    set pdfArgs {}
    for {set a 0} {$a < $argc} {incr a} {
        set arg [lindex $argv $a]
        switch -regexp -- $arg {
            ^-x$ { lappend pdfArgs -width [lindex $argv [incr a]] }
            ^-x  { lappend pdfArgs -width [string range $arg 2 end] }
            ^-y$ { lappend pdfArgs -height [lindex $argv [incr a]] }
            ^-y  { lappend pdfArgs -height [string range $arg 2 end] }
            ^-m$ { lappend pdfArgs -margin [lindex $argv [incr a]] }
            ^-m  { lappend pdfArgs -margin [string range $arg 2 end] }
            ^-f$ { lappend pdfArgs -font [lindex $argv [incr a]] }
            ^-f  { lappend pdfArgs -font [string range $arg 2 end] }
            ^-s$ { lappend pdfArgs -fontsize [lindex $argv [incr a]] }
            ^-s  { lappend pdfArgs -fontsize [string range $arg 2 end] }
            ^-v$ { lappend pdfArgs -leadsize [lindex $argv [incr a]] }
            ^-v  { lappend pdfArgs -leadsize [string range $arg 2 end] }
            ^-t$ { lappend pdfArgs -tabsize [lindex $argv [incr a]] }
            ^-t  { lappend pdfArgs -tabsize [string range $arg 2 end] }
            ^-A4$ { lappend pdfArgs -a4 1 }
            ^-A3$ { lappend pdfArgs -a3 1 }
            ^-L$  { lappend pdfArgs -landscape 1 }
 
            ^-h$ -
            ^-?$ -
            ^--help$ Usage
 
            ^--$ { incr a; break }
            ^- {   puts stderr "unknown option: \"$arg\"" ; Usage}
            default { break }
        }
    }
    set argc [expr {$argc - $a}]
    set argv [lrange $argv $a end]
 
    if {$pdfArgs ne {}} {
        eval ::Text2PDF::Configure $pdfArgs
    }
 }
 
 
 ::Text2PDF::Configure
 ParseArgs
 if {$argc != 2} Usage
 foreach {iname oname} $argv break
 
 set fin [open $iname r] ; set txt [read $fin] ; close $fin
 set pdf [::Text2PDF::Text2PDF $txt]
 
 set fout [open $oname w]
 fconfigure $fout -translation binary
 puts -nonewline $fout $pdf
 close $fout
 
 puts "Converted $iname to $oname"
 exit