Version 3 of EpubCreator

Updated 2014-07-14 17:56:49 by clif

##+########################################################################## # # epubCreator.tsh -- command line tool to create an epub version 3.0 # file from a single text or xhmtml file, an optional cover image and # a list of 0 or more images # by Keith Vetter 2014-03-14 # Mods Clif Flynt, 2014-04-01 # Support for multiple text/html files (multiple chapters) # Support for additional .css file # Support for filename.epub different from "book title.epub" # Support for toc.ncx as well as nav.html # Expanded command line processing # # usage: epubCreator \ # -title Title -author 'last, first' -data file1.txt file2.html file3.xhtml...\ # -cover Cover.jpg -images additional Images -css Style.css -html (1/0) \ # -name BookName # # -title Title for book # -author Name of author as last, first # -data List of data files to include in the text # -cover An image file for the cover # -images Additional images that might be reference by text # -css An optional css file if you want special formatting # -html 1 or 0 to define whether the input data is already in html format # -name The name for the .epub file, if different from title #

package require uuid package require fileutil

array set EXT {"" "" .png image/png .gif image/gif .jpg image/jpeg

    .jpeg image/jpeg .svg image/svg+xml}

proc parseList {list stateVarName {throwError 1}} {

    upvar $stateVarName stateArray

    set errs ""

    foreach arg $list {
        if {([string first "-" $arg] == 0) &&
             ([llength $arg] == 1)} {
            set index [string range $arg 1 end]
            if {![info exists stateArray($index)]} {
                if {$throwError} {
                    error "No default for ${stateVarName}($index).\nValid fields are [lsort [array names stateArray]]"
                } else {
                    lappend errs "No default for ${stateVarName}($index)"
                }
            }
            set cmd set
        } else {
            if {[info exists cmd]} {
                $cmd stateArray($index) $arg
                set cmd lappend
            }
        } 
    }
    return $errs

}

proc Init {rawname title author cover images} {

    global E
    set guid [::uuid::uuid generate]

    set E(rawname) $rawname
    set E(basename) [file tail [file rootname $E(rawname)]]
    set E(dirname) [file normalize [file dirname $E(rawname)]]
    set E(output,tempdir) [file join [::fileutil::tempdir] "epub_$guid"]
    if {$E(name) eq ""} {
      set E(output,final) [file join $E(dirname) "$E(basename).epub"]
    } else {
      set E(output,final) [file join $E(dirname) "$E(name).epub"]
    }

    set E(epub) EPUB
    set E(epub,tempdir) [file join $E(output,tempdir) $E(epub)]
    set E(html,name) "$E(basename).xhtml"
    set E(html,tempname) [file join $E(epub,tempdir) "$E(html,name)"]
    set E(ncx,tempname) [file join $E(epub,tempdir) "toc.ncx"]
    set E(opf,name)  [file join $E(epub) package.opf]
    set E(opf,tempname)  [file join $E(output,tempdir) $E(opf,name)]
    set E(mimetype) mimetype
    set E(mimetype,tempname) [file join $E(output,tempdir) $E(mimetype)]
    set E(meta-inf) META-INF
    set E(meta-inf,tempdir) [file join $E(output,tempdir) $E(meta-inf)]
    set E(meta-inf,tempname) [file join $E(meta-inf,tempdir) container.xml]
    set E(nav,name) nav.xhtml
    set E(nav,tempname) [file join $E(epub,tempdir) $E(nav,name)]
    set E(cover,source) $cover
    set E(cover,name) [file tail $cover]
    set E(cover,format) $::EXT([file extension $cover])
    set E(images) $images
    set E(date) [clock format [clock seconds] -gmt 1 -format "%Y-%m-%dT%TZ"]

    file mkdir $E(output,tempdir)
    file mkdir $E(meta-inf,tempdir)
    file mkdir $E(epub,tempdir)

    set E(guid) "ebook:$guid"
    set E(title) $title
    set E(author) $author

    if {$E(css) ne ""} {
      set E(css,name) $E(css)
      set E(css,tempname) [file join $E(epub,tempdir) stylesheet.css]
      set E(opf,stylesheet) {<item href="stylesheet.css" id="css" media-type="text/css"/>}
    }

} proc MakeEpubFiles {} {

    global E
    set i 0
    if {$E(cover,source) ne ""} {
      set E(rawname) [list cover.xhtml {*}$E(rawname)]
      set of [open cover.xhtml w]
      puts $of [BodyToHtml "<img src=\"[file tail $E(cover,source)]\"></img>"]
      close $of
    }
    foreach rawname $E(rawname) {
      incr i
      set basename [file tail [file rootname $rawname]]
      set E(html,name) "$basename.xhtml"
      set E(html,tempname) [file join $E(epub,tempdir) "$E(html,name)"]
      append E(opf,html_items) [subst {
    <item id="id_html_$i" href="$E(html,name)" media-type="application/xhtml+xml"/>}]
      append E(opf,ref_items) [subst {
        <itemref idref="id_html_$i"/>}]
      append navs [subst $::NAV_XHTML1]
      append ncxs [subst $::CONTENT_NCX1]
      if {$E(html)} {
        WriteAllData $E(html,tempname) $rawname
      } else {
        WriteAllData $E(html,tempname) [TextToHtml $rawname]
      }
    }
    WriteAllData $E(mimetype,tempname) "application/epub+zip"
    WriteAllData $E(meta-inf,tempname) [subst $::CONTAINER_XML]
    WriteAllData $E(opf,tempname) [MakeOPF]
    WriteAllData $E(nav,tempname) "[subst $::NAV_XHTML0]\n$navs\n$::NAV_XHTML2"
    WriteAllData $E(ncx,tempname) "[subst $::CONTENT_NCX0]\n$ncxs\n$::CONTENT_NCX2"
    if {[info exists E(css,tempname)]} {
      WriteAllData $E(css,tempname) $E(css)
    }

} proc TextToHtml {rawname} {

    global E
    set fin [open $rawname r]
    set data [read $fin] 
    close $fin

    if {[string first "<html" $data] == -1} {
        set data [string map {& &amp; < &lt; > &gt; \x22 &quot; ' &apos;} $data] ; list
        regsub -all -line {^$} $data {</p><p>} data

        set data "[subst $::HTML_TEMPLATE]" ; list
    }
    return $data

}

proc BodyToHtml {data} {

    global E

    set data "[subst $::HTML_TEMPLATE]" 
    return $data

}

proc MakeOPF {} {

    global E
    set html_items {}
    set image_items {}
    set ref_items {}
    set opf [subst $::CONTENT_OPF]
    if {$E(cover,source) eq ""} {
        regsub -all -line {^.*id_cover.*$} $opf "" opf
    } else {
        file copy $E(cover,source) $E(epub,tempdir)
    }

    ;# Copy any additional images
    set image_items ""
    for {set i 0} {$i < [llength $E(images)]} {incr i} {
        set iname [lindex $E(images) $i]
        file copy $iname $E(epub,tempdir)
        set tailname [file tail $iname]
        set media $::EXT([file extension $iname])
        set id "id_image_$i"
        set line "<item href=\"$tailname\" id=\"$id\" media-type=\"$media\"/>\n"
        append image_items $line
    }
    if {$image_items ne ""} {
        regsub -line {^.*other images go here.*$} $opf $image_items opf
    }
    return $opf

} proc ZipEpub {} {

    global E
    cd $E(output,tempdir)

    # Get rid of any previous epub else zip will append to the
    # existing file, leaving behind elements you thought had been removed
    catch {file delete $E(output,final)}
    exec zip -rX $E(output,final) $E(mimetype) $E(meta-inf)/ $E(epub)/

} proc WriteAllData {fname data} {

    if {[file exists $data]} {
      set if [open $data]
      set data [read $if]
      close $if
    }
    set fout [open $fname w]; 
    puts -nonewline $fout $data; 
    close $fout;

} proc Cleanup {} {

    global E
    file delete -force -- $E(output,tempdir)
    file delete cover.xhtml

} set HTML_TEMPLATE {<?xml version="1.0"?> <html xmlns="http://www.w3.org/1999/xhtml " xml:lang="en" lang="en"

      xmlns:epub="http://www.idpf.org/2007/ops">
  <head>
    <title>$E(title)</title>
    <link href="stylesheet.css" type="text/css" rel="stylesheet"/>
  </head>
  <body>
    <p>
    $data
    </p>
  </body>

</html> }

set CONTAINER_XML {<?xml version="1.0"?> <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">

  <rootfiles>
    <rootfile media-type="application/oebps-package+xml"
             full-path="$E(opf,name)" />
  </rootfiles>

</container> }

# NCX format # per: http://www.gbenthien.net/Kindle%20and%20EPUB/ncx.html

set CONTENT_NCX0 {<?xml version="1.0" encoding="UTF-8"?> <ncx xmlns="http://www.daisy.org/z3986/2005/ncx/ " version="2005-1" xml:lang="en"> <head> <meta name="dtb:uid" content="$::E(guid)"/> <meta name="dtb:depth" content="1"/> <meta name="dtb:totalPageCount" content="0"/> <meta name="dtb:maxPageNumber" content="0"/> </head>

<docTitle><text>$::E(name)</text></docTitle> <docAuthor><text>$::E(author)</text></docAuthor>

<navMap>

} set CONTENT_NCX1 { <navPoint id="navpoint-$i" playOrder="$i"> <navLabel><text>Chapter $i</text></navLabel> <content src="$::E(html,name)"/> </navPoint> }

set CONTENT_NCX2 { </navMap> </ncx> } set CONTENT_OPF {<?xml version="1.0" encoding="UTF-8"?> <package xmlns="http://www.idpf.org/2007/opf " version="3.0" unique-identifier="uuid">

  <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
    <dc:title>$E(title)</dc:title>
    <dc:creator>$E(author)</dc:creator>
    <dc:identifier id="uuid">$E(guid)</dc:identifier>
    <dc:language>en</dc:language>
    <meta property="dcterms:modified">$E(date)</meta>
    <meta name="cover" content="id_cover"/>
  </metadata>
  <manifest>
    $::E(opf,stylesheet)
    <item id="id_cover" href="$E(cover,name)" media-type="$E(cover,format)"/>
    $::E(opf,html_items)
    <item id="toc" href="toc.ncx" media-type="application/x-dtbncx+xml" />
    $image_items
    <item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
  </manifest>
  <spine toc="toc">
    $::E(opf,ref_items)
  </spine>

</package> }

set NAV_XHTML0 {<?xml version="1.0" encoding="UTF-8"?> <html xmlns="http://www.w3.org/1999/xhtml " xml:lang="en" lang="en"

      xmlns:epub="http://www.idpf.org/2007/ops">
  <head>
  <title>$::E(name)</title>
  </head>
  <body>
  <nav epub:type="toc" id="toc">
  <ol>

} set NAV_XHTML1 { <li><a href="$E(html,name)">Chapter $i</a></li> } set NAV_XHTML2 {

    </ol>
  </nav>

</body> </html> }

puts "\nepubCreator v0.3\nby Keith Vetter\n" if {llength $argv < 3} {

    puts stderr "usage: epubCreator -title <title> -author <author> -data <data file1> <data file2> ... -cover ?<cover image>? -images ?<other images> ...?"
    puts stderr "for example:"
    puts stderr "  epubCreator \"Pride and Prejudice\" \"Austen, Jane\" p_and_p.xhtml cover.jpg chapter1.html"
    return

}

# set images lassign $argv title author fname cover

array set E {

  title {}
  author {}
  data {}
  cover {}
  images {}
  css {}
  html {0}
  name {}

}

 parseList $argv E 1

Init $E(data) $E(title) $E(author) $E(cover) $E(images)

MakeEpubFiles # parray E # puts "TMP: ..$E(output,tempdir).." ZipEpub Cleanup puts "created $E(output,final)" return