Version 2 of EpubCreator

Updated 2014-03-15 00:49:12 by escargo

Keith Vetter 2014-03-14 : I've been working with eBooks for over a decade now, so I thought I'd share a simple command line tool I use to create an epub from a single text or xhtml file.

You need to specify the epub's title, author and content, which can be either an xhtml file or raw text (which will get converted into xhtml). You can also specify a cover image. If the xhtml source has images, you can include them also.

The command line usage is:

  epubCreator "Pride and Prejudice" "Jane Austen" p_and_p.xhtml cover.jpg img1.jpg img2.jpg

An epub file is essentially a zip file with some metadata files and one or more xhtml files with the book's content.

##+##########################################################################
#
# epubCreator.tsh -- command line tool to create an epub version 3.0
# file from a single text or xhmtml file, an optional cover image and
# a list of 0 or more images
# by Keith Vetter 2014-03-14
#
# usage: epubCreator Title Author BookContent ?CoverImage? OtherImages...
#
package require uuid
package require fileutil

array set EXT {"" "" .png image/png .gif image/gif .jpg image/jpeg
    .jpeg image/jpeg .svg image/svg+xml}

################################################################

proc Init {rawname title author cover images} {
    global E
    set guid [::uuid::uuid generate]

    set E(rawname) $rawname
    set E(basename) [file tail [file rootname $E(rawname)]]
    set E(dirname) [file normalize [file dirname $E(rawname)]]
    set E(output,tempdir) [file join [::fileutil::tempdir] "epub_$guid"]
    set E(output,final) [file join $E(dirname) "$E(basename).epub"]

    set E(epub) EPUB
    set E(epub,tempdir) [file join $E(output,tempdir) $E(epub)]
    set E(html,name) "$E(basename).xhtml"
    set E(html,tempname) [file join $E(epub,tempdir) "$E(html,name)"]
    set E(opf,name)  [file join $E(epub) package.opf]
    set E(opf,tempname)  [file join $E(output,tempdir) $E(opf,name)]
    set E(mimetype) mimetype
    set E(mimetype,tempname) [file join $E(output,tempdir) $E(mimetype)]
    set E(meta-inf) META-INF
    set E(meta-inf,tempdir) [file join $E(output,tempdir) $E(meta-inf)]
    set E(meta-inf,tempname) [file join $E(meta-inf,tempdir) container.xml]
    set E(nav,name) nav.xhtml
    set E(nav,tempname) [file join $E(epub,tempdir) $E(nav,name)]
    set E(cover,source) $cover
    set E(cover,name) [file tail $cover]
    set E(cover,format) $::EXT([file extension $cover])
    set E(images) $images
    set E(date) [clock format [clock seconds] -gmt 1 -format "%Y-%m-%dT%TZ"]

    file mkdir $E(output,tempdir)
    file mkdir $E(meta-inf,tempdir)
    file mkdir $E(epub,tempdir)

    set E(guid) "ebook:$guid"
    set E(title) $title
    set E(author) $author
}
proc MakeEpubFiles {} {
    global E

    WriteAllData $E(html,tempname) [TextToHtml]
    WriteAllData $E(mimetype,tempname) "application/epub+zip"
    WriteAllData $E(meta-inf,tempname) [subst $::CONTAINER_XML]
    WriteAllData $E(opf,tempname) [MakeOPF]
    WriteAllData $E(nav,tempname) [subst $::NAV_XHTML]
}
proc TextToHtml {} {
    global E
    set fin [open $E(rawname) r]
    set data [read $fin] ; list
    close $fin

    if {[string first "<html" $data] == -1} {
        set data [string map {& &amp; < &lt; > &gt; \x22 &quot; ' &apos;} $data] ; list
        regsub -all -line {^$} $data {</p><p>} data

        set data "[subst $::HTML_TEMPLATE]" ; list
    }
    return $data
}
proc MakeOPF {} {
    global E
    set opf [subst $::CONTENT_OPF]
    if {$E(cover,source) eq ""} {
        regsub -all -line {^.*id_cover.*$} $opf "" opf
    } else {
        file copy $E(cover,source) $E(epub,tempdir)
    }

    ;# Copy any additional images
    set image_items ""
    for {set i 0} {$i < [llength $E(images)]} {incr i} {
        set iname [lindex $E(images) $i]
        file copy $iname $E(epub,tempdir)
        set tailname [file tail $iname]
        set media $::EXT([file extension $iname])
        set id "id_image_$i"
        set line "<item href=\"$tailname\" id=\"$id\" media-type=\"$media\"/>\n"
        append image_items $line
    }
    if {$image_items ne ""} {
        regsub -line {^.*other images go here.*$} $opf $image_items opf
    }
    return $opf
}
proc ZipEpub {} {
    global E
    cd $E(output,tempdir)
    exec zip -rX $E(output,final) $E(mimetype) $E(meta-inf)/ $E(epub)/
}
proc WriteAllData {fname data} {
    set fout [open $fname w]; puts -nonewline $fout $data; close $fout;
}
proc Cleanup {} {
    global E
    file delete -force -- $E(output,tempdir)
}
################################################################
set HTML_TEMPLATE {<?xml version="1.0"?>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"
      xmlns:epub="http://www.idpf.org/2007/ops">
  <head>
    <title>$E(title)</title>
  </head>
  <body>
    <p>
    $data
    </p>
  </body>
</html>
}

set CONTAINER_XML {<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
  <rootfiles>
    <rootfile media-type="application/oebps-package+xml"
             full-path="$E(opf,name)" />
  </rootfiles>
</container>
}

set CONTENT_OPF {<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uuid">
  <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
    <dc:title>$E(title)</dc:title>
    <dc:creator>$E(author)</dc:creator>
    <dc:identifier id="uuid">$E(guid)</dc:identifier>
    <dc:language>en</dc:language>
    <meta property="dcterms:modified">$E(date)</meta>
    <meta name="cover" content="id_cover"/>
  </metadata>
  <manifest>
    <item id="id_cover" href="$E(cover,name)" media-type="$E(cover,format)" properties="cover-image"/>
    <item id="id_html" href="$E(html,name)" media-type="application/xhtml+xml"/>
    <item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
    <!-- other images go here -->
  </manifest>
  <spine>
    <itemref idref="id_html"/>
  </spine>
</package>
}

set NAV_XHTML {<?xml version="1.0" encoding="UTF-8"?>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"
      xmlns:epub="http://www.idpf.org/2007/ops">
  <head>
  </head>
  <body>
    <nav epub:type="toc" id="toc">
      <ol>
        <li><a href="$E(html,name)">The Content</a></li>
      </ol>
    </nav>
  </body>
</html>
}

################################################################

puts "\nepubCreator v0.3\nby Keith Vetter\n"
if {[llength $argv] < 3} {
    puts stderr "usage: epubCreator <title> <author> <data file> ?<cover image>? ?<other images>...?"
    puts stderr "for example:"
    puts stderr "  epubCreator \"Pride and Prejudice\" \"Jane Austen\" p_and_p.xhtml cover.jpg chapter1.jpg"
    return
}
set images [lassign $argv title author fname cover]

Init $fname $title $author $cover $images

MakeEpubFiles
ZipEpub
Cleanup
puts "created $E(output,final)"
return

ak - 2014-03-14 23:42:26

Tcllib contains a package "zipfile::encode (doc)" that can obviate the need for 'exec zip'. It requires Trf and zlibtcl though. Note that while Tcl 8.6 provides zip functions in-core, the Tcllib package currently makes no use of that.