Version 7 of EpubCreator

Updated 2015-11-17 10:57:49 by juef

Keith Vetter 2014-03-14 : I've been working with eBooks for over a decade now, so I thought I'd share a simple command line tool I use to create an epub from a single text or xhtml file.

You need to specify the epub's title, author and content, which can be either an xhtml file or raw text (which will get converted into xhtml). You can also specify a cover image. If the xhtml source has images, you can include them also.

The command line usage is:

  epubCreator "Pride and Prejudice" "Jane Austen" p_and_p.xhtml cover.jpg img1.jpg img2.jpg

An epub file is essentially a zip file with some metadata files and one or more xhtml files with the book's content.

##+##########################################################################
#
# epubCreator.tsh -- command line tool to create an epub version 3.0
# file from a single text or xhtml file, an optional cover image and
# a list of 0 or more images
# by Keith Vetter 2014-03-14
#
# usage: epubCreator Title Author BookContent ?CoverImage? OtherImages...
#
package require uuid
package require fileutil

array set EXT {"" "" .png image/png .gif image/gif .jpg image/jpeg
    .jpeg image/jpeg .svg image/svg+xml}

################################################################

proc Init {rawname title author cover images} {
    global E
    set guid [::uuid::uuid generate]

    set E(rawname) $rawname
    set E(basename) [file tail [file rootname $E(rawname)]]
    set E(dirname) [file normalize [file dirname $E(rawname)]]
    set E(output,tempdir) [file join [::fileutil::tempdir] "epub_$guid"]
    set E(output,final) [file join $E(dirname) "$E(basename).epub"]

    set E(epub) EPUB
    set E(epub,tempdir) [file join $E(output,tempdir) $E(epub)]
    set E(html,name) "$E(basename).xhtml"
    set E(html,tempname) [file join $E(epub,tempdir) "$E(html,name)"]
    set E(opf,name)  [file join $E(epub) package.opf]
    set E(opf,tempname)  [file join $E(output,tempdir) $E(opf,name)]
    set E(mimetype) mimetype
    set E(mimetype,tempname) [file join $E(output,tempdir) $E(mimetype)]
    set E(meta-inf) META-INF
    set E(meta-inf,tempdir) [file join $E(output,tempdir) $E(meta-inf)]
    set E(meta-inf,tempname) [file join $E(meta-inf,tempdir) container.xml]
    set E(nav,name) nav.xhtml
    set E(nav,tempname) [file join $E(epub,tempdir) $E(nav,name)]
    set E(cover,source) $cover
    set E(cover,name) [file tail $cover]
    set E(cover,format) $::EXT([file extension $cover])
    set E(images) $images
    set E(date) [clock format [clock seconds] -gmt 1 -format "%Y-%m-%dT%TZ"]

    file mkdir $E(output,tempdir)
    file mkdir $E(meta-inf,tempdir)
    file mkdir $E(epub,tempdir)

    set E(guid) "ebook:$guid"
    set E(title) $title
    set E(author) $author
}
proc MakeEpubFiles {} {
    global E

    WriteAllData $E(html,tempname) [TextToHtml]
    WriteAllData $E(mimetype,tempname) "application/epub+zip"
    WriteAllData $E(meta-inf,tempname) [subst $::CONTAINER_XML]
    WriteAllData $E(opf,tempname) [MakeOPF]
    WriteAllData $E(nav,tempname) [subst $::NAV_XHTML]
}
proc TextToHtml {} {
    global E
    set fin [open $E(rawname) r]
    set data [read $fin] ; list
    close $fin

    if {[string first "<html" $data] == -1} {
        set data [string map {& &amp; < &lt; > &gt; \x22 &quot; ' &apos;} $data] ; list
        regsub -all -line {^$} $data {</p><p>} data

        set data "[subst $::HTML_TEMPLATE]" ; list
    }
    return $data
}
proc MakeOPF {} {
    global E
    set opf [subst $::CONTENT_OPF]
    if {$E(cover,source) eq ""} {
        regsub -all -line {^.*id_cover.*$} $opf "" opf
    } else {
        file copy $E(cover,source) $E(epub,tempdir)
    }

    ;# Copy any additional images
    set image_items ""
    for {set i 0} {$i < [llength $E(images)]} {incr i} {
        set iname [lindex $E(images) $i]
        file copy $iname $E(epub,tempdir)
        set tailname [file tail $iname]
        set media $::EXT([file extension $iname])
        set id "id_image_$i"
        set line "<item href=\"$tailname\" id=\"$id\" media-type=\"$media\"/>\n"
        append image_items $line
    }
    if {$image_items ne ""} {
        regsub -line {^.*other images go here.*$} $opf $image_items opf
    }
    return $opf
}
proc ZipEpub {} {
    global E
    cd $E(output,tempdir)
    exec zip -rX $E(output,final) $E(mimetype) $E(meta-inf)/ $E(epub)/
}
proc WriteAllData {fname data} {
    set fout [open $fname w]; puts -nonewline $fout $data; close $fout;
}
proc Cleanup {} {
    global E
    file delete -force -- $E(output,tempdir)
}
################################################################
set HTML_TEMPLATE {<?xml version="1.0"?>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"
      xmlns:epub="http://www.idpf.org/2007/ops">
  <head>
    <title>$E(title)</title>
  </head>
  <body>
    <p>
    $data
    </p>
  </body>
</html>
}

set CONTAINER_XML {<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
  <rootfiles>
    <rootfile media-type="application/oebps-package+xml"
             full-path="$E(opf,name)" />
  </rootfiles>
</container>
}

set CONTENT_OPF {<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uuid">
  <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
    <dc:title>$E(title)</dc:title>
    <dc:creator>$E(author)</dc:creator>
    <dc:identifier id="uuid">$E(guid)</dc:identifier>
    <dc:language>en</dc:language>
    <meta property="dcterms:modified">$E(date)</meta>
    <meta name="cover" content="id_cover"/>
  </metadata>
  <manifest>
    <item id="id_cover" href="$E(cover,name)" media-type="$E(cover,format)" properties="cover-image"/>
    <item id="id_html" href="$E(html,name)" media-type="application/xhtml+xml"/>
    <item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
    <!-- other images go here -->
  </manifest>
  <spine>
    <itemref idref="id_html"/>
  </spine>
</package>
}

set NAV_XHTML {<?xml version="1.0" encoding="UTF-8"?>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"
      xmlns:epub="http://www.idpf.org/2007/ops">
  <head>
  </head>
  <body>
    <nav epub:type="toc" id="toc">
      <ol>
        <li><a href="$E(html,name)">The Content</a></li>
      </ol>
    </nav>
  </body>
</html>
}

################################################################

puts "\nepubCreator v0.3\nby Keith Vetter\n"
if {[llength $argv] < 3} {
    puts stderr "usage: epubCreator <title> <author> <data file> ?<cover image>? ?<other images>...?"
    puts stderr "for example:"
    puts stderr "  epubCreator \"Pride and Prejudice\" \"Jane Austen\" p_and_p.xhtml cover.jpg chapter1.jpg"
    return
}
set images [lassign $argv title author fname cover]

Init $fname $title $author $cover $images

MakeEpubFiles
ZipEpub
Cleanup
puts "created $E(output,final)"
return

ak - 2014-03-14 23:42:26

Tcllib contains a package "zipfile::encode (doc)" that can obviate the need for 'exec zip'. It requires Trf and zlibtcl though. Note that while Tcl 8.6 provides zip functions in-core, the Tcllib package currently makes no use of that.


clif flynt - 2014-07-14

I modified and extended Keith's code a bit. After some tweaking, I've got it passing the epubcheck validator, accepting multiple files and a couple other tweaks.

Check the comments for the new, expanded command line.

############################################################################
#
# epubCreator.tcl -- command line tool to create an epub version 3.0
# file from a single text or xhtml file, an optional cover image and
# a list of 0 or more images
# by Keith Vetter 2014-03-14
# Mods Clif Flynt, 2014-04-01
#  Support for multiple text/html files (multiple chapters)
#  Support for additional .css file
#  Support for filename.epub different from "book title.epub"
#  Support for toc.ncx as well as nav.html
#  Expanded command line processing
#
# usage: epubCreator \
# -title Title -author 'last, first' -data file1.txt file2.html file3.xhtml...\
# -cover Cover.jpg -images additional Images -css Style.css -html (1/0) \
# -name BookName
#
#  -title       Title for book
#  -author      Name of author as last, first
#  -data        List of data files to include in the text
#  -cover       An image file for the cover
#  -images      Additional images that might be reference by text
#  -css         An optional css file if you want special formatting
#  -html        1 or 0 to define whether the input data is already in html format
#  -name        The name for the .epub file, if different from title
#

package require uuid
package require fileutil

array set EXT {"" "" .png image/png .gif image/gif .jpg image/jpeg
    .jpeg image/jpeg .svg image/svg+xml}



proc parseList {list stateVarName {throwError 1}} {
    upvar $stateVarName stateArray

    set errs ""
    
    foreach arg $list {
        if {([string first "-" $arg] == 0) &&
             ([llength $arg] == 1)} {
            set index [string range $arg 1 end]
            if {![info exists stateArray($index)]} {
                if {$throwError} {
                    error "No default for ${stateVarName}($index).\nValid fields are [lsort [array names stateArray]]"
                } else {
                    lappend errs "No default for ${stateVarName}($index)"
                }
            }
            set cmd set
        } else {
            if {[info exists cmd]} {
                $cmd stateArray($index) $arg
                set cmd lappend
            }
        } 
    }
    return $errs
}

proc Init {rawname title author cover images} {
    global E
    set guid [::uuid::uuid generate]

    set E(rawname) $rawname
    set E(basename) [file tail [file rootname $E(rawname)]]
    set E(dirname) [file normalize [file dirname $E(rawname)]]
    set E(output,tempdir) [file join [::fileutil::tempdir] "epub_$guid"]
    if {$E(name) eq ""} {
      set E(output,final) [file join $E(dirname) "$E(basename).epub"]
    } else {
      set E(output,final) [file join $E(dirname) "$E(name).epub"]
    }

    set E(epub) EPUB
    set E(epub,tempdir) [file join $E(output,tempdir) $E(epub)]
    set E(html,name) "$E(basename).xhtml"
    set E(html,tempname) [file join $E(epub,tempdir) "$E(html,name)"]
    set E(ncx,tempname) [file join $E(epub,tempdir) "toc.ncx"]
    set E(opf,name)  [file join $E(epub) package.opf]
    set E(opf,tempname)  [file join $E(output,tempdir) $E(opf,name)]
    set E(mimetype) mimetype
    set E(mimetype,tempname) [file join $E(output,tempdir) $E(mimetype)]
    set E(meta-inf) META-INF
    set E(meta-inf,tempdir) [file join $E(output,tempdir) $E(meta-inf)]
    set E(meta-inf,tempname) [file join $E(meta-inf,tempdir) container.xml]
    set E(nav,name) nav.xhtml
    set E(nav,tempname) [file join $E(epub,tempdir) $E(nav,name)]
    set E(cover,source) $cover
    set E(cover,name) [file tail $cover]
    set E(cover,format) $::EXT([file extension $cover])
    set E(images) $images
    set E(date) [clock format [clock seconds] -gmt 1 -format "%Y-%m-%dT%TZ"]

    file mkdir $E(output,tempdir)
    file mkdir $E(meta-inf,tempdir)
    file mkdir $E(epub,tempdir)

    set E(guid) "ebook:$guid"
    set E(title) $title
    set E(author) $author
    
    if {$E(css) ne ""} {
      set E(css,name) $E(css)
      set E(css,tempname) [file join $E(epub,tempdir) stylesheet.css]
      set E(opf,stylesheet) {<item href="stylesheet.css" id="css" media-type="text/css"/>}
    }
}
proc MakeEpubFiles {} {
    global E
    set i 0
    if {$E(cover,source) ne ""} {
      set E(rawname) [list cover.xhtml {*}$E(rawname)]
      set of [open cover.xhtml w]
      puts $of [BodyToHtml "<img src=\"[file tail $E(cover,source)]\"></img>"]
      close $of
    }
    foreach rawname $E(rawname) {
      incr i
      set basename [file tail [file rootname $rawname]]
      set E(html,name) "$basename.xhtml"
      set E(html,tempname) [file join $E(epub,tempdir) "$E(html,name)"]
      append E(opf,html_items) [subst {
    <item id="id_html_$i" href="$E(html,name)" media-type="application/xhtml+xml"/>}]
      append E(opf,ref_items) [subst {
        <itemref idref="id_html_$i"/>}]
      append navs [subst $::NAV_XHTML1]
      append ncxs [subst $::CONTENT_NCX1]
      if {$E(html)} {
        WriteAllData $E(html,tempname) $rawname
      } else {
        WriteAllData $E(html,tempname) [TextToHtml $rawname]
      }
    }
    WriteAllData $E(mimetype,tempname) "application/epub+zip"
    WriteAllData $E(meta-inf,tempname) [subst $::CONTAINER_XML]
    WriteAllData $E(opf,tempname) [MakeOPF]
    WriteAllData $E(nav,tempname) "[subst $::NAV_XHTML0]\n$navs\n$::NAV_XHTML2"
    WriteAllData $E(ncx,tempname) "[subst $::CONTENT_NCX0]\n$ncxs\n$::CONTENT_NCX2"
    if {[info exists E(css,tempname)]} {
      WriteAllData $E(css,tempname) $E(css)
    }
}
proc TextToHtml {rawname} {
    global E
    set fin [open $rawname r]
    set data [read $fin] 
    close $fin

    if {[string first "<html" $data] == -1} {
        set data [string map {& &amp; < &lt; > &gt; \x22 &quot; ' &apos;} $data] ; list
        regsub -all -line {^$} $data {</p><p>} data

        set data "[subst $::HTML_TEMPLATE]" ; list
    }
    return $data
}

proc BodyToHtml {data} {
    global E

    set data "[subst $::HTML_TEMPLATE]" 
    return $data
}

proc MakeOPF {} {
    global E

    set data "[subst $::HTML_TEMPLATE]" 
    return $data
}

proc MakeOPF {} {
    global E
    set html_items {}
    set image_items {}
    set ref_items {}
    set opf [subst $::CONTENT_OPF]
    if {$E(cover,source) eq ""} {
        regsub -all -line {^.*id_cover.*$} $opf "" opf
    } else {
        file copy $E(cover,source) $E(epub,tempdir)
    }

    ;# Copy any additional images
    set image_items ""
    for {set i 0} {$i < [llength $E(images)]} {incr i} {
        set iname [lindex $E(images) $i]
        file copy $iname $E(epub,tempdir)
        set tailname [file tail $iname]
        set media $::EXT([file extension $iname])
        set id "id_image_$i"
        set line "<item href=\"$tailname\" id=\"$id\" media-type=\"$media\"/>\n"
        append image_items $line
    }
    if {$image_items ne ""} {
        regsub -line {^.*other images go here.*$} $opf $image_items opf
    }
    return $opf
}
proc ZipEpub {} {
    global E
    cd $E(output,tempdir)

    # Get rid of any previous epub else zip will append to the
    # existing file, leaving behind elements you thought had been removed
    catch {file delete $E(output,final)}
    exec zip -rX $E(output,final) $E(mimetype) $E(meta-inf)/ $E(epub)/
}
proc WriteAllData {fname data} {
    if {[file exists $data]} {
      set if [open $data]
      set data [read $if]
      close $if
    }
    set fout [open $fname w]; 
    puts -nonewline $fout $data; 
    close $fout;
}
proc Cleanup {} {
    global E
    file delete -force -- $E(output,tempdir)
    file delete cover.xhtml
}
set HTML_TEMPLATE {<?xml version="1.0"?>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"
      xmlns:epub="http://www.idpf.org/2007/ops">
  <head>
    <title>$E(title)</title>
    <link href="stylesheet.css" type="text/css" rel="stylesheet"/>
  </head>
  <body>
    <p>
    $data
    </p>
  </body>
</html>
}

set CONTAINER_XML {<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
  <rootfiles>
    <rootfile media-type="application/oebps-package+xml"
             full-path="$E(opf,name)" />
  </rootfiles>
</container>
}

# NCX format
# per: http://www.gbenthien.net/Kindle%20and%20EPUB/ncx.html

set CONTENT_NCX0 {<?xml version="1.0" encoding="UTF-8"?>
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" xml:lang="en">
<head>
<meta name="dtb:uid" content="$::E(guid)"/>
<meta name="dtb:depth" content="1"/>
<meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
</head>


<docTitle><text>$::E(name)</text></docTitle>
<docAuthor><text>$::E(author)</text></docAuthor>



<navMap>

}
set CONTENT_NCX1 {
<navPoint id="navpoint-$i" playOrder="$i">
<navLabel><text>Chapter $i</text></navLabel>
<content src="$::E(html,name)"/>
</navPoint>
}

set CONTENT_NCX2 {
</navMap>
</ncx>
}
set CONTENT_OPF {<?xml version="1.0" encoding="UTF-8"?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0" unique-identifier="uuid">
  <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
    <dc:title>$E(title)</dc:title>
    <dc:creator>$E(author)</dc:creator>
    <dc:identifier id="uuid">$E(guid)</dc:identifier>
    <dc:language>en</dc:language>
    <meta property="dcterms:modified">$E(date)</meta>
    <meta name="cover" content="id_cover"/>
  </metadata>
  <manifest>
    $::E(opf,stylesheet)
    <item id="id_cover" href="$E(cover,name)" media-type="$E(cover,format)"/>
    $::E(opf,html_items)
    <item id="toc" href="toc.ncx" media-type="application/x-dtbncx+xml" />
    $image_items
    <item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
  </manifest>
  <spine toc="toc">
    $::E(opf,ref_items)
  </spine>
</package>
}

set NAV_XHTML0 {<?xml version="1.0" encoding="UTF-8"?>
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"
      xmlns:epub="http://www.idpf.org/2007/ops">
  <head>
  <title>$::E(name)</title>
  </head>
  <body>
  <nav epub:type="toc" id="toc">
  <ol>
}
set NAV_XHTML1 {      <li><a href="$E(html,name)">Chapter $i</a></li>
}
set NAV_XHTML2 {
    </ol>
  </nav>
</body>
</html>
}


puts "\nepubCreator v0.3\nby Keith Vetter\n"
if {[llength $argv] < 3} {
    puts stderr "usage: epubCreator -title <title> -author <author> -data <data file1> <data file2> ... -cover ?<cover image>? -images ?<other images> ...?"
    puts stderr "for example:"
    puts stderr "  epubCreator -title \"Pride and Prejudice\" -author \"Austen, Jane\" p_and_p.xhtml -cover cover.jpg chapter1.html"
    return
}
# set images [lassign $argv title author fname cover]

array set E {
  title {}
  author {}
  data {}
  cover {}
  images {}
  css {}
  html {0}
  name {}
}
 parseList $argv E 1

Init $E(data) $E(title) $E(author) $E(cover) $E(images)

MakeEpubFiles
# parray E
# puts "TMP: ..$E(output,tempdir).."
ZipEpub
Cleanup
puts "created $E(output,final)"
return