[Keith Vetter] 2014-03-14 : I've been working with eBooks for over a
decade now, so I thought I'd share a simple command line tool
I use to create an epub from a single text or xhtml file.
You need to specify the epub's title, author and content, which can be
either an xhtml file or raw text (which will get converted into
xhtml). You can also specify a cover image. If the xhtml source has
images, you can include them also.
The command line usage is:
epubCreator "Pride and Prejudice" "Jane Austen" p_and_p.xhtml cover.jpg img1.jpg img2.jpg
An http://www.idpf.org/epub/30/spec/epub30-publications.html%|%epub file%|%
is essentially a zip file with some metadata files and one or more xhtml
files with the book's content.
----
'''[ak] - 2014-03-14 23:42:26'''
[Tcllib] contains a package
[https://core.tcl-lang.org/tcllib/doc/trunk/embedded/md/tcllib/files/modules/zip/encode.md%|%"zipfile::encode (doc)"]
that can obviate the need for 'exec zip'. It requires [Trf] and [zlibtcl] though. Note that while Tcl 8.6 provides zip functions in-core, the [Tcllib] package currently makes no use of that.
[KPV] https://core.tcl-lang.org/tcllib/doc/trunk/embedded/md/tcllib/files/modules/zip/encode.md%|%"zipfile::encode (doc)"%|% won't work because
of epub's weird requirement that the first file has to be uncompressed.
----
'''[clif flynt] - 2014-07-14 '''
I modified and extended Keith's code a bit. After some tweaking, I've got it passing the epubcheck validator,
accepting multiple files and a couple other tweaks.
Check the comments for the new, expanded command line.
----
[KPV] 2018-08-31 -- Inspired by [Clif Flynt]'s changes, I added a bunch more features, including automatically creating a cover image and a TOC. But it's because of [ao3ToEpub] that I finally got around to updating this page.
----
======
##+##########################################################################
#
# epubCreator.tsh -- command line tool to create an epub version 3.0 file
# from text or xhmtml files, an optional cover image, style sheets and images.
#
# The EPUB Contents Document 3.0.1 spec is at
# http://www.idpf.org/epub/301/spec/epub-contentdocs.html
# A good description of how an epub (version 2.0) file is organinized is at
# http://gbenthien.net/Kindle%20and%20EPUB/epub.php
#
# by Keith Vetter 2014-03-14
# Clif Flynt, 2014-04-01
# Support for multiple text/html files (multiple chapters)
# Support for additional .css file
# Support for filename.epub different from "book title.epub"
# Support for toc.ncx as well as nav.xhtml
# http://www.idpf.org/epub/301/spec/epub-contentdocs.html#sec-xhtml-nav
# [NCX is part of Epub 2.0 but inserted for backwards compatibility]
# Expanded command line processing
# Keith Vetter 2015-12-03
# extract title, author, stylesheets and images from html data files
# insert a TOC after cover image
# create cover image if none given, requires ImageMagick or Tk
# cleaned up few bugs
# support multiple CSS files
#
package require fileutil
package require base64
package require textutil
set version "0.5"
array set E {
data {}
title {*}
author {*}
cover {*}
images {*}
css {*}
html {*}
output {*}
toc 1
verbose 1
tk 0
zip {*}
}
set usage {usage:
epubCreator -data file1.txt file2.xhtml file3.xhtml...
epubCreator
-data file1.txt file2.xhtml file3.xhtml...
-title 'Book Title'
-author 'last, first'
-cover Cover.jpg
-images
} data set data "
$data
" set data [MakeHtmlPage $data $E(title)] } else { set data [FixHtml $data] if {! [HasHtmlHeader $data]} { INFO "adding header" set data [MakeHtmlPage $data $E(title)] } } return $data } proc IsHtmlData {data} { if {$::E(html) ne "*"} { return $::E(html) } if {[string first " -1} { return 1 } if {[string first " -1} { return 1 }
return 0
}
proc HasHtmlHeader {data} {
if {[string first " -1} { return 1 }
return 0
}
proc FixHtml {data} {
# Found some pages had "
" without closing slash
return [regsub -all {
} $data {
}]
}
proc MakeHtmlPage {body title} {
global E
set html "[subst $::HTML_TEMPLATE]"
return $html
}
proc Plural {num word} {
if {$num != 1} {append word "s"}
return "$num $word"
}
proc MakeCoverPage {} {
global E
set html_name "cover.xhtml"
set tempname [file join $::E(epub,tempdir) $html_name]
set fout [open $tempname w]
puts $fout [MakeHtmlPage "" $E(title)]
close $fout
return $html_name
}
proc MakeOPF {} {
global E
set opf [subst $::PACKAGE_OPF]
if {$E(cover,source) eq ""} {
INFO "removing cover page from opf"
regsub -all -line {^.*id_cover.*$} $opf "" opf
}
if {! $E(toc)} {
INFO "removing TOC from spine"
regsub -all -line {^.*]+?title=['"](.*?)["']} $data . navlabel
}
INFO "chapter $chapter title: => $navlabel"
return $navlabel
}
##+##########################################################################
#
# guid -- like uuid::uuid generate but that functions displays a warning on OSX
#
proc guid { } {
if {![info exists ::GuiD__SeEd__VaR]} {set ::GuiD__SeEd__VaR 0}
if {![info exists ::GuiD__MaChInFo__VaR]} {
set ::GuiD__MaChInFo__VaR $::tcl_platform(user)[info hostname]$::tcl_platform(machine)$::tcl_platform(os)
}
set MachInfo [expr {rand()}]$::GuiD__SeEd__VaR$::GuiD__MaChInFo__VaR
binary scan $MachInfo h* MachInfo_Hex
set CmdCntAndSeq [string range "[info cmdcount]$::GuiD__SeEd__VaR$::GuiD__SeEd__VaR" 0 8]
binary scan [expr {rand()}] h* Rand_Hex
set guid [format %2.2x [clock seconds]]
# Pick though clock clicks for a good sequence.
append guid -[string range [format %2.2x [clock clicks]] 0 3] \
-[string range [format %2.2x $CmdCntAndSeq] 0 3] \
-[string range $Rand_Hex 3 6] \
-[string range $MachInfo_Hex 0 11]
incr ::GuiD__SeEd__VaR
return [string toupper $guid]
}
################################################################
#
# Makes a cover image
#
namespace eval ::BlankCover {
variable blank_cover_tile {
/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0a
HBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIy
MjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCABAAEADASIA
AhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQA
AAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3
ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWm
p6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEA
AwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSEx
BhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElK
U1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3
uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwDi2xgE
jIHON3OP8/yo6r1XbnGM8etKQQ27IJU9D1xik8zcMP8Ad7kt+h44rzj0BTv9eOON3akGMlc5PoeK
aApG4EA9cEZ/Wg7QoJU9PmwQB/OmIdnBzvy3Rffnpn1o24YqPlOfb0pOn1bg8DP/ANel3Zx13diT
xkdv8/0oATJLbDj5ufmX6/T0zQAQmBlk6cnHHcH/AD+VKwyGz97sev8AkUg+QsR0/un9KAFHylct
n+Erx1z0/nSjKhMgjjGfX0P+fWmA4UFhtYdh049qdtYkjI+ZuRigBeA/zZLZ7df85pM7SoG4svqP
wpvDBApP17A9f8/hSjnqoI/3u/8A+ugALMRgSAEtyCMHPT+Qo+Zh/ex7f05oIThiCgHfOMUrgKMg
5IyeB/n1/WgAUEA7Ccngg8YpCzFQyx/N6/h+ff0pMlS4xtwMkfT3pzKQSoIBAycnvQAhAIITvwRg
YH+eaXcQD949+RyPr+vNNIV9qkDdkAkj/GlbeDsz1/CgAwCADkZbqeo4PT9KcRkMVLYP94f4U0sF
3Y2gd8c/h/npRhQ+VyT1BxmgA6EfJyD/AHcZ4/8A1UctlQHJwQRnH+e9OAJ+8/fjAz+tMVTjrweD
u7n/ADmgB4Zy2SMlh03Y/KkAxtGWAxjhenp19qAwDbhkY7fl2pGI2NhFI/u46D/P86AH9FA3de56
nnimHdtXOQD684P50oXPzdGJ7etKV5+9nBzwhoGxozxhsdOo9KRwME4KnHTIOfWnKwO4A8f71Ivz
YGMhuQB9D3oEA2hf4lOecfKBz7UAbWYZbvzmjJCllOG9ex+lAGGUIARjHTjFMAO0gk7d3B4PI55F
BbnCk5xzlqXjnKk7eq+n/wBagEbfRlPYc4/H8aQH/9k=}
proc CanMakeCoverImage {} {
if {$::E(tk) > 1} { set ::auto_execs(convert) "" } ;# Hidden way to force Tk
if {[auto_execok convert] ne "" && [auto_execok montage] ne ""} { return 1 }
if {$::E(tk) == 0} { return 0 }
foreach pkg {Tk Img} {
set n [catch [list package require $pkg] emsg]
if {$n} {
WARN "cannot load $pkg: $emsg"
return 0
}
wm withdraw .
}
return 1
}
proc MakeCoverImage {title author output_image} {
if {[auto_execok convert] ne ""} {
INFO "creating cover image using ImageMagick"
MakeBlankCover $output_image
WriteOntoBlankCover $title $author $output_image
} else {
MakeCoverImage_Tk $title $author $output_image
}
}
proc MakeBlankCover {output_image} {
set fout [open $output_image wb]
puts -nonewline $fout [::base64::decode $::BlankCover::blank_cover_tile]
close $fout
# Tile our blank_cover_tile
INFO " montage -mode concatenate -tile 8x12 \$img*96 \$img"
exec montage -mode concatenate -tile 8x12 \
{*}[lrepeat [expr {8 * 12}] $output_image] $output_image
;# Add black border around page
INFO [sjoin " convert \$img -fill none -stroke black -strokewidth 10 " \
"-draw {rectangle 20 20 492 748} \$img"]
exec convert $output_image -fill none -stroke black -strokewidth 10 \
-draw {rectangle 20 20 492 748} $output_image
}
proc WriteOntoBlankCover {title author output_image} {
set font [WhichImageMagickFont]
INFO " using ImageMagick font '$font'"
if {$font ne ""} { set font "-font $font" }
set title [::textutil::adjust $title -length 18 -strictlength true]
set author [::textutil::adjust $author -length 18 -strictlength true]
set txt "$title\n\nby\n$author"
set cmd [list convert $output_image -fill black -stroke black {*}$font]
lappend cmd -pointsize 64 -gravity north -annotate +0+100 $txt $output_image
INFO [sjoin " convert \$img -fill black -stroke black $font -pointsize 64 " \
"-gravity north -annotate +0+100 \$title \$img"]
exec {*}$cmd
}
proc WhichImageMagickFont {} {
# ImageMagick doesn't seem to have consistent font names across systems
# so we list all available fonts and search for a Times Roman font.
set fin [open "|convert -list font" r]
set all [read $fin] ; list
catch {close $fin} ;# convert exits with non-zero status
set times(all) {}
set times(good) {}
foreach {. font} [regexp -inline -all -line {^.*Font: (.*Times.*)$} $all] {
set font_ [string map {- ""} $font]
if {$font_ eq "Times"} {return $font}
if {$font_ eq "TimesRoman"} { return $font }
if {$font_ eq "TimesNewRoman"} { return $font }
lappend times(all) $font
if {[string match -nocase "*italic" $font]} continue
if {[string match -nocase "*I" $font]} continue
if {[string match -nocase "*oblique" $font]} continue
if {[string match -nocase "*O" $font]} continue
lappend times(good) $font
}
if {$times(good) ne {}} { return [lindex $times(good) 0] }
return [lindex $times(all) 0]
}
proc MakeCoverImage_Tk {title author output_image} {
if {[package version Img] eq ""} { ERROR "requires Img package" }
INFO "creating cover image using Tk"
foreach img [image names] {
if {[string match "::cover::*" $img]} { image delete $img }
}
image create photo ::cover::tile -data [::base64::decode $::BlankCover::blank_cover_tile]
image create photo ::cover::blank_cover -width 512 -height 768
::cover::blank_cover copy ::cover::tile -to 0 0 512 768
set font {Times 40 bold}
set title [::textutil::adjust $title -length 18 -strictlength true]
set author [::textutil::adjust $author -length 18 -strictlength true]
set txt "$title\n\nby\n$author"
destroy .c
wm deiconify .
wm geom . -10000-10000
pack [canvas .c -width 512 -height 768 -bd 0 -highlightthickness 0]
.c create image 0 0 -anchor nw -image ::cover::blank_cover
.c create rect 20 20 492 748 -fill {} -outline black -width 10
# .c create text 256 50 -font $font -tag a -anchor n -justify center -text $txt
set y 50
foreach line [split [string trim $txt] \n] {
.c create text 256 $y -font $font -tag b -anchor n -justify center -text $line
incr y 50
}
;# Now copy canvas into an image and save it
raise .
update
image create photo ::cover::cover -data .c
::cover::cover write $output_image -format jpeg
wm withdraw .
destroy .c
foreach img [image names] {
if {[string match "::cover::*" $img]} { image delete $img }
}
}
}
proc sjoin {args} { return [join $args ""] }
################################################################
#
# Various XHTML templates
# HTML_TEMPLATE -- convert text into xhtml, also used by cover page
# CONTAINER_XML -- for META-INF/container.xml
# PACKAGE_OPF -- for the EPUB/package.opf file
# NAV_XHTML# -- for the nav.xhtml navigation document
# CONTENT_NCX# -- for the EPub version 2.0 toc.ncx navigation document
#
set HTML_TEMPLATE {