ao3ToEpub

Keith Vetter 2018-08-31 - A script that uses the ao3 package to download fan-fiction stories from Archive of Our Own and turn them into epubs.

see also:

  • EpubCreator -- tool to create an epub from html pages
  • ao3 -- interface to get data about Archive of Our Own fan-fiction stories

##+##########################################################################
#
# ao3ToEpub -- uses the AO3 library to create an epub for an Archive of Our Own story
# Requires ao3.tsh and epubcreator.tsh, both can be found on https://wiki.tcl-lang.org
#
# by Keith Vetter 2018-08-29
#

package require fileutil

if {! [file exist ao3.tsh]} {
    error "missing ao3.tsh--download from https://wiki.tcl-lang.org/ao3"
}
source ao3.tsh

proc IntoEpub {id} {
    puts "Building epub for story $id from Archive of Our Own"
    set tmpdir [file join [::fileutil::tempdir] "ao3_epubCreator_$id"]
    file delete -force $tmpdir

    file mkdir $tmpdir
    set ao3 [::AO3::New $id 0 cache]
    puts "[$ao3 url]\n"
    $ao3 save [file join $tmpdir raw.html]

    set title [$ao3 title]
    set author [$ao3 author]
    set files [ChaptersIntoFiles $ao3 $tmpdir]
    $ao3 cleanup
    set output [file join ~/FBooks [regsub -all {\W} [string map {" " _} $title] ""].epub]

    set cmd [findEpubCreator]
    append cmd " \n  -verbose 0 \n  -output \"$output\" "
    append cmd "\n  -title \"$title\" \n  -author \"$author\" \n  -data "
    foreach fname $files {
        append cmd "\n    \"$fname\" "
    }
    puts "\nrunning: [string map {\n \\\n} $cmd]"
    puts [exec {*}$cmd]

    if {[string first [::fileutil::tempdir] $tmpdir] > -1} {
        file delete -force $tmpdir
    }
}
proc findEpubCreator {} {
    foreach dir {"" . ..} {
        set epubCreator [auto_execok [file join $dir epubCreator.tsh]]
        if {$epubCreator ne ""} { return $epubCreator }
    }
    error "missing epubCreator.tsh--download from https://wiki.tcl-lang.org/epubCreator"
}
proc ChaptersIntoFiles {ao3 tmpdir} {
    set fnames {}

    set count [$ao3 chapter . count]
    if {$count == 0} { set count 1 }
    for {set idx 1} {$idx <= $count} {incr idx} {
        set html [$ao3 chapter $idx html]
        set html [TweakHtml $html]

        set fname [file join $tmpdir "chapter_${idx}.html"]
        puts "chapter $idx -> $fname"
        lappend fnames $fname

        set fout [open $fname "w"]
        puts -nonewline $fout $html
        close $fout
    }
    return $fnames
}
proc TweakHtml {html} {
    # Fix html to be epub3 legal and nicer looking.

    # remove all <a> tags
    regsub -all {</??a.*?>} $html "" html
    # remove <h3>Chapter Text</h3>
    regsub -all {<h3[^>]*>Chapter Text</h3>} $html "" html
    return $html
}


################################################################

if {$argv eq {}} {
    puts "usage: [file tail $argv0] story_id"
    return
}
set id [lindex $argv 0]
IntoEpub $id

return