mirror of
https://github.com/tfussell/xlnt.git
synced 2024-03-22 13:11:17 +08:00
228 lines
8.0 KiB
Ruby
228 lines
8.0 KiB
Ruby
|
require 'fileutils'
|
||
|
require 'rexml/parsers/pullparser'
|
||
|
|
||
|
module DocBook
|
||
|
|
||
|
class Epub
|
||
|
CHECKER = "epubcheck"
|
||
|
STYLESHEET = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "docbook.xsl"))
|
||
|
CALLOUT_PATH = File.join('images', 'callouts')
|
||
|
CALLOUT_FULL_PATH = File.expand_path(File.join(File.dirname(__FILE__), '..', '..', '..', CALLOUT_PATH))
|
||
|
CALLOUT_LIMIT = 15
|
||
|
CALLOUT_EXT = ".png"
|
||
|
XSLT_PROCESSOR = "xsltproc"
|
||
|
OUTPUT_DIR = ".epubtmp#{Time.now.to_f.to_s}"
|
||
|
MIMETYPE = "application/epub+zip"
|
||
|
META_DIR = "META-INF"
|
||
|
OEBPS_DIR = "OEBPS"
|
||
|
ZIPPER = "zip"
|
||
|
|
||
|
attr_reader :output_dir
|
||
|
|
||
|
def initialize(docbook_file, output_dir=OUTPUT_DIR, css_file=nil, customization_layer=nil, embedded_fonts=[])
|
||
|
@docbook_file = docbook_file
|
||
|
@output_dir = output_dir
|
||
|
@meta_dir = File.join(@output_dir, META_DIR)
|
||
|
@oebps_dir = File.join(@output_dir, OEBPS_DIR)
|
||
|
@css_file = css_file ? File.expand_path(css_file) : css_file
|
||
|
@embedded_fonts = embedded_fonts
|
||
|
@to_delete = []
|
||
|
|
||
|
if customization_layer
|
||
|
@stylesheet = File.expand_path(customization_layer)
|
||
|
else
|
||
|
@stylesheet = STYLESHEET
|
||
|
end
|
||
|
|
||
|
unless File.exist?(@docbook_file)
|
||
|
raise ArgumentError.new("File #{@docbook_file} does not exist")
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def render_to_file(output_file, verbose=false)
|
||
|
render_to_epub(output_file, verbose)
|
||
|
bundle_epub(output_file, verbose)
|
||
|
cleanup_files(@to_delete)
|
||
|
end
|
||
|
|
||
|
def self.invalid?(file)
|
||
|
# Obnoxiously, we can't just check for a non-zero output...
|
||
|
cmd = %Q(#{CHECKER} "#{file}")
|
||
|
output = `#{cmd} 2>&1`
|
||
|
|
||
|
if $?.to_i == 0
|
||
|
return false
|
||
|
else
|
||
|
STDERR.puts output if $DEBUG
|
||
|
return output
|
||
|
end
|
||
|
end
|
||
|
|
||
|
private
|
||
|
def render_to_epub(output_file, verbose)
|
||
|
@collapsed_docbook_file = collapse_docbook()
|
||
|
|
||
|
chunk_quietly = "--stringparam chunk.quietly " + (verbose ? '0' : '1')
|
||
|
callout_path = "--stringparam callout.graphics.path #{CALLOUT_PATH}/"
|
||
|
callout_limit = "--stringparam callout.graphics.number.limit #{CALLOUT_LIMIT}"
|
||
|
callout_ext = "--stringparam callout.graphics.extension #{CALLOUT_EXT}"
|
||
|
html_stylesheet = "--stringparam html.stylesheet #{File.basename(@css_file)}" if @css_file
|
||
|
base = "--stringparam base.dir #{OEBPS_DIR}/"
|
||
|
unless @embedded_fonts.empty?
|
||
|
embedded_fonts = @embedded_fonts.map {|f| File.basename(f)}.join(',')
|
||
|
font = "--stringparam epub.embedded.fonts \"#{embedded_fonts}\""
|
||
|
end
|
||
|
meta = "--stringparam epub.metainf.dir #{META_DIR}/"
|
||
|
oebps = "--stringparam epub.oebps.dir #{OEBPS_DIR}/"
|
||
|
options = [chunk_quietly,
|
||
|
callout_path,
|
||
|
callout_limit,
|
||
|
callout_ext,
|
||
|
base,
|
||
|
font,
|
||
|
meta,
|
||
|
oebps,
|
||
|
html_stylesheet,
|
||
|
].join(" ")
|
||
|
# Double-quote stylesheet & file to help Windows cmd.exe
|
||
|
db2epub_cmd = %Q(cd "#{@output_dir}" && #{XSLT_PROCESSOR} #{options} "#{@stylesheet}" "#{@collapsed_docbook_file}")
|
||
|
STDERR.puts db2epub_cmd if $DEBUG
|
||
|
success = system(db2epub_cmd)
|
||
|
raise "Could not render as .epub to #{output_file} (#{db2epub_cmd})" unless success
|
||
|
@to_delete << Dir["#{@meta_dir}/*"]
|
||
|
@to_delete << Dir["#{@oebps_dir}/*"]
|
||
|
end
|
||
|
|
||
|
def bundle_epub(output_file, verbose)
|
||
|
|
||
|
quiet = verbose ? "" : "-q"
|
||
|
mimetype_filename = write_mimetype()
|
||
|
meta = File.basename(@meta_dir)
|
||
|
oebps = File.basename(@oebps_dir)
|
||
|
images = copy_images()
|
||
|
csses = copy_csses()
|
||
|
fonts = copy_fonts()
|
||
|
callouts = copy_callouts()
|
||
|
# zip -X -r ../book.epub mimetype META-INF OEBPS
|
||
|
# Double-quote stylesheet & file to help Windows cmd.exe
|
||
|
zip_cmd = %Q(cd "#{@output_dir}" && #{ZIPPER} #{quiet} -X -r "#{File.expand_path(output_file)}" "#{mimetype_filename}" "#{meta}" "#{oebps}")
|
||
|
puts zip_cmd if $DEBUG
|
||
|
success = system(zip_cmd)
|
||
|
raise "Could not bundle into .epub file to #{output_file}" unless success
|
||
|
end
|
||
|
|
||
|
# Input must be collapsed because REXML couldn't find figures in files that
|
||
|
# were XIncluded or added by ENTITY
|
||
|
# http://sourceforge.net/tracker/?func=detail&aid=2750442&group_id=21935&atid=373747
|
||
|
def collapse_docbook
|
||
|
# Double-quote stylesheet & file to help Windows cmd.exe
|
||
|
collapsed_file = File.join(File.expand_path(File.dirname(@docbook_file)),
|
||
|
'.collapsed.' + File.basename(@docbook_file))
|
||
|
entity_collapse_command = %Q(xmllint --loaddtd --noent -o "#{collapsed_file}" "#{@docbook_file}")
|
||
|
entity_success = system(entity_collapse_command)
|
||
|
raise "Could not collapse named entites in #{@docbook_file}" unless entity_success
|
||
|
|
||
|
xinclude_collapse_command = %Q(xmllint --xinclude -o "#{collapsed_file}" "#{collapsed_file}")
|
||
|
xinclude_success = system(xinclude_collapse_command)
|
||
|
raise "Could not collapse XIncludes in #{@docbook_file}" unless xinclude_success
|
||
|
|
||
|
@to_delete << collapsed_file
|
||
|
return collapsed_file
|
||
|
end
|
||
|
|
||
|
def copy_callouts
|
||
|
new_callout_images = []
|
||
|
if has_callouts?
|
||
|
calloutglob = "#{CALLOUT_FULL_PATH}/*#{CALLOUT_EXT}"
|
||
|
Dir.glob(calloutglob).each {|img|
|
||
|
img_new_filename = File.join(@oebps_dir, CALLOUT_PATH, File.basename(img))
|
||
|
|
||
|
# TODO: What to rescue for these two?
|
||
|
FileUtils.mkdir_p(File.dirname(img_new_filename))
|
||
|
FileUtils.cp(img, img_new_filename)
|
||
|
@to_delete << img_new_filename
|
||
|
new_callout_images << img
|
||
|
}
|
||
|
end
|
||
|
return new_callout_images
|
||
|
end
|
||
|
|
||
|
def copy_fonts
|
||
|
new_fonts = []
|
||
|
@embedded_fonts.each {|font_file|
|
||
|
font_new_filename = File.join(@oebps_dir, File.basename(font_file))
|
||
|
FileUtils.cp(font_file, font_new_filename)
|
||
|
new_fonts << font_file
|
||
|
}
|
||
|
return new_fonts
|
||
|
end
|
||
|
|
||
|
def copy_csses
|
||
|
if @css_file
|
||
|
css_new_filename = File.join(@oebps_dir, File.basename(@css_file))
|
||
|
FileUtils.cp(@css_file, css_new_filename)
|
||
|
end
|
||
|
end
|
||
|
|
||
|
def copy_images
|
||
|
image_references = get_image_refs()
|
||
|
new_images = []
|
||
|
image_references.each {|img|
|
||
|
# TODO: It'd be cooler if we had a filetype lookup rather than just
|
||
|
# extension
|
||
|
if img =~ /\.(svg|png|gif|jpe?g|xml)/i
|
||
|
img_new_filename = File.join(@oebps_dir, img)
|
||
|
img_full = File.join(File.expand_path(File.dirname(@docbook_file)), img)
|
||
|
|
||
|
# TODO: What to rescue for these two?
|
||
|
FileUtils.mkdir_p(File.dirname(img_new_filename))
|
||
|
puts(img_full + ": " + img_new_filename) if $DEBUG
|
||
|
FileUtils.cp(img_full, img_new_filename)
|
||
|
@to_delete << img_new_filename
|
||
|
new_images << img_full
|
||
|
end
|
||
|
}
|
||
|
return new_images
|
||
|
end
|
||
|
|
||
|
def write_mimetype
|
||
|
mimetype_filename = File.join(@output_dir, "mimetype")
|
||
|
File.open(mimetype_filename, "w") {|f| f.print MIMETYPE}
|
||
|
@to_delete << mimetype_filename
|
||
|
return File.basename(mimetype_filename)
|
||
|
end
|
||
|
|
||
|
def cleanup_files(file_list)
|
||
|
file_list.flatten.each {|f|
|
||
|
# Yikes
|
||
|
FileUtils.rm_r(f, :force => true )
|
||
|
}
|
||
|
end
|
||
|
|
||
|
# Returns an Array of all of the (image) @filerefs in a document
|
||
|
def get_image_refs
|
||
|
parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
|
||
|
image_refs = []
|
||
|
while parser.has_next?
|
||
|
el = parser.pull
|
||
|
if el.start_element? and (el[0] == "imagedata" or el[0] == "graphic")
|
||
|
image_refs << el[1]['fileref']
|
||
|
end
|
||
|
end
|
||
|
return image_refs.uniq
|
||
|
end
|
||
|
|
||
|
# Returns true if the document has code callouts
|
||
|
def has_callouts?
|
||
|
parser = REXML::Parsers::PullParser.new(File.new(@collapsed_docbook_file))
|
||
|
while parser.has_next?
|
||
|
el = parser.pull
|
||
|
if el.start_element? and (el[0] == "calloutlist" or el[0] == "co")
|
||
|
return true
|
||
|
end
|
||
|
end
|
||
|
return false
|
||
|
end
|
||
|
end
|
||
|
end
|