-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathcollection2mobixhtml.py
executable file
·146 lines (116 loc) · 5.13 KB
/
collection2mobixhtml.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# python cm.py -d test-ccap -o result.html
# This script is to generate xhtml content for mobi-converter
# and the opf file by the way.
import sys
import os
import Image
import shutil
from StringIO import StringIO
from tempfile import mkdtemp
import subprocess
import shutil
from lxml import etree
import urllib2
import module2dbk
import collection2dbk
import util
BASE_PATH = os.getcwd()
DEBUG=True
# XSL files
DOCBOOK2XHTML_XSL=util.makeXsl('dbk2xhtml.xsl')
DOCBOOK_CLEANUP_XSL = util.makeXsl('dbk-clean-whole.xsl')
DOCBOOK2OPF = util.makeXsl('dbk2mobiopf.xsl')
MODULES_XPATH = etree.XPath('//col:module/@document', namespaces=util.NAMESPACES)
IMAGES_XPATH = etree.XPath('//c:*/@src[not(starts-with(.,"http:"))]', namespaces=util.NAMESPACES)
def collection2xhtml(collection_dir, output_xhtml, reduce_quality=False):
p = util.Progress()
collxml = etree.parse(os.path.join(collection_dir, 'collection.xml'))
moduleIds = MODULES_XPATH(collxml)
modules = {} # {'m1000': (etree.Element, {'file.jpg':'23947239874'})}
allFiles = {}
for moduleId in moduleIds:
moduleDir = os.path.join(collection_dir, moduleId)
if os.path.isdir(moduleDir):
cnxml, files = loadModule(moduleDir)
for f in files:
allFiles[os.path.join(moduleId, f)] = files[f]
modules[moduleId] = (cnxml, files)
p.start(1, 'Converting collection to Docbook')
dbk, newFiles = collection2dbk.convert(p, collxml, modules, collection_dir, svg2png=True, math2svg=True, reduce_quality=reduce_quality)#replace temp_dir with collection_dir
allFiles.update(newFiles)
p.tick('Converting Docbook to MOBI')
stdErr = convert(p, dbk, allFiles, collection_dir, output_xhtml)
p.finish()
return stdErr
def loadModule(moduleDir):
""" Given a directory of files (containing an index.cnxml)
load it into memory """
# Try autogenerated CNXML 1st
cnxmlPath = os.path.join(moduleDir, 'index_auto_generated.cnxml')
if not os.path.exists(cnxmlPath):
cnxmlPath = os.path.join(moduleDir, 'index.cnxml')
cnxmlStr = open(cnxmlPath).read()
cnxml = etree.parse(StringIO(cnxmlStr))
files = {}
for f in IMAGES_XPATH(cnxml):
try:
data = open(os.path.join(moduleDir, f)).read()
files[f] = data
#print >> sys.stderr, "LOG: Image ADDED! %s %s" % (module, f)
except IOError:
print >> sys.stderr, "LOG: Image not found %s %s" % (os.path.basename(moduleDir), f)
# If the dbk file has already been generated, include it
dbkPath = os.path.join(moduleDir, 'index.included.dbk')
if os.path.exists(dbkPath):
dbkStr = open(dbkPath).read()
files['index.included.dbk'] = dbkStr
return (cnxml, files)
def convert(p, dbk1, files, collection_dir, output_xhtml):
""" Converts a Docbook Element and a dictionary of files into a xhtml. """
def transform(xslDoc, xmlDoc):
""" Performs an XSLT transform and parses the <xsl:message /> text """
ret = xslDoc(xmlDoc) # xslDoc(xmlDoc, **({'cnx.tempdir.path':"'%s'" % temp_dir}))
for entry in xslDoc.error_log:
# TODO: Log the errors (and convert JSON to python) instead of just printing
print >> sys.stderr, entry.message.encode('utf-8')
return ret
def transformopf(xslDoc, xmlDoc,colpath):
""" Performs an XSLT transform (SPECIFICALLY FOR OPF)and parses the <xsl:message /> text """
ret = xslDoc(xmlDoc,opfpath="'%s'" % colpath)
for entry in xslDoc.error_log:
print >> sys.stderr, entry.message.encode('utf-8')
return ret
# Step 1 (Cleaning up Docbook)
p.start(2, 'Cleaning up Docbook')
dbk2 = transform(DOCBOOK_CLEANUP_XSL, dbk1)
# Step 2 (Docbook to XHTML)
p.tick('Converting Docbook to XHTML')
xhtml_file = os.path.join(os.getcwd(), output_xhtml)
xhtml = transform(DOCBOOK2XHTML_XSL, dbk2)
open(xhtml_file,'w').write(etree.tostring(xhtml))
# Step 3 (Generate OPF file)
colpath = os.path.abspath(collection_dir)+"/"#Pass the current working dir to xsl template to save opf file into that folder
transformopf(DOCBOOK2OPF, dbk2, colpath)
p.finish()
def main():
try:
import argparse
except ImportError:
print "argparse is needed for commandline"
return 2
parser = argparse.ArgumentParser(description='Convert an unzipped Collection to a .xhtml')
parser.add_argument('-d', dest='collection_dir', help='Path to an unzipped collection', required=True)
parser.add_argument('-o', dest='output_xhtml', help='Path to write the xhtml file', nargs='?', type=argparse.FileType('w'), default=sys.stdout)
parser.add_argument('-r', dest='reduce_quality', help='Reduce image quality', action='store_true')
args = parser.parse_args()
# Verify the user pointed to a valid collection dir
if not os.path.isdir(args.collection_dir) or not os.path.isfile(os.path.join(args.collection_dir, 'collection.xml')):
print >> sys.stderr, "Must point to a valid collection directory (with a collection.xml file)"
# Set the output file
if args.output_xhtml == sys.stdout:
output_xhtml = '/dev/stdout'
else:
output_xhtml = os.path.abspath(args.output_xhtml.name)
stdErr = collection2xhtml(args.collection_dir, output_xhtml, args.reduce_quality)
if __name__ == '__main__':
sys.exit(main())