-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathutil.py
207 lines (167 loc) · 6.65 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import os
import sys
from StringIO import StringIO
from lxml import etree
from tempfile import mkstemp
import subprocess
try:
import pkg_resources
resource_filename = pkg_resources.resource_filename
except ImportError:
def resource_filename(dir, file):
return os.path.join(os.getcwd(), dir, file)
### We use BOTH inkscape AND imagemagick (convert) because:
# Only inkscape can load the STIX fonts from the OS (imagemagick's SVG libs don't)
# Only imagemagick allows changing the color depth of an image (math/SVG use 8 bits)
#
# Instead of inkscape, use rsvg
INKSCAPE_BIN = '/Applications/Inkscape.app/Contents/Resources/bin/inkscape'
if not os.path.isfile(INKSCAPE_BIN):
INKSCAPE_BIN = 'inkscape'
CONVERT_BIN = 'convert'
# http://lxml.de/xpathxslt.html
def makeXsl(filename):
""" Helper that creates a XSLT stylesheet """
path = resource_filename("xsl", filename)
#print "Loading resource: %s" % path
xml = etree.parse(path)
return etree.XSLT(xml)
COLLXML_PARAMS = makeXsl('collxml-params.xsl')
COLLXML2DOCBOOK_XSL = makeXsl('collxml2dbk.xsl')
DOCBOOK_CLEANUP_XSL = makeXsl('dbk-clean-whole.xsl')
DOCBOOK_NORMALIZE_PATHS_XSL = makeXsl('dbk2epub-normalize-paths.xsl')
DOCBOOK_NORMALIZE_GLOSSARY_XSL = makeXsl('dbk-clean-whole-remove-duplicate-glossentry.xsl')
NAMESPACES = {
'xhtml':'http://www.w3.org/1999/xhtml',
'c' :'http://cnx.rice.edu/cnxml',
'svg':'http://www.w3.org/2000/svg',
'mml':'http://www.w3.org/1998/Math/MathML',
'db' :'http://docbook.org/ns/docbook',
'xi' :'http://www.w3.org/2001/XInclude',
'col':'http://cnx.rice.edu/collxml'}
# For SVG Cover image
DBK2SVG_COVER_XSL = makeXsl('dbk2svg-cover.xsl')
COLLECTION_COVER_PREFIX='_collection_cover'
# Used for loading collection/module from the filesystem
MODULES_XPATH = etree.XPath('//col:module/@document', namespaces=NAMESPACES)
IMAGES_XPATH = etree.XPath('//c:*/@src[not(starts-with(.,"http:"))]', namespaces=NAMESPACES)
def _reduce_png(pngData):
strCmd = '-compose Copy_Opacity -depth 8 +dither -quality 100 png:/dev/stdin png:-'.split()
strCmd.insert(0, CONVERT_BIN)
p = subprocess.Popen(strCmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
pngReduced, strError = p.communicate(pngData)
return pngReduced, strError
def svg2png(svgStr):
# Can't just use stdout because Inkscape outputs text to stdout _and_ stderr
strCmd = ['rsvg-convert', '-d', '96', '-p', '96' ]
p = subprocess.Popen(strCmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
pngData, strError = p.communicate(svgStr)
pngReduced, strError = _reduce_png(pngData)
return pngReduced
# From http://stackoverflow.com/questions/2932408/
def svg2png_inkscape(svgStr):
# Can't just use stdout because Inkscape outputs text to stdout _and_ stderr
fd, pngPath = mkstemp(suffix='.png')
# Can't just use stdout because Inkscape outputs text to stdout _and_ stderr
strCmd = [INKSCAPE_BIN, '--without-gui', '-f', '/dev/stdin', '--export-png=%s' % pngPath]
p = subprocess.Popen(strCmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, close_fds=True)
_, strError = p.communicate(svgStr)
pngFile = open(pngPath)
pngData = pngFile.read()
pngFile.close()
os.close(fd)
os.remove(pngPath)
pngReduced, strError = _reduce_png(pngData)
return pngReduced
def dbk2cover(dbk, filesDict, svg2pngFlag=True):
newFiles = {}
if ('%s.png' % COLLECTION_COVER_PREFIX) in filesDict:
return filesDict['%s.png' % COLLECTION_COVER_PREFIX], newFiles
if ('%s.svg' % COLLECTION_COVER_PREFIX) in filesDict:
svgStr = filesDict['%s.svg' % COLLECTION_COVER_PREFIX]
else:
svg = transform(DBK2SVG_COVER_XSL, dbk)
svgStr = etree.tostring(svg)
newFiles['cover.svg'] = svgStr
if svg2pngFlag:
png = svg2png_inkscape(svgStr)
return png, newFiles
else:
return svg, newFiles
def transform(xslDoc, xmlDoc):
""" Performs an XSLT transform and parses the <xsl:message /> text """
ret = xslDoc(xmlDoc)
for entry in xslDoc.error_log:
# TODO: Log the errors (and convert JSON to python) instead of just printing
print entry
return ret
### The following are methods that load up files on the filesysteme into memory
def loadModule(moduleDir):
""" Given a directory of files (containing an index.cnxml)
load it into memory """
# Try autogenerated CNXML 1st
cnxmlPath = os.path.join(moduleDir, 'index_auto_generated.cnxml')
if not os.path.exists(cnxmlPath):
cnxmlPath = os.path.join(moduleDir, 'index.cnxml')
cnxmlStr = open(cnxmlPath).read()
cnxml = etree.parse(StringIO(cnxmlStr))
files = {}
for f in IMAGES_XPATH(cnxml):
try:
data = open(os.path.join(moduleDir, f)).read()
files[f] = data
#print >> sys.stderr, "LOG: Image ADDED! %s %s" % (module, f)
except IOError:
print >> sys.stderr, "LOG: Image not found %s %s" % (os.path.basename(moduleDir), f)
# If the dbk file has already been generated, include it
dbkPath = os.path.join(moduleDir, 'index.included.dbk')
if os.path.exists(dbkPath):
dbkStr = open(dbkPath).read()
files['index.included.dbk'] = dbkStr
return (cnxml, files)
def loadCollection(dir):
collxml = etree.parse(os.path.join(dir, 'collection.xml'))
moduleIds = MODULES_XPATH(collxml)
modules = {} # {'m1000': (etree.Element, {'file.jpg':'23947239874'})}
allFiles = {}
for moduleId in moduleIds:
moduleDir = os.path.join(dir, moduleId)
if os.path.isdir(moduleDir):
cnxml, files = loadModule(moduleDir)
for f in files:
allFiles[os.path.join(moduleId, f)] = files[f]
modules[moduleId] = (cnxml, files)
return collxml, modules, allFiles
class Progress(object):
def __init__(self):
self.stack = []
pass
def start(self, ticks, msg):
self.stack.append({ 'done': 0, 'total': ticks + 1, 'msg': msg })
self._log()
def tick(self, msg):
self.stack[-1]['done'] += 1
self.stack[-1]['msg'] = msg
if self.stack[-1]['done'] > self.stack[-1]['total']:
import pdb; pdb.set_trace()
self._log()
def finish(self):
self.stack[-1]['done'] = self.stack[-1]['total']
self.stack[-1]['msg'] = 'Done'
self._log()
if self.stack[-1]['done'] != self.stack[-1]['total']:
import pdb; pdb.set_trace()
self.stack = self.stack[:-1]
def _log(self):
# Build up the percentage
percent = 0.0
weight = 1.0
msg = []
for p in self.stack:
percent += weight * p['done'] / p['total']
weight = weight * 1.0 / p['total']
msg.append(p['msg'])
# Discard the top-most message since it will never change
if len(msg) > 1:
msg = msg[1:]
print >> sys.stderr, "STATUS: %d%% %s" % (percent * 100, ': '.join(msg))