forked from airyai/prj2doc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprj2doc.py
executable file
·391 lines (351 loc) · 12.1 KB
/
prj2doc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
#!/usr/bin/env python
# -*- coding: utf-8 -*-
'''
Project to Document utility.
Written by PWX <[email protected]> in Feb. 2012.
This utility will collect project sources under current
directory, highlight their syntax, and output as a whole
document.
'''
from __future__ import unicode_literals, print_function
import os, sys, fnmatch
import re, getopt
import codecs
#sys.setdefaultencoding('utf-8')
import chardet
from pygments import highlight
from pygments.lexers import guess_lexer_for_filename
from pygments.formatters import get_formatter_for_filename, HtmlFormatter
from pygments.styles import get_style_by_name, get_all_styles
from pygments.util import ClassNotFound
# templates
ALLOW_WHITESPACE_FILTER = set(('.c', '.cpp', '.cc', '.h', '.hpp', '.cs',
'.vb', '.js', '.php', '.java'))
DEFAULT_PATTERNS = ('Makefile*', '*.cpp', '*.cc', '*.hpp', '*.c', '*.h',
'*.py', '*.pyw', '*.java', '*.cs', '*.vb', '*.js',
'*.php*')
DEFAULT_COMMENT_START = '//'
COMMENT_STARTS = {}
COMMENT_STARTS['Python'] = '#'
COMMENT_STARTS['Makefile'] = '#'
COMMENT_STARTS['VB.net'] = '\''
FILE_HEADER_TEMPLATE = '''
文件:{filename}
语法:{language}
行数:{line}
'''.strip()
CURRENT_DIR_PREFIX = '.' + os.path.sep
def generate_header(filename, language, line):
if (filename.startswith(CURRENT_DIR_PREFIX)):
filename = filename[len(CURRENT_DIR_PREFIX):]
ret = FILE_HEADER_TEMPLATE.format(filename=filename, language=language,
line=line).split('\n')
width = 0
for l in ret:
lw = 0
for c in l:
if (ord(c) < 256):
lw += 1
else:
lw += 2
width = max(width, lw)
cmstart = COMMENT_STARTS.get(language, DEFAULT_COMMENT_START)
hr = '{0}{1}'.format(cmstart, '=' * (width+1))
ret = [hr] + ['{0} {1}'.format(cmstart, l) for l in ret] + [hr]
return '\n'.join(ret)
# utilities
def readfile(path):
with open(path, 'rb') as f:
return f.read()
def writefile(path, cnt):
with open(path, 'wb') as f:
f.write(codecs.BOM_UTF8)
f.write(cnt.encode('utf-8'))
# parse arguments
SHORT_OPT_PATTERN = 'ho:s:m:l:'
LONG_OPT_PATTERN = ('output=', 'style=', 'makefile=', 'help', 'list-style',
'linenos=')
def usage():
print ('Usage: prj2doc [选项] ... [输入通配符] ...')
print ('Written by 平芜泫 <[email protected]>。')
print ('')
print ('如果没有指定输入通配符,那么当前目录、以及子目录下的所有文件将被选择。\n'
'否则将在当前目录和子目录下搜索所有符合通配符的文件。如果所有被选择的\n'
'文件中存在 Makefile,那么文件的排列顺序将参照 Makefile 中第一次出现的\n'
'顺序。')
print ('')
print (' -o, --output= 输出文档的路径。文档类型会根据扩展名猜测。')
print (' 如果没有指定,则输出 project.html 和 project.doc。')
print (' 支持的扩展名:html, doc, tex。')
print (' 注:doc 只在 Windows 下可用,且转换后需要手工微调。')
print (' -m, --makefile= 指定一个 Makefile 文件。')
print (' -s, --style= 设定代码高亮的配色方案。默认为 colorful。')
print (' --list-style 列出所有支持的配色方案。')
print (' -l, --lineno=[on/off] 打开或关闭源文件每一行的行号。')
print (' -h, --help 显示这个信息。')
def listStyles():
print (' '.join(get_all_styles()))
optlist, args = getopt.getopt(sys.argv[1:], SHORT_OPT_PATTERN, LONG_OPT_PATTERN)
OUTPUT = []
MKPATTERN = '*Makefile*'
MAKEFILE = None
STYLE = 'colorful'
LINENOS = True
def GET_LINENOS(fmt):
return True if LINENOS else False
for (k, v) in optlist:
if (k in ('-o', '--output=')):
OUTPUT.append(v)
elif (k in ('-s', '--style=')):
STYLE = v
elif (k in ('--list-style')):
listStyles()
sys.exit(0)
elif (k in ('-l', '--lineno=')):
LINENOS = (v == 'on')
elif (k in ('-m', '--makefile=')):
MKPATTERN = v
elif (k in ('-h', '--help')):
usage()
sys.exit(0)
PATTERNS = args
if (len(PATTERNS) == 0):
PATTERNS = DEFAULT_PATTERNS
if (len(OUTPUT) == 0):
OUTPUT = ['project.html']
if (sys.platform == 'win32'):
OUTPUT.append('project.doc')
# scan input files
FORBIDS = ('prj2doc*', )
def scan_dir(path, file_list):
global MAKEFILE
dir_list = []
# first list all files under the directory
for p in os.listdir(path):
p2 = os.path.join(path, p)
if (os.path.isdir(p2)):
dir_list.append(p2)
else:
flag = True
for pattern in FORBIDS:
if (fnmatch.fnmatch(p, pattern)):
flag = False
if (not flag):
continue
for pattern in PATTERNS:
if (fnmatch.fnmatch(p, pattern)):
file_list.append(os.path.normcase(p2))
if (MAKEFILE is None and fnmatch.fnmatch(p, MKPATTERN)):
MAKEFILE = os.path.normcase(p2)
# then recursively scan
for p2 in dir_list:
scan_dir(p2, file_list)
INPUTS = []
print ('正在扫描目录下的所有源文件...')
scan_dir('.', INPUTS)
# check Makefile
phrase_map = {}
if (MAKEFILE is not None):
makefile = readfile(MAKEFILE)
regex = re.compile('\\s+')
phrases = regex.split(makefile.lower())
phrases.insert(0, MAKEFILE)
phindex = 0
for p in phrases:
p = os.path.normcase(p)
if (os.path.isfile(p)):
n = os.path.split(p)[1]
n = os.path.splitext(n)[0]
phrase_map.setdefault(n, phindex)
phindex += 1
def ext_compare(x, y):
if (x == '.h' and y == '.cpp'):
return -1
elif (x == '.cpp' and y == '.h'):
return 1
else:
return cmp(x, y)
def filename_compare(x, y):
xx = os.path.splitext(os.path.split(x)[1])
yy = os.path.splitext(os.path.split(y)[1])
xi = phrase_map.get(xx[0], None)
yi = phrase_map.get(yy[0], None)
if (xi is not None and yi is not None):
ret = cmp(xi, yi)
if (ret != 0):
return ret
return ext_compare(xx[1], yy[1])
elif (xi is not None):
return -1
elif (yi is not None):
return 1
else:
ret = cmp(xx[0], yy[0])
if (ret != 0):
return ret
return ext_compare(xx[1], yy[1])
INPUTS.sort(filename_compare)
# convert via MS Office
try:
import win32com.client
WIN32_SUPPORT = True
except ImportError:
WIN32_SUPPORT = False
pass
CONV_TEMP = None
CONV_LIST = []
if WIN32_SUPPORT:
# define convert function
def html2doc(htmlPath, docPath):
word = win32com.client.Dispatch('Word.Application')
doc = word.Documents.Open(os.path.abspath(htmlPath).encode(sys.getfilesystemencoding()))
doc.SaveAs(os.path.abspath(docPath).encode(sys.getfilesystemencoding()), FileFormat=0)
doc.Close()
word.Quit()
# process list
CONV_LIST = []
new_output = []
CONV_TEMP = 'prj2doc.temp.html'
for i in range(0, len(OUTPUT)):
if (os.path.splitext(OUTPUT[i])[1].lower() in ('.doc', )):
CONV_LIST.append(OUTPUT[i])
else:
new_output.append(OUTPUT[i])
if (len(CONV_LIST) > 0):
new_output.append(CONV_TEMP)
OUTPUT = new_output
# create formatters & lexers for output files
FORMATTERS = {}
CONTENTS = {}
LEXERS = {}
print ('读取源文件,并载入代码高亮引擎...')
try:
STYLE = get_style_by_name(STYLE)
except ClassNotFound:
print ('未定义的配色方案 {0}。'.format(STYLE))
sys.exit(10)
for o in OUTPUT:
try:
f = get_formatter_for_filename(o)
f.style = STYLE
f.encoding = 'utf-8'
#f.noclasses = True
#f.nobackground = True
FORMATTERS[o] = f
except ClassNotFound:
print ('不支持的输出格式 {0}。'.format(o))
sys.exit(12)
def front_tab_to_space(x):
for i in range(0, len(x)):
if (x[i] != '\t'):
return ' ' * i + x[i:]
return ' ' * len(x)
for i in INPUTS:
try:
cnt = readfile(i)
if (len(cnt) == 0):
continue
CONTENTS[i] = cnt
except Exception as ex:
print ('无法读取源文件 {0}:{1}。'.format(i, ex))
#sys.exit(11)
try:
l = guess_lexer_for_filename(i, readfile(i))
l.encoding = 'utf-8'
LEXERS[i] = l
except ClassNotFound:
print ('不能确定源文件 {0} 的语法类型。'.format(i))
#sys.exit(13)
if (len(CONTENTS) == 0):
print ('没有找到任何非空输入文件。')
sys.exit(0)
# generating sources for each file
CHARDET_REPLACE = {'gb2312': 'gb18030', 'gbk': 'gb18030'}
def detect_encoding(cnt):
ret = chardet.detect(cnt)['encoding']
if (ret is None):
ret = sys.getfilesystemencoding()
ret = ret.lower()
return CHARDET_REPLACE.get(ret, ret)
HIGHLIGHTS = {o:[] for o in OUTPUT}
HIGHLIGHT_STYLES = {}
for k in INPUTS:
if (k not in CONTENTS or k not in LEXERS):
continue
print ('正在处理 {0} ...'.format(k))
lexer = LEXERS[k]
cnt = CONTENTS[k]
encoding = detect_encoding(cnt)
if (encoding != 'gb18030' and encoding != 'utf-8'):
encoding = sys.getfilesystemencoding() # Special hack!!!
cnt = unicode(cnt, encoding)
if (os.path.splitext(k)[1].lower() in ALLOW_WHITESPACE_FILTER):
cnt = '\n'.join([front_tab_to_space(x)
for x in cnt.split('\n')])
for o in OUTPUT:
if (o not in FORMATTERS):
continue
f = FORMATTERS[o]
header = generate_header(k, lexer.name, cnt.count('\n') + 1)
# header
f.linenos = False
HIGHLIGHTS[o].append(unicode(highlight(header, lexer, f), 'utf-8'))
# body
f.linenos = GET_LINENOS(f) if (o != CONV_TEMP) else False
f.nobackground = (o == CONV_TEMP)
HIGHLIGHTS[o].append(unicode(highlight(cnt, lexer, f), 'utf-8'))
# style
if (o not in HIGHLIGHT_STYLES and hasattr(f, 'get_style_defs')):
HIGHLIGHT_STYLES[o] = '\n'.join([f.get_style_defs('')])
# combining outputs
COMBINE_TEMPLATE_HTML = '''
<html>
<head>
<title>Project Document</title>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8;" />
<style>
pre {{ margin: 3px 2px; }}
.linenodiv {{
background: #eeeeee;
padding-right: 1px;
margin-right: 2px;
text-align: right;
}}
* {{
font-size: 13px;
font-family: WenQuanYi Micro Hei Mono, 微软雅黑, Droid Sans, DejaVu Sans Mono, monospace;
}}
{style}
</style>
</head>
<body>
{body}
</body>
</html>
'''.strip()
def combine_html(outputs, path):
ret = []
for i in range(0, len(outputs), 2):
ret.append('<p>{0}\n{1}</p>'.format(outputs[i], outputs[i+1]))
return COMBINE_TEMPLATE_HTML.format(body='\n<p> </p>\n'.join(ret),
style=HIGHLIGHT_STYLES.get(path, ''))
def combine_other(outputs, path):
return '\n'.join(outputs)
COMBINE_TABLE = {'.html': combine_html, '.htm': combine_html}
print ('将结果写入指定的输出 ...')
for o in OUTPUT:
try:
writefile(o, COMBINE_TABLE.get(os.path.splitext(o)[1].lower(),
combine_other)(HIGHLIGHTS[o], o))
except Exception as ex:
print ('写入文件 {0} 失败:{1}。'.format(o, ex))
# do office convert
if (WIN32_SUPPORT and os.path.isfile(CONV_TEMP)):
conv_body = unicode(readfile(CONV_TEMP), 'utf-8')
for cv in CONV_LIST:
try:
html2doc(CONV_TEMP, cv)
except Exception as ex:
writefile(os.path.join(os.path.dirname(CONV_TEMP), cv + ".html"), conv_body)
print ('转换文件为 {0} 失败,保留中间文件 {0}.html。'.format(cv))
os.remove(CONV_TEMP)