This repository has been archived by the owner on Oct 2, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathutil.py
539 lines (428 loc) · 15.1 KB
/
util.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
import os
import re
import shutil
import misaka
import houdini
import yaml
from datetime import datetime, timezone, timedelta, date
from urllib.parse import urljoin
from pygments import highlight
from pygments.formatters.html import HtmlFormatter
from pygments.lexers import get_lexer_by_name
from config import config as C
class HighlighterRenderer(misaka.HtmlRenderer):
def blockcode(self, text, lang):
if not lang:
return '\n<pre><code>{}</code></pre>\n'.format(houdini.escape_html(text.strip()))
lexer = get_lexer_by_name(lang)
formatter = HtmlFormatter()
return highlight(text, lexer, formatter)
renderer = HighlighterRenderer()
def make_abs_url(root_url, rel_url):
"""
Make an absolute URL from a relative URL
:param root_url: root URL
:param rel_url: relative URL
:return: absolute URL
"""
return urljoin(root_url, rel_url)
def read_md_file(file_path, split=True):
"""
Read full content of a markdown file
:param file_path: path of file
:param split: split head and body ot not
:return: a tuple with 2 elements - yaml head and markdown body, or a string of whole file if split is True
"""
with open(file_path, 'r', encoding='utf-8') as stream:
content = stream.read().split('\n\n', 1) if split else stream.read()
return content
def read_md_file_head(file_path):
"""
Read the yaml head of a markdown file
:param file_path: path of file
:return: the yaml head
"""
with open(file_path, 'r', encoding='utf-8') as stream:
last_line = None
line = None
head = ''
while not (line == last_line and line == '\n'):
line = stream.readline()
head += line
last_line = line
return head
def read_md_file_body(file_path):
"""
Read the markdown body of a markdown file
:param file_path: path of file
:return: the markdown body
"""
return read_md_file(file_path, split=True)[1]
def render_yaml(yaml_str):
"""
Render a yaml string
:param yaml_str: yaml string to render
:return: a dict
"""
return yaml.load(yaml_str)
def render_md(md_str):
"""
Render a markdown string
:param md_str: markdown string to render
:return: html text
"""
return misaka.Markdown(
renderer,
extensions=('fenced-code',
'tables',
'footnotes',
'autolink',
'strikethrough',
'underline',
'highlight',
'quote',
'superscript')
)(md_str)
# regex to match the post file with correct format
post_file_regex = re.compile(
r'(\d{4}|\d{2})-((1[0-2])|(0?[1-9]))-(([12][0-9])|(3[01])|(0?[1-9]))-[\w-]+\.(md|MD|markdown|mdown)'
)
# regex to match the "<!--more-->" (aka "Read More") flag in md file
more_flag_regex = re.compile(
r'<!--\s*more\s*-->'
)
def get_posts_list():
"""
Get list of all post files, whose name match the format "yyyy-m(m)-d(d)-text.m(ark)d(own)"
:return: list of all post files
"""
file_list = []
for file in sorted(os.listdir('posts'), reverse=True):
if post_file_regex.match(file):
file_list.append(file)
return file_list
def default_post_info(file):
"""
Get default post info from post file name
:param file: post file name with extension
:return: default post info (a dict)
"""
y, m, d, file_name = os.path.splitext(file)[0].split('-', 3)
# set default post info
entry = {
'layout': 'post',
'author': C.author,
'email': C.email,
# default date, title, url from file name
'date': datetime(int(y), int(m), int(d)),
'title': ' '.join(map(lambda w: w.capitalize(), file_name.split('-'))),
'url': '/'.join(('/post', y, m, d, file_name))
}
return entry
def parse_posts(start=0, count=0, f_list=None, tag=None, category=None, cut_by_read_more=False):
"""
Parse posts in the "posts" directory
:param start: the first post id
:param count: number of posts to parse (0 for all)
:param f_list: specific file list
:param tag: specific tag
:param category: specific category
:param cut_by_read_more: should cut md file by "Read More" flag
:return: list of parsed post (a list of dict)
"""
if not f_list:
# count can't be less than 0
if count < 0:
raise ValueError('The number of posts to parse cannot be less than 0.')
# filter the file list
file_list = get_posts_list()
if count > 0:
end = start + min(count, len(file_list)) # using min() in case of over bound
file_list = file_list[start:end]
else:
# parse specific file list
file_list = f_list
# filter tag and category
new_file_list = []
if tag is not None or category is not None:
for file in file_list:
file_path = os.path.join('posts', file)
yml_dict = render_yaml(read_md_file_head(file_path))
if yml_dict:
# make sure that the tags and categories are lists
if 'categories' in yml_dict:
yml_dict['categories'] = to_list(yml_dict['categories'])
if 'tags' in yml_dict:
yml_dict['tags'] = to_list(yml_dict['tags'])
if ('tags' in yml_dict and tag in yml_dict['tags']) or \
('categories' in yml_dict and category in yml_dict['categories']):
new_file_list.append(file)
file_list = new_file_list
entries = []
for file in file_list:
entry = default_post_info(file)
file_path = os.path.join('posts', file)
yml, md = read_md_file(file_path)
# get more detailed post info from yaml head
post_info = render_yaml(yml)
if post_info:
for k, v in post_info.items():
entry[k] = v
# fix datetime wrongly being a date object or has no tzinfo
if 'date' in entry:
entry['date'] = fix_datetime(entry['date'])
if 'updated' in entry:
entry['updated'] = fix_datetime(entry['updated'])
# fix categories and tags
if 'categories' in entry:
entry['categories'] = to_list(entry['categories'])
if 'tags' in entry:
entry['tags'] = to_list(entry['tags'])
# render markdown body to html
entry['read_more'] = cut_by_read_more # default set to cut_by_read_more
if cut_by_read_more:
# find the "Read More" flag position and cut it
indices = [x.start(0) for x in more_flag_regex.finditer(md)]
if indices:
md = md[:indices[0]]
else:
# didn't cut
entry['read_more'] = False
entry['body'] = render_md(md)
entries.append(entry)
return entries
def parse_posts_page(page_id):
"""
Parse posts on the page with page_id
:param page_id: id of page to parse
:return: a info dict to be sent to templates
"""
pg = {
'has_newer': False,
'newer_url': '',
'has_older': False,
'older_url': '',
'entries': []
}
count = C.entry_count_one_page
file_list = get_posts_list()
if count == 0 and page_id != 1:
# should show all posts but not on page 1, then show nothing
return pg
# show specific number of posts or all (only on page 1)
start = (page_id - 1) * count
end = min(page_id * count, len(file_list))
# determine there are newer or older posts
if start > 0:
pg['has_newer'] = True
pg['newer_url'] = '/'.join(('/page', str(page_id - 1))) if page_id - 1 != 1 else '/'
if end < len(file_list) and end != 0:
pg['has_older'] = True
pg['older_url'] = '/'.join(('/page', str(page_id + 1)))
pg['entries'] = parse_posts(start, count, cut_by_read_more=C.get('support_read_more', False))
return pg
def default_custom_page_info(file):
"""
Get default custom page info from markdown filename
:param file: page markdown file name with extension
:return: default page info (a dict)
"""
filename = os.path.splitext(file)[0]
# set default post info
entry = {
'layout': 'page',
'author': C.author,
'email': C.email,
# default title, url from file name
'title': ' '.join(map(lambda w: w.capitalize(), filename.split('-')))
}
return entry
def parse_custom_page(file_path):
"""
Parse custom page in markdown
:param file_path: markdown file path
:return: parsed custom page (a dict)
"""
page = default_custom_page_info(os.path.basename(file_path))
yml, md = read_md_file(file_path)
# get more detailed page info from yaml head
page_info = render_yaml(yml)
if page_info:
for k, v in page_info.items():
page[k] = v
# render markdown body to html
page['body'] = render_md(md)
return page
def timezone_from_str(tz_str):
"""
Convert a timezone string to a timezone object
:param tz_str: string with format 'UTC±[hh]:[mm]'
:return: a timezone object
"""
m = re.match(r'UTC([+|-]\d{1,2}):(\d{2})', tz_str)
delta_h = int(m.group(1))
delta_m = int(m.group(2)) if delta_h >= 0 else -int(m.group(2))
return timezone(timedelta(hours=delta_h, minutes=delta_m))
def fix_datetime(dt):
"""
Fix a datetime (supposed to be)
:param dt: datetime to fix
:return: correct datetime object
"""
if isinstance(dt, date):
dt = datetime(dt.year, dt.month, dt.day)
if dt is not None and dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone_from_str(C.timezone))
return dt
def to_list(item):
"""
Make a list contains item if item is not a list
:param item: item to convert
:return: a list
"""
if item is not None and not isinstance(item, list):
return [item]
return item
def extension_of_markdown_file(file_path_without_ext):
"""
Find markdown file with the specific file path
:param file_path_without_ext: file path without extension
:return: file extension
"""
if os.path.exists(file_path_without_ext + '.md') is not False:
return 'md'
elif os.path.exists(file_path_without_ext + '.MD') is not False:
return 'MD'
elif os.path.exists(file_path_without_ext + '.mdown') is not False:
return 'mdown'
elif os.path.exists(file_path_without_ext + '.markdown') is not False:
return 'markdown'
else:
return None
def get_labels_of_posts(label_type):
"""
Get labels (tags or categories) of all posts
:param label_type: 'tags' or 'categories'
:return: list of labels with post count
"""
file_list = get_posts_list()
count_dict = {}
for file in file_list:
file_path = os.path.join('posts', file)
yml_dict = render_yaml(read_md_file_head(file_path))
if yml_dict:
labels = to_list(yml_dict[label_type])
for label in labels:
count_dict[label] = count_dict.get(label, 0) + 1
result = []
for key in count_dict.keys():
result.append(dict(name=key, count=count_dict[key]))
return result
def get_categories():
"""
Get all categories
:return: list of categories with post count
"""
return get_labels_of_posts('categories')
def get_tags():
"""
Get all tags
:return: list of tags with post count
"""
return get_labels_of_posts('tags')
def copytree(src, dst, symlinks=False, ignore=None):
"""
Correctly copy an entire directory
:param src: source dir
:param dst: destination dir
:param symlinks: copy content of symlinks
:param ignore: ignore
"""
for item in os.listdir(src):
s = os.path.join(src, item)
d = os.path.join(dst, item)
if os.path.isdir(s):
shutil.copytree(s, d, symlinks, ignore)
else:
shutil.copy2(s, d)
def search_content(query_text, count_per_page, current_page):
"""
Search for posts and pages which contain query_text
:param query_text: text to query
:return: list of entries
"""
pg = {
'query': query_text,
'entries': [],
'has_next': False,
'next_url': '',
'has_prev': False,
'prev_url': ''
}
entries = []
def append_if_needed(entry, yml, md, type):
is_result = False
# get more detailed info from yaml head
info = render_yaml(yml)
if info:
for k, v in info.items():
entry[k] = v
if query_text.lower() in entry['title'].lower():
# title contains query text
is_result = True
elif query_text.lower() in md.lower():
# markdown body contains query text
is_result = True
if is_result:
# fix datetime wrongly being a date object or has no tzinfo
if 'date' in entry:
entry['date'] = fix_datetime(entry['date'])
if 'updated' in entry:
entry['updated'] = fix_datetime(entry['updated'])
# fix categories and tags
if 'categories' in entry:
entry['categories'] = to_list(entry['categories'])
if 'tags' in entry:
entry['tags'] = to_list(entry['tags'])
entry['type'] = type
entries.append(entry)
# search for posts
post_files = get_posts_list()
for file in post_files:
entry = default_post_info(file)
yml, md = read_md_file(os.path.join('posts', file))
append_if_needed(entry, yml, md, 'post')
# search for custom pages
def list_dir(dir):
results = []
for f in os.listdir(dir):
full_rel_path = os.path.join(dir, f)
if os.path.isdir(full_rel_path):
results += list_dir(full_rel_path)
elif f.endswith('.md') or f.endswith('.MD') \
or f.endswith('.markdown') or f.endswith('.mdown'):
results.append(full_rel_path)
return results
custom_page_file_paths = list_dir('pages')
for file_path in custom_page_file_paths:
entry = default_custom_page_info(os.path.basename(file_path))
split_path = os.path.splitext(file_path)[0].split(os.sep)[1:]
if split_path[len(split_path) - 1] == 'index':
split_path[len(split_path) - 1] = ''
entry['url'] = '/' + '/'.join(split_path)
yml, md = read_md_file(file_path)
append_if_needed(entry, yml, md, 'page')
total_count = len(entries)
start = count_per_page * (current_page - 1)
end = min(count_per_page * current_page, total_count)
if start < 0 or start >= total_count or end > total_count or end <= 0:
pg['entries'] = None
else:
pg['entries'] = entries[start:end]
if start > 0:
pg['has_prev'] = True
pg['prev_url'] = '/search?q=%s&c=%s&p=%s' % (query_text, count_per_page, current_page - 1)
if end < total_count:
pg['has_next'] = True
pg['next_url'] = '/search?q=%s&c=%s&p=%s' % (query_text, count_per_page, current_page + 1)
return pg