-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathindex.py
executable file
·140 lines (121 loc) · 4.26 KB
/
index.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#!/usr/bin/env python3
# Copyright: Harald Schilly <[email protected]>
# License: Apache 2.0
from pprint import pprint
import yaml
import json
import os
import itertools as it
# to make all "src" absolute paths!
ROOT = os.path.dirname(os.path.abspath(__file__))
os.chdir(ROOT)
# TODO this is silly code, please fix it ...
def update_meta(meta, new_meta):
'''
A simple dict update would overwrite/remove entries.
'''
if 'tags' in new_meta:
meta['tags'].update(new_meta['tags'])
if 'licenses' in new_meta:
meta['licenses'].update(new_meta['licenses'])
if 'categories' in new_meta:
# if you introduce a new category, it must be unique
assert all(nm not in meta['categories'].keys()
for nm in new_meta['categories'].keys())
meta['categories'].update(new_meta['categories'])
# TODO this is just for a unique id for each document. maybe make it stable?
ID = it.count(0)
all_ids = set()
def init_doc(docs, prefix):
for doc in docs:
doc['src'] = os.path.join(prefix, doc['src'])
if 'thumbnail' in doc:
doc['thumbnail'] = os.path.join(prefix, doc['thumbnail'])
if 'id' in doc:
assert doc['id'] not in all_ids
all_ids.add(doc['id'])
else:
newid = 'doc-{}'.format(next(ID))
assert newid not in all_ids
all_ids.add(newid)
doc['id'] = newid
# prefix is the path to prefix
def resolve_references(meta, docs, prefix=''):
# append new documents and merge meta
if 'references' in meta:
for ref in meta['references']:
prefix = os.path.join(prefix, os.path.dirname(ref))
print("resolve_references prefix={}".format(prefix))
new_meta, *new_docs = yaml.load_all(open(ref),
Loader=yaml.SafeLoader)
init_doc(new_docs, prefix)
resolve_references(new_meta, new_docs, prefix=prefix)
update_meta(meta, new_meta)
docs.extend(new_docs)
del meta['references']
return meta, docs
def consistency_checks(meta, docs):
print('done. running consistency checks ...')
cats = meta['categories']
tags = meta['tags']
allowed_keys = [
'id',
'src',
'title',
'description',
'website',
'author',
'license',
'category',
'tags',
'thumbnail',
'subdir',
'preview',
]
for doc in docs:
title = doc["title"]
print('checking {0[id]}: {0[title]}'.format(doc))
assert all(k in allowed_keys
for k in doc.keys()), "keys: {}".format(list(doc.keys()))
assert 'title' in doc, "doc {} misses a title".format(doc.id)
assert 'category' in doc, f"doc {title} misses category"
assert 'src' in doc, f"doc {title} misses src"
assert doc['src'].endswith(
'/'
), f'doc "{title}" src must end with a slash to signal it is a directory. single files will be supported later ...'
assert os.path.exists(
os.path.join(ROOT, doc['src'])
), 'doc "{title}" src path "{doc["src"]}" does not exist!'
assert doc['category'] in cats
if 'tags' in doc:
for t in doc['tags']:
id = doc['id']
assert t in tags, f'Tag "{t}" of document "{id}" not in meta.tags'
if 'thumbnail' in doc:
assert os.path.exists(
doc['thumbnail']
), 'Thumbnail {0[thumbnail]} for {0[id]} does not exist'.format(
doc)
for k, v in cats.items():
assert 'name' in v
for k, v in tags.items():
assert 'name' in v
def debug(meta, docs):
print("META:")
pprint(meta)
print("DOCS:")
for doc in docs:
pprint(doc)
def export_json(meta, docs):
with open('index.json', 'w') as out:
json.dump({'metadata': meta, 'documents': docs}, out, indent=1)
def main(index_fn):
meta, *docs = yaml.load_all(open(index_fn), Loader=yaml.SafeLoader)
init_doc(docs, ROOT)
resolve_references(meta, docs, prefix=ROOT)
#debug(meta, docs)
consistency_checks(meta, docs)
export_json(meta, docs)
print('all done.')
if __name__ == '__main__':
main(index_fn='index.yaml')