forked from munificent/craftinginterpreters
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbook.py
510 lines (451 loc) · 13.3 KB
/
book.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Tracks metadata about the material in the book.
"""
from functools import total_ordering
import os
import re
# Matches the name in a `^code` tag and ignores the rest.
SNIPPET_TAG_PATTERN = re.compile(r'\s*\^code ([-a-z0-9]+).*')
TITLE_PATTERN = re.compile(r'\^title (.*)')
TOC = [
{
'name': '',
'chapters': [
{
'name': 'Crafting Interpreters',
'topics': [],
},
{
'name': 'Table of Contents',
'topics': [],
}
],
},
{
'name': 'Welcome',
'chapters': [
{
'name': 'Introduction',
'topics': [
'Why learn programming languages?',
'How this book is organized'
],
'design_note': "What's in a Name?"
},
{
'name': 'A Map of the Territory',
'topics': [
'Interpreters and compilers', 'Phases of a compiler',
'Transpilers', 'Just-in-time compilation'
],
},
{
'name': 'The Lox Language',
'topics': [
'Dynamic typing', 'Automatic memory management', 'Built-in types',
'Expressions', 'Statements', 'Object-orientation', 'Prototypes'
],
'design_note': "Expressions and Statements"
}
]
},
{
'name': 'A Tree-Walk Interpreter',
'chapters': [
{
'name': 'Scanning',
'topics': [
'Tokens', 'Token types', 'Lexical analysis', 'Regular languages',
'Lookahead', 'Reserved words', 'Error reporting'
],
'design_note': "Implicit Semicolons"
},
{
'name': 'Representing Code',
'topics': [
'Abstract syntax trees', 'Expression trees', 'Generating AST classes',
'The Visitor pattern', 'Pretty printing'
],
},
{
'name': 'Parsing Expressions',
'topics': [
'Expression nodes', 'Recursive descent', 'Precedence',
'Associativity', 'Primary expressions', 'Syntax errors'
],
'design_note': "Logic Versus History"
},
{
'name': 'Evaluating Expressions',
'topics': [
'The Interpreter pattern', 'Tree-walk interpretation',
'Subexpressions', 'Runtime errors', 'Type checking', 'Truthiness'
],
'design_note': 'Static and Dynamic Typing'
},
{
'name': 'Statements and State',
'topics': [
'Statement nodes', 'Blocks', 'Expression statements', 'Variables',
'Assignment', 'Lexical scope', 'Environments'
],
'design_note': 'Implicit Variable Declaration'
},
{
'name': 'Control Flow',
'topics': [
'If statements', 'While statements', 'For statements', 'Desugaring',
'Logical operators', 'Short-circuit evaluation'
],
'design_note': 'Spoonfuls of Syntactic Sugar'
},
{
'name': 'Functions',
'topics': [
'Function declarations', 'Formal parameters', 'Call expressions',
'Arguments', 'Return statements', 'Function objects', 'Closures',
'Arity', 'Native functions'
],
},
{
'name': 'Resolving and Binding',
'topics': ['Name resolution', 'Early binding', 'Static errors'],
},
{
'name': 'Classes',
'topics': [
'Class declarations', 'Fields', 'Properties',
'Get and set expressions', 'Constructors', 'Initializers', 'this',
'Method references'
],
'design_note': 'Prototypes and Power'
},
{
'name': 'Inheritance',
'topics': ['Superclasses', 'Overriding', 'Calling superclass methods'],
}
]
},
{
'name': 'A Bytecode Virtual Machine',
'chapters': [
{
'name': 'Chunks of Bytecode',
'topics': [
'Allocation', 'Dynamic arrays', 'Code chunks', 'Constant tables',
'Instruction arguments', 'Disassembly'
],
'done': False,
},
{
'name': 'A Virtual Machine',
'topics': [
'Bytecode instructions', 'The stack', 'Instruction pointer',
'Loading constants', 'Arithmetic instructions', 'Interpreter loop',
'Instruction dispatch'
],
'done': False,
},
{
'name': 'Scanning on Demand',
'topics': [
'Reading files', 'Token values', 'Source pointers', 'LL(k) grammars'
],
'done': False,
},
{
'name': 'Compiling Expressions',
'topics': [
'Pratt parsers', 'Binary operators', 'Unary operators', 'Precedence',
'Single-pass compilation', 'Code generation'
],
'done': False,
},
{
'name': 'Types of Values',
'topics': [
'Tagged unions', 'Boolean values', 'nil',
'Comparison and equality operators', 'Not operator', 'Runtime errors'
],
'done': False,
},
{
'name': 'Strings',
'topics': [
'Objects', 'Reference types', 'Heap tracing', 'Concatenation',
'Polymorphism'
],
'done': False,
},
{
'name': 'Hash Tables',
'topics': [
'Hash functions', 'String hashing', 'Linear probing',
'Rehashing', 'Reference equality', 'String interning'
],
'done': False,
},
{
'name': 'Global Variables',
'topics': [
'Statements', 'Variable declaration', 'Assignment',
'Global variables table'
],
'done': False,
},
{
'name': 'Local Variables',
'topics': [
'Blocks', 'Scope depth', 'Stack variables', 'Name resolution',
'Byte argument instructions'
],
'done': False,
},
{
'name': 'Jumping Forward and Back',
'topics': [
'Jump instructions', 'Conditional jumps', 'Control flow statements',
'Short-circuiting', 'Backpatching'
],
'done': False,
},
{
'name': 'Calls and Functions',
'topics': [
'Calling convention', 'Arguments', 'Call instructions',
'Native functions', 'Function declarations', 'Parameters',
'Return statements', 'Function objects', 'Call frames',
'Stack overflow'
],
'done': False,
},
{
'name': 'Closures',
'topics': [
'Upvalues', 'Resolving enclosing locals', 'Closure flattening',
'Capturing variables', 'Closing upvalues'
],
'done': False,
},
{
'name': 'Garbage Collection',
'topics': [
'Roots', 'Stress testing', 'Mark-sweep collection', 'Tracing',
'Tri-color marking', 'Weak references', 'Heap growth'
],
'done': False,
},
{
'name': 'Classes and Instances',
'topics': [
'Property expressions', 'Class declarations', 'Instances', 'Fields',
'Undefined fields'
],
'done': False,
},
{
'name': 'Methods and Initializers',
'topics': [
'Invocation expressions', 'This', 'Method declarations',
'Initializers', 'Bound methods'
],
'done': False,
},
{
'name': 'Superclasses',
'topics': [
'Method inheritance', 'Super invocations'
],
'done': False,
},
{
'name': 'Optimization',
'topics': [
'Benchmarking', 'Hash code masking', 'NaN tagging'
],
'done': False,
}
]
},
{
'name': 'Backmatter',
'chapters': [
{
'name': 'Appendix I',
'topics': [],
},
{
'name': 'Appendix II',
'topics': [],
}
],
},
]
def list_code_chapters():
"""Gets the list of titles of the chapters that have code."""
chapters = []
def walk_part(part):
for chapter in part['chapters']:
chapters.append(chapter['name'])
walk_part(TOC[2])
walk_part(TOC[3])
walk_part(TOC[4])
return chapters
CODE_CHAPTERS = list_code_chapters()
def roman(n):
"""Convert n to roman numerals."""
if n <= 3:
return "I" * n
elif n == 4:
return "IV"
elif n < 10:
return "V" + "I" * (n - 5)
else:
raise "Can't convert " + str(n) + " to Roman."
def number_chapters():
"""Determine the part or chapter numbers for each part or chapter."""
numbers = {}
part_num = 1
chapter_num = 1
in_matter = False
for part in TOC:
# Front- and backmatter have no names, pages, or numbers.
in_matter = part['name'] == '' or part['name'] == 'Backmatter'
if not in_matter:
numbers[part['name']] = roman(part_num)
part_num += 1
for chapter in part['chapters']:
if in_matter:
# Front- and backmatter chapters are specially numbered.
name = chapter['name']
if name == 'Appendix I':
numbers[chapter['name']] = 'A1'
elif name == 'Appendix II':
numbers[chapter['name']] = 'A2'
else:
numbers[chapter['name']] = ''
else:
numbers[chapter['name']] = chapter_num
chapter_num += 1
return numbers
NUMBERS = number_chapters()
def flatten_pages():
"""Flatten the tree of parts and chapters to a single linear list of pages."""
pages = []
types = []
for part in TOC:
# There are no part pages for the front- and backmatter.
if part['name']:
pages.append(part['name'])
types.append('Part')
for chapter in part['chapters']:
pages.append(chapter['name'])
types.append('Chapter')
return pages, types
PAGES, TYPES = flatten_pages()
def adjacent_page(title, offset):
'''The title of the page [offset] pages before or after [title].'''
page_index = PAGES.index(title) + offset
if page_index < 0 or page_index >= len(PAGES): return None
return PAGES[page_index]
def adjacent_type(title, offset):
'''Generate template data to link to the previous or next page.'''
page_index = PAGES.index(title) + offset
if page_index < 0 or page_index >= len(PAGES): return None
return TYPES[page_index]
def chapter_number(name):
"""
Given the name of a chapter (or part of end matter page), finds its number.
"""
if name in NUMBERS:
return NUMBERS[name]
# The chapter has no number. This is true for the appendices.
return ""
def get_language(name):
if CODE_CHAPTERS.index(name) < CODE_CHAPTERS.index("Chunks of Bytecode"):
return "java"
return "c"
def get_file_name(title):
"""
Given a title like "Hash Tables", converts it to the corresponding file
name like "hash-tables".
"""
if title == "Crafting Interpreters":
return "index"
if title == "Table of Contents":
return "contents"
# Hack. The introduction has a *subheader* named "Challenges" distinct from
# the challenges section. This function here is also used to generate the
# anchor names for the links, so handle that one specially so it doesn't
# collide with the real "Challenges" section.
if title == "Challenges":
return "challenges_"
title = title.lower().replace(" ", "-")
title = re.sub(r'[,.?!:/"]', '', title)
return title
def get_markdown_path(title):
return os.path.join('book', get_file_name(title) + '.md')
def get_short_name(name):
number = chapter_number(name)
first_word = name.split()[0].lower().replace(',', '')
if first_word == "a" or first_word == "the":
first_word = name.split()[1].lower()
if first_word == "user-defined":
first_word = "user"
return "chap{0:02d}_{1}".format(number, first_word)
@total_ordering
class SnippetTag:
def __init__(self, chapter, name, index):
self.chapter = chapter
self.name = name
self.chapter_index = chapter_number(chapter)
self.index = index
# Hackish. Always want "not-yet" to be the last tag even if it appears
# before a real tag. That ensures we can push it for other tags that have
# been named.
if name == "not-yet":
self.index = 9999
def __eq__(self, other):
return (isinstance(other, SnippetTag) and
self.chapter_index == other.chapter_index and
self.index < other.index)
def __lt__(self, other):
if self.chapter_index != other.chapter_index:
return self.chapter_index < other.chapter_index
return self.index < other.index
def __repr__(self):
return "Tag({}|{}: {} {})".format(
self.chapter_index, self.index, self.chapter, self.name)
def get_chapter_snippet_tags():
"""
Parses the snippet tags from every chapter. Returns a map of chapter names
to maps of snippet names to SnippetTags.
"""
chapters = {}
for chapter in CODE_CHAPTERS:
chapters[chapter] = get_snippet_tags(get_markdown_path(chapter))
# for chapter, tags in chapters.items():
# print(chapter)
# for tag in tags.values():
# print(" {}".format(tag))
return chapters
def get_snippet_tags(path):
"""
Parses the Markdown file at [path] and finds all of the `^code` tags.
Returns a map of snippet names to SnippetTags for them.
"""
tags = {}
with open(path, 'r') as input:
title = None
for line in input:
match = TITLE_PATTERN.match(line)
if match:
title = match.group(1)
match = SNIPPET_TAG_PATTERN.match(line)
if match:
if title == None:
raise Exception("Should have found title first.")
tags[match.group(1)] = SnippetTag(title, match.group(1), len(tags))
return tags