-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathd_lexer.py
79 lines (73 loc) · 1.89 KB
/
d_lexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#function: change characters into tokens
# input: source program,
# output: token stream
# by OL 2016.01.05
import sys
import re
RESV = 'RESERVED'
NUM = 'NUM'
ID = 'ID'
token_exprs= [
(r'[ +]',None),#kong ge
#(r'\s',None),
(r'\n+', None), # huanhang
(r'\t+',None), #zhi biao
(r'#[^\n]*',None),# zhushi
(r'==',RESV),
(r'=',RESV),
(r'\(',RESV),
(r'\)',RESV),
(r';',RESV),
(r'\+',RESV),
(r'-',RESV),
(r'\*',RESV),
(r'/',RESV),
(r'<',RESV),
(r'>',RESV),
(r'>=',RESV),
(r'<=',RESV),
(r'!=',RESV),
(r'and',RESV),
(r'or',RESV),
(r'xor',RESV),
(r'not',RESV),
(r'if',RESV),
(r'else',RESV),
(r'while',RESV),
(r'for',RESV),
(r'[0-9]',NUM),
(r'[A-Za-z][A-Za-z0-9_ ]*',ID),
] # all characters that can be recognized
def lex( characters, token_exprs):
pos=0
tokens=[]
values =[]
while pos < len(characters):
match = None
for token_expr in token_exprs:
pattern, tag=token_expr
regex=re.compile(pattern)
#print(dir(regex))
match=regex.match(characters,pos)
if match:
text=match.group(0)
if tag:
token=[text,tag]
tokens.append(token)
if tag=='RESERVED':
values.append(text)
elif tag=='NUM':
values.append(tag)
elif tag== 'ID':
values.append(tag)
else:
values.append(text)
break
if not match:
sys.stderr.write('Illegal character:%s\n' % characters[pos])
sys.exit(1)
else:
pos=match.end(0)
return tokens,values
def d_lex(characters):
return lex(characters,token_exprs)