-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathAutomata.py
612 lines (522 loc) · 21.4 KB
/
Automata.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
"""Automata.py
Manipulation of and conversions between regular expressions,
deterministic finite automata, and nondeterministic finite automata.
D. Eppstein, UC Irvine, November 2003.
"""
from Util import arbitrary_item
import sys
import operator
import unittest
from PartitionRefinement import PartitionRefinement
from Sequence import Sequence
# Hack for Python 3 compatibility
try:
unicode
except:
unicode = str
class LanguageError(Exception): pass
def Language(A):
"""Convert automaton A into an object describing its language.
This is distinct from class RegularLanguage in case we
want to later add other types of automaton and nonregular languages.
"""
return A.language()
class RegularLanguage:
"""Object representing the language recognized by a DFA or NFA.
Available operations are testing whether a string is in the language,
logical combinations, and subset and equality testing.
"""
def __init__(self,arg):
if isinstance(arg,FiniteAutomaton):
self.recognizer = arg
elif isinstance(arg,(str,unicode)):
self.recognizer = RegExp(arg)
else:
raise LanguageError("Unrecognized constructor for RegularLanguage")
def __contains__(self,inputsequence):
return self.recognizer(inputsequence)
def __eq__(self,other):
if not isinstance(other,RegularLanguage):
return None
return self.recognizer.minimize() == other.recognizer.minimize()
def __ne__(self,other):
return not (self == other)
def __le__(self,other):
return not(self &~ other)
def __ge__(self,other):
return not(other &~ self)
def __lt__(self,other):
return self <= other and self != other
def __gt__(self,other):
return self >= other and self != other
def __invert__(self):
"""Complement (with respect to alphabet) of language."""
return Language(self.recognizer.complement())
def __and__(self,other):
"""Intersection of two languages with the same alphabet."""
if not isinstance(other,RegularLanguage):
raise LanguageError("Unable to intersect nonregular language")
return Language(self.recognizer.intersection(other.recognizer))
def __or__(self,other):
"""Union of two languages with the same alphabet."""
if not isinstance(other,RegularLanguage):
raise LanguageError("Unable to intersect nonregular language")
return Language(self.recognizer.union(other.recognizer))
def __xor__(self,other):
"""Symmetric difference of two languages with the same alphabet."""
if not isinstance(other,RegularLanguage):
raise LanguageError("Unable to intersect nonregular language")
return Language(self.recognizer.symmetricDifference(other.recognizer))
def __nonzero__(self):
"""Is this the empty language?"""
for x in self.recognizer.states():
if x.isfinal():
return True
return False
class FiniteAutomaton:
"""Base class for DFA and NFA. This class should not be instantiated
on its own, but dispatches methods that are appropriate to both types
of automaton by calling .asDFA() or .asNFA() to convert the automaton
to the appropriate type. All automaton instances should include the
following instance variables and methods:
- x.initial: initial state (for DFA) or set of states (for NFA)
- x.alphabet: set of input symbols accepted by the automaton
- x.transition(state,symbol): result of transition function,
either a single state (for a DFA) or set of states (for an NFA)
- x.isfinal(state): whether the state is an accepting state
- x.asDFA(): return an equivalent DFA
- x.asNFA(): return an equivalent NFA
"""
initial = alphabet = transition = isfinal = asDFA = asNFA = None
def __len__(self):
"""How many states does this automaton have?"""
return len(list(self.states()))
def __call__(self,symbols):
"""Test whether sequence of symbols is accepted by the DFA."""
return self.asDFA()(symbols)
def language(self):
"""Form language object for language recognized by automaton."""
return RegularLanguage(self)
def states(self):
"""Generate all states reachable from initial state."""
return self.asNFA().states()
def pprint(self,output=sys.stdout):
"""Pretty-print this automaton to an output stream."""
return self.asNFA().pprint(output)
def minimize(self):
"""Return smallest equivalent DFA."""
return _MinimumDFA(self.asDFA())
def reverse(self):
"""Construct NFA for reversal of original NFA's language."""
return _ReverseNFA(self.asNFA())
def renumber(self,offset=0):
"""Replace complicated state objects by small integers."""
return _RenumberNFA(self.asNFA(),offset=offset)
def RegExp(self):
"""Return equivalent regular expression."""
return self.asNFA().RegExp()
def complement(self):
"""Make automaton recognizing complement of given automaton's language."""
return _ComplementDFA(self.asDFA())
def union(self,other):
"""Make automaton recognizing union of two automata's languages."""
return _ProductDFA(self.asDFA(),other.asDFA(),operator.or_)
def intersection(self,other):
"""Make automaton recognizing union of two automata's languages."""
return _ProductDFA(self.asDFA(),other.asDFA(),operator.and_)
def symmetricDifference(self,other):
"""Make automaton recognizing union of two automata's languages."""
return _ProductDFA(self.asDFA(),other.asDFA(),operator.xor)
class DFA(FiniteAutomaton):
"""Base class for deterministic finite automaton. Subclasses are
responsible for filling out the details of the initial state, alphabet,
and transition function.
"""
def asDFA(self):
return self
def asNFA(self):
return _NFAfromDFA(self)
def __call__(self,symbols):
"""Test whether sequence of symbols is accepted by the DFA."""
state = self.initial
for symbol in symbols:
if symbol not in self.alphabet:
raise LanguageError("Symbol " + repr(symbol) +
" not in input alphabet")
state = self.transition(state,symbol)
return self.isfinal(state)
def __eq__(self,other):
"""Report whether these two DFAs have equivalent states."""
if not isinstance(other,DFA) or len(self) != len(other) \
or self.alphabet != other.alphabet:
return False
equivalences = {self.initial:other.initial}
unprocessed = [self.initial]
while unprocessed:
x = unprocessed.pop()
y = equivalences[x]
for c in self.alphabet:
xc = self.transition(x,c)
yc = other.transition(y,c)
if xc not in equivalences:
equivalences[xc] = yc
unprocessed.append(xc)
elif equivalences[xc] != yc:
return False
return True
def __ne__(self,other):
"""Report whether these two DFAs have equivalent states."""
return not (self == other)
class NFA(FiniteAutomaton):
"""Base class for nondeterministic finite automaton. Subclasses are
responsible for filling out the details of the initial state, alphabet,
and transition function. Note that the NFAs defined here do not allow
epsilon-transitions. Results of self.initial and self.transition are
assumed to be represented as frozenset instances.
"""
def asNFA(self):
return self
def asDFA(self):
return _DFAfromNFA(self)
def states(self):
visited = set()
unvisited = set(self.initial)
while unvisited:
state = arbitrary_item(unvisited)
yield state
unvisited.remove(state)
visited.add(state)
for symbol in self.alphabet:
unvisited |= self.transition(state,symbol) - visited
def pprint(self,output=sys.stdout):
"""Pretty-print this NFA to an output stream."""
for state in self.states():
adjectives = []
if state in self.initial:
adjectives.append("initial")
if self.isfinal(state):
adjectives.append("accepting")
if not [c for c in self.alphabet if self.transition(state,c)]:
adjectives.append("terminal")
if not adjectives:
print >>output, state
else:
print >>output, state, "(" + ", ".join(adjectives) + ")"
for c in self.alphabet:
for neighbor in self.transition(state,c):
print >>output, " --[" + str(c) + "]-->", neighbor
def RegExp(self):
"""Convert to regular expression and return as a string.
See Sipser for an explanation of this algorithm."""
# create artificial initial and final states
initial = object()
final = object()
states = {initial,final} | set(self.states())
# 2d matrix of expressions connecting each pair of states
expr = {}
for x in states:
for y in states:
expr[x,y] = None
for x in self.states():
if x in self.initial:
expr[initial,x] = ''
if self.isfinal(x):
expr[x,final] = ''
expr[x,x] = ''
for x in self.states():
for c in self.alphabet:
for y in self.transition(x,c):
if expr[x,y]:
expr[x,y] += '+' + str(c)
else:
expr[x,y] = str(c)
# eliminate states one at a time
for s in self.states():
states.remove(s)
for x in states:
for y in states:
if expr[x,s] is not None and expr[s,y] is not None:
xsy = []
if expr[x,s]:
xsy += self._parenthesize(expr[x,s])
if expr[s,s]:
xsy += self._parenthesize(expr[s,s],True) + ['*']
if expr[s,y]:
xsy += self._parenthesize(expr[s,y])
if expr[x,y] is not None:
xsy += ['+',expr[x,y] or '()']
expr[x,y] = ''.join(xsy)
return expr[initial,final]
def _parenthesize(self,expr,starring=False):
"""Return list of strings with or without parens for use in RegExp.
This is only for the purpose of simplifying the expressions returned,
by omitting parentheses or other expression features when unnecessary;
it would always be correct simply to return ['(',expr,')'].
"""
if len(expr) == 1 or (not starring and '+' not in expr):
return [expr]
elif starring and expr.endswith('+()'):
return ['(',expr[:-3],')'] # +epsilon redundant when starring
else:
return ['(',expr,')']
class _DFAfromNFA(DFA):
"""Conversion of NFA to DFA. We create a DFA state for each set
of NFA states. A DFA state is final if it contains at least one
final NFA set, and the transition function for a DFA state is the
union of the transition functions of the NFA states it contains.
"""
def __init__(self,N):
self.initial = frozenset(N.initial)
self.alphabet = N.alphabet
self.NFA = N
def transition(self,stateset,symbol):
result = set()
for state in stateset:
result |= self.NFA.transition(state,symbol)
return frozenset(result)
def isfinal(self,stateset):
for state in stateset:
if self.NFA.isfinal(state):
return True
return False
class _NFAfromDFA(NFA):
"""Conversion of DFA to NFA. We convert the initial state and the
results of each transition function into single-element sets.
"""
def __init__(self,D):
self.initial = frozenset([D.initial])
self.alphabet = D.alphabet
self.DFA = D
def transition(self,state,symbol):
return frozenset([self.DFA.transition(state,symbol)])
def isfinal(self,state):
return self.DFA.isfinal(state)
class RegExp(NFA):
"""Convert regular expression to NFA."""
def __init__(self,expr):
self.expr = expr
self.pos = 0
self.nstates = 0
self.expect = {}
self.successor = {}
self.alphabet = set()
self.initial,penultimate,epsilon = self.expression()
final = self.newstate(None)
for state in penultimate:
self.successor[state].add(final)
self.final = frozenset([final])
if epsilon:
self.final = self.final | self.initial
def transition(self,state,c):
"""Implement NFA transition function."""
if c != self.expect[state]:
return frozenset()
else:
return self.successor[state]
def isfinal(self,state):
"""Implement NFA acceptance test."""
return state in self.final
# Recursive-descent parser for regular expressions.
# Each function uses self.pos as a pointer into self.expr,
# updates self.expect and self.successor,
# and returns a tuple (initial,penultimate,epsilon), where
# initial = the initial states of the subexpression
# penultimate = states one step away from an accepting state
# epsilon = true if the subexpression accepts the empty string
def epsilon(self):
"""Parse an empty string and return an empty automaton."""
return frozenset(),frozenset(),True
def newstate(self,expect):
"""Allocate a new state in which we expect to see the given letter."""
state = self.nstates
self.successor[state] = set()
self.expect[state] = expect
self.nstates += 1
return state
def base(self):
"""Parse a subexpression that can be starred: single letter or group."""
if self.pos == len(self.expr) or self.expr[self.pos] == ')':
return self.epsilon()
if self.expr[self.pos] == '(':
self.pos += 1
ret = self.expression()
if self.pos == len(self.expr) or self.expr[self.pos] != ')':
raise LanguageError("Close paren expected at char " + str(self.pos))
self.pos += 1
return ret
if self.expr[self.pos] == '\\':
self.pos += 1
if self.pos == len(self.expr):
raise RegExpError("Character expected after backslash")
self.alphabet.add(self.expr[self.pos])
state = self.newstate(self.expr[self.pos])
self.pos += 1
state = frozenset([state])
return state,state,False
def factor(self):
"""Parse a catenable expression: base or starred base."""
initial,penultimate,epsilon = self.base()
while self.pos < len(self.expr) and self.expr[self.pos] == '*':
self.pos += 1
for state in penultimate:
self.successor[state] |= initial
epsilon = True
return initial,penultimate,epsilon
def term(self):
"""Parse a summable expression: factor or concatenation."""
initial,penultimate,epsilon = self.factor()
while self.pos < len(self.expr) and self.expr[self.pos] not in ')+':
Fi,Fp,Fe = self.factor()
for state in penultimate:
self.successor[state] |= Fi
if epsilon:
initial = initial | Fi
if Fe:
penultimate = penultimate | Fp
else:
penultimate = Fp
epsilon = epsilon and Fe
return initial,penultimate,epsilon
def expression(self):
"""Parse a whole regular expression or grouped subexpression."""
initial,penultimate,epsilon = self.term()
while self.pos < len(self.expr) and self.expr[self.pos] == '+':
self.pos += 1
Ti,Tp,Te = self.term()
initial = initial | Ti
penultimate = penultimate | Tp
epsilon = epsilon or Te
return initial,penultimate,epsilon
class LookupNFA(NFA):
"""Construct NFA with precomputed lookup table of transitions."""
def __init__(self,alphabet,initial,ttable,final):
self.alphabet = alphabet
self.initial = frozenset(initial)
self.ttable = ttable
self.final = frozenset(final)
def transition(self,state,symbol):
return frozenset(self.ttable[state,symbol])
def isfinal(self,state):
return state in self.final
def _RenumberNFA(N,offset=0):
"""Replace NFA state objects with small integers."""
replacements = {}
for x in N.states():
replacements[x] = offset
offset += 1
initial = [replacements[x] for x in N.initial]
ttable = {}
for state in N.states():
for symbol in N.alphabet:
ttable[replacements[state],symbol] = [replacements[x]
for x in N.transition(state,symbol)]
final = [replacements[x] for x in N.states() if N.isfinal(x)]
return LookupNFA(N.alphabet,initial,ttable,final)
class _ProductDFA(DFA):
"""DFA that simulates D1 and D2 and combines their outputs with op."""
def __init__(self,D1,D2,op):
if D1.alphabet != D2.alphabet:
raise LanguageError("DFAs have incompatible alphabets")
self.alphabet = D1.alphabet
self.initial = (D1.initial,D2.initial)
self.D1 = D1
self.D2 = D2
self.op = op
def transition(self,state,symbol):
s1,s2 = state
return self.D1.transition(s1,symbol), \
self.D2.transition(s2,symbol)
def isfinal(self,state):
s1,s2 = state
f1 = self.D1.isfinal(s1) and 1 or 0
f2 = self.D2.isfinal(s2) and 1 or 0
return self.op(f1,f2)
def _ReverseNFA(N):
"""Construct NFA for reversal of original NFA's language."""
initial = [s for s in N.states() if N.isfinal(s)]
ttable = {(s,c):[] for s in N.states() for c in N.alphabet}
for s in N.states():
for c in N.alphabet:
for t in N.transition(s,c):
ttable[t,c].append(s)
return LookupNFA(N.alphabet,initial,ttable,N.initial)
class _ComplementDFA(DFA):
"""DFA for complementary language."""
def __init__(self,D):
self.DFA = D
self.initial = D.initial
self.alphabet = D.alphabet
def transition(self,state,symbol):
return self.DFA.transition(state,symbol)
def isfinal(self,state):
return not self.DFA.isfinal(state)
class _MinimumDFA(DFA):
"""Construct equivalent DFA with minimum number of states,
using Hopcroft's O(ns log n) partition-refinement algorithm.
"""
def __init__(self,D):
# refine partition of states by reversed neighborhoods
N = D.reverse()
P = PartitionRefinement(D.states())
P.refine([s for s in D.states() if D.isfinal(s)])
unrefined = Sequence(P,key=id)
while unrefined:
part = arbitrary_item(unrefined)
unrefined.remove(part)
for symbol in D.alphabet:
neighbors = set()
for state in part:
neighbors |= N.transition(state,symbol)
for new,old in P.refine(neighbors):
if old in unrefined or len(new) < len(old):
unrefined.append(new)
else:
unrefined.append(old)
# convert partition to DFA
P.freeze()
self.partition = P
self.initial = P[D.initial]
self.alphabet = D.alphabet
self.DFA = D
def transition(self,state,symbol):
rep = arbitrary_item(state)
return self.partition[self.DFA.transition(rep,symbol)]
def isfinal(self,state):
rep = arbitrary_item(state)
return self.DFA.isfinal(rep)
# If called as standalone routine, run some unit tests
class RegExpTest(unittest.TestCase):
# tuples (L,[strings in L],[strings not in L])
languages = [
(RegularLanguage("0"), ["0"], ["","00"]),
(RegularLanguage("(10+0)*"), ["","0","010"], ["1"]),
(RegularLanguage("(0+1)*1(0+1)(0+1)"), ["000100"], ["0011"]),
]
def testMembership(self):
"""membership tests for RegularLanguage(expression)"""
for L,Li,Lx in self.languages:
for S in Li:
self.assertTrue(S in L)
for S in Lx:
self.assertTrue(S not in L)
def testComplement(self):
"""membership tests for ~RegularLanguage"""
for L,Li,Lx in self.languages:
L = ~L
for S in Lx:
self.assertTrue(S in L)
for S in Li:
self.assertTrue(S not in L)
def testEquivalent(self):
"""test that converting NFA->expr->NFA produces same language"""
for L1,Li,Lx in self.languages:
L2 = RegularLanguage(L1.recognizer.RegExp())
self.assertEqual(L1,L2)
def testInequivalent(self):
"""test that different regular languages are recognized as different"""
for i in range(len(self.languages)):
for j in range(i):
self.assertNotEqual(self.languages[i][0],
self.languages[j][0])
if __name__ == "__main__":
unittest.main()