diff --git a/README.md b/README.md index 2052777..8f8f54a 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,11 @@ -# lsbasi -Let's Build A Simple Interpreter +Source code for the series **Let's Build A Simple Interpreter** + ++ [Let's Build A Simple Interpreter. Part 1.](http://ruslanspivak.com/lsbasi-part1/) ++ [Let's Build A Simple Interpreter. Part 2.](http://ruslanspivak.com/lsbasi-part2/) ++ [Let's Build A Simple Interpreter. Part 3.](http://ruslanspivak.com/lsbasi-part3/) ++ [Let's Build A Simple Interpreter. Part 4.](http://ruslanspivak.com/lsbasi-part4/) ++ [Let's Build A Simple Interpreter. Part 5.](http://ruslanspivak.com/lsbasi-part5/) ++ [Let's Build A Simple Interpreter. Part 6.](http://ruslanspivak.com/lsbasi-part6/) ++ [Let's Build A Simple Interpreter. Part 7.](http://ruslanspivak.com/lsbasi-part7/) ++ [Let's Build A Simple Interpreter. Part 8.](http://ruslanspivak.com/lsbasi-part8/) ++ [Let's Build A Simple Interpreter. Part 9.](http://ruslanspivak.com/lsbasi-part9/) diff --git a/part1/calc1.py b/part1/calc1.py new file mode 100644 index 0000000..e31f145 --- /dev/null +++ b/part1/calc1.py @@ -0,0 +1,131 @@ +# Token types +# +# EOF (end-of-file) token is used to indicate that +# there is no more input left for lexical analysis +INTEGER, PLUS, EOF = 'INTEGER', 'PLUS', 'EOF' + + +class Token(object): + def __init__(self, ttype, value): + # token type: INTEGER, PLUS, or EOF + self.ttype = ttype + # token value: 0, 1, 2. 3, 4, 5, 6, 7, 8, 9, '+', or None + self.value = value + + def __str__(self): + """String representation of the class instance. + + Examples: + Token(INTEGER, 3) + Token(PLUS '+') + """ + return 'Token({ttype}, {value})'.format( + ttype=self.ttype, + value=repr(self.value) + ) + + def __repr__(self): + return self.__str__() + + +class Interpreter(object): + def __init__(self, text): + # client string input, e.g. "3+5" + self.text = text + # self.pos is an index into self.text + self.pos = 0 + # current token instance + self.current_token = None + + def error(self): + raise Exception('Error parsing input') + + def get_next_token(self): + """Lexical analyzer (also known as scanner or tokenizer) + + This method is responsible for breaking a sentence + apart into tokens. One token at a time. + """ + text = self.text + + # is self.pos index past the end of the self.text ? + # if so, then return EOF token because there is no more + # input left to convert into tokens + if self.pos > len(text) - 1: + return Token(EOF, None) + + # get a character at the position self.pos and decide + # what token to create based on the single character + current_char = text[self.pos] + + # if the character is a digit then convert it to + # integer, create an INTEGER token, increment self.pos + # index to point to the next character after the digit, + # and return the INTEGER token + if current_char.isdigit(): + token = Token(INTEGER, int(current_char)) + self.pos += 1 + return token + + if current_char == '+': + token = Token(PLUS, current_char) + self.pos += 1 + return token + + self.error() + + def eat(self, token_type): + # compare the current token type with the passed token + # type and if they match then "eat" the current token + # and assign the next token to the self.current_token, + # otherwise raise an exception. + if self.current_token.ttype == token_type: + self.current_token = self.get_next_token() + else: + self.error() + + def expr(self): + """expr -> INTEGER PLUS INTEGER""" + # set current token to the first token taken from the input + self.current_token = self.get_next_token() + + # we expect the current token to be a single-digit integer + left = self.current_token + self.eat(INTEGER) + + # we expect the current token to be a '+' token + op = self.current_token + self.eat(PLUS) + + # we expect the current token to be a single-digit integer + right = self.current_token + self.eat(INTEGER) + # after the above call the self.current_token is set to + # EOF token + + # at this point INTEGER PLUS INTEGER sequence of tokens + # has been successfully found and the method can just + # return the result of adding two integers, thus + # effectively interpreting client input + result = left.value + right.value + return result + + +def main(): + while True: + try: + try: + text = raw_input('calc> ') + except NameError: # Python3 + text = input('calc> ') + except EOFError: + break + if not text: + continue + interpreter = Interpreter(text) + result = interpreter.expr() + print(result) + + +if __name__ == '__main__': + main() diff --git a/part1/factorial.pas b/part1/factorial.pas new file mode 100644 index 0000000..16e771d --- /dev/null +++ b/part1/factorial.pas @@ -0,0 +1,17 @@ +program factorial; + +function factorial(n: integer): longint; +begin + if n = 0 then + factorial := 1 + else + factorial := n * factorial(n - 1); +end; + +var + n: integer; + +begin + for n := 0 to 16 do + writeln(n, '! = ', factorial(n)); +end. diff --git a/part2/calc2.py b/part2/calc2.py new file mode 100644 index 0000000..b9f3483 --- /dev/null +++ b/part2/calc2.py @@ -0,0 +1,155 @@ +# Token types +# EOF (end-of-file) token is used to indicate that +# there is no more input left for lexical analysis +INTEGER, PLUS, MINUS, EOF = 'INTEGER', 'PLUS', 'MINUS', 'EOF' + + +class Token(object): + def __init__(self, ttype, value): + # token type: INTEGER, PLUS, MINUS, or EOF + self.ttype = ttype + # token value: non-negative integer value, '+', '-', or None + self.value = value + + def __str__(self): + """String representation of the class instance. + + Examples: + Token(INTEGER, 3) + Token(PLUS '+') + """ + return 'Token({ttype}, {value})'.format( + ttype=self.ttype, + value=repr(self.value) + ) + + def __repr__(self): + return self.__str__() + + +class Interpreter(object): + def __init__(self, text): + # client string input, e.g. "3 + 5", "12 - 5", etc + self.text = text + # self.pos is an index into self.text + self.pos = 0 + # current token instance + self.current_token = None + self.current_char = self.text[self.pos] + + def error(self): + raise Exception('Error parsing input') + + def advance(self): + """Advance the 'pos' pointer and set the 'current_char' variable.""" + self.pos += 1 + if self.pos > len(self.text) - 1: + self.current_char = None # Indicates end of input + else: + self.current_char = self.text[self.pos] + + def skip_whitespace(self): + while self.current_char is not None and self.current_char.isspace(): + self.advance() + + def integer(self): + """Return a (multidigit) integer consumed from the input.""" + result = '' + while self.current_char is not None and self.current_char.isdigit(): + result += self.current_char + self.advance() + return int(result) + + def get_next_token(self): + """Lexical analyzer (also known as scanner or tokenizer) + + This method is responsible for breaking a sentence + apart into tokens. + """ + while self.current_char is not None: + + if self.current_char.isspace(): + self.skip_whitespace() + continue + + if self.current_char.isdigit(): + return Token(INTEGER, self.integer()) + + if self.current_char == '+': + self.advance() + return Token(PLUS, '+') + + if self.current_char == '-': + self.advance() + return Token(MINUS, '-') + + self.error() + + return Token(EOF, None) + + def eat(self, token_type): + # compare the current token type with the passed token + # type and if they match then "eat" the current token + # and assign the next token to the self.current_token, + # otherwise raise an exception. + if self.current_token.ttype == token_type: + self.current_token = self.get_next_token() + else: + self.error() + + def expr(self): + """Parser / Interpreter + + expr -> INTEGER PLUS INTEGER + expr -> INTEGER MINUS INTEGER + """ + # set current token to the first token taken from the input + self.current_token = self.get_next_token() + + # we expect the current token to be an integer + left = self.current_token + self.eat(INTEGER) + + # we expect the current token to be either a '+' or '-' + op = self.current_token + if op.ttype == PLUS: + self.eat(PLUS) + else: + self.eat(MINUS) + + # we expect the current token to be an integer + right = self.current_token + self.eat(INTEGER) + # after the above call the self.current_token is set to + # EOF token + + # at this point either the INTEGER PLUS INTEGER or + # the INTEGER MINUS INTEGER sequence of tokens + # has been successfully found and the method can just + # return the result of adding or subtracting two integers, + # thus effectively interpreting client input + if op.ttype == PLUS: + result = left.value + right.value + else: + result = left.value - right.value + return result + + +def main(): + while True: + try: + try: + text = raw_input('calc> ') + except NameError: # Python3 + text = input('calc> ') + except EOFError: + break + if not text: + continue + interpreter = Interpreter(text) + result = interpreter.expr() + print(result) + + +if __name__ == '__main__': + main() diff --git a/part2/test_calc2.py b/part2/test_calc2.py new file mode 100644 index 0000000..61a96f1 --- /dev/null +++ b/part2/test_calc2.py @@ -0,0 +1,96 @@ +import unittest + + +class CalcTestCase(unittest.TestCase): + + def makeInterpreter(self, text): + from calc2 import Interpreter + interpreter = Interpreter(text) + return interpreter + + def test_lexer_integer(self): + from calc2 import INTEGER + lexer = self.makeInterpreter('234') + token = lexer.get_next_token() + self.assertEqual(token.type, INTEGER) + self.assertEqual(token.value, 234) + + def test_lexer_plus(self): + from calc2 import PLUS + lexer = self.makeInterpreter('+') + token = lexer.get_next_token() + self.assertEqual(token.type, PLUS) + self.assertEqual(token.value, '+') + + def test_lexer_minus(self): + from calc2 import MINUS + lexer = self.makeInterpreter('-') + token = lexer.get_next_token() + self.assertEqual(token.type, MINUS) + self.assertEqual(token.value, '-') + + def test_lexer_eof(self): + from calc2 import EOF + lexer = self.makeInterpreter('-') + token = lexer.get_next_token() + token = lexer.get_next_token() + self.assertEqual(token.type, EOF) + + def test_lexer_whitespace(self): + from calc2 import INTEGER + lexer = self.makeInterpreter(' 23') + token = lexer.get_next_token() + self.assertEqual(token.type, INTEGER) + self.assertEqual(token.value, 23) + + def test_lexer_addition(self): + from calc2 import INTEGER, PLUS, EOF + lexer = self.makeInterpreter('2+3') + + token = lexer.get_next_token() + self.assertEqual(token.type, INTEGER) + self.assertEqual(token.value, 2) + + token = lexer.get_next_token() + self.assertEqual(token.type, PLUS) + self.assertEqual(token.value, '+') + + token = lexer.get_next_token() + self.assertEqual(token.type, INTEGER) + self.assertEqual(token.value, 3) + + token = lexer.get_next_token() + self.assertEqual(token.type, EOF) + + def test_lexer_subtraction(self): + from calc2 import INTEGER, MINUS, EOF + lexer = self.makeInterpreter(' 27 - 7 ') + + token = lexer.get_next_token() + self.assertEqual(token.type, INTEGER) + self.assertEqual(token.value, 27) + + token = lexer.get_next_token() + self.assertEqual(token.type, MINUS) + self.assertEqual(token.value, '-') + + token = lexer.get_next_token() + self.assertEqual(token.type, INTEGER) + self.assertEqual(token.value, 7) + + token = lexer.get_next_token() + self.assertEqual(token.type, EOF) + + def test_interpreter_addition(self): + interpreter = self.makeInterpreter(' 23 + 7') + result = interpreter.expr() + self.assertEqual(result, 30) + + def test_interpreter_subtraction(self): + interpreter = self.makeInterpreter(' 27 - 7 ') + result = interpreter.expr() + self.assertEqual(result, 20) + + +if __name__ == '__main__': + unittest.main() diff --git a/part3/calc3.py b/part3/calc3.py new file mode 100644 index 0000000..5a041a9 --- /dev/null +++ b/part3/calc3.py @@ -0,0 +1,148 @@ +# Token types +# +# EOF (end-of-file) token is used to indicate that +# there is no more input left for lexical analysis +INTEGER, PLUS, MINUS, EOF = 'INTEGER', 'PLUS', 'MINUS', 'EOF' + + +class Token(object): + def __init__(self, ttype, value): + # token type: INTEGER, PLUS, MINUS, or EOF + self.ttype = ttype + # token value: non-negative integer value, '+', '-', or None + self.value = value + + def __str__(self): + """String representation of the class instance. + + Examples: + Token(INTEGER, 3) + Token(PLUS, '+') + """ + return 'Token({ttype}, {value})'.format( + ttype=self.ttype, + value=repr(self.value) + ) + + def __repr__(self): + return self.__str__() + + +class Interpreter(object): + def __init__(self, text): + # client string input, e.g. "3 + 5", "12 - 5 + 3", etc + self.text = text + # self.pos is an index into self.text + self.pos = 0 + # current token instance + self.current_token = None + self.current_char = self.text[self.pos] + + ########################################################## + # Lexer code # + ########################################################## + def error(self): + raise Exception('Invalid syntax') + + def advance(self): + """Advance the `pos` pointer and set the `current_char` variable.""" + self.pos += 1 + if self.pos > len(self.text) - 1: + self.current_char = None # Indicates end of input + else: + self.current_char = self.text[self.pos] + + def skip_whitespace(self): + while self.current_char is not None and self.current_char.isspace(): + self.advance() + + def integer(self): + """Return a (multidigit) integer consumed from the input.""" + result = '' + while self.current_char is not None and self.current_char.isdigit(): + result += self.current_char + self.advance() + return int(result) + + def get_next_token(self): + """Lexical analyzer (also known as scanner or tokenizer) + + This method is responsible for breaking a sentence + apart into tokens. One token at a time. + """ + while self.current_char is not None: + + if self.current_char.isspace(): + self.skip_whitespace() + continue + + if self.current_char.isdigit(): + return Token(INTEGER, self.integer()) + + if self.current_char == '+': + self.advance() + return Token(PLUS, '+') + + if self.current_char == '-': + self.advance() + return Token(MINUS, '-') + + self.error() + + return Token(EOF, None) + + ########################################################## + # Parser / Interpreter code # + ########################################################## + def eat(self, token_type): + # compare the current token type with the passed token + # type and if they match then "eat" the current token + # and assign the next token to the self.current_token, + # otherwise raise an exception. + if self.current_token.ttype == token_type: + self.current_token = self.get_next_token() + else: + self.error() + + def term(self): + """Return an INTEGER token value.""" + token = self.current_token + self.eat(INTEGER) + return token.value + + def expr(self): + """Arithmetic expression parser / interpreter.""" + # set current token to the first token taken from the input + self.current_token = self.get_next_token() + + result = self.term() + while self.current_token.ttype in (PLUS, MINUS): + token = self.current_token + if token.ttype == PLUS: + self.eat(PLUS) + result = result + self.term() + elif token.ttype == MINUS: + self.eat(MINUS) + result = result - self.term() + + return result + + +def main(): + while True: + try: + try: + text = raw_input('calc> ') + except NameError: # Python3 + text = input('calc> ') + except EOFError: + break + if not text: + continue + interpreter = Interpreter(text) + result = interpreter.expr() + print(result) + + +if __name__ == '__main__': + main() diff --git a/part4/calc4.py b/part4/calc4.py new file mode 100644 index 0000000..53e6519 --- /dev/null +++ b/part4/calc4.py @@ -0,0 +1,156 @@ +# Token types +# +# EOF (end-of-file) token is used to indicate that +# there is no more input left for lexical analysis +INTEGER, MUL, DIV, EOF = 'INTEGER', 'MUL', 'DIV', 'EOF' + + +class Token(object): + def __init__(self, ttype, value): + # token type: INTEGER, MUL, DIV, or EOF + self.ttype = ttype + # token value: non-negative integer value, '*', '/', or None + self.value = value + + def __str__(self): + """String representation of the class instance. + + Examples: + Token(INTEGER, 3) + Token(MUL, '*') + """ + return 'Token({ttype}, {value})'.format( + ttype=self.ttype, + value=repr(self.value) + ) + + def __repr__(self): + return self.__str__() + + +class Lexer(object): + def __init__(self, text): + # client string input, e.g. "3 * 5", "12 / 3 * 4", etc + self.text = text + # self.pos is an index into self.text + self.pos = 0 + self.current_char = self.text[self.pos] + + def error(self): + raise Exception('Invalid character') + + def advance(self): + """Advance the `pos` pointer and set the `current_char` variable.""" + self.pos += 1 + if self.pos > len(self.text) - 1: + self.current_char = None # Indicates end of input + else: + self.current_char = self.text[self.pos] + + def skip_whitespace(self): + while self.current_char is not None and self.current_char.isspace(): + self.advance() + + def integer(self): + """Return a (multidigit) integer consumed from the input.""" + result = '' + while self.current_char is not None and self.current_char.isdigit(): + result += self.current_char + self.advance() + return int(result) + + def get_next_token(self): + """Lexical analyzer (also known as scanner or tokenizer) + + This method is responsible for breaking a sentence + apart into tokens. One token at a time. + """ + while self.current_char is not None: + + if self.current_char.isspace(): + self.skip_whitespace() + continue + + if self.current_char.isdigit(): + return Token(INTEGER, self.integer()) + + if self.current_char == '*': + self.advance() + return Token(MUL, '*') + + if self.current_char == '/': + self.advance() + return Token(DIV, '/') + + self.error() + + return Token(EOF, None) + + +class Interpreter(object): + def __init__(self, lexer): + self.lexer = lexer + # set current token to the first token taken from the input + self.current_token = self.lexer.get_next_token() + + def error(self): + raise Exception('Invalid syntax') + + def eat(self, token_type): + # compare the current token type with the passed token + # type and if they match then "eat" the current token + # and assign the next token to the self.current_token, + # otherwise raise an exception. + if self.current_token.ttype == token_type: + self.current_token = self.lexer.get_next_token() + else: + self.error() + + def factor(self): + """Return an INTEGER token value. + + factor : INTEGER + """ + token = self.current_token + self.eat(INTEGER) + return token.value + + def expr(self): + """Arithmetic expression parser / interpreter. + + expr : factor ((MUL | DIV) factor)* + factor : INTEGER + """ + result = self.factor() + + while self.current_token.ttype in (MUL, DIV): + token = self.current_token + if token.ttype == MUL: + self.eat(MUL) + result = result * self.factor() + elif token.ttype == DIV: + self.eat(DIV) + result = result / self.factor() + + return result + + +def main(): + while True: + try: + try: + text = raw_input('calc> ') + except NameError: # Python3 + text = input('calc> ') + except EOFError: + break + if not text: + continue + lexer = Lexer(text) + interpreter = Interpreter(lexer) + result = interpreter.expr() + print(result) + + +if __name__ == '__main__': + main() diff --git a/part4/parser.py b/part4/parser.py new file mode 100644 index 0000000..b6f8baf --- /dev/null +++ b/part4/parser.py @@ -0,0 +1,155 @@ +# Token types +# +# EOF (end-of-file) token is used to indicate that +# there is no more input left for lexical analysis +INTEGER, MUL, DIV, EOF = 'INTEGER', 'MUL', 'DIV', 'EOF' + + +class Token(object): + def __init__(self, ttype, value): + # token type: INTEGER, MUL, DIV, or EOF + self.ttype = ttype + # token value: non-negative integer value, '*', '/', or None + self.value = value + + def __str__(self): + """String representation of the class instance. + + Examples: + Token(INTEGER, 3) + Token(MUL, '*') + """ + return 'Token({ttype}, {value})'.format( + ttype=self.ttype, + value=repr(self.value) + ) + + def __repr__(self): + return self.__str__() + + +class Lexer(object): + def __init__(self, text): + # client string input, e.g. "3 * 5", "12 / 3 * 4", etc + self.text = text + # self.pos is an index into self.text + self.pos = 0 + self.current_char = self.text[self.pos] + + def error(self): + raise Exception('Invalid character') + + def advance(self): + """Advance the `pos` pointer and set the `current_char` variable.""" + self.pos += 1 + if self.pos > len(self.text) - 1: + self.current_char = None # Indicates end of input + else: + self.current_char = self.text[self.pos] + + def skip_whitespace(self): + while self.current_char is not None and self.current_char.isspace(): + self.advance() + + def integer(self): + """Return a (multidigit) integer consumed from the input.""" + result = '' + while self.current_char is not None and self.current_char.isdigit(): + result += self.current_char + self.advance() + return int(result) + + def get_next_token(self): + """Lexical analyzer (also known as scanner or tokenizer) + + This method is responsible for breaking a sentence + apart into tokens. One token at a time. + """ + while self.current_char is not None: + + if self.current_char.isspace(): + self.skip_whitespace() + continue + + if self.current_char.isdigit(): + return Token(INTEGER, self.integer()) + + if self.current_char == '*': + self.advance() + return Token(MUL, '*') + + if self.current_char == '/': + self.advance() + return Token(DIV, '/') + + self.error() + + return Token(EOF, None) + + +class Parser(object): + def __init__(self, lexer): + self.lexer = lexer + # set current token to the first token taken from the input + self.current_token = self.lexer.get_next_token() + + def error(self): + raise Exception('Invalid syntax') + + def eat(self, token_type): + # compare the current token type with the passed token + # type and if they match then "eat" the current token + # and assign the next token to the self.current_token, + # otherwise raise an exception. + if self.current_token.ttype == token_type: + self.current_token = self.lexer.get_next_token() + else: + self.error() + + def factor(self): + """Parse integer. + + factor : INTEGER + """ + self.eat(INTEGER) + + def expr(self): + """Arithmetic expression parser. + + Grammar: + + expr : factor ((MUL | DIV) factor)* + factor : INTEGER + """ + self.factor() + + while self.current_token.ttype in (MUL, DIV): + token = self.current_token + if token.ttype == MUL: + self.eat(MUL) + self.factor() + elif token.ttype == DIV: + self.eat(DIV) + self.factor() + + def parse(self): + self.expr() + + +def main(): + while True: + try: + # To run under Python3 replace 'raw_input' call + # with 'input' + text = raw_input('calc> ') + except EOFError: + break + if not text: + continue + + parser = Parser(Lexer(text)) + parser.parse() + + +if __name__ == '__main__': + main() diff --git a/part4/test_interpreter.py b/part4/test_interpreter.py new file mode 100644 index 0000000..0876441 --- /dev/null +++ b/part4/test_interpreter.py @@ -0,0 +1,56 @@ +import unittest + + +class LexerTestCase(unittest.TestCase): + def makeLexer(self, text): + from calc4 import Lexer + lexer = Lexer(text) + return lexer + + def test_lexer_integer(self): + from calc4 import INTEGER + lexer = self.makeLexer('234') + token = lexer.get_next_token() + self.assertEqual(token.ttype, INTEGER) + self.assertEqual(token.value, 234) + + def test_lexer_mul(self): + from calc4 import MUL + lexer = self.makeLexer('*') + token = lexer.get_next_token() + self.assertEqual(token.ttype, MUL) + self.assertEqual(token.value, '*') + + def test_lexer_div(self): + from calc4 import DIV + lexer = self.makeLexer(' / ') + token = lexer.get_next_token() + self.assertEqual(token.ttype, DIV) + self.assertEqual(token.value, '/') + + +class InterpreterTestCase(unittest.TestCase): + def makeInterpreter(self, text): + from calc4 import Lexer, Interpreter + lexer = Lexer(text) + interpreter = Interpreter(lexer) + return interpreter + + def test_expression1(self): + interpreter = self.makeInterpreter('7 * 4 / 2') + result = interpreter.expr() + self.assertEqual(result, 14) + + def test_expression2(self): + interpreter = self.makeInterpreter('7 * 4 / 2 * 3') + result = interpreter.expr() + self.assertEqual(result, 42) + + def test_expression3(self): + interpreter = self.makeInterpreter('10 * 4 * 2 * 3 / 8') + result = interpreter.expr() + self.assertEqual(result, 30) + + +if __name__ == '__main__': + unittest.main() diff --git a/part4/test_parser.py b/part4/test_parser.py new file mode 100644 index 0000000..2ba1e03 --- /dev/null +++ b/part4/test_parser.py @@ -0,0 +1,34 @@ +import unittest + + +class ParserTestCase(unittest.TestCase): + def makeParser(self, text): + from parser import Lexer, Parser + lexer = Lexer(text) + parser = Parser(lexer) + return parser + + def test_expression1(self): + parser = self.makeParser('7') + parser.parse() + + def test_expression2(self): + parser = self.makeParser('7 * 4 / 2') + parser.parse() + + def test_expression3(self): + parser = self.makeParser('7 * 4 / 2 * 3') + parser.parse() + + def test_expression4(self): + parser = self.makeParser('10 * 4 * 2 * 3 / 8') + parser.parse() + + def test_expression_invalid_syntax(self): + parser = self.makeParser('10 *') + with self.assertRaises(Exception): + parser.parse() + + +if __name__ == '__main__': + unittest.main() diff --git a/part5/calc5.py b/part5/calc5.py new file mode 100644 index 0000000..e490321 --- /dev/null +++ b/part5/calc5.py @@ -0,0 +1,183 @@ +# Token types +# +# EOF (end-of-file) token is used to indicate that +# there is no more input left for lexical analysis +INTEGER, PLUS, MINUS, MUL, DIV, EOF = ( + 'INTEGER', 'PLUS', 'MINUS', 'MUL', 'DIV', 'EOF' +) + + +class Token(object): + def __init__(self, ttype, value): + # token type: INTEGER, PLUS, MINUS, MUL, DIV, or EOF + self.ttype = ttype + # token value: non-negative integer value, '+', '-', '*', '/', or None + self.value = value + + def __str__(self): + """String representation of the class instance. + + Examples: + Token(INTEGER, 3) + Token(PLUS, '+') + Token(MUL, '*') + """ + return 'Token({ttype}, {value})'.format( + ttype=self.ttype, + value=repr(self.value) + ) + + def __repr__(self): + return self.__str__() + + +class Lexer(object): + def __init__(self, text): + # client string input, e.g. "3 * 5", "12 / 3 * 4", etc + self.text = text + # self.pos is an index into self.text + self.pos = 0 + self.current_char = self.text[self.pos] + + def error(self): + raise Exception('Invalid character') + + def advance(self): + """Advance the `pos` pointer and set the `current_char` variable.""" + self.pos += 1 + if self.pos > len(self.text) - 1: + self.current_char = None # Indicates end of input + else: + self.current_char = self.text[self.pos] + + def skip_whitespace(self): + while self.current_char is not None and self.current_char.isspace(): + self.advance() + + def integer(self): + """Return a (multidigit) integer consumed from the input.""" + result = '' + while self.current_char is not None and self.current_char.isdigit(): + result += self.current_char + self.advance() + return int(result) + + def get_next_token(self): + """Lexical analyzer (also known as scanner or tokenizer) + + This method is responsible for breaking a sentence + apart into tokens. One token at a time. + """ + while self.current_char is not None: + + if self.current_char.isspace(): + self.skip_whitespace() + continue + + if self.current_char.isdigit(): + return Token(INTEGER, self.integer()) + + if self.current_char == '+': + self.advance() + return Token(PLUS, '+') + + if self.current_char == '-': + self.advance() + return Token(MINUS, '-') + + if self.current_char == '*': + self.advance() + return Token(MUL, '*') + + if self.current_char == '/': + self.advance() + return Token(DIV, '/') + + self.error() + + return Token(EOF, None) + + +class Interpreter(object): + def __init__(self, lexer): + self.lexer = lexer + # set current token to the first token taken from the input + self.current_token = self.lexer.get_next_token() + + def error(self): + raise Exception('Invalid syntax') + + def eat(self, token_type): + # compare the current token type with the passed token + # type and if they match then "eat" the current token + # and assign the next token to the self.current_token, + # otherwise raise an exception. + if self.current_token.ttype == token_type: + self.current_token = self.lexer.get_next_token() + else: + self.error() + + def factor(self): + """factor : INTEGER""" + token = self.current_token + self.eat(INTEGER) + return token.value + + def term(self): + """term : factor ((MUL | DIV) factor)*""" + result = self.factor() + + while self.current_token.ttype in (MUL, DIV): + token = self.current_token + if token.ttype == MUL: + self.eat(MUL) + result = result * self.factor() + elif token.ttype == DIV: + self.eat(DIV) + result = result / self.factor() + + return result + + def expr(self): + """Arithmetic expression parser / interpreter. + + calc> 14 + 2 * 3 - 6 / 2 + 17 + + expr : term ((PLUS | MINUS) term)* + term : factor ((MUL | DIV) factor)* + factor : INTEGER + """ + result = self.term() + + while self.current_token.ttype in (PLUS, MINUS): + token = self.current_token + if token.ttype == PLUS: + self.eat(PLUS) + result = result + self.term() + elif token.ttype == MINUS: + self.eat(MINUS) + result = result - self.term() + + return result + + +def main(): + while True: + try: + try: + text = raw_input('calc> ') + except NameError: # Python3 + text = input('calc> ') + except EOFError: + break + if not text: + continue + lexer = Lexer(text) + interpreter = Interpreter(lexer) + result = interpreter.expr() + print(result) + + +if __name__ == '__main__': + main() diff --git a/part5/test_interpreter.py b/part5/test_interpreter.py new file mode 100644 index 0000000..24ec3fc --- /dev/null +++ b/part5/test_interpreter.py @@ -0,0 +1,80 @@ +import unittest + + +class LexerTestCase(unittest.TestCase): + def makeLexer(self, text): + from calc5 import Lexer + lexer = Lexer(text) + return lexer + + def test_lexer_integer(self): + from calc5 import INTEGER + lexer = self.makeLexer('234') + token = lexer.get_next_token() + self.assertEqual(token.ttype, INTEGER) + self.assertEqual(token.value, 234) + + def test_lexer_mul(self): + from calc5 import MUL + lexer = self.makeLexer('*') + token = lexer.get_next_token() + self.assertEqual(token.ttype, MUL) + self.assertEqual(token.value, '*') + + def test_lexer_div(self): + from calc5 import DIV + lexer = self.makeLexer(' / ') + token = lexer.get_next_token() + self.assertEqual(token.ttype, DIV) + self.assertEqual(token.value, '/') + + def test_lexer_plus(self): + from calc5 import PLUS + lexer = self.makeLexer('+') + token = lexer.get_next_token() + self.assertEqual(token.ttype, PLUS) + self.assertEqual(token.value, '+') + + def test_lexer_minus(self): + from calc5 import MINUS + lexer = self.makeLexer('-') + token = lexer.get_next_token() + self.assertEqual(token.ttype, MINUS) + self.assertEqual(token.value, '-') + + +class InterpreterTestCase(unittest.TestCase): + def makeInterpreter(self, text): + from calc5 import Lexer, Interpreter + lexer = Lexer(text) + interpreter = Interpreter(lexer) + return interpreter + + def test_expression1(self): + interpreter = self.makeInterpreter('3') + result = interpreter.expr() + self.assertEqual(result, 3) + + def test_expression2(self): + interpreter = self.makeInterpreter('2 + 7 * 4') + result = interpreter.expr() + self.assertEqual(result, 30) + + def test_expression3(self): + interpreter = self.makeInterpreter('7 - 8 / 4') + result = interpreter.expr() + self.assertEqual(result, 5) + + def test_expression4(self): + interpreter = self.makeInterpreter('14 + 2 * 3 - 6 / 2') + result = interpreter.expr() + self.assertEqual(result, 17) + + def test_expression_invalid_syntax(self): + interpreter = self.makeInterpreter('10 *') + with self.assertRaises(Exception): + interpreter.expr() + + +if __name__ == '__main__': + unittest.main() diff --git a/part6/calc6.py b/part6/calc6.py new file mode 100644 index 0000000..febd7e7 --- /dev/null +++ b/part6/calc6.py @@ -0,0 +1,195 @@ +# Token types +# +# EOF (end-of-file) token is used to indicate that +# there is no more input left for lexical analysis +INTEGER, PLUS, MINUS, MUL, DIV, LPAREN, RPAREN, EOF = ( + 'INTEGER', 'PLUS', 'MINUS', 'MUL', 'DIV', '(', ')', 'EOF' +) + + +class Token(object): + def __init__(self, ttype, value): + self.ttype = ttype + self.value = value + + def __str__(self): + """String representation of the class instance. + + Examples: + Token(INTEGER, 3) + Token(PLUS, '+') + Token(MUL, '*') + """ + return 'Token({ttype}, {value})'.format( + ttype=self.ttype, + value=repr(self.value) + ) + + def __repr__(self): + return self.__str__() + + +class Lexer(object): + def __init__(self, text): + # client string input, e.g. "4 + 2 * 3 - 6 / 2" + self.text = text + # self.pos is an index into self.text + self.pos = 0 + self.current_char = self.text[self.pos] + + def error(self): + raise Exception('Invalid character') + + def advance(self): + """Advance the `pos` pointer and set the `current_char` variable.""" + self.pos += 1 + if self.pos > len(self.text) - 1: + self.current_char = None # Indicates end of input + else: + self.current_char = self.text[self.pos] + + def skip_whitespace(self): + while self.current_char is not None and self.current_char.isspace(): + self.advance() + + def integer(self): + """Return a (multidigit) integer consumed from the input.""" + result = '' + while self.current_char is not None and self.current_char.isdigit(): + result += self.current_char + self.advance() + return int(result) + + def get_next_token(self): + """Lexical analyzer (also known as scanner or tokenizer) + + This method is responsible for breaking a sentence + apart into tokens. One token at a time. + """ + while self.current_char is not None: + + if self.current_char.isspace(): + self.skip_whitespace() + continue + + if self.current_char.isdigit(): + return Token(INTEGER, self.integer()) + + if self.current_char == '+': + self.advance() + return Token(PLUS, '+') + + if self.current_char == '-': + self.advance() + return Token(MINUS, '-') + + if self.current_char == '*': + self.advance() + return Token(MUL, '*') + + if self.current_char == '/': + self.advance() + return Token(DIV, '/') + + if self.current_char == '(': + self.advance() + return Token(LPAREN, '(') + + if self.current_char == ')': + self.advance() + return Token(RPAREN, ')') + + self.error() + + return Token(EOF, None) + + +class Interpreter(object): + def __init__(self, lexer): + self.lexer = lexer + # set current token to the first token taken from the input + self.current_token = self.lexer.get_next_token() + + def error(self): + raise Exception('Invalid syntax') + + def eat(self, token_type): + # compare the current token type with the passed token + # type and if they match then "eat" the current token + # and assign the next token to the self.current_token, + # otherwise raise an exception. + if self.current_token.ttype == token_type: + self.current_token = self.lexer.get_next_token() + else: + self.error() + + def factor(self): + """factor : INTEGER | LPAREN expr RPAREN""" + token = self.current_token + if token.ttype == INTEGER: + self.eat(INTEGER) + return token.value + elif token.ttype == LPAREN: + self.eat(LPAREN) + result = self.expr() + self.eat(RPAREN) + return result + + def term(self): + """term : factor ((MUL | DIV) factor)*""" + result = self.factor() + + while self.current_token.ttype in (MUL, DIV): + token = self.current_token + if token.ttype == MUL: + self.eat(MUL) + result = result * self.factor() + elif token.ttype == DIV: + self.eat(DIV) + result = result / self.factor() + + return result + + def expr(self): + """Arithmetic expression parser / interpreter. + + calc> 7 + 3 * (10 / (12 / (3 + 1) - 1)) + 22 + + expr : term ((PLUS | MINUS) term)* + term : factor ((MUL | DIV) factor)* + factor : INTEGER | LPAREN expr RPAREN + """ + result = self.term() + + while self.current_token.ttype in (PLUS, MINUS): + token = self.current_token + if token.ttype == PLUS: + self.eat(PLUS) + result = result + self.term() + elif token.ttype == MINUS: + self.eat(MINUS) + result = result - self.term() + + return result + + +def main(): + while True: + try: + try: + text = raw_input('calc> ') + except NameError: # Python3 + text = input('calc> ') + except EOFError: + break + if not text: + continue + lexer = Lexer(text) + interpreter = Interpreter(lexer) + result = interpreter.expr() + print(result) + + +if __name__ == '__main__': + main() diff --git a/part6/test_interpreter.py b/part6/test_interpreter.py new file mode 100644 index 0000000..bc066db --- /dev/null +++ b/part6/test_interpreter.py @@ -0,0 +1,111 @@ +import unittest + + +class LexerTestCase(unittest.TestCase): + def makeLexer(self, text): + from calc6 import Lexer + lexer = Lexer(text) + return lexer + + def test_lexer_integer(self): + from calc6 import INTEGER + lexer = self.makeLexer('234') + token = lexer.get_next_token() + self.assertEqual(token.ttype, INTEGER) + self.assertEqual(token.value, 234) + + def test_lexer_mul(self): + from calc6 import MUL + lexer = self.makeLexer('*') + token = lexer.get_next_token() + self.assertEqual(token.ttype, MUL) + self.assertEqual(token.value, '*') + + def test_lexer_div(self): + from calc6 import DIV + lexer = self.makeLexer(' / ') + token = lexer.get_next_token() + self.assertEqual(token.ttype, DIV) + self.assertEqual(token.value, '/') + + def test_lexer_plus(self): + from calc6 import PLUS + lexer = self.makeLexer('+') + token = lexer.get_next_token() + self.assertEqual(token.ttype, PLUS) + self.assertEqual(token.value, '+') + + def test_lexer_minus(self): + from calc6 import MINUS + lexer = self.makeLexer('-') + token = lexer.get_next_token() + self.assertEqual(token.ttype, MINUS) + self.assertEqual(token.value, '-') + + def test_lexer_lparen(self): + from calc6 import LPAREN + lexer = self.makeLexer('(') + token = lexer.get_next_token() + self.assertEqual(token.ttype, LPAREN) + self.assertEqual(token.value, '(') + + def test_lexer_rparen(self): + from calc6 import RPAREN + lexer = self.makeLexer(')') + token = lexer.get_next_token() + self.assertEqual(token.ttype, RPAREN) + self.assertEqual(token.value, ')') + + +class InterpreterTestCase(unittest.TestCase): + def makeInterpreter(self, text): + from calc6 import Lexer, Interpreter + lexer = Lexer(text) + interpreter = Interpreter(lexer) + return interpreter + + def test_expression1(self): + interpreter = self.makeInterpreter('3') + result = interpreter.expr() + self.assertEqual(result, 3) + + def test_expression2(self): + interpreter = self.makeInterpreter('2 + 7 * 4') + result = interpreter.expr() + self.assertEqual(result, 30) + + def test_expression3(self): + interpreter = self.makeInterpreter('7 - 8 / 4') + result = interpreter.expr() + self.assertEqual(result, 5) + + def test_expression4(self): + interpreter = self.makeInterpreter('14 + 2 * 3 - 6 / 2') + result = interpreter.expr() + self.assertEqual(result, 17) + + def test_expression5(self): + interpreter = self.makeInterpreter('7 + 3 * (10 / (12 / (3 + 1) - 1))') + result = interpreter.expr() + self.assertEqual(result, 22) + + def test_expression6(self): + interpreter = self.makeInterpreter( + '7 + 3 * (10 / (12 / (3 + 1) - 1)) / (2 + 3) - 5 - 3 + (8)' + ) + result = interpreter.expr() + self.assertEqual(result, 10) + + def test_expression7(self): + interpreter = self.makeInterpreter('7 + (((3 + 2)))') + result = interpreter.expr() + self.assertEqual(result, 12) + + def test_expression_invalid_syntax(self): + interpreter = self.makeInterpreter('10 *') + with self.assertRaises(Exception): + interpreter.expr() + + +if __name__ == '__main__': + unittest.main() diff --git a/part7/python/ex1.py b/part7/python/ex1.py new file mode 100644 index 0000000..4f8a7d4 --- /dev/null +++ b/part7/python/ex1.py @@ -0,0 +1,60 @@ +############################################################################### +# Exercise 1: Infix to Postfix Translator # +############################################################################### +import unittest + +from spi import Lexer, Parser, NodeVisitor + + +class Infix2PostfixTranslator(NodeVisitor): + def __init__(self, tree): + self.tree = tree + + def visit_BinOp(self, node): + left_val = self.visit(node.left) + right_val = self.visit(node.right) + return '{left} {right} {op}'.format( + left=left_val, + right=right_val, + op=node.op.value, + ) + + def visit_Num(self, node): + return node.value + + def translate(self): + return self.visit(self.tree) + + +def infix2postfix(s): + lexer = Lexer(s) + parser = Parser(lexer) + tree = parser.parse() + translator = Infix2PostfixTranslator(tree) + translation = translator.translate() + return translation + + +class Infix2PostfixTestCase(unittest.TestCase): + + def test_1(self): + self.assertEqual(infix2postfix('2 + 3'), '2 3 +') + + def test_2(self): + self.assertEqual(infix2postfix('2 + 3 * 5'), '2 3 5 * +') + + def test_3(self): + self.assertEqual( + infix2postfix('5 + ((1 + 2) * 4) - 3'), + '5 1 2 + 4 * + 3 -', + ) + + def test_4(self): + self.assertEqual( + infix2postfix('(5 + 3) * 12 / 3'), + '5 3 + 12 * 3 /', + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/part7/python/ex2.py b/part7/python/ex2.py new file mode 100644 index 0000000..7f5a628 --- /dev/null +++ b/part7/python/ex2.py @@ -0,0 +1,63 @@ +############################################################################### +# Exercise 2: Infix to LISP style Translator # +############################################################################### +import unittest + +from spi import Lexer, Parser, NodeVisitor + + +class Infix2LispTranslator(NodeVisitor): + def __init__(self, tree): + self.tree = tree + + def visit_BinOp(self, node): + left_val = self.visit(node.left) + right_val = self.visit(node.right) + return '({op} {left} {right})'.format( + left=left_val, + right=right_val, + op=node.op.value, + ) + + def visit_Num(self, node): + return node.value + + def translate(self): + return self.visit(self.tree) + + +def infix2lisp(s): + lexer = Lexer(s) + parser = Parser(lexer) + tree = parser.parse() + translator = Infix2LispTranslator(tree) + translation = translator.translate() + return translation + + +class Infix2LispTestCase(unittest.TestCase): + + def test_1(self): + self.assertEqual(infix2lisp('1 + 2'), '(+ 1 2)') + + def test_2(self): + self.assertEqual(infix2lisp('2 * 7'), '(* 2 7)') + + def test_3(self): + self.assertEqual(infix2lisp('2 * 7 + 3'), '(+ (* 2 7) 3)') + + def test_4(self): + self.assertEqual(infix2lisp('2 + 3 * 5'), '(+ 2 (* 3 5))') + + def test_5(self): + self.assertEqual(infix2lisp('7 + 5 * 2 - 3'), '(- (+ 7 (* 5 2)) 3)') + + def test_6(self): + self.assertEqual( + infix2lisp('1 + 2 + 3 + 4 + 5'), + '(+ (+ (+ (+ 1 2) 3) 4) 5)' + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/part7/python/genastdot.py b/part7/python/genastdot.py new file mode 100644 index 0000000..0de2048 --- /dev/null +++ b/part7/python/genastdot.py @@ -0,0 +1,71 @@ +############################################################################### +# AST visualizer - generates a DOT file for Graphviz. # +# # +# To generate an image from the DOT file run $ dot -Tpng -o ast.png ast.dot # +# # +############################################################################### +import argparse +import textwrap + +from spi import Lexer, Parser, NodeVisitor + + +class ASTVisualizer(NodeVisitor): + def __init__(self, parser): + self.parser = parser + self.ncount = 1 + self.dot_header = [textwrap.dedent("""\ + digraph astgraph { + node [shape=circle, fontsize=12, fontname="Courier", height=.1]; + ranksep=.3; + edge [arrowsize=.5] + + """)] + self.dot_body = [] + self.dot_footer = ['}'] + + def visit_Num(self, node): + s = ' node{} [label="{}"]\n'.format(self.ncount, node.token.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + def visit_BinOp(self, node): + s = ' node{} [label="{}"]\n'.format(self.ncount, node.op.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + self.visit(node.left) + self.visit(node.right) + + for child_node in (node.left, node.right): + s = ' node{} -> node{}\n'.format(node._num, child_node._num) + self.dot_body.append(s) + + def gendot(self): + tree = self.parser.parse() + self.visit(tree) + return ''.join(self.dot_header + self.dot_body + self.dot_footer) + + +def main(): + argparser = argparse.ArgumentParser( + description='Generate an AST DOT file.' + ) + argparser.add_argument( + 'text', + help='Arithmetic expression (in quotes): "1 + 2 * 3"' + ) + args = argparser.parse_args() + text = args.text + + lexer = Lexer(text) + parser = Parser(lexer) + viz = ASTVisualizer(parser) + content = viz.gendot() + print(content) + + +if __name__ == '__main__': + main() diff --git a/part7/python/genptdot.py b/part7/python/genptdot.py new file mode 100644 index 0000000..6e2456a --- /dev/null +++ b/part7/python/genptdot.py @@ -0,0 +1,188 @@ +############################################################################### +# # +# Parse Tree visualizer # +# # +# To generate an image from the DOT file run: # +# $ dot -Tpng -o parsetree.png parsetree.dot # +# # +############################################################################### +import argparse +import textwrap + +from spi import PLUS, MINUS, MUL, DIV, INTEGER, LPAREN, RPAREN, Lexer + + +class Node(object): + def __init__(self, name): + self.name = name + self.children = [] + + def add(self, node): + self.children.append(node) + + +class RuleNode(Node): + pass + + +class TokenNode(Node): + pass + + +class Parser(object): + """Parses the input and builds a parse tree.""" + + def __init__(self, lexer): + self.lexer = lexer + # set current token to the first token taken from the input + self.current_token = self.lexer.get_next_token() + + # Parse tree root + self.root = None + self.current_node = None + + def error(self): + raise Exception('Invalid syntax') + + def eat(self, token_type): + # compare the current token type with the passed token + # type and if they match then "eat" the current token + # and assign the next token to the self.current_token, + # otherwise raise an exception. + if self.current_token.ttype == token_type: + self.current_node.add(TokenNode(self.current_token.value)) + self.current_token = self.lexer.get_next_token() + else: + self.error() + + def factor(self): + """factor : INTEGER | LPAREN expr RPAREN""" + node = RuleNode('factor') + self.current_node.add(node) + _save = self.current_node + self.current_node = node + + token = self.current_token + if token.ttype == INTEGER: + self.eat(INTEGER) + elif token.ttype == LPAREN: + self.eat(LPAREN) + self.expr() + self.eat(RPAREN) + + self.current_node = _save + + def term(self): + """term : factor ((MUL | DIV) factor)*""" + node = RuleNode('term') + self.current_node.add(node) + _save = self.current_node + self.current_node = node + + self.factor() + + while self.current_token.ttype in (MUL, DIV): + token = self.current_token + if token.ttype == MUL: + self.eat(MUL) + elif token.ttype == DIV: + self.eat(DIV) + + self.factor() + + self.current_node = _save + + def expr(self): + """ + expr : term ((PLUS | MINUS) term)* + term : factor ((MUL | DIV) factor)* + factor : INTEGER | LPAREN expr RPAREN + """ + node = RuleNode('expr') + if self.root is None: + self.root = node + else: + self.current_node.add(node) + + _save = self.current_node + self.current_node = node + + self.term() + + while self.current_token.ttype in (PLUS, MINUS): + token = self.current_token + if token.ttype == PLUS: + self.eat(PLUS) + elif token.ttype == MINUS: + self.eat(MINUS) + + self.term() + + self.current_node = _save + + def parse(self): + self.expr() + return self.root + + +class ParseTreeVisualizer(object): + def __init__(self, parser): + self.parser = parser + self.ncount = 1 + self.dot_header = [textwrap.dedent("""\ + digraph astgraph { + node [shape=none, fontsize=12, fontname="Courier", height=.1]; + ranksep=.3; + edge [arrowsize=.5] + + """)] + self.dot_body = [] + self.dot_footer = ['}'] + + def bfs(self, node): + ncount = 1 + queue = [] + queue.append(node) + s = ' node{} [label="{}"]\n'.format(ncount, node.name) + self.dot_body.append(s) + node._num = ncount + ncount += 1 + + while queue: + node = queue.pop(0) + for child_node in node.children: + s = ' node{} [label="{}"]\n'.format(ncount, child_node.name) + self.dot_body.append(s) + child_node._num = ncount + ncount += 1 + s = ' node{} -> node{}\n'.format(node._num, child_node._num) + self.dot_body.append(s) + queue.append(child_node) + + def gendot(self): + tree = self.parser.parse() + self.bfs(tree) + return ''.join(self.dot_header + self.dot_body + self.dot_footer) + + +def main(): + argparser = argparse.ArgumentParser( + description='Generate a Parse Tree DOT file.' + ) + argparser.add_argument( + 'text', + help='Arithmetic expression (in quotes): "1 + 2 * 3"' + ) + args = argparser.parse_args() + text = args.text + + lexer = Lexer(text) + parser = Parser(lexer) + + viz = ParseTreeVisualizer(parser) + content = viz.gendot() + print(content) + + +if __name__ == '__main__': + main() diff --git a/part7/python/spi.py b/part7/python/spi.py new file mode 100644 index 0000000..5e8463d --- /dev/null +++ b/part7/python/spi.py @@ -0,0 +1,268 @@ +""" SPI - Simple Pascal Interpreter """ + +############################################################################### +# # +# LEXER # +# # +############################################################################### + +# Token types +# +# EOF (end-of-file) token is used to indicate that +# there is no more input left for lexical analysis +INTEGER, PLUS, MINUS, MUL, DIV, LPAREN, RPAREN, EOF = ( + 'INTEGER', 'PLUS', 'MINUS', 'MUL', 'DIV', '(', ')', 'EOF' +) + + +class Token(object): + def __init__(self, ttype, value): + self.ttype = ttype + self.value = value + + def __str__(self): + """String representation of the class instance. + + Examples: + Token(INTEGER, 3) + Token(PLUS, '+') + Token(MUL, '*') + """ + return 'Token({ttype}, {value})'.format( + ttype=self.ttype, + value=repr(self.value) + ) + + def __repr__(self): + return self.__str__() + + +class Lexer(object): + def __init__(self, text): + # client string input, e.g. "4 + 2 * 3 - 6 / 2" + self.text = text + # self.pos is an index into self.text + self.pos = 0 + self.current_char = self.text[self.pos] + + def error(self): + raise Exception('Invalid character') + + def advance(self): + """Advance the `pos` pointer and set the `current_char` variable.""" + self.pos += 1 + if self.pos > len(self.text) - 1: + self.current_char = None # Indicates end of input + else: + self.current_char = self.text[self.pos] + + def skip_whitespace(self): + while self.current_char is not None and self.current_char.isspace(): + self.advance() + + def integer(self): + """Return a (multidigit) integer consumed from the input.""" + result = '' + while self.current_char is not None and self.current_char.isdigit(): + result += self.current_char + self.advance() + return int(result) + + def get_next_token(self): + """Lexical analyzer (also known as scanner or tokenizer) + + This method is responsible for breaking a sentence + apart into tokens. One token at a time. + """ + while self.current_char is not None: + + if self.current_char.isspace(): + self.skip_whitespace() + continue + + if self.current_char.isdigit(): + return Token(INTEGER, self.integer()) + + if self.current_char == '+': + self.advance() + return Token(PLUS, '+') + + if self.current_char == '-': + self.advance() + return Token(MINUS, '-') + + if self.current_char == '*': + self.advance() + return Token(MUL, '*') + + if self.current_char == '/': + self.advance() + return Token(DIV, '/') + + if self.current_char == '(': + self.advance() + return Token(LPAREN, '(') + + if self.current_char == ')': + self.advance() + return Token(RPAREN, ')') + + self.error() + + return Token(EOF, None) + + +############################################################################### +# # +# PARSER # +# # +############################################################################### + +class AST(object): + pass + + +class BinOp(AST): + def __init__(self, left, op, right): + self.left = left + self.token = self.op = op + self.right = right + + +class Num(AST): + def __init__(self, token): + self.token = token + self.value = token.value + + +class Parser(object): + def __init__(self, lexer): + self.lexer = lexer + # set current token to the first token taken from the input + self.current_token = self.lexer.get_next_token() + + def error(self): + raise Exception('Invalid syntax') + + def eat(self, token_type): + # compare the current token type with the passed token + # type and if they match then "eat" the current token + # and assign the next token to the self.current_token, + # otherwise raise an exception. + if self.current_token.ttype == token_type: + self.current_token = self.lexer.get_next_token() + else: + self.error() + + def factor(self): + """factor : INTEGER | LPAREN expr RPAREN""" + token = self.current_token + if token.ttype == INTEGER: + self.eat(INTEGER) + return Num(token) + elif token.ttype == LPAREN: + self.eat(LPAREN) + node = self.expr() + self.eat(RPAREN) + return node + + def term(self): + """term : factor ((MUL | DIV) factor)*""" + node = self.factor() + + while self.current_token.ttype in (MUL, DIV): + token = self.current_token + if token.ttype == MUL: + self.eat(MUL) + elif token.ttype == DIV: + self.eat(DIV) + + node = BinOp(left=node, op=token, right=self.factor()) + + return node + + def expr(self): + """ + expr : term ((PLUS | MINUS) term)* + term : factor ((MUL | DIV) factor)* + factor : INTEGER | LPAREN expr RPAREN + """ + node = self.term() + + while self.current_token.ttype in (PLUS, MINUS): + token = self.current_token + if token.ttype == PLUS: + self.eat(PLUS) + elif token.ttype == MINUS: + self.eat(MINUS) + + node = BinOp(left=node, op=token, right=self.term()) + + return node + + def parse(self): + node = self.expr() + if self.current_token.ttype != EOF: + self.error() + return node + + +############################################################################### +# # +# INTERPRETER # +# # +############################################################################### + +class NodeVisitor(object): + def visit(self, node): + method_name = 'visit_' + type(node).__name__ + visitor = getattr(self, method_name, self.generic_visit) + return visitor(node) + + def generic_visit(self, node): + raise Exception('No visit_{} method'.format(type(node).__name__)) + + +class Interpreter(NodeVisitor): + def __init__(self, parser): + self.parser = parser + + def visit_BinOp(self, node): + if node.op.ttype == PLUS: + return self.visit(node.left) + self.visit(node.right) + elif node.op.ttype == MINUS: + return self.visit(node.left) - self.visit(node.right) + elif node.op.ttype == MUL: + return self.visit(node.left) * self.visit(node.right) + elif node.op.ttype == DIV: + return self.visit(node.left) / self.visit(node.right) + + def visit_Num(self, node): + return node.value + + def interpret(self): + tree = self.parser.parse() + return self.visit(tree) + + +def main(): + while True: + try: + try: + text = raw_input('spi> ') + except NameError: # Python3 + text = input('spi> ') + except EOFError: + break + if not text: + continue + + lexer = Lexer(text) + parser = Parser(lexer) + interpreter = Interpreter(parser) + result = interpreter.interpret() + print(result) + + +if __name__ == '__main__': + main() diff --git a/part7/python/test_interpreter.py b/part7/python/test_interpreter.py new file mode 100644 index 0000000..433eecd --- /dev/null +++ b/part7/python/test_interpreter.py @@ -0,0 +1,117 @@ +import unittest + + +class LexerTestCase(unittest.TestCase): + def makeLexer(self, text): + from spi import Lexer + lexer = Lexer(text) + return lexer + + def test_lexer_integer(self): + from spi import INTEGER + lexer = self.makeLexer('234') + token = lexer.get_next_token() + self.assertEqual(token.ttype, INTEGER) + self.assertEqual(token.value, 234) + + def test_lexer_mul(self): + from spi import MUL + lexer = self.makeLexer('*') + token = lexer.get_next_token() + self.assertEqual(token.ttype, MUL) + self.assertEqual(token.value, '*') + + def test_lexer_div(self): + from spi import DIV + lexer = self.makeLexer(' / ') + token = lexer.get_next_token() + self.assertEqual(token.ttype, DIV) + self.assertEqual(token.value, '/') + + def test_lexer_plus(self): + from spi import PLUS + lexer = self.makeLexer('+') + token = lexer.get_next_token() + self.assertEqual(token.ttype, PLUS) + self.assertEqual(token.value, '+') + + def test_lexer_minus(self): + from spi import MINUS + lexer = self.makeLexer('-') + token = lexer.get_next_token() + self.assertEqual(token.ttype, MINUS) + self.assertEqual(token.value, '-') + + def test_lexer_lparen(self): + from spi import LPAREN + lexer = self.makeLexer('(') + token = lexer.get_next_token() + self.assertEqual(token.ttype, LPAREN) + self.assertEqual(token.value, '(') + + def test_lexer_rparen(self): + from spi import RPAREN + lexer = self.makeLexer(')') + token = lexer.get_next_token() + self.assertEqual(token.ttype, RPAREN) + self.assertEqual(token.value, ')') + + +class InterpreterTestCase(unittest.TestCase): + def makeInterpreter(self, text): + from spi import Lexer, Parser, Interpreter + lexer = Lexer(text) + parser = Parser(lexer) + interpreter = Interpreter(parser) + return interpreter + + def test_expression1(self): + interpreter = self.makeInterpreter('3') + result = interpreter.interpret() + self.assertEqual(result, 3) + + def test_expression2(self): + interpreter = self.makeInterpreter('2 + 7 * 4') + result = interpreter.interpret() + self.assertEqual(result, 30) + + def test_expression3(self): + interpreter = self.makeInterpreter('7 - 8 / 4') + result = interpreter.interpret() + self.assertEqual(result, 5) + + def test_expression4(self): + interpreter = self.makeInterpreter('14 + 2 * 3 - 6 / 2') + result = interpreter.interpret() + self.assertEqual(result, 17) + + def test_expression5(self): + interpreter = self.makeInterpreter('7 + 3 * (10 / (12 / (3 + 1) - 1))') + result = interpreter.interpret() + self.assertEqual(result, 22) + + def test_expression6(self): + interpreter = self.makeInterpreter( + '7 + 3 * (10 / (12 / (3 + 1) - 1)) / (2 + 3) - 5 - 3 + (8)' + ) + result = interpreter.interpret() + self.assertEqual(result, 10) + + def test_expression7(self): + interpreter = self.makeInterpreter('7 + (((3 + 2)))') + result = interpreter.interpret() + self.assertEqual(result, 12) + + def test_expression_invalid_syntax1(self): + interpreter = self.makeInterpreter('10 *') + with self.assertRaises(Exception): + interpreter.interpret() + + def test_expression_invalid_syntax2(self): + interpreter = self.makeInterpreter('1 (1 + 2)') + with self.assertRaises(Exception): + interpreter.interpret() + + +if __name__ == '__main__': + unittest.main() diff --git a/part7/rust/README.md b/part7/rust/README.md new file mode 100644 index 0000000..510cd81 --- /dev/null +++ b/part7/rust/README.md @@ -0,0 +1,11 @@ +Rust implementation of a simple Pascal interpreter: [Let's Build A Simple Interpreter. Part 7.](http://ruslanspivak.com/lsbasi-part7/) + +To run the interpreter: + + $ cd spi + $ cargo run + +To run the tests: + + $ cd spi + $ cargo test \ No newline at end of file diff --git a/part7/rust/spi/Cargo.toml b/part7/rust/spi/Cargo.toml new file mode 100644 index 0000000..00c1c15 --- /dev/null +++ b/part7/rust/spi/Cargo.toml @@ -0,0 +1,4 @@ +[package] +name = "spi" +version = "0.1.0" +authors = ["Ruslan Spivak "] diff --git a/part7/rust/spi/src/main.rs b/part7/rust/spi/src/main.rs new file mode 100644 index 0000000..8e8e817 --- /dev/null +++ b/part7/rust/spi/src/main.rs @@ -0,0 +1,386 @@ +use std::io; +use std::io::Write; + +#[derive(Clone, Debug, Eq, PartialEq)] +enum Token { + INTEGER(i32), + PLUS, + MINUS, + MUL, + DIV, + LPAREN, + RPAREN, + EOF, +} + + +pub struct Lexer { + text: String, + pos: i32, + current_char: Option, +} + +impl Lexer { + fn new(text: String) -> Lexer { + let mut lexer = Lexer { + text: text, + pos: 0, + current_char: None, + }; + if lexer.text.len() > 0 { + lexer.current_char = Some(lexer.text.as_bytes()[0] as char); + } + + lexer + } + + fn advance(&mut self) { + self.pos += 1; + if self.pos > self.text.len() as i32 - 1 { + self.current_char = None; // Indicates end of input + } else { + self.current_char = Some(self.text.as_bytes()[self.pos as usize] as char); + } + } + + fn skip_whitespace(&mut self) { + while let Some(ch) = self.current_char { + if ch.is_whitespace() { + self.advance(); + } else { + break; + } + } + } + + fn integer(&mut self) -> i32 { + let mut result = String::new(); + while let Some(ch) = self.current_char { + if ch.is_digit(10) { + result.push(ch); + self.advance(); + } else { + break; + } + } + + result.parse::().unwrap() + } + + fn get_next_token(&mut self) -> Token { + while let Some(ch) = self.current_char { + if ch.is_whitespace() { + self.skip_whitespace(); + continue; + } + + if ch.is_digit(10) { + return Token::INTEGER(self.integer()); + } + + match ch { + '+' => { + self.advance(); + return Token::PLUS; + }, + '-' => { + self.advance(); + return Token::MINUS; + }, + '*' => { + self.advance(); + return Token::MUL; + }, + '/' => { + self.advance(); + return Token::DIV; + }, + '(' => { + self.advance(); + return Token::LPAREN; + }, + ')' => { + self.advance(); + return Token::RPAREN; + }, + _ => {} + } + + panic!("Invalid character"); + } + + Token::EOF + } + +} + + +struct AST { + token: Token, + children: Vec, +} + +impl AST { + fn new(token: Token, children: Vec) -> AST { + AST { + token: token, + children: children, + } + } +} + + +pub struct Parser { + lexer: Lexer, + current_token: Option, +} + +impl Parser { + fn new(lexer: Lexer) -> Parser { + let mut parser = Parser { + lexer: lexer, + current_token: None, + }; + parser.current_token = Some(parser.lexer.get_next_token()); + + parser + } + + fn eat(&mut self, token: Token) { + if token == self.current_token.clone().unwrap() { + self.current_token = Some(self.lexer.get_next_token()); + } else { + panic!("Invalid syntax"); + } + } + + fn factor(&mut self) -> AST { + // factor : INTEGER | LPAREN expr RPAREN + let token = self.current_token.clone().unwrap(); + match token { + Token::INTEGER(i) => { + self.eat(Token::INTEGER(i)); + return AST::new(token, vec![]); + }, + Token::LPAREN => { + self.eat(Token::LPAREN); + let node = self.expr(); + self.eat(Token::RPAREN); + return node; + }, + _ => panic!("Invalid syntax"), + } + } + + fn term(&mut self) -> AST { + // term : factor ((MUL | DIV) factor)* + let mut node = self.factor(); + + while self.current_token == Some(Token::MUL) || + self.current_token == Some(Token::DIV) { + + match self.current_token { + Some(Token::MUL) => { + self.eat(Token::MUL); + let children: Vec = vec![node, self.factor()]; + node = AST::new(Token::MUL, children); + }, + Some(Token::DIV) => { + self.eat(Token::DIV); + let children: Vec = vec![node, self.factor()]; + node = AST::new(Token::DIV, children); + }, + _ => panic!("Invalid syntax"), + } + } + + node + } + + fn expr(&mut self) -> AST { + // expr : term ((PLUS | MINUS) term)* + // term : factor ((MUL | DIV) factor)* + // factor : INTEGER | LPAREN expr RPAREN + + let mut node = self.term(); + + while self.current_token == Some(Token::PLUS) || + self.current_token == Some(Token::MINUS) { + + match self.current_token { + Some(Token::PLUS) => { + self.eat(Token::PLUS); + let children: Vec = vec![node, self.term()]; + node = AST::new(Token::PLUS, children); + }, + Some(Token::MINUS) => { + self.eat(Token::MINUS); + let children: Vec = vec![node, self.term()]; + node = AST::new(Token::MINUS, children); + }, + _ => panic!("Invalid syntax"), + } + } + + node + } + + fn parse(&mut self) -> AST { + self.expr() + } +} + + +pub struct Interpreter { + parser: Parser, +} + +impl Interpreter { + fn new(parser: Parser) -> Interpreter { + Interpreter { + parser: parser, + } + } + + fn visit_num(&self, node: &AST) -> i32 { + match node.token { + Token::INTEGER(i) => { return i; }, + _ => panic!("Error"), + } + } + + fn visit_binop(&self, node: &AST) -> i32 { + let left_val = self.visit(&node.children[0]); + let right_val = self.visit(&node.children[1]); + + match node.token { + Token::PLUS => { + return left_val + right_val; + }, + Token::MINUS => { + return left_val - right_val; + }, + Token::MUL => { + return left_val * right_val; + }, + Token::DIV => { + return left_val / right_val; + }, + _ => panic!("Error"), + } + } + + fn visit(&self, node: &AST) -> i32 { + match node.token { + Token::INTEGER(i) => { + return self.visit_num(node); + } + Token::PLUS | Token::MINUS | Token::MUL | Token::DIV => { + return self.visit_binop(node); + }, + _ => panic!("Error"), + } + } + + fn interpret(&mut self) -> i32 { + let tree = self.parser.parse(); + let result = self.visit(&tree); + + result + } +} + + +fn main() { + + loop { + let mut input = String::new(); + + let _ = io::stdout().write(b"spi> "); + let _ = io::stdout().flush(); + + io::stdin().read_line(&mut input).unwrap(); + + let text = String::from(input.trim()); + let lexer = Lexer::new(text); + let parser = Parser::new(lexer); + + let mut interpreter = Interpreter::new(parser); + let result = interpreter.interpret(); + println!("{}", result); + } + +} + + +#[cfg(test)] +mod tests { + use super::*; + + fn make_interpreter(text: &str) -> Interpreter { + let lexer = Lexer::new(String::from(text)); + let parser = Parser::new(lexer); + let interpreter = Interpreter::new(parser); + + interpreter + } + + #[test] + fn test_expression1() { + let mut interpreter = make_interpreter("3"); + let result = interpreter.interpret(); + assert_eq!(result, 3); + } + + #[test] + fn test_expression2() { + let mut interpreter = make_interpreter("2 + 7 * 4"); + let result = interpreter.interpret(); + assert_eq!(result, 30); + } + + #[test] + fn test_expression3() { + let mut interpreter = make_interpreter("7 - 8 / 4"); + let result = interpreter.interpret(); + assert_eq!(result, 5); + } + + #[test] + fn test_expression4() { + let mut interpreter = make_interpreter("14 + 2 * 3 - 6 / 2"); + let result = interpreter.interpret(); + assert_eq!(result, 17); + } + + #[test] + fn test_expression5() { + let mut interpreter = make_interpreter("7 + 3 * (10 / (12 / (3 + 1) - 1))"); + let result = interpreter.interpret(); + assert_eq!(result, 22); + } + + #[test] + fn test_expression6() { + let mut interpreter = make_interpreter( + "7 + 3 * (10 / (12 / (3 + 1) - 1)) / (2 + 3) - 5 - 3 + (8)" + ); + let result = interpreter.interpret(); + assert_eq!(result, 10); + } + + #[test] + fn test_expression7() { + let mut interpreter = make_interpreter("7 + (((3 + 2)))"); + let result = interpreter.interpret(); + assert_eq!(result, 12); + } + + #[test] + #[should_panic] + fn test_expression_invalid_syntax() { + let mut interpreter = make_interpreter("10 *"); + interpreter.interpret(); + } + +} diff --git a/part8/python/genastdot.py b/part8/python/genastdot.py new file mode 100644 index 0000000..0449da9 --- /dev/null +++ b/part8/python/genastdot.py @@ -0,0 +1,81 @@ +############################################################################### +# AST visualizer - generates a DOT file for Graphviz. # +# # +# To generate an image from the DOT file run $ dot -Tpng -o ast.png ast.dot # +# # +############################################################################### +import argparse +import textwrap + +from spi import Lexer, Parser, NodeVisitor + + +class ASTVisualizer(NodeVisitor): + def __init__(self, parser): + self.parser = parser + self.ncount = 1 + self.dot_header = [textwrap.dedent("""\ + digraph astgraph { + node [shape=circle, fontsize=12, fontname="Courier", height=.1]; + ranksep=.3; + edge [arrowsize=.5] + + """)] + self.dot_body = [] + self.dot_footer = ['}'] + + def visit_Num(self, node): + s = ' node{} [label="{}"]\n'.format(self.ncount, node.token.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + def visit_BinOp(self, node): + s = ' node{} [label="{}"]\n'.format(self.ncount, node.op.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + self.visit(node.left) + self.visit(node.right) + + for child_node in (node.left, node.right): + s = ' node{} -> node{}\n'.format(node._num, child_node._num) + self.dot_body.append(s) + + def visit_UnaryOp(self, node): + s = ' node{} [label="unary {}"]\n'.format(self.ncount, node.op.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + self.visit(node.expr) + s = ' node{} -> node{}\n'.format(node._num, node.expr._num) + self.dot_body.append(s) + + def gendot(self): + tree = self.parser.parse() + self.visit(tree) + return ''.join(self.dot_header + self.dot_body + self.dot_footer) + + +def main(): + argparser = argparse.ArgumentParser( + description='Generate an AST DOT file.' + ) + argparser.add_argument( + 'text', + help='Arithmetic expression (in quotes): "1 + 2 * 3"' + ) + args = argparser.parse_args() + text = args.text + + lexer = Lexer(text) + parser = Parser(lexer) + viz = ASTVisualizer(parser) + content = viz.gendot() + print(content) + + +if __name__ == '__main__': + main() diff --git a/part8/python/spi.py b/part8/python/spi.py new file mode 100644 index 0000000..6c4a663 --- /dev/null +++ b/part8/python/spi.py @@ -0,0 +1,291 @@ +""" SPI - Simple Pascal Interpreter """ + +############################################################################### +# # +# LEXER # +# # +############################################################################### + +# Token types +# +# EOF (end-of-file) token is used to indicate that +# there is no more input left for lexical analysis +INTEGER, PLUS, MINUS, MUL, DIV, LPAREN, RPAREN, EOF = ( + 'INTEGER', 'PLUS', 'MINUS', 'MUL', 'DIV', '(', ')', 'EOF' +) + + +class Token(object): + def __init__(self, ttype, value): + self.ttype = ttype + self.value = value + + def __str__(self): + """String representation of the class instance. + + Examples: + Token(INTEGER, 3) + Token(PLUS, '+') + Token(MUL, '*') + """ + return 'Token({ttype}, {value})'.format( + ttype=self.ttype, + value=repr(self.value) + ) + + def __repr__(self): + return self.__str__() + + +class Lexer(object): + def __init__(self, text): + # client string input, e.g. "4 + 2 * 3 - 6 / 2" + self.text = text + # self.pos is an index into self.text + self.pos = 0 + self.current_char = self.text[self.pos] + + def error(self): + raise Exception('Invalid character') + + def advance(self): + """Advance the `pos` pointer and set the `current_char` variable.""" + self.pos += 1 + if self.pos > len(self.text) - 1: + self.current_char = None # Indicates end of input + else: + self.current_char = self.text[self.pos] + + def skip_whitespace(self): + while self.current_char is not None and self.current_char.isspace(): + self.advance() + + def integer(self): + """Return a (multidigit) integer consumed from the input.""" + result = '' + while self.current_char is not None and self.current_char.isdigit(): + result += self.current_char + self.advance() + return int(result) + + def get_next_token(self): + """Lexical analyzer (also known as scanner or tokenizer) + + This method is responsible for breaking a sentence + apart into tokens. One token at a time. + """ + while self.current_char is not None: + + if self.current_char.isspace(): + self.skip_whitespace() + continue + + if self.current_char.isdigit(): + return Token(INTEGER, self.integer()) + + if self.current_char == '+': + self.advance() + return Token(PLUS, '+') + + if self.current_char == '-': + self.advance() + return Token(MINUS, '-') + + if self.current_char == '*': + self.advance() + return Token(MUL, '*') + + if self.current_char == '/': + self.advance() + return Token(DIV, '/') + + if self.current_char == '(': + self.advance() + return Token(LPAREN, '(') + + if self.current_char == ')': + self.advance() + return Token(RPAREN, ')') + + self.error() + + return Token(EOF, None) + + +############################################################################### +# # +# PARSER # +# # +############################################################################### + +class AST(object): + pass + + +class BinOp(AST): + def __init__(self, left, op, right): + self.left = left + self.token = self.op = op + self.right = right + + +class Num(AST): + def __init__(self, token): + self.token = token + self.value = token.value + + +class UnaryOp(AST): + def __init__(self, op, expr): + self.token = self.op = op + self.expr = expr + + +class Parser(object): + def __init__(self, lexer): + self.lexer = lexer + # set current token to the first token taken from the input + self.current_token = self.lexer.get_next_token() + + def error(self): + raise Exception('Invalid syntax') + + def eat(self, token_type): + # compare the current token type with the passed token + # type and if they match then "eat" the current token + # and assign the next token to the self.current_token, + # otherwise raise an exception. + if self.current_token.ttype == token_type: + self.current_token = self.lexer.get_next_token() + else: + self.error() + + def factor(self): + """factor : (PLUS | MINUS) factor | INTEGER | LPAREN expr RPAREN""" + token = self.current_token + if token.ttype == PLUS: + self.eat(PLUS) + node = UnaryOp(token, self.factor()) + return node + elif token.ttype == MINUS: + self.eat(MINUS) + node = UnaryOp(token, self.factor()) + return node + elif token.ttype == INTEGER: + self.eat(INTEGER) + return Num(token) + elif token.ttype == LPAREN: + self.eat(LPAREN) + node = self.expr() + self.eat(RPAREN) + return node + + def term(self): + """term : factor ((MUL | DIV) factor)*""" + node = self.factor() + + while self.current_token.ttype in (MUL, DIV): + token = self.current_token + if token.ttype == MUL: + self.eat(MUL) + elif token.ttype == DIV: + self.eat(DIV) + + node = BinOp(left=node, op=token, right=self.factor()) + + return node + + def expr(self): + """ + expr : term ((PLUS | MINUS) term)* + term : factor ((MUL | DIV) factor)* + factor : (PLUS | MINUS) factor | INTEGER | LPAREN expr RPAREN + """ + node = self.term() + + while self.current_token.ttype in (PLUS, MINUS): + token = self.current_token + if token.ttype == PLUS: + self.eat(PLUS) + elif token.ttype == MINUS: + self.eat(MINUS) + + node = BinOp(left=node, op=token, right=self.term()) + + return node + + def parse(self): + node = self.expr() + if self.current_token.ttype != EOF: + self.error() + return node + + +############################################################################### +# # +# INTERPRETER # +# # +############################################################################### + +class NodeVisitor(object): + def visit(self, node): + method_name = 'visit_' + type(node).__name__ + visitor = getattr(self, method_name, self.generic_visit) + return visitor(node) + + def generic_visit(self, node): + raise Exception('No visit_{} method'.format(type(node).__name__)) + + +class Interpreter(NodeVisitor): + def __init__(self, parser): + self.parser = parser + + def visit_BinOp(self, node): + if node.op.ttype == PLUS: + return self.visit(node.left) + self.visit(node.right) + elif node.op.ttype == MINUS: + return self.visit(node.left) - self.visit(node.right) + elif node.op.ttype == MUL: + return self.visit(node.left) * self.visit(node.right) + elif node.op.ttype == DIV: + return self.visit(node.left) / self.visit(node.right) + + def visit_Num(self, node): + return node.value + + def visit_UnaryOp(self, node): + op = node.op.ttype + if op == PLUS: + return +self.visit(node.expr) + elif op == MINUS: + return -self.visit(node.expr) + + def interpret(self): + tree = self.parser.parse() + if tree is None: + return '' + return self.visit(tree) + + +def main(): + while True: + try: + try: + text = raw_input('spi> ') + except NameError: # Python3 + text = input('spi> ') + except EOFError: + break + if not text: + continue + + lexer = Lexer(text) + parser = Parser(lexer) + interpreter = Interpreter(parser) + result = interpreter.interpret() + print(result) + + +if __name__ == '__main__': + main() diff --git a/part8/python/test_interpreter.py b/part8/python/test_interpreter.py new file mode 100644 index 0000000..e4e2868 --- /dev/null +++ b/part8/python/test_interpreter.py @@ -0,0 +1,142 @@ +import unittest + + +class LexerTestCase(unittest.TestCase): + def makeLexer(self, text): + from spi import Lexer + lexer = Lexer(text) + return lexer + + def test_lexer_integer(self): + from spi import INTEGER + lexer = self.makeLexer('234') + token = lexer.get_next_token() + self.assertEqual(token.ttype, INTEGER) + self.assertEqual(token.value, 234) + + def test_lexer_mul(self): + from spi import MUL + lexer = self.makeLexer('*') + token = lexer.get_next_token() + self.assertEqual(token.ttype, MUL) + self.assertEqual(token.value, '*') + + def test_lexer_div(self): + from spi import DIV + lexer = self.makeLexer(' / ') + token = lexer.get_next_token() + self.assertEqual(token.ttype, DIV) + self.assertEqual(token.value, '/') + + def test_lexer_plus(self): + from spi import PLUS + lexer = self.makeLexer('+') + token = lexer.get_next_token() + self.assertEqual(token.ttype, PLUS) + self.assertEqual(token.value, '+') + + def test_lexer_minus(self): + from spi import MINUS + lexer = self.makeLexer('-') + token = lexer.get_next_token() + self.assertEqual(token.ttype, MINUS) + self.assertEqual(token.value, '-') + + def test_lexer_lparen(self): + from spi import LPAREN + lexer = self.makeLexer('(') + token = lexer.get_next_token() + self.assertEqual(token.ttype, LPAREN) + self.assertEqual(token.value, '(') + + def test_lexer_rparen(self): + from spi import RPAREN + lexer = self.makeLexer(')') + token = lexer.get_next_token() + self.assertEqual(token.ttype, RPAREN) + self.assertEqual(token.value, ')') + + +class InterpreterTestCase(unittest.TestCase): + def makeInterpreter(self, text): + from spi import Lexer, Parser, Interpreter + lexer = Lexer(text) + parser = Parser(lexer) + interpreter = Interpreter(parser) + return interpreter + + def test_expression1(self): + interpreter = self.makeInterpreter('3') + result = interpreter.interpret() + self.assertEqual(result, 3) + + def test_expression2(self): + interpreter = self.makeInterpreter('2 + 7 * 4') + result = interpreter.interpret() + self.assertEqual(result, 30) + + def test_expression3(self): + interpreter = self.makeInterpreter('7 - 8 / 4') + result = interpreter.interpret() + self.assertEqual(result, 5) + + def test_expression4(self): + interpreter = self.makeInterpreter('14 + 2 * 3 - 6 / 2') + result = interpreter.interpret() + self.assertEqual(result, 17) + + def test_expression5(self): + interpreter = self.makeInterpreter('7 + 3 * (10 / (12 / (3 + 1) - 1))') + result = interpreter.interpret() + self.assertEqual(result, 22) + + def test_expression6(self): + interpreter = self.makeInterpreter( + '7 + 3 * (10 / (12 / (3 + 1) - 1)) / (2 + 3) - 5 - 3 + (8)' + ) + result = interpreter.interpret() + self.assertEqual(result, 10) + + def test_expression7(self): + interpreter = self.makeInterpreter('7 + (((3 + 2)))') + result = interpreter.interpret() + self.assertEqual(result, 12) + + def test_expression8(self): + interpreter = self.makeInterpreter('- 3') + result = interpreter.interpret() + self.assertEqual(result, -3) + + def test_expression9(self): + interpreter = self.makeInterpreter('+ 3') + result = interpreter.interpret() + self.assertEqual(result, 3) + + def test_expression10(self): + interpreter = self.makeInterpreter('5 - - - + - 3') + result = interpreter.interpret() + self.assertEqual(result, 8) + + def test_expression11(self): + interpreter = self.makeInterpreter('5 - - - + - (3 + 4) - +2') + result = interpreter.interpret() + self.assertEqual(result, 10) + + def test_no_expression(self): + interpreter = self.makeInterpreter(' ') + result = interpreter.interpret() + self.assertEqual(result, '') + + def test_expression_invalid_syntax1(self): + interpreter = self.makeInterpreter('10 *') + with self.assertRaises(Exception): + interpreter.interpret() + + def test_expression_invalid_syntax2(self): + interpreter = self.makeInterpreter('1 (1 + 2)') + with self.assertRaises(Exception): + interpreter.interpret() + + +if __name__ == '__main__': + unittest.main() diff --git a/part8/python/testunary.pas b/part8/python/testunary.pas new file mode 100644 index 0000000..8d6a9a2 --- /dev/null +++ b/part8/python/testunary.pas @@ -0,0 +1,10 @@ +{ Install Free Pascal: http://www.freepascal.org/ } +{ Compile and run: $ fpc testunary.pas && ./testunary } +program testunary; + +begin + writeln('Expected -3. Got ', - 3); + writeln('Expected 3. Got ', + 3); + writeln('Expected 8. Got ', 5 - - - + - 3); + writeln('Expected 10. Got ', 5 - - - + - (3 + 4) - +2); +end. diff --git a/part9/python/assignments.txt b/part9/python/assignments.txt new file mode 100644 index 0000000..ce120a7 --- /dev/null +++ b/part9/python/assignments.txt @@ -0,0 +1,11 @@ +BEGIN + + BEGIN + number := 2; + a := number; + b := 10 * a + 10 * number / 4; + c := a - - b + END; + + x := 11; +END. diff --git a/part9/python/genastdot.py b/part9/python/genastdot.py new file mode 100644 index 0000000..019246d --- /dev/null +++ b/part9/python/genastdot.py @@ -0,0 +1,118 @@ +############################################################################### +# AST visualizer - generates a DOT file for Graphviz. # +# # +# To generate an image from the DOT file run $ dot -Tpng -o ast.png ast.dot # +# # +############################################################################### +import argparse +import textwrap + +from spi import Lexer, Parser, NodeVisitor + + +class ASTVisualizer(NodeVisitor): + def __init__(self, parser): + self.parser = parser + self.ncount = 1 + self.dot_header = [textwrap.dedent("""\ + digraph astgraph { + node [shape=circle, fontsize=12, fontname="Courier", height=.1]; + ranksep=.3; + edge [arrowsize=.5] + + """)] + self.dot_body = [] + self.dot_footer = ['}'] + + def visit_Num(self, node): + s = ' node{} [label="{}"]\n'.format(self.ncount, node.token.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + def visit_BinOp(self, node): + s = ' node{} [label="{}"]\n'.format(self.ncount, node.op.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + self.visit(node.left) + self.visit(node.right) + + for child_node in (node.left, node.right): + s = ' node{} -> node{}\n'.format(node._num, child_node._num) + self.dot_body.append(s) + + def visit_UnaryOp(self, node): + s = ' node{} [label="unary {}"]\n'.format(self.ncount, node.op.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + self.visit(node.expr) + s = ' node{} -> node{}\n'.format(node._num, node.expr._num) + self.dot_body.append(s) + + def visit_Compound(self, node): + s = ' node{} [label="Compound"]\n'.format(self.ncount) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + for child in node.children: + self.visit(child) + s = ' node{} -> node{}\n'.format(node._num, child._num) + self.dot_body.append(s) + + def visit_Assign(self, node): + s = ' node{} [label="{}"]\n'.format(self.ncount, node.op.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + self.visit(node.left) + self.visit(node.right) + + for child_node in (node.left, node.right): + s = ' node{} -> node{}\n'.format(node._num, child_node._num) + self.dot_body.append(s) + + def visit_Var(self, node): + s = ' node{} [label="{}"]\n'.format(self.ncount, node.value) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + def visit_NoOp(self, node): + s = ' node{} [label="NoOp"]\n'.format(self.ncount) + self.dot_body.append(s) + node._num = self.ncount + self.ncount += 1 + + def gendot(self): + tree = self.parser.parse() + self.visit(tree) + return ''.join(self.dot_header + self.dot_body + self.dot_footer) + + +def main(): + argparser = argparse.ArgumentParser( + description='Generate an AST DOT file.' + ) + argparser.add_argument( + 'fname', + help='Pascal source file' + ) + args = argparser.parse_args() + fname = args.fname + text = open(fname, 'r').read() + + lexer = Lexer(text) + parser = Parser(lexer) + viz = ASTVisualizer(parser) + content = viz.gendot() + print(content) + + +if __name__ == '__main__': + main() diff --git a/part9/python/spi.py b/part9/python/spi.py new file mode 100644 index 0000000..924825f --- /dev/null +++ b/part9/python/spi.py @@ -0,0 +1,480 @@ +""" SPI - Simple Pascal Interpreter. Part 9.""" + +############################################################################### +# # +# LEXER # +# # +############################################################################### + +# Token types +# +# EOF (end-of-file) token is used to indicate that +# there is no more input left for lexical analysis +(INTEGER, PLUS, MINUS, MUL, DIV, LPAREN, RPAREN, ID, ASSIGN, + BEGIN, END, SEMI, DOT, EOF) = ( + 'INTEGER', 'PLUS', 'MINUS', 'MUL', 'DIV', '(', ')', 'ID', 'ASSIGN', + 'BEGIN', 'END', 'SEMI', 'DOT', 'EOF' +) + + +class Token(object): + def __init__(self, ttype, value): + self.ttype = ttype + self.value = value + + def __str__(self): + """String representation of the class instance. + + Examples: + Token(INTEGER, 3) + Token(PLUS, '+') + Token(MUL, '*') + """ + return 'Token({ttype}, {value})'.format( + ttype=self.ttype, + value=repr(self.value) + ) + + def __repr__(self): + return self.__str__() + + +RESERVED_KEYWORDS = { + 'BEGIN': Token('BEGIN', 'BEGIN'), + 'END': Token('END', 'END'), +} + + +class Lexer(object): + def __init__(self, text): + # client string input, e.g. "4 + 2 * 3 - 6 / 2" + self.text = text + # self.pos is an index into self.text + self.pos = 0 + self.current_char = self.text[self.pos] + + def error(self): + raise Exception('Invalid character') + + def advance(self): + """Advance the `pos` pointer and set the `current_char` variable.""" + self.pos += 1 + if self.pos > len(self.text) - 1: + self.current_char = None # Indicates end of input + else: + self.current_char = self.text[self.pos] + + def peek(self): + peek_pos = self.pos + 1 + if peek_pos > len(self.text) - 1: + return None + else: + return self.text[peek_pos] + + def skip_whitespace(self): + while self.current_char is not None and self.current_char.isspace(): + self.advance() + + def integer(self): + """Return a (multidigit) integer consumed from the input.""" + result = '' + while self.current_char is not None and self.current_char.isdigit(): + result += self.current_char + self.advance() + return int(result) + + def _id(self): + """Handle identifiers and reserved keywords""" + result = '' + while self.current_char is not None and self.current_char.isalnum(): + result += self.current_char + self.advance() + + token = RESERVED_KEYWORDS.get(result, Token(ID, result)) + return token + + def get_next_token(self): + """Lexical analyzer (also known as scanner or tokenizer) + + This method is responsible for breaking a sentence + apart into tokens. One token at a time. + """ + while self.current_char is not None: + + if self.current_char.isspace(): + self.skip_whitespace() + continue + + if self.current_char.isalpha(): + return self._id() + + if self.current_char.isdigit(): + return Token(INTEGER, self.integer()) + + if self.current_char == ':' and self.peek() == '=': + self.advance() + self.advance() + return Token(ASSIGN, ':=') + + if self.current_char == ';': + self.advance() + return Token(SEMI, ';') + + if self.current_char == '+': + self.advance() + return Token(PLUS, '+') + + if self.current_char == '-': + self.advance() + return Token(MINUS, '-') + + if self.current_char == '*': + self.advance() + return Token(MUL, '*') + + if self.current_char == '/': + self.advance() + return Token(DIV, '/') + + if self.current_char == '(': + self.advance() + return Token(LPAREN, '(') + + if self.current_char == ')': + self.advance() + return Token(RPAREN, ')') + + if self.current_char == '.': + self.advance() + return Token(DOT, '.') + + self.error() + + return Token(EOF, None) + + +############################################################################### +# # +# PARSER # +# # +############################################################################### + +class AST(object): + pass + + +class BinOp(AST): + def __init__(self, left, op, right): + self.left = left + self.token = self.op = op + self.right = right + + +class Num(AST): + def __init__(self, token): + self.token = token + self.value = token.value + + +class UnaryOp(AST): + def __init__(self, op, expr): + self.token = self.op = op + self.expr = expr + + +class Compound(AST): + """Represents a 'BEGIN ... END' block""" + def __init__(self): + self.children = [] + + +class Assign(AST): + def __init__(self, left, op, right): + self.left = left + self.token = self.op = op + self.right = right + + +class Var(AST): + """The Var node is constructed out of ID token.""" + def __init__(self, token): + self.token = token + self.value = token.value + + +class NoOp(AST): + pass + + +class Parser(object): + def __init__(self, lexer): + self.lexer = lexer + # set current token to the first token taken from the input + self.current_token = self.lexer.get_next_token() + + def error(self): + raise Exception('Invalid syntax') + + def eat(self, token_type): + # compare the current token type with the passed token + # type and if they match then "eat" the current token + # and assign the next token to the self.current_token, + # otherwise raise an exception. + if self.current_token.ttype == token_type: + self.current_token = self.lexer.get_next_token() + else: + self.error() + + def program(self): + """program : compound_statement DOT""" + node = self.compound_statement() + self.eat(DOT) + return node + + def compound_statement(self): + """ + compound_statement: BEGIN statement_list END + """ + self.eat(BEGIN) + nodes = self.statement_list() + self.eat(END) + + root = Compound() + for node in nodes: + root.children.append(node) + + return root + + def statement_list(self): + """ + statement_list : statement + | statement SEMI statement_list + """ + node = self.statement() + + results = [node] + + while self.current_token.ttype == SEMI: + self.eat(SEMI) + results.append(self.statement()) + + if self.current_token.ttype == ID: + self.error() + + return results + + def statement(self): + """ + statement : compound_statement + | assignment_statement + | empty + """ + if self.current_token.ttype == BEGIN: + node = self.compound_statement() + elif self.current_token.ttype == ID: + node = self.assignment_statement() + else: + node = self.empty() + return node + + def assignment_statement(self): + """ + assignment_statement : variable ASSIGN expr + """ + left = self.variable() + token = self.current_token + self.eat(ASSIGN) + right = self.expr() + node = Assign(left, token, right) + return node + + def variable(self): + """ + variable : ID + """ + node = Var(self.current_token) + self.eat(ID) + return node + + def empty(self): + """An empty production""" + return NoOp() + + def expr(self): + """ + expr : term ((PLUS | MINUS) term)* + """ + node = self.term() + + while self.current_token.ttype in (PLUS, MINUS): + token = self.current_token + if token.ttype == PLUS: + self.eat(PLUS) + elif token.ttype == MINUS: + self.eat(MINUS) + + node = BinOp(left=node, op=token, right=self.term()) + + return node + + def term(self): + """term : factor ((MUL | DIV) factor)*""" + node = self.factor() + + while self.current_token.ttype in (MUL, DIV): + token = self.current_token + if token.ttype == MUL: + self.eat(MUL) + elif token.ttype == DIV: + self.eat(DIV) + + node = BinOp(left=node, op=token, right=self.factor()) + + return node + + def factor(self): + """factor : PLUS factor + | MINUS factor + | INTEGER + | LPAREN expr RPAREN + | variable + """ + token = self.current_token + if token.ttype == PLUS: + self.eat(PLUS) + node = UnaryOp(token, self.factor()) + return node + elif token.ttype == MINUS: + self.eat(MINUS) + node = UnaryOp(token, self.factor()) + return node + elif token.ttype == INTEGER: + self.eat(INTEGER) + return Num(token) + elif token.ttype == LPAREN: + self.eat(LPAREN) + node = self.expr() + self.eat(RPAREN) + return node + else: + node = self.variable() + return node + + def parse(self): + """ + program : compound_statement DOT + + compound_statement : BEGIN statement_list END + + statement_list : statement + | statement SEMI statement_list + + statement : compound_statement + | assignment_statement + | empty + + assignment_statement : variable ASSIGN expr + + empty : + + expr: term ((PLUS | MINUS) term)* + + term: factor ((MUL | DIV) factor)* + + factor : PLUS factor + | MINUS factor + | INTEGER + | LPAREN expr RPAREN + | variable + + variable: ID + """ + node = self.program() + if self.current_token.ttype != EOF: + self.error() + + return node + + +############################################################################### +# # +# INTERPRETER # +# # +############################################################################### + +class NodeVisitor(object): + def visit(self, node): + method_name = 'visit_' + type(node).__name__ + visitor = getattr(self, method_name, self.generic_visit) + return visitor(node) + + def generic_visit(self, node): + raise Exception('No visit_{} method'.format(type(node).__name__)) + + +class Interpreter(NodeVisitor): + + GLOBAL_SCOPE = {} + + def __init__(self, parser): + self.parser = parser + + def visit_BinOp(self, node): + if node.op.ttype == PLUS: + return self.visit(node.left) + self.visit(node.right) + elif node.op.ttype == MINUS: + return self.visit(node.left) - self.visit(node.right) + elif node.op.ttype == MUL: + return self.visit(node.left) * self.visit(node.right) + elif node.op.ttype == DIV: + return self.visit(node.left) / self.visit(node.right) + + def visit_Num(self, node): + return node.value + + def visit_UnaryOp(self, node): + op = node.op.ttype + if op == PLUS: + return +self.visit(node.expr) + elif op == MINUS: + return -self.visit(node.expr) + + def visit_Compound(self, node): + for child in node.children: + self.visit(child) + + def visit_Assign(self, node): + var_name = node.left.value + self.GLOBAL_SCOPE[var_name] = self.visit(node.right) + + def visit_Var(self, node): + var_name = node.value + val = self.GLOBAL_SCOPE.get(var_name) + if val is None: + raise NameError(repr(var_name)) + else: + return val + + def visit_NoOp(self, node): + pass + + def interpret(self): + tree = self.parser.parse() + if tree is None: + return '' + return self.visit(tree) + + +def main(): + import sys + text = open(sys.argv[1], 'r').read() + + lexer = Lexer(text) + parser = Parser(lexer) + interpreter = Interpreter(parser) + result = interpreter.interpret() + print(interpreter.GLOBAL_SCOPE) + + +if __name__ == '__main__': + main() diff --git a/part9/python/test_interpreter.py b/part9/python/test_interpreter.py new file mode 100644 index 0000000..28f5371 --- /dev/null +++ b/part9/python/test_interpreter.py @@ -0,0 +1,140 @@ +import unittest + + +class LexerTestCase(unittest.TestCase): + def makeLexer(self, text): + from spi import Lexer + lexer = Lexer(text) + return lexer + + def test_lexer_integer(self): + from spi import INTEGER + lexer = self.makeLexer('234') + token = lexer.get_next_token() + self.assertEqual(token.ttype, INTEGER) + self.assertEqual(token.value, 234) + + def test_lexer_mul(self): + from spi import MUL + lexer = self.makeLexer('*') + token = lexer.get_next_token() + self.assertEqual(token.ttype, MUL) + self.assertEqual(token.value, '*') + + def test_lexer_div(self): + from spi import DIV + lexer = self.makeLexer(' / ') + token = lexer.get_next_token() + self.assertEqual(token.ttype, DIV) + self.assertEqual(token.value, '/') + + def test_lexer_plus(self): + from spi import PLUS + lexer = self.makeLexer('+') + token = lexer.get_next_token() + self.assertEqual(token.ttype, PLUS) + self.assertEqual(token.value, '+') + + def test_lexer_minus(self): + from spi import MINUS + lexer = self.makeLexer('-') + token = lexer.get_next_token() + self.assertEqual(token.ttype, MINUS) + self.assertEqual(token.value, '-') + + def test_lexer_lparen(self): + from spi import LPAREN + lexer = self.makeLexer('(') + token = lexer.get_next_token() + self.assertEqual(token.ttype, LPAREN) + self.assertEqual(token.value, '(') + + def test_lexer_rparen(self): + from spi import RPAREN + lexer = self.makeLexer(')') + token = lexer.get_next_token() + self.assertEqual(token.ttype, RPAREN) + self.assertEqual(token.value, ')') + + def test_lexer_new_tokens(self): + from spi import ASSIGN, DOT, ID, SEMI, BEGIN, END + records = ( + (':=', ASSIGN, ':='), + ('.', DOT, '.'), + ('number', ID, 'number'), + (';', SEMI, ';'), + ('BEGIN', BEGIN, 'BEGIN'), + ('END', END, 'END'), + ) + for text, tok_type, tok_val in records: + lexer = self.makeLexer(text) + token = lexer.get_next_token() + self.assertEqual(token.ttype, tok_type) + self.assertEqual(token.value, tok_val) + + +class InterpreterTestCase(unittest.TestCase): + def makeInterpreter(self, text): + from spi import Lexer, Parser, Interpreter + lexer = Lexer(text) + parser = Parser(lexer) + interpreter = Interpreter(parser) + return interpreter + + def test_arithmetic_expressions(self): + for expr, result in ( + ('3', 3), + ('2 + 7 * 4', 30), + ('7 - 8 / 4', 5), + ('14 + 2 * 3 - 6 / 2', 17), + ('7 + 3 * (10 / (12 / (3 + 1) - 1))', 22), + ('7 + 3 * (10 / (12 / (3 + 1) - 1)) / (2 + 3) - 5 - 3 + (8)', 10), + ('7 + (((3 + 2)))', 12), + ('- 3', -3), + ('+ 3', 3), + ('5 - - - + - 3', 8), + ('5 - - - + - (3 + 4) - +2', 10), + ): + interpreter = self.makeInterpreter('BEGIN a := %s END.' % expr) + interpreter.interpret() + globals = interpreter.GLOBAL_SCOPE + self.assertEqual(globals['a'], result) + + def test_expression_invalid_syntax1(self): + interpreter = self.makeInterpreter('BEGIN a := 10 * ; END.') + with self.assertRaises(Exception): + interpreter.interpret() + + def test_expression_invalid_syntax2(self): + interpreter = self.makeInterpreter('BEGIN a := 1 (1 + 2); END.') + with self.assertRaises(Exception): + interpreter.interpret() + + def test_statements(self): + text = """\ +BEGIN + + BEGIN + number := 2; + a := number; + b := 10 * a + 10 * number / 4; + c := a - - b + END; + + x := 11; +END. +""" + interpreter = self.makeInterpreter(text) + interpreter.interpret() + + globals = interpreter.GLOBAL_SCOPE + self.assertEqual(len(globals.keys()), 5) + self.assertEqual(globals['number'], 2) + self.assertEqual(globals['a'], 2) + self.assertEqual(globals['b'], 25) + self.assertEqual(globals['c'], 27) + self.assertEqual(globals['x'], 11) + + +if __name__ == '__main__': + unittest.main()