-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathproof_parser.py
37 lines (27 loc) · 934 Bytes
/
proof_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import nltk
from nltk.tokenize import word_tokenize
def get_assumptions(statement):
return " ".join(statement.split(" ")[1:])
def parser(proof):
sent_token = proof.split('\n', proof.count('\n'))
variables = []; sets = []; assumptions = []; statements = []; question=""
for statement in sent_token:
words = statement.split(" ")
for i in words:
if len(i) == 1:
if i.isupper():
sets.append(i)
elif i.islower():
variables.append(i)
if len(statement) < 2:
continue
if "Question:" in statement:
question = statement.split("Question:")[1].strip()
word_token = word_tokenize(statement)
if word_token[0] == "Let":
assumptions.append(get_assumptions(statement).rstrip())
elif statement[0] == "⇒" or statement[0] == "∴":
statements.append(statement[1:].rstrip())
variables = list(set(variables))
sets = list(set(sets))
return question, sets, variables, assumptions, statements