-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
187 lines (169 loc) · 7.04 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import imaplib
import email
from email.header import decode_header
import webbrowser
import os
import pip
# Pretend these are not outwardly visible...
username = "[email protected]"
password = "thePolyBot2000"
log = ""
sender = ""
# Credit for Gmail Parser: https://www.thepythoncode.com/article/reading-emails-in-python
def clean(text):
# clean text for creating a folder
return "".join(c if c.isalnum() else "_" for c in text)
imap = imaplib.IMAP4_SSL("imap.gmail.com")
# authenticate
imap.login(username, password)
status, messages = imap.select("INBOX")
# number of top emails to fetch
N = 1
# total number of emails
messages = int(messages[0])
resulting_message = []
try:
for i in range(messages, messages-N, -1):
# fetch the email message by ID
res, msg = imap.fetch(str(i), "(RFC822)")
for response in msg:
if isinstance(response, tuple):
# parse a bytes email into a message object
msg = email.message_from_bytes(response[1])
# decode the email subject
subject, encoding = decode_header(msg["Subject"])[0]
if isinstance(subject, bytes):
# if it's a bytes, decode to str
subject = subject.decode(encoding)
# decode email sender
From, encoding = decode_header(msg.get("From"))[0]
if isinstance(From, bytes):
From = From.decode(encoding)
# if the email message is multipart
if msg.is_multipart():
# iterate over email parts
for part in msg.walk():
# extract content type of email
content_type = part.get_content_type()
content_disposition = str(part.get("Content-Disposition"))
try:
# get the email body
body = part.get_payload(decode=True).decode()
except:
pass
if content_type == "text/plain" and "attachment" not in content_disposition:
# print text/plain emails and skip attachments
resulting_message = body.split(' ')
else:
# extract content type of email
content_type = msg.get_content_type()
# get the email body
body = msg.get_payload(decode=True).decode()
if content_type == "text/plain":
# print only text email parts
resulting_message = body.split(' ')
if content_type == "text/html":
# if it's HTML, create a new HTML file and open it in browser
folder_name = clean(subject)
if not os.path.isdir(folder_name):
# make a folder for this email (named after the subject)
os.mkdir(folder_name)
filename = "index.html"
filepath = os.path.join(folder_name, filename)
# write the file
open(filepath, "w").write(body)
# open in the default browser
sender = msg['From']
except:
print("Error: No new emails to read in...")
# close the connection and logout
imap.close()
imap.logout()
if(sender != ""):
resulting_message = [x.replace("\r\n"," ") for x in resulting_message]
grammar_msg = resulting_message
resulting_message = [x.replace(","," ") for x in resulting_message]
resulting_message = [x.replace("."," ") for x in resulting_message]
parsed_msg = []
for x in resulting_message:
if(x != ' ' and x != ''):
tmp_array = []
if(' ' in x):
tmp_array = x.split(' ')
else:
tmp_array.append(x)
for k in tmp_array:
parsed_msg.append(k)
# Credit for removing spaces: https://www.geeksforgeeks.org/python-remove-empty-strings-from-list-of-strings/
while("" in parsed_msg):
parsed_msg.remove("")
#Basic Spell Checking on Uncapitalized Verbiage
from autocorrect import Speller
spell = Speller(lang='en')
i = 0
whyAreTheseCorrections = ['iterated', 'codebase', 'rpi']
for x in parsed_msg:
if((not x.islower() and not x.isupper()) or x.isupper()):
i += 1
else:
before = parsed_msg[i]
parsed_msg[i] = spell(parsed_msg[i])
for j in whyAreTheseCorrections:
if(before == j):
parsed_msg[i] = before
if(before != parsed_msg[i]):
print("Basic Spelling Check: Possible Correction: " + before + " to " + parsed_msg[i])
log += "Basic Spelling Check: Possible Correction: " + before + " to " + parsed_msg[i] + "\n"
i += 1
# Parse Sender Information
sender = sender.split(" ")
nameOfSender = ""
emailOfSender = ""
i = 0
for x in sender:
if "@" in x:
emailOfSender = x
else:
nameOfSender += x + " "
nameOfSender = nameOfSender.rstrip()
emailOfSender = emailOfSender.replace('<', '')
emailOfSender = emailOfSender.replace('>', '')
# To ease debugging...
skipDeletion = False
if not skipDeletion:
# To delete emails...
status, messages = imap.search(None, "ALL")
# convert messages to a list of email IDs
messages = messages[0].split(b' ')
mail = messages[len(messages) - 1]
#for mail in messages:
_, msg = imap.fetch(mail, "(RFC822)")
# you can delete the for loop for performance if you have a long list of emails
# because it is only for printing the SUBJECT of target email to delete
for response in msg:
if isinstance(response, tuple):
msg = email.message_from_bytes(response[1])
# decode the email subject
subject = decode_header(msg["Subject"])[0][0]
if isinstance(subject, bytes):
# if it's a bytes type, decode to str
subject = subject.decode()
# mark the mail as deleted
imap.store(mail, "+FLAGS", "\\Deleted")
#permanently remove mails that are marked as deleted
#from the selected mailbox (in this case, INBOX)
imap.expunge()
# close the connection and logout
imap.close()
imap.logout()
from subprocess import call
import time
with open("tempParsed.txt", 'w') as f:
f.write('#'.join(parsed_msg))
f.close()
time.sleep(.5)
with open("tempGramParsed.txt", 'w') as f:
f.write('#'.join(grammar_msg))
f.close()
time.sleep(.5)
call(["python", "advanceCorrection.py", nameOfSender, emailOfSender, log])