Skip to content

Commit

Permalink
feat: parse body data while fetching ERP
Browse files Browse the repository at this point in the history
  • Loading branch information
proffapt committed Nov 27, 2024
1 parent afd146e commit ff07a80
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 32 deletions.
16 changes: 6 additions & 10 deletions mftp/mail.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import re
import logging
from email import encoders
from bs4 import BeautifulSoup as bs
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from endpoints import NOTICE_CONTENT_URL
from email.mime.multipart import MIMEMultipart
from env import FROM_EMAIL, FROM_EMAIL_PASS, BCC_EMAIL_S

Expand Down Expand Up @@ -64,21 +62,22 @@ def send(mails, smtp, gmail_api, notice_db):
break


def format_notice(notices, session):
def format_notice(notices):
print('[FORMATTING MAILS]', flush=True)

formatted_notifs = []
for notice in reversed(notices):
id_, year = notice['UID'].split('_')
id_ = notice['UID'].split('_')[0]

message = MIMEMultipart()
message["Subject"] = f"#{id_} | {notice['Type']} | {notice['Subject']} | {notice['Company']}"
message["From"] = f'MFTP < {FROM_EMAIL} >'
message["Bcc"] = ", ".join(BCC_EMAIL_S)

try:
body = parseBody(session, year, id_)
body = parse_body(notice['BodyData'])
notice['Body'] = body
notice.pop('BodyData', None) # Remove unparsed body data
except Exception as e:
logging.error(f" Failed to parse mail body ~ {str(e)}")
break
Expand Down Expand Up @@ -121,11 +120,8 @@ def format_notice(notices, session):
return formatted_notifs


def parseBody(session, year, id_):
content = session.get(NOTICE_CONTENT_URL.format(year, id_))
content_html = bs(content.text, 'html.parser')
content_html_div = bs.find_all(content_html, 'div', {'id': 'printableArea'})[0]
body = content_html_div.decode_contents(formatter='html')
def parse_body(body_data):
body = body_data.decode_contents(formatter='html')

return str(body)

Expand Down
4 changes: 2 additions & 2 deletions mftp/mftp.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@
notices = notice.fetch(headers, session, ssoToken, notice_db)
if notices:
if args.ntfy:
notifications = ntfy.format_notice(notices, session)
notifications = ntfy.format_notice(notices)
if notifications:
ntfy.send(notifications, notice_db)
else:
mails = mail.format_notice(notices, session)
mails = mail.format_notice(notices)
if mails:
mail.send(mails, args.smtp, args.gmail_api, notice_db)
else:
Expand Down
27 changes: 21 additions & 6 deletions mftp/notice.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import xml.etree.ElementTree as ET
from bs4 import BeautifulSoup as bs
from endpoints import TPSTUDENT_URL, NOTICEBOARD_URL, NOTICES_URL, ATTACHMENT_URL
from endpoints import TPSTUDENT_URL, NOTICEBOARD_URL, NOTICES_URL, ATTACHMENT_URL, NOTICE_CONTENT_URL


LAST_NOTICES_CHECK_COUNT = 30
Expand Down Expand Up @@ -40,16 +40,23 @@ def fetch(headers, session, ssoToken, notice_db):
'Company': row.find('cell[4]').text.strip(),
}

# Handling Body
try:
body_data = parse_body_data(session, year, id_)
notice['BodyData'] = body_data
except Exception as e:
logging.error(f" Failed to parse mail body ~ {str(e)}")
break

# Handling attachment
try:
attachment = parseAttachment(session, year, id_)
attachment = parse_attachment(session, year, id_)
if attachment:
notice['Attachment'] = attachment
except Exception as e:
logging.error(f" Failed to parse mail attachment ~ {str(e)}")
break

if attachment:
notice['Attachment'] = attachment

latest_notices.append(notice)

# This is done to reduce DB queries
Expand All @@ -64,7 +71,15 @@ def fetch(headers, session, ssoToken, notice_db):
return new_notices


def parseAttachment(session, year, id_):
def parse_body_data(session, year, id_):
content = session.get(NOTICE_CONTENT_URL.format(year, id_))
content_html = bs(content.text, 'html.parser')
body_data = bs.find_all(content_html, 'div', {'id': 'printableArea'})[0]

return body_data


def parse_attachment(session, year, id_):
stream = session.get(ATTACHMENT_URL.format(year, id_), stream=True)
attachment = b''
for chunk in stream.iter_content(4096):
Expand Down
23 changes: 9 additions & 14 deletions mftp/ntfy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import logging
import requests
from urllib.parse import quote
from bs4 import BeautifulSoup as bs
from endpoints import NOTICE_CONTENT_URL
from env import NTFY_BASE_URL, NTFY_TOPICS, NTFY_TOPIC_ICON, NTFY_USER, NTFY_PASS, HEIMDALL_COOKIE


Expand Down Expand Up @@ -43,17 +41,18 @@ def ntfy_emoji(subject):
return emoji


def format_notice(notices, session):
def format_notice(notices):
print('[FORMATTING NOTIFICATIONS]', flush=True)

formatted_notifs = []
for notice in reversed(notices):
id_, year = notice['UID'].split('_')
id_ = notice['UID'].split('_')[0]

try:
data = parseBody(notice, session, year, id_)
data = parse_body(notice['BodyData'], notice['Time'])
notice['Body'] = data
body, links = parseLinks(data)
notice.pop('BodyData', None) # Remove unparsed body data
body, links = parse_links(data)
body += '''
--------------
Expand Down Expand Up @@ -208,21 +207,17 @@ def delete_file(file_name):
return False


def parseBody(notice, session, year, id_):
content = session.get(NOTICE_CONTENT_URL.format(year, id_))
content_html = bs(content.text, 'html.parser')
content_html_div = bs.find_all(content_html, 'div', {'id': 'printableArea'})[0]

def parse_body(body_data, time):
body = ''
for br in content_html_div.find_all('br'):
for br in body_data.find_all('br'):
body = body + br.next_sibling.strip() + '\n'

body = body + notice['Time']
body = body + time

return body


def parseLinks(data):
def parse_links(data):
body = data
links = re.findall(r'(https?://[^\s]+)', data)
action_template = "view, Link {}, {}"
Expand Down

0 comments on commit ff07a80

Please sign in to comment.