feat: parse body data while fetching ERP

metakgp · Nov 27, 2024 · ff07a80 · ff07a80
1 parent afd146e
commit ff07a80
Show file tree

Hide file tree

Showing 4 changed files with 38 additions and 32 deletions.
diff --git a/mftp/mail.py b/mftp/mail.py
@@ -1,10 +1,8 @@
 import re
 import logging
 from email import encoders
-from bs4 import BeautifulSoup as bs
 from email.mime.text import MIMEText
 from email.mime.base import MIMEBase
-from endpoints import NOTICE_CONTENT_URL
 from email.mime.multipart import MIMEMultipart
 from env import FROM_EMAIL, FROM_EMAIL_PASS, BCC_EMAIL_S
 
@@ -64,21 +62,22 @@ def send(mails, smtp, gmail_api, notice_db):
                     break
 
 
-def format_notice(notices, session):
+def format_notice(notices):
     print('[FORMATTING MAILS]', flush=True)
 
     formatted_notifs = []
     for notice in reversed(notices):
-        id_, year = notice['UID'].split('_')
+        id_ = notice['UID'].split('_')[0]
 
         message = MIMEMultipart()
         message["Subject"] = f"#{id_} | {notice['Type']} | {notice['Subject']} | {notice['Company']}"
         message["From"] = f'MFTP < {FROM_EMAIL} >'
         message["Bcc"] = ", ".join(BCC_EMAIL_S)
 
         try:
-            body = parseBody(session, year, id_)
+            body = parse_body(notice['BodyData'])
             notice['Body'] = body
+            notice.pop('BodyData', None) # Remove unparsed body data
         except Exception as e:
             logging.error(f" Failed to parse mail body ~ {str(e)}")
             break
@@ -121,11 +120,8 @@ def format_notice(notices, session):
     return formatted_notifs
 
 
-def parseBody(session, year, id_):
-    content = session.get(NOTICE_CONTENT_URL.format(year, id_))
-    content_html = bs(content.text, 'html.parser')
-    content_html_div = bs.find_all(content_html, 'div', {'id': 'printableArea'})[0]
-    body = content_html_div.decode_contents(formatter='html')
+def parse_body(body_data):
+    body = body_data.decode_contents(formatter='html')
 
     return str(body)
 

diff --git a/mftp/mftp.py b/mftp/mftp.py
@@ -40,11 +40,11 @@
   notices = notice.fetch(headers, session, ssoToken, notice_db)
   if notices:
     if args.ntfy:
-      notifications = ntfy.format_notice(notices, session)
+      notifications = ntfy.format_notice(notices)
       if notifications:
           ntfy.send(notifications, notice_db)
     else:
-      mails = mail.format_notice(notices, session)
+      mails = mail.format_notice(notices)
       if mails:
           mail.send(mails, args.smtp, args.gmail_api, notice_db)
   else:

diff --git a/mftp/notice.py b/mftp/notice.py
@@ -1,7 +1,7 @@
 import logging
 import xml.etree.ElementTree as ET
 from bs4 import BeautifulSoup as bs
-from endpoints import TPSTUDENT_URL, NOTICEBOARD_URL, NOTICES_URL, ATTACHMENT_URL
+from endpoints import TPSTUDENT_URL, NOTICEBOARD_URL, NOTICES_URL, ATTACHMENT_URL, NOTICE_CONTENT_URL
 
 
 LAST_NOTICES_CHECK_COUNT = 30
@@ -40,16 +40,23 @@ def fetch(headers, session, ssoToken, notice_db):
             'Company': row.find('cell[4]').text.strip(),
         }
 
+        # Handling Body
+        try:
+            body_data = parse_body_data(session, year, id_)
+            notice['BodyData'] = body_data
+        except Exception as e:
+            logging.error(f" Failed to parse mail body ~ {str(e)}")
+            break
+
         # Handling attachment
         try:
-            attachment = parseAttachment(session, year, id_)
+            attachment = parse_attachment(session, year, id_)
+            if attachment:
+                notice['Attachment'] = attachment
         except Exception as e:
             logging.error(f" Failed to parse mail attachment ~ {str(e)}")
             break
 
-        if attachment:
-            notice['Attachment'] = attachment
-
         latest_notices.append(notice)
 
     # This is done to reduce DB queries
@@ -64,7 +71,15 @@ def fetch(headers, session, ssoToken, notice_db):
     return new_notices
 
 
-def parseAttachment(session, year, id_):
+def parse_body_data(session, year, id_):
+    content = session.get(NOTICE_CONTENT_URL.format(year, id_))
+    content_html = bs(content.text, 'html.parser')
+    body_data = bs.find_all(content_html, 'div', {'id': 'printableArea'})[0]
+
+    return body_data
+
+
+def parse_attachment(session, year, id_):
     stream = session.get(ATTACHMENT_URL.format(year, id_), stream=True)
     attachment = b''
     for chunk in stream.iter_content(4096):

diff --git a/mftp/ntfy.py b/mftp/ntfy.py
@@ -4,8 +4,6 @@
 import logging
 import requests
 from urllib.parse import quote
-from bs4 import BeautifulSoup as bs
-from endpoints import NOTICE_CONTENT_URL
 from env import NTFY_BASE_URL, NTFY_TOPICS, NTFY_TOPIC_ICON, NTFY_USER, NTFY_PASS, HEIMDALL_COOKIE
 
 
@@ -43,17 +41,18 @@ def ntfy_emoji(subject):
     return emoji
 
 
-def format_notice(notices, session):
+def format_notice(notices):
     print('[FORMATTING NOTIFICATIONS]', flush=True)
 
     formatted_notifs = []
     for notice in reversed(notices):
-        id_, year = notice['UID'].split('_')
+        id_ = notice['UID'].split('_')[0]
 
         try:
-            data = parseBody(notice, session, year, id_)
+            data = parse_body(notice['BodyData'], notice['Time'])
             notice['Body'] = data
-            body, links = parseLinks(data)
+            notice.pop('BodyData', None) # Remove unparsed body data
+            body, links = parse_links(data)
             body += '''
 --------------
 
@@ -208,21 +207,17 @@ def delete_file(file_name):
         return False
 
 
-def parseBody(notice, session, year, id_):
-    content = session.get(NOTICE_CONTENT_URL.format(year, id_))
-    content_html = bs(content.text, 'html.parser')
-    content_html_div = bs.find_all(content_html, 'div', {'id': 'printableArea'})[0]
-
+def parse_body(body_data, time):
     body = ''
-    for br in content_html_div.find_all('br'):
+    for br in body_data.find_all('br'):
         body = body + br.next_sibling.strip() + '\n'
 
-    body = body + notice['Time']
+    body = body + time
 
     return body
 
 
-def parseLinks(data):
+def parse_links(data):
     body = data
     links = re.findall(r'(https?://[^\s]+)', data)
     action_template = "view, Link {}, {}"