Skip to content

Commit

Permalink
Merge pull request #94 from filips123/xlsx-parsing
Browse files Browse the repository at this point in the history
Reintroduce pdf menu parser
  • Loading branch information
PetJer authored Jan 3, 2024
2 parents 5594438 + 586b862 commit ab436cd
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 2 deletions.
6 changes: 4 additions & 2 deletions API/gimvicurnik/updaters/eclassroom.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ def _parse_lunch_schedule_xlsx(self, stream: BytesIO, effective: date) -> None:
"""

# Extract workbook from an XLSX stream
wb = with_span(op="extract")(load_workbook)(stream, read_only=True, data_only=True)
wb = with_span(op="extract")(load_workbook)(stream, data_only=True)

lunch_schedule = []

Expand All @@ -665,6 +665,9 @@ def _parse_lunch_schedule_xlsx(self, stream: BytesIO, effective: date) -> None:
if ws.title != "kosilo":
continue

while not ws["A1"].value:
ws.delete_cols(1)

for wr in ws.iter_rows(min_row=3, max_col=5):
if not wr[3].value:
break
Expand All @@ -674,7 +677,6 @@ def _parse_lunch_schedule_xlsx(self, stream: BytesIO, effective: date) -> None:
assert isinstance(wr[0].value, datetime)
assert isinstance(wr[1].value, str)
assert isinstance(wr[2].value, str)
assert isinstance(wr[3].value, int)
assert isinstance(wr[4].value, str)

# Ignore rows that do not contain a class name
Expand Down
73 changes: 73 additions & 0 deletions API/gimvicurnik/updaters/menu.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .base import BaseMultiUpdater, DocumentInfo
from ..database import DocumentType, LunchMenu, SnackMenu
from ..errors import MenuApiError, MenuDateError, MenuFormatError
from ..utils.pdf import extract_tables
from ..utils.sentry import with_span

if typing.TYPE_CHECKING:
Expand Down Expand Up @@ -120,6 +121,10 @@ def parse_document(self, document: DocumentInfo, stream: BytesIO, effective: dat
span.set_tag("document.format", document.extension)

match (document.type, document.extension):
case (DocumentType.SNACK_MENU, "pdf"):
self._parse_snack_menu_pdf(stream, effective)
case (DocumentType.LUNCH_MENU, "pdf"):
self._parse_lunch_menu_pdf(stream, effective)
case (DocumentType.SNACK_MENU, "xlsx"):
self._parse_snack_menu_xlsx(stream, effective)
case (DocumentType.LUNCH_MENU, "xlsx"):
Expand All @@ -131,6 +136,41 @@ def parse_document(self, document: DocumentInfo, stream: BytesIO, effective: dat
case _:
raise KeyError("Unknown document type for menu")

def _parse_snack_menu_pdf(self, stream: BytesIO, effective: datetime.date) -> None:
"""Parse the snack menu PDF document."""

# Extract all tables from a PDF stream
tables = with_span(op="extract")(extract_tables)(stream)

days = 0

# Parse tables into menus and store them
for table in tables:
for row in table:
if not row[1] or "NV in N" in row[1]:
continue

current = effective + datetime.timedelta(days=days)
days += 1

menu = {
"date": current,
"normal": row[1],
"poultry": row[2],
"vegetarian": row[3],
"fruitvegetable": row[4],
}

model = self.session.query(SnackMenu).filter(SnackMenu.date == current).first()

if not model:
model = SnackMenu()

for key, value in menu.items():
setattr(model, key, value)

self.session.add(model)

def _parse_snack_menu_xlsx(self, stream: BytesIO, effective: datetime.date) -> None:
"""Parse the snack menu XLSX document."""

Expand Down Expand Up @@ -201,6 +241,39 @@ def _parse_snack_menu_xlsx(self, stream: BytesIO, effective: datetime.date) -> N

wb.close()

def _parse_lunch_menu_pdf(self, stream: BytesIO, effective: datetime.date) -> None:
"""Parse the lunch menu PDF document."""

# Extract all tables from a PDF stream
tables = with_span(op="extract")(extract_tables)(stream)

days = 0

# Parse tables into menus and store them
for table in tables:
for row in table:
if not row[1] or "N KOSILO" in row[1]:
continue

current = effective + datetime.timedelta(days=days)
days += 1

menu = {
"date": current,
"normal": row[1],
"vegetarian": row[2],
}

model = self.session.query(LunchMenu).filter(LunchMenu.date == current).first()

if not model:
model = LunchMenu()

for key, value in menu.items():
setattr(model, key, value)

self.session.add(model)

def _parse_lunch_menu_xlsx(self, stream: BytesIO, effective: datetime.date) -> None:
"""Parse the lunch menu XLSX document."""

Expand Down

0 comments on commit ab436cd

Please sign in to comment.