diff --git a/API/gimvicurnik/updaters/eclassroom.py b/API/gimvicurnik/updaters/eclassroom.py index 95f30ed..25012bf 100644 --- a/API/gimvicurnik/updaters/eclassroom.py +++ b/API/gimvicurnik/updaters/eclassroom.py @@ -267,10 +267,6 @@ def parse_document(self, document: DocumentInfo, stream: BytesIO, effective: dat span.set_tag("document.type", document.type.value) span.set_tag("document.format", document.extension) - # Only parse xlsx lunch schedules - a guard for now - if document.type == DocumentType.LUNCH_SCHEDULE and document.extension != "xlsx": - return - match (document.type, document.extension): case (DocumentType.SUBSTITUTIONS, "pdf"): self._parse_substitutions_pdf(stream, effective) @@ -772,27 +768,26 @@ def _parse_lunch_schedule_xlsx(self, stream: BytesIO, effective: date) -> None: assert isinstance(wr[2].value, str) assert isinstance(wr[4].value, str) - # Schedule for specific class - class_schedule: dict[str, Any] = {} + schedule: dict[str, Any] = {} # Time in format H:M - class_schedule["time"] = wr[0].value + schedule["time"] = wr[0].value # Notes - class_schedule["notes"] = wr[1].value.strip() if wr[1].value else None + schedule["notes"] = wr[1].value.strip() if wr[1].value else None # Class name (class id) if wr[2].value: - class_schedule["class_id"] = get_or_create( - self.session, model=Class, name=wr[2].value.strip() - )[0].id + schedule["class_id"] = get_or_create(self.session, model=Class, name=wr[2].value.strip())[ + 0 + ].id # Location - class_schedule["location"] = wr[4].value.strip() if wr[4].value else None + schedule["location"] = wr[4].value.strip() if wr[4].value else None # Effective date - class_schedule["date"] = effective - lunch_schedule.append(class_schedule) + schedule["date"] = effective + lunch_schedule.append(schedule) wb.close() diff --git a/API/gimvicurnik/updaters/menu.py b/API/gimvicurnik/updaters/menu.py index c73481d..466ee1d 100644 --- a/API/gimvicurnik/updaters/menu.py +++ b/API/gimvicurnik/updaters/menu.py @@ -9,6 +9,7 @@ from bs4 import BeautifulSoup, ParserRejectedMarkup from openpyxl import load_workbook +from sqlalchemy import insert from .base import BaseMultiUpdater, DocumentInfo from ..database import DocumentType, LunchMenu, SnackMenu @@ -85,7 +86,9 @@ def get_document_effective(self, document: DocumentInfo) -> datetime.date: # jedilnik-kosilo-YYYY-MM-DD(-popravek).pdf # jedilnik-malica-YYYY-MM-DD(-popravek).pdf - date = re.search(r"jedilnik-(?:kosilo|malica)-(\d+)-(\d+)-(\d+)(?:-[\w-]*)?.pdf", document.url) + date = re.search( + r"jedilnik-(?:kosilo|malica)-(\d+)-(\d+)-(\d+)(?:-[\w-]*)?\.(?:pdf|xlsx)", document.url + ) # The specified date is commonly Monday of the effective week # However, in some cases, it may also be another day of that week @@ -174,68 +177,67 @@ def _parse_snack_menu_xlsx(self, stream: BytesIO, effective: datetime.date) -> N # Extract workbook from an XLSX stream wb = with_span(op="extract")(load_workbook)(stream, read_only=True, data_only=True) - menu: dict[str, Any] = {} + snack_menu: dict[str, Any] = { + "normal": [], + "poultry": [], + "vegetarian": [], + "fruitvegetable": [], + } days = 0 - # Parse tables into menus and store them + # Parse menus and store them for ws in wb: - for wr in ws.iter_rows(min_row=1, max_col=3): - if not hasattr(wr[0].border, "bottom"): + for wr in ws.iter_rows(min_row=2, max_col=5): + if days == 5: + break + + # Ignore blank cells + if not wr[1].value: continue - # Make mypy not complain about incorrect types for cell values - # If the cell has an incorrect type, we should fail anyway + # Check for correct cell value type (else mypy complains) if typing.TYPE_CHECKING: assert isinstance(wr[1].value, str) assert isinstance(wr[2].value, str) assert isinstance(wr[3].value, str) assert isinstance(wr[4].value, str) - # Store the menu after the end of table - if wr[0].border.bottom.color: - if menu and menu["date"]: - # fmt: off - model = ( - self.session.query(SnackMenu) - .filter(SnackMenu.date == menu["date"]) - .first() - ) - # fmt: on - - if not model: - model = SnackMenu() - - model.date = menu["date"] - model.normal = "\n".join(menu["normal"][1:]) - model.poultry = "\n".join(menu["poultry"][1:]) - model.vegetarian = "\n".join(menu["vegetarian"][1:]) - model.fruitvegetable = "\n".join(menu["fruitvegetable"][1:]) - - self.session.add(model) - days += 1 - - menu = { - "date": None, - "normal": [], - "poultry": [], - "vegetarian": [], - "fruitvegetable": [], - } - - if wr[0].value and isinstance(wr[0].value, datetime.datetime): - menu["date"] = effective + datetime.timedelta(days=days) + # Ignore information cells + if "NV in N" in wr[1].value: + continue if wr[1].value: - menu["normal"].append(wr[1].value.strip()) + snack_menu["normal"].append(wr[1].value.strip()) if wr[2].value: - menu["poultry"].append(wr[2].value.strip()) + snack_menu["poultry"].append(wr[2].value.strip()) if wr[3].value: - menu["vegetarian"].append(wr[3].value.strip()) + snack_menu["vegetarian"].append(wr[3].value.strip()) if wr[4].value: - menu["fruitvegetable"].append(wr[4].value.strip()) + snack_menu["fruitvegetable"].append(wr[4].value.strip()) + + # Store the menu after the end of day + if wr[0].border.bottom.color: + snack_menu["date"] = effective + datetime.timedelta(days=days) + self.session.query(SnackMenu).filter(SnackMenu.date == snack_menu["date"]).delete() + + snack_menu["normal"] = "\n".join(snack_menu["normal"]) + snack_menu["poultry"] = "\n".join(snack_menu["poultry"]) + snack_menu["vegetarian"] = "\n".join(snack_menu["vegetarian"]) + snack_menu["fruitvegetable"] = "\n".join(snack_menu["fruitvegetable"]) + + self.session.execute(insert(SnackMenu), snack_menu) + + # Set for next day + days += 1 + snack_menu = { + "normal": [], + "poultry": [], + "vegetarian": [], + "fruitvegetable": [], + } wb.close() @@ -278,56 +280,53 @@ def _parse_lunch_menu_xlsx(self, stream: BytesIO, effective: datetime.date) -> N # Extract workbook from an XLSX stream wb = with_span(op="extract")(load_workbook)(stream, read_only=True, data_only=True) - menu: dict[str, Any] = {} + lunch_menu: dict[str, Any] = { + "normal": [], + "vegetarian": [], + } days = 0 - # Parse tables into menus and store them + # Parse menus and store them for ws in wb: - for wr in ws.iter_rows(min_row=1, max_col=3): - if not hasattr(wr[0].border, "bottom"): + for wr in ws.iter_rows(min_row=2, max_col=3): + if days == 5: + break + + # Ignore blank cells + if not wr[1].value: continue - # Make mypy not complain about incorrect types for cell values - # If the cell has an incorrect type, we should fail anyway + # Check for correct cell value type (else mypy complains) if typing.TYPE_CHECKING: assert isinstance(wr[1].value, str) assert isinstance(wr[2].value, str) - # Store the menu after the end of table - if wr[0].border.bottom.color: - if menu and menu["date"]: - # fmt: off - model = ( - self.session.query(LunchMenu) - .filter(LunchMenu.date == menu["date"]) - .first() - ) - # fmt: on - - if not model: - model = LunchMenu() - - model.date = menu["date"] - model.normal = "\n".join(menu["normal"][1:]) - model.vegetarian = "\n".join(menu["vegetarian"][1:]) - - self.session.add(model) - days += 1 - - menu = { - "date": None, - "normal": [], - "vegetarian": [], - } - - if wr[0].value and isinstance(wr[0].value, datetime.datetime): - menu["date"] = effective + datetime.timedelta(days=days) + # Ignore information cells + if "N KOSILO" in wr[1].value: + continue if wr[1].value: - menu["normal"].append(wr[1].value.strip()) + lunch_menu["normal"].append(wr[1].value.strip()) if wr[2].value: - menu["vegetarian"].append(wr[2].value.strip()) + lunch_menu["vegetarian"].append(wr[2].value.strip()) + + # Store the menu after the end of day + if wr[0].border.bottom.color: + lunch_menu["date"] = effective + datetime.timedelta(days=days) + self.session.query(LunchMenu).filter(LunchMenu.date == lunch_menu["date"]).delete() + + lunch_menu["normal"] = "\n".join(lunch_menu["normal"]) + lunch_menu["vegetarian"] = "\n".join(lunch_menu["vegetarian"]) + + self.session.execute(insert(LunchMenu), lunch_menu) + + # Set for next day + days += 1 + lunch_menu = { + "normal": [], + "vegetarian": [], + } wb.close()