Skip to content

Commit

Permalink
Merge pull request #84 from filips123/xlsx-parsing
Browse files Browse the repository at this point in the history
Xlsx parsing
  • Loading branch information
PetJer authored Sep 17, 2023
2 parents 0a233e2 + f2bc5b5 commit 3be64c5
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 95 deletions.
23 changes: 9 additions & 14 deletions API/gimvicurnik/updaters/eclassroom.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,10 +267,6 @@ def parse_document(self, document: DocumentInfo, stream: BytesIO, effective: dat
span.set_tag("document.type", document.type.value)
span.set_tag("document.format", document.extension)

# Only parse xlsx lunch schedules - a guard for now
if document.type == DocumentType.LUNCH_SCHEDULE and document.extension != "xlsx":
return

match (document.type, document.extension):
case (DocumentType.SUBSTITUTIONS, "pdf"):
self._parse_substitutions_pdf(stream, effective)
Expand Down Expand Up @@ -772,27 +768,26 @@ def _parse_lunch_schedule_xlsx(self, stream: BytesIO, effective: date) -> None:
assert isinstance(wr[2].value, str)
assert isinstance(wr[4].value, str)

# Schedule for specific class
class_schedule: dict[str, Any] = {}
schedule: dict[str, Any] = {}

# Time in format H:M
class_schedule["time"] = wr[0].value
schedule["time"] = wr[0].value

# Notes
class_schedule["notes"] = wr[1].value.strip() if wr[1].value else None
schedule["notes"] = wr[1].value.strip() if wr[1].value else None

# Class name (class id)
if wr[2].value:
class_schedule["class_id"] = get_or_create(
self.session, model=Class, name=wr[2].value.strip()
)[0].id
schedule["class_id"] = get_or_create(self.session, model=Class, name=wr[2].value.strip())[
0
].id

# Location
class_schedule["location"] = wr[4].value.strip() if wr[4].value else None
schedule["location"] = wr[4].value.strip() if wr[4].value else None

# Effective date
class_schedule["date"] = effective
lunch_schedule.append(class_schedule)
schedule["date"] = effective
lunch_schedule.append(schedule)

wb.close()

Expand Down
161 changes: 80 additions & 81 deletions API/gimvicurnik/updaters/menu.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from bs4 import BeautifulSoup, ParserRejectedMarkup
from openpyxl import load_workbook
from sqlalchemy import insert

from .base import BaseMultiUpdater, DocumentInfo
from ..database import DocumentType, LunchMenu, SnackMenu
Expand Down Expand Up @@ -85,7 +86,9 @@ def get_document_effective(self, document: DocumentInfo) -> datetime.date:

# jedilnik-kosilo-YYYY-MM-DD(-popravek).pdf
# jedilnik-malica-YYYY-MM-DD(-popravek).pdf
date = re.search(r"jedilnik-(?:kosilo|malica)-(\d+)-(\d+)-(\d+)(?:-[\w-]*)?.pdf", document.url)
date = re.search(
r"jedilnik-(?:kosilo|malica)-(\d+)-(\d+)-(\d+)(?:-[\w-]*)?\.(?:pdf|xlsx)", document.url
)

# The specified date is commonly Monday of the effective week
# However, in some cases, it may also be another day of that week
Expand Down Expand Up @@ -174,68 +177,67 @@ def _parse_snack_menu_xlsx(self, stream: BytesIO, effective: datetime.date) -> N
# Extract workbook from an XLSX stream
wb = with_span(op="extract")(load_workbook)(stream, read_only=True, data_only=True)

menu: dict[str, Any] = {}
snack_menu: dict[str, Any] = {
"normal": [],
"poultry": [],
"vegetarian": [],
"fruitvegetable": [],
}
days = 0

# Parse tables into menus and store them
# Parse menus and store them
for ws in wb:
for wr in ws.iter_rows(min_row=1, max_col=3):
if not hasattr(wr[0].border, "bottom"):
for wr in ws.iter_rows(min_row=2, max_col=5):
if days == 5:
break

# Ignore blank cells
if not wr[1].value:
continue

# Make mypy not complain about incorrect types for cell values
# If the cell has an incorrect type, we should fail anyway
# Check for correct cell value type (else mypy complains)
if typing.TYPE_CHECKING:
assert isinstance(wr[1].value, str)
assert isinstance(wr[2].value, str)
assert isinstance(wr[3].value, str)
assert isinstance(wr[4].value, str)

# Store the menu after the end of table
if wr[0].border.bottom.color:
if menu and menu["date"]:
# fmt: off
model = (
self.session.query(SnackMenu)
.filter(SnackMenu.date == menu["date"])
.first()
)
# fmt: on

if not model:
model = SnackMenu()

model.date = menu["date"]
model.normal = "\n".join(menu["normal"][1:])
model.poultry = "\n".join(menu["poultry"][1:])
model.vegetarian = "\n".join(menu["vegetarian"][1:])
model.fruitvegetable = "\n".join(menu["fruitvegetable"][1:])

self.session.add(model)
days += 1

menu = {
"date": None,
"normal": [],
"poultry": [],
"vegetarian": [],
"fruitvegetable": [],
}

if wr[0].value and isinstance(wr[0].value, datetime.datetime):
menu["date"] = effective + datetime.timedelta(days=days)
# Ignore information cells
if "NV in N" in wr[1].value:
continue

if wr[1].value:
menu["normal"].append(wr[1].value.strip())
snack_menu["normal"].append(wr[1].value.strip())

if wr[2].value:
menu["poultry"].append(wr[2].value.strip())
snack_menu["poultry"].append(wr[2].value.strip())

if wr[3].value:
menu["vegetarian"].append(wr[3].value.strip())
snack_menu["vegetarian"].append(wr[3].value.strip())

if wr[4].value:
menu["fruitvegetable"].append(wr[4].value.strip())
snack_menu["fruitvegetable"].append(wr[4].value.strip())

# Store the menu after the end of day
if wr[0].border.bottom.color:
snack_menu["date"] = effective + datetime.timedelta(days=days)
self.session.query(SnackMenu).filter(SnackMenu.date == snack_menu["date"]).delete()

snack_menu["normal"] = "\n".join(snack_menu["normal"])
snack_menu["poultry"] = "\n".join(snack_menu["poultry"])
snack_menu["vegetarian"] = "\n".join(snack_menu["vegetarian"])
snack_menu["fruitvegetable"] = "\n".join(snack_menu["fruitvegetable"])

self.session.execute(insert(SnackMenu), snack_menu)

# Set for next day
days += 1
snack_menu = {
"normal": [],
"poultry": [],
"vegetarian": [],
"fruitvegetable": [],
}

wb.close()

Expand Down Expand Up @@ -278,56 +280,53 @@ def _parse_lunch_menu_xlsx(self, stream: BytesIO, effective: datetime.date) -> N
# Extract workbook from an XLSX stream
wb = with_span(op="extract")(load_workbook)(stream, read_only=True, data_only=True)

menu: dict[str, Any] = {}
lunch_menu: dict[str, Any] = {
"normal": [],
"vegetarian": [],
}
days = 0

# Parse tables into menus and store them
# Parse menus and store them
for ws in wb:
for wr in ws.iter_rows(min_row=1, max_col=3):
if not hasattr(wr[0].border, "bottom"):
for wr in ws.iter_rows(min_row=2, max_col=3):
if days == 5:
break

# Ignore blank cells
if not wr[1].value:
continue

# Make mypy not complain about incorrect types for cell values
# If the cell has an incorrect type, we should fail anyway
# Check for correct cell value type (else mypy complains)
if typing.TYPE_CHECKING:
assert isinstance(wr[1].value, str)
assert isinstance(wr[2].value, str)

# Store the menu after the end of table
if wr[0].border.bottom.color:
if menu and menu["date"]:
# fmt: off
model = (
self.session.query(LunchMenu)
.filter(LunchMenu.date == menu["date"])
.first()
)
# fmt: on

if not model:
model = LunchMenu()

model.date = menu["date"]
model.normal = "\n".join(menu["normal"][1:])
model.vegetarian = "\n".join(menu["vegetarian"][1:])

self.session.add(model)
days += 1

menu = {
"date": None,
"normal": [],
"vegetarian": [],
}

if wr[0].value and isinstance(wr[0].value, datetime.datetime):
menu["date"] = effective + datetime.timedelta(days=days)
# Ignore information cells
if "N KOSILO" in wr[1].value:
continue

if wr[1].value:
menu["normal"].append(wr[1].value.strip())
lunch_menu["normal"].append(wr[1].value.strip())

if wr[2].value:
menu["vegetarian"].append(wr[2].value.strip())
lunch_menu["vegetarian"].append(wr[2].value.strip())

# Store the menu after the end of day
if wr[0].border.bottom.color:
lunch_menu["date"] = effective + datetime.timedelta(days=days)
self.session.query(LunchMenu).filter(LunchMenu.date == lunch_menu["date"]).delete()

lunch_menu["normal"] = "\n".join(lunch_menu["normal"])
lunch_menu["vegetarian"] = "\n".join(lunch_menu["vegetarian"])

self.session.execute(insert(LunchMenu), lunch_menu)

# Set for next day
days += 1
lunch_menu = {
"normal": [],
"vegetarian": [],
}

wb.close()

Expand Down

0 comments on commit 3be64c5

Please sign in to comment.