Skip to content

Commit

Permalink
feat: add barebones cli
Browse files Browse the repository at this point in the history
  • Loading branch information
benjamin-awd committed Nov 8, 2023
1 parent e4e8053 commit a61edc6
Show file tree
Hide file tree
Showing 12 changed files with 116 additions and 5 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ dist
# allowed pdf files/fixtures
!tests/integration/banks/**/*.csv
!tests/integration/banks/**/*.pdf
!tests/unit/test_cli/**/*.pdf
!monopoly/examples/*.pdf

# john files
Expand Down
2 changes: 1 addition & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pymupdf = "^1.23.3"
pydantic = "^2.4.2"
pdf2john = "^0.1.8"
pdftotext = "^2.2.2"
click = "^8.1.7"


[tool.poetry.group.dev.dependencies]
Expand Down Expand Up @@ -84,3 +85,6 @@ ignore_missing_imports = true
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.poetry.scripts]
monopoly = "monopoly.cli:monopoly"
3 changes: 2 additions & 1 deletion src/monopoly/banks/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from dataclasses import Field, fields
from pathlib import Path
from typing import Type

from monopoly.constants import EncryptionIdentifier, MetadataIdentifier
Expand All @@ -14,7 +15,7 @@
banks: list[Type[BankBase]] = [Citibank, Dbs, Hsbc, Ocbc, StandardChartered]


def auto_detect_bank(file_path: str) -> BankBase:
def auto_detect_bank(file_path: Path) -> BankBase:
"""
Reads the encryption metadata or actual metadata (if the PDF is not encrypted),
and checks for a bank based on unique identifiers.
Expand Down
3 changes: 2 additions & 1 deletion src/monopoly/banks/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pathlib import Path
from typing import Optional

from monopoly.constants import EncryptionIdentifier, MetadataIdentifier
Expand All @@ -11,7 +12,7 @@ class BankBase(StatementProcessor):

def __init__(
self,
file_path: str,
file_path: Path,
identifiers: Optional[list[EncryptionIdentifier | MetadataIdentifier]] = None,
password: Optional[str] = None,
parser: Optional[PdfParser] = None,
Expand Down
46 changes: 46 additions & 0 deletions src/monopoly/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from pathlib import Path
from typing import Collection, Iterable

import click

from monopoly.banks import auto_detect_bank


def run(files: Collection[Path]):
for file in files:
bank = auto_detect_bank(file)
statement = bank.extract()
transformed_df = bank.transform(statement)
bank.load(transformed_df, statement)


def get_statement_paths(files: Iterable[Path]) -> set[Path]:
matched_files = set()
for path in files:
if path.is_file() and str(path).endswith(".pdf"):
matched_files.add(path)

if path.is_dir():
matched_files |= get_statement_paths(path.iterdir())

return matched_files


@click.command()
@click.argument(
"files",
nargs=-1,
type=click.Path(exists=True, allow_dash=True, resolve_path=True, path_type=Path),
)
def monopoly(files: list[Path]):
"""
Monopoly helps convert your bank statements from PDF to CSV.
A file or directory can be passed in via the FILES argument
"""
if files:
matched_files = get_statement_paths(files)
run(matched_files)

else:
print("No command received")
3 changes: 2 additions & 1 deletion src/monopoly/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from dataclasses import dataclass
from functools import cached_property
from io import BytesIO
from pathlib import Path
from typing import Optional

import fitz
Expand Down Expand Up @@ -32,7 +33,7 @@ def lines(self) -> list[str]:
class PdfParser:
def __init__(
self,
file_path: str,
file_path: Path,
brute_force_config: Optional[BruteForceConfig] = None,
pdf_config: Optional[PdfConfig] = None,
):
Expand Down
3 changes: 2 additions & 1 deletion src/monopoly/processor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from datetime import datetime
from pathlib import Path
from typing import Optional

from pandas import DataFrame
Expand Down Expand Up @@ -37,7 +38,7 @@ class StatementProcessor(PdfParser):
allows for the parser to be reused and avoid re-opening the PDF.
"""

def __init__(self, file_path: str, parser: Optional[PdfParser] = None, **kwargs):
def __init__(self, file_path: Path, parser: Optional[PdfParser] = None, **kwargs):
keys = [
"statement_config",
"transaction_config",
Expand Down
56 changes: 56 additions & 0 deletions tests/unit/test_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from pathlib import Path
from unittest.mock import DEFAULT, MagicMock, patch

import pytest

from monopoly.cli import get_statement_paths, run


@pytest.fixture
def test_directory() -> Path:
return Path("tests/unit/test_cli").resolve()


class MockBank(MagicMock):
def extract(self):
pass

def transform(self):
pass

def load(self):
pass


def test_run(monkeypatch):
def mock_auto_detect_bank(file_path: Path):
assert "input.pdf" in str(file_path)
return MockBank()

monkeypatch.setattr("monopoly.cli.auto_detect_bank", mock_auto_detect_bank)

# Mock paths
files = [Path("tests/integration/banks/example/input.pdf").resolve()]

with patch.multiple(MockBank, extract=DEFAULT, transform=DEFAULT, load=DEFAULT):
run(files)

assert isinstance(MockBank.extract, MagicMock)
assert isinstance(MockBank.transform, MagicMock)
assert isinstance(MockBank.load, MagicMock)

# Assertions
MockBank.extract.assert_called_once()
MockBank.transform.assert_called_once()
MockBank.load.assert_called_once()


def test_get_statement_paths(test_directory: Path) -> None:
path = test_directory
expected = {
path / "top_level.pdf",
path / "top_level_2.pdf",
path / "nested_directory/nested.pdf",
}
res = get_statement_paths(test_directory.iterdir())
assert res == expected
Empty file.
Empty file.
Empty file.

0 comments on commit a61edc6

Please sign in to comment.