From fabc835661b95282773defa15c84f09af5a5d438 Mon Sep 17 00:00:00 2001 From: cyclotruc Date: Tue, 31 Dec 2024 06:30:01 +0000 Subject: [PATCH] Update README.md test: added unit tests for clone.py (#82) ci: disable windows tests (#86) Signed-off-by: joydeep049 test: added unit test for parse_query (#81) Refactor project structure, enhance logic, update configurations, and improve code quality Refactoring and Logic Improvements - Refactored the `_scan_directory` function in `src/gitingest/ingest_from_query.py` by extracting loop logic into the new `_process_item` function, and further separating functionality into `_process_symlink` and `_process_file` - Replaced multiple return statements with error raising and catching, introducing custom exceptions (`MaxFilesReachedError`, `MaxFileSizeReachedError`, `AlreadyVisitedError`) in the `_process_item` and `_scan_directory` functions - Enhanced the logic in the `process_query` function in `src/process_query.py` for better flow and maintainability - Improved the logic in `_generate_token_string` in `src/gitingest/ingest_from_query.py` - Refined the `download_ingest` function in `src/routers/download.py` for better clarity and functionality Exception Handling Enhancements - Replaced broad `Exception` handling with specific `OSError` in the `_read_file_content` function in `src/gitingest/ingest_from_query.py` - Refined exception handling throughout the codebase, including removing redundant try-except-raise blocks, e.g., in `clone_repo` function in `src/gitingest/clone.py` - Added custom exceptions to `src/gitingest/exceptions.py`: `MaxFilesReachedError`, `MaxFileSizeReachedError`, and `AlreadyVisitedError` - Included explicit re-raising of exceptions in various functions for improved error propagation Test Suite Refactoring - Cleaned up and reorganized test files: - Moved tests from `src/gitingest/tests/` to `tests/` - Consolidated fixtures from `tests/test_ingest.py` into `tests/conftest.py` - Removed redundant content from `tests/conftest.py` - Migrated configuration from `pytest.ini` to `pyproject.toml`, deleted `pytest.ini`, and updated `.dockerignore` Documentation Improvements - Added `darglint` for enforcing `numpy` docstring style in `.pre-commit-config.yaml` for `src/` files - Updated docstrings throughout the codebase, including adding module docstrings where needed - Updated `README.md`: - Added "GitHub stars" badge - Moved the "Discord" badge to its own line - Replaced occurrences of "Gitingest" with "GitIngest" for consistency and clarity Linting and Code Quality - Integrated `pylint` into `.pre-commit-config.yaml` for both `src/` and `tests/` directories - Created `tests/.pylintrc` for linting configuration specific to test files Code Clean-up - Removed the redundant `src/__init__.py` file Naming Conventions and Code Style - Renamed `logSliderToSize` to `log_slider_to_size` in `src/server_utils.py` for consistency with Python's naming conventions - Added explicit encoding specification in multiple instances of `open` throughout the code --- .github/workflows/ci.yml | 2 +- .pre-commit-config.yaml | 2 + README.md | 67 +++++++++++++++++++++++--- tests/test_clone.py | 95 +++++++++++++++++++++++++++++++++++++ tests/test_parse_query.py | 98 ++++++++++++++++++++++++++++++++++++++- 5 files changed, 256 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 163c2a8..9fbbf5d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: true matrix: - os: [ubuntu-latest, windows-latest, macos-latest] + os: [ubuntu-latest, macos-latest] python-version: ["3.10", "3.11", "3.12", "3.13"] steps: diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0eb3fda..fedf41f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -102,6 +102,7 @@ repos: [ click, fastapi-analytics, + pytest-asyncio, python-dotenv, slowapi, starlette, @@ -118,6 +119,7 @@ repos: click, fastapi-analytics, pytest, + pytest-asyncio, python-dotenv, slowapi, starlette, diff --git a/README.md b/README.md index 583740b..293fbfe 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,6 @@ [![Discord](https://dcbadge.limes.pink/api/server/https://discord.com/invite/zerRaGK9EC)](https://discord.com/invite/zerRaGK9EC) - Turn any Git repository into a prompt-friendly text ingest for LLMs. You can also replace `hub` with `ingest` in any github url to access the coresponding digest @@ -111,11 +110,42 @@ GitIngest aims to be friendly for first time contributors, with a simple python - **`ALLOWED_HOSTS`**: Specify allowed hostnames for the application. Default: `"gitingest.com,*.gitingest.com,gitdigest.dev,localhost"`. You can configure the application using the following environment variables: -```bash -ALLOWED_HOSTS="gitingest.local,localhost" -``` + ```bash + # Default: "gitingest.com, *.gitingest.com, localhost, 127.0.0.1". + ALLOWED_HOSTS="example.com, localhost, 127.0.0.1" + ``` + +## 🛠️ Stack + +- [Tailwind CSS](https://tailwindcss.com/) - Frontend +- [FastAPI](https://github.com/fastapi/fastapi) - Backend framework +- [Jinja2](https://jinja.palletsprojects.com/) - HTML templating +- [tiktoken](https://github.com/openai/tiktoken) - Token estimation +- [apianalytics.dev](https://www.apianalytics.dev/) - Simple Analytics + +## ✔️ Contributing to GitIngest + +GitIngest aims to be friendly for first time contributors, with a simple python and html codebase. + If you need any help while working with the code, reach out to us on [discord](https://discord.com/invite/zerRaGK9EC) + +### Ways to help (non-technical) + +- Provide your feedback and ideas on discord +- Open an Issue on github to report a bug / submit an feature request +- Talk about GitIngest on social media + +### How to submit a PR + +1. Fork the repository & clone it locally +2. Setup the dev environment (see Development section bellow) +3. Run unit tests with `pytest` +4. Commit your changes and run `pre-commit` +5. Open a pull request on Github for review and feedback +6. (Optionnal) Invite project maintainer to your branch for easier collaboration -#### Run locally +## 🔧 Development + +### Run web UI locally 1. Clone the repository @@ -127,7 +157,10 @@ ALLOWED_HOSTS="gitingest.local,localhost" 2. Install dependencies ```bash - pip install -r requirements.txt + pip install -r requirements-dev.txt + python -m venv .venv + source .venv/bin/activate + pre-commit install ``` 3. Run the application: @@ -136,3 +169,25 @@ ALLOWED_HOSTS="gitingest.local,localhost" cd src uvicorn main:app --reload ``` + +4. Run unit tests + + ```bash + pytest + ``` + +The application should be available at `http://localhost:8000` + +### Working on the CLI + +1. Install the package in dev mode + + ```bash + pip install -e . + ``` + +2. Run the CLI + + ```bash + gitingest --help + ``` diff --git a/tests/test_clone.py b/tests/test_clone.py index 5383555..2e58a0f 100644 --- a/tests/test_clone.py +++ b/tests/test_clone.py @@ -76,3 +76,98 @@ async def test_check_repo_exists() -> None: # Test failed request mock_process.returncode = 1 assert await _check_repo_exists(url) is False + + +@pytest.mark.asyncio +async def test_clone_repo_invalid_url() -> None: + clone_config = CloneConfig( + url="", + local_path="/tmp/repo", + ) + with pytest.raises(ValueError, match="The 'url' parameter is required."): + await clone_repo(clone_config) + + +@pytest.mark.asyncio +async def test_clone_repo_invalid_local_path() -> None: + clone_config = CloneConfig( + url="https://github.com/user/repo", + local_path="", + ) + with pytest.raises(ValueError, match="The 'local_path' parameter is required."): + await clone_repo(clone_config) + + +@pytest.mark.asyncio +async def test_clone_repo_with_custom_branch() -> None: + clone_config = CloneConfig( + url="https://github.com/user/repo", + local_path="/tmp/repo", + branch="feature-branch", + ) + with patch("gitingest.clone._check_repo_exists", return_value=True): + with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec: + await clone_repo(clone_config) + mock_exec.assert_called_once_with( + "git", + "clone", + "--depth=1", + "--single-branch", + "--branch", + "feature-branch", + clone_config.url, + clone_config.local_path, + ) + + +@pytest.mark.asyncio +async def test_git_command_failure() -> None: + clone_config = CloneConfig( + url="https://github.com/user/repo", + local_path="/tmp/repo", + ) + with patch("gitingest.clone._check_repo_exists", return_value=True): + with patch("gitingest.clone._run_git_command", side_effect=RuntimeError("Git command failed")): + with pytest.raises(RuntimeError, match="Git command failed"): + await clone_repo(clone_config) + + +@pytest.mark.asyncio +async def test_clone_repo_default_shallow_clone() -> None: + clone_config = CloneConfig( + url="https://github.com/user/repo", + local_path="/tmp/repo", + ) + with patch("gitingest.clone._check_repo_exists", return_value=True): + with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec: + await clone_repo(clone_config) + mock_exec.assert_called_once_with( + "git", "clone", "--depth=1", "--single-branch", clone_config.url, clone_config.local_path + ) + + +@pytest.mark.asyncio +async def test_clone_repo_commit_without_branch() -> None: + clone_config = CloneConfig( + url="https://github.com/user/repo", + local_path="/tmp/repo", + commit="a" * 40, # Simulating a valid commit hash + ) + with patch("gitingest.clone._check_repo_exists", return_value=True): + with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec: + await clone_repo(clone_config) + assert mock_exec.call_count == 2 # Clone and checkout calls + mock_exec.assert_any_call("git", "clone", "--single-branch", clone_config.url, clone_config.local_path) + mock_exec.assert_any_call("git", "-C", clone_config.local_path, "checkout", clone_config.commit) + + +@pytest.mark.asyncio +async def test_check_repo_exists_with_redirect() -> None: + url = "https://github.com/user/repo" + with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec: + mock_process = AsyncMock() + mock_process.communicate.return_value = (b"HTTP/1.1 302 Found\n", b"") + mock_process.returncode = 0 # Simulate successful request + mock_exec.return_value = mock_process + + assert await _check_repo_exists(url) diff --git a/tests/test_parse_query.py b/tests/test_parse_query.py index d4756f0..0a162a4 100644 --- a/tests/test_parse_query.py +++ b/tests/test_parse_query.py @@ -3,7 +3,7 @@ import pytest from gitingest.ignore_patterns import DEFAULT_IGNORE_PATTERNS -from gitingest.parse_query import _parse_url, parse_query +from gitingest.parse_query import _parse_patterns, _parse_url, parse_query def test_parse_url_valid() -> None: @@ -46,3 +46,99 @@ def test_parse_query_invalid_pattern() -> None: url = "https://github.com/user/repo" with pytest.raises(ValueError, match="Pattern.*contains invalid characters"): parse_query(url, max_file_size=50, from_web=True, include_patterns="*.py;rm -rf") + + +def test_parse_url_with_subpaths() -> None: + url = "https://github.com/user/repo/tree/main/subdir/file" + result = _parse_url(url) + assert result["user_name"] == "user" + assert result["repo_name"] == "repo" + assert result["branch"] == "main" + assert result["subpath"] == "/subdir/file" + + +def test_parse_url_invalid_repo_structure() -> None: + url = "https://github.com/user" + with pytest.raises(ValueError, match="Invalid repository URL"): + _parse_url(url) + + +def test_parse_patterns_valid() -> None: + patterns = "*.py, *.md, docs/*" + result = _parse_patterns(patterns) + assert result == ["*.py", "*.md", "docs/*"] + + +def test_parse_patterns_invalid_characters() -> None: + patterns = "*.py;rm -rf" + with pytest.raises(ValueError, match="Pattern.*contains invalid characters"): + _parse_patterns(patterns) + + +def test_parse_query_with_large_file_size() -> None: + url = "https://github.com/user/repo" + result = parse_query(url, max_file_size=10**9, from_web=True) + assert result["max_file_size"] == 10**9 + assert result["ignore_patterns"] == DEFAULT_IGNORE_PATTERNS + + +def test_parse_query_empty_patterns() -> None: + url = "https://github.com/user/repo" + result = parse_query(url, max_file_size=50, from_web=True, include_patterns="", ignore_patterns="") + assert result["include_patterns"] is None + assert result["ignore_patterns"] == DEFAULT_IGNORE_PATTERNS + + +def test_parse_query_include_and_ignore_overlap() -> None: + url = "https://github.com/user/repo" + result = parse_query( + url, + max_file_size=50, + from_web=True, + include_patterns="*.py", + ignore_patterns=["*.py", "*.txt"], + ) + assert result["include_patterns"] == ["*.py"] + assert "*.py" not in result["ignore_patterns"] + assert "*.txt" in result["ignore_patterns"] + + +def test_parse_query_local_path() -> None: + path = "/home/user/project" + result = parse_query(path, max_file_size=100, from_web=False) + assert result["local_path"] == "/home/user/project" + assert result["id"] is not None + assert result["slug"] == "user/project" + + +def test_parse_query_relative_path() -> None: + path = "./project" + result = parse_query(path, max_file_size=100, from_web=False) + assert result["local_path"].endswith("project") + assert result["slug"].endswith("project") + + +def test_parse_query_empty_source() -> None: + with pytest.raises(ValueError, match="Invalid repository URL"): + parse_query("", max_file_size=100, from_web=True) + + +def test_parse_url_branch_and_commit_distinction() -> None: + url_branch = "https://github.com/user/repo/tree/main" + url_commit = "https://github.com/user/repo/tree/abcd1234abcd1234abcd1234abcd1234abcd1234" + + result_branch = _parse_url(url_branch) + result_commit = _parse_url(url_commit) + + assert result_branch["branch"] == "main" + assert result_branch["commit"] is None + + assert result_commit["branch"] is None + assert result_commit["commit"] == "abcd1234abcd1234abcd1234abcd1234abcd1234" + + +def test_parse_query_uuid_uniqueness() -> None: + path = "/home/user/project" + result1 = parse_query(path, max_file_size=100, from_web=False) + result2 = parse_query(path, max_file_size=100, from_web=False) + assert result1["id"] != result2["id"]