Skip to content

Commit

Permalink
Replace dict-based query with ParsedQuery dataclass (cyclotruc#133)
Browse files Browse the repository at this point in the history
- Introduce ParsedQuery dataclass to store query parameters and metadata
- Update ingestion and parser modules to use ParsedQuery instead of dict[str, Any]
- Convert ignore_patterns and include_patterns to sets
- Clean references to max size and pattern handling
- Update tests to reflect new dataclass usage
  • Loading branch information
filipchristiansen authored Jan 17, 2025
1 parent 3ce8e7e commit d721b00
Show file tree
Hide file tree
Showing 12 changed files with 369 additions and 375 deletions.
5 changes: 5 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@

from pathlib import Path

MAX_FILE_SIZE = 10 * 1024 * 1024 # 10 MB
MAX_DIRECTORY_DEPTH = 20 # Maximum depth of directory traversal
MAX_FILES = 10_000 # Maximum number of files to process
MAX_TOTAL_SIZE_BYTES = 500 * 1024 * 1024 # 500 MB

MAX_DISPLAY_SIZE: int = 300_000
TMP_BASE_PATH = Path("/tmp/gitingest")
DELETE_REPO_AFTER: int = 60 * 60 # In seconds
Expand Down
8 changes: 4 additions & 4 deletions src/gitingest/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

import click

from gitingest.query_ingestion import MAX_FILE_SIZE
from config import MAX_FILE_SIZE
from gitingest.repository_ingest import ingest


Expand Down Expand Up @@ -49,8 +49,8 @@ async def main(
"""
try:
# Combine default and custom ignore patterns
exclude_patterns = list(exclude_pattern)
include_patterns = list(set(include_pattern))
exclude_patterns = set(exclude_pattern)
include_patterns = set(include_pattern)

if not output:
output = "digest.txt"
Expand All @@ -61,7 +61,7 @@ async def main(
click.echo(summary)

except Exception as e:
click.echo(f"Error: {str(e)}", err=True)
click.echo(f"Error: {e}", err=True)
raise click.Abort()


Expand Down
22 changes: 8 additions & 14 deletions src/gitingest/ignore_patterns.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
""" Default ignore patterns for Gitingest. """

DEFAULT_IGNORE_PATTERNS: list[str] = [
DEFAULT_IGNORE_PATTERNS: set[str] = {
# Python
"*.pyc",
"*.pyo",
Expand Down Expand Up @@ -29,18 +29,17 @@
"*.war",
"*.ear",
"*.nar",
"target/",
".gradle/",
"build/",
".settings/",
".project",
".classpath",
"gradle-app.setting",
"*.gradle",
# IDEs and editors / Java
".project",
# C/C++
"*.o",
"*.obj",
"*.so",
"*.dll",
"*.dylib",
"*.exe",
Expand Down Expand Up @@ -68,21 +67,22 @@
".ruby-gemset",
".rvmrc",
# Rust
"target/",
"Cargo.lock",
"**/*.rs.bk",
# Java / Rust
"target/",
# Go
"bin/",
"pkg/",
# .NET/C#
"bin/",
"obj/",
"*.suo",
"*.user",
"*.userosscache",
"*.sln.docstates",
"packages/",
"*.nupkg",
# Go / .NET / C#
"bin/",
# Version control
".git",
".svn",
Expand Down Expand Up @@ -112,12 +112,9 @@
".idea",
".vscode",
".vs",
"*.swp",
"*.swo",
"*.swn",
".settings",
".project",
".classpath",
"*.sublime-*",
# Temporary and cache files
"*.log",
Expand All @@ -140,9 +137,6 @@
"*.egg",
"*.whl",
"*.so",
"*.dylib",
"*.dll",
"*.class",
# Documentation
"site-packages",
".docusaurus",
Expand All @@ -159,4 +153,4 @@
"*.tfstate*",
## Dependencies in various languages
"vendor/",
]
}
Loading

0 comments on commit d721b00

Please sign in to comment.