Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add PAT support with custom Git server compatibility #69

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 47 additions & 15 deletions src/gitingest/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,23 @@ class CloneConfig:
local_path: str
commit: str | None = None
branch: str | None = None
pat: str | None = None


@async_timeout(CLONE_TIMEOUT)
async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
"""
Clones a repository to a local path based on the provided query parameters.
Clones a repository to a local path based on the provided configuration.

Parameters
----------
config : CloneConfig
A dictionary containing the following keys:
Configuration object containing:
- url (str): The URL of the repository.
- local_path (str): The local path to clone the repository to.
- commit (Optional[str]): The specific commit hash to checkout.
- branch (Optional[str]): The branch to clone. Defaults to 'main' or 'master' if not provided.
- branch (Optional[str]): The branch to clone.
- pat (Optional[str]): Personal Access Token for authentication.

Returns
-------
Expand All @@ -36,17 +38,18 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
Raises
------
ValueError
If the repository does not exist or if required query parameters are missing.
If the repository does not exist or if required parameters are missing.
RuntimeError
If any git command fails during execution.
AsyncTimeoutError
If the cloning process exceeds the specified timeout.
"""
# Extract and validate query parameters
# Extract and validate parameters
url: str = config.url
local_path: str = config.local_path
commit: str | None = config.commit
branch: str | None = config.branch
pat: str | None = config.pat

if not url:
raise ValueError("The 'url' parameter is required.")
Expand All @@ -55,52 +58,81 @@ async def clone_repo(config: CloneConfig) -> tuple[bytes, bytes]:
raise ValueError("The 'local_path' parameter is required.")

# Check if the repository exists
if not await _check_repo_exists(url):
raise ValueError("Repository not found, make sure it is public")
if not await _check_repo_exists(url, pat):
raise ValueError("Repository not found, make sure it is public or provide valid PAT")

try:
if commit:
# Scenario 1: Clone and checkout a specific commit
# Clone the repository without depth to ensure full history for checkout
clone_cmd = ["git", "clone", "--single-branch", url, local_path]
clone_cmd = ["git", "clone", "--single-branch"]
if pat:
url = url.replace("https://", f"https://oauth2:{pat}@")
clone_cmd.extend([url, local_path])
await _run_git_command(*clone_cmd)

# Checkout the specific commit
checkout_cmd = ["git", "-C", local_path, "checkout", commit]
return await _run_git_command(*checkout_cmd)

if branch and branch.lower() not in ("main", "master"):

# Scenario 2: Clone a specific branch with shallow depth
clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch, url, local_path]
clone_cmd = ["git", "clone", "--depth=1", "--single-branch", "--branch", branch]
if pat:
url = url.replace("https://", f"https://oauth2:{pat}@")
clone_cmd.extend([url, local_path])
return await _run_git_command(*clone_cmd)

# Scenario 3: Clone the default branch with shallow depth
clone_cmd = ["git", "clone", "--depth=1", "--single-branch", url, local_path]
clone_cmd = ["git", "clone", "--depth=1", "--single-branch"]
if pat:
url = url.replace("https://", f"https://oauth2:{pat}@")
clone_cmd.extend([url, local_path])
return await _run_git_command(*clone_cmd)

except (RuntimeError, asyncio.TimeoutError, AsyncTimeoutError):
raise # Re-raise the exception


async def _check_repo_exists(url: str) -> bool:
async def _check_repo_exists(url: str, pat: str | None = None) -> bool:
"""
Check if a repository exists at the given URL using an HTTP HEAD request.

Parameters
----------
url : str
The URL of the repository.
pat : str | None
Personal Access Token for authentication, optional.

Returns
-------
bool
True if the repository exists, False otherwise.
"""
# Parse URL to get components
parts = url.split('/')
if len(parts) < 5: # Need at least protocol, empty, host, username, repo
return False

host = parts[2]
username = parts[3]
repo = parts[4]

# Construct API URL based on host
if 'github.com' in host:
api_url = url
else:
# For custom Git servers, use API v1 endpoint
api_url = f"https://{host}/api/v1/repos/{username}/{repo}"

cmd = ["curl", "-I"]
if pat:
cmd.extend(["-H", f"Authorization: token {pat}"])
cmd.append(api_url)

proc = await asyncio.create_subprocess_exec(
"curl",
"-I",
url,
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
)
Expand Down
85 changes: 80 additions & 5 deletions src/gitingest/tests/test_clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,23 +19,24 @@ async def test_clone_repo_with_commit() -> None:
mock_process = AsyncMock()
mock_process.communicate.return_value = (b"output", b"error")
mock_exec.return_value = mock_process

await clone_repo(clone_config)
mock_check.assert_called_once_with(clone_config.url)
mock_check.assert_called_once_with(clone_config.url, None)
assert mock_exec.call_count == 2 # Clone and checkout calls


@pytest.mark.asyncio
async def test_clone_repo_without_commit() -> None:
query = CloneConfig(url="https://github.com/user/repo", local_path="/tmp/repo", commit=None, branch="main")
clone_config = CloneConfig(url="https://github.com/user/repo", local_path="/tmp/repo", commit=None, branch="main")

with patch("gitingest.clone._check_repo_exists", return_value=True) as mock_check:
with patch("gitingest.clone._run_git_command", new_callable=AsyncMock) as mock_exec:
mock_process = AsyncMock()
mock_process.communicate.return_value = (b"output", b"error")
mock_exec.return_value = mock_process

await clone_repo(query)
mock_check.assert_called_once_with(query.url)
await clone_repo(clone_config)
mock_check.assert_called_once_with(clone_config.url, None)
assert mock_exec.call_count == 1 # Only clone call


Expand All @@ -50,7 +51,7 @@ async def test_clone_repo_nonexistent_repository() -> None:
with patch("gitingest.clone._check_repo_exists", return_value=False) as mock_check:
with pytest.raises(ValueError, match="Repository not found"):
await clone_repo(clone_config)
mock_check.assert_called_once_with(clone_config.url)
mock_check.assert_called_once_with(clone_config.url, None)


@pytest.mark.asyncio
Expand All @@ -74,3 +75,77 @@ async def test_check_repo_exists() -> None:
# Test failed request
mock_process.returncode = 1
assert await _check_repo_exists(url) is False


@pytest.mark.asyncio
async def test_check_repo_exists_with_pat() -> None:
url = "https://github.com/user/repo"
pat = "test_token_123"

with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec:
mock_process = AsyncMock()
mock_process.communicate.return_value = (b"HTTP/1.1 200 OK\n", b"")
mock_process.returncode = 0
mock_exec.return_value = mock_process

await check_repo_exists(url, pat)
cyclotruc marked this conversation as resolved.
Show resolved Hide resolved

# Verify curl command includes authorization header
mock_exec.assert_called_with(
"curl", "-I",
"-H", f"Authorization: token {pat}",
url,
stdout=-1, # asyncio.subprocess.PIPE
stderr=-1, # asyncio.subprocess.PIPE
)


@pytest.mark.asyncio
async def test_check_repo_exists_custom_git_server() -> None:
url = "https://git.custom.com/user/repo"
pat = "test_token_123"

with patch("asyncio.create_subprocess_exec", new_callable=AsyncMock) as mock_exec:
mock_process = AsyncMock()
mock_process.communicate.return_value = (b"HTTP/1.1 200 OK\n", b"")
mock_process.returncode = 0
mock_exec.return_value = mock_process

await check_repo_exists(url, pat)
cyclotruc marked this conversation as resolved.
Show resolved Hide resolved

# Verify curl command uses correct API endpoint and includes authorization header
mock_exec.assert_called_with(
"curl", "-I",
"-H", f"Authorization: token {pat}",
"https://git.custom.com/api/v1/repos/user/repo",
stdout=-1, # asyncio.subprocess.PIPE
stderr=-1, # asyncio.subprocess.PIPE
)


@pytest.mark.asyncio
async def test_clone_repo_with_pat() -> None:
clone_config = CloneConfig(
url="https://git.custom.com/user/repo",
local_path="/tmp/repo",
commit=None,
branch="main",
pat="test_token_123"
)

with patch("gitingest.clone.check_repo_exists", return_value=True) as mock_check:
cyclotruc marked this conversation as resolved.
Show resolved Hide resolved
with patch("gitingest.clone.run_git_command", new_callable=AsyncMock) as mock_exec:
cyclotruc marked this conversation as resolved.
Show resolved Hide resolved
mock_process = AsyncMock()
mock_process.communicate.return_value = (b"output", b"error")
mock_exec.return_value = mock_process

await clone_repo(clone_config)
mock_check.assert_called_once_with(clone_config.url, clone_config.pat)

# Verify git clone command includes PAT in URL
expected_url = clone_config.url.replace("https://", f"https://oauth2:{clone_config.pat}@")
# Check that the command was called with the correct arguments
mock_exec.assert_called_with(
"git", "clone", "--depth=1", "--single-branch",
expected_url, clone_config.local_path
)
2 changes: 2 additions & 0 deletions src/process_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ async def process_query(
slider_position: int,
pattern_type: str = "exclude",
pattern: str = "",
pat: str | None = None,
is_index: bool = False,
) -> _TemplateResponse:
"""
Expand Down Expand Up @@ -140,6 +141,7 @@ async def process_query(
local_path=query["local_path"],
commit=query.get("commit"),
branch=query.get("branch"),
pat=pat,
)
await clone_repo(clone_config)
summary, tree, content = ingest_from_query(query)
Expand Down
39 changes: 37 additions & 2 deletions src/templates/components/github_form.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
<div class="rounded-xl relative z-20 pl-8 sm:pl-10 pr-8 sm:pr-16 py-8 border-[3px] border-gray-900 bg-[#fff4da]">
<img src="https://cdn.devdojo.com/images/january2023/shape-1.png"
class="absolute md:block hidden left-0 h-[4.5rem] w-[4.5rem] bottom-0 -translate-x-full ml-3">
<form class="flex md:flex-row flex-col w-full h-full justify-center items-stretch space-y-5 md:space-y-0 md:space-x-5"
<form class="flex flex-col w-full h-full justify-center items-stretch space-y-5"
id="ingestForm"
onsubmit="handleSubmit(event{% if is_index %}, true{% endif %})">
<!-- Repository URL input -->
<div class="relative w-full h-full">
<div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0 z-10"></div>
<input type="text"
Expand All @@ -16,16 +17,50 @@
required
class="border-[3px] w-full relative z-20 border-gray-900 placeholder-gray-600 text-lg font-medium focus:outline-none py-3.5 px-6 rounded">
</div>
<div class="relative w-auto flex-shrink-0 h-full group">

<!-- Access Settings Checkbox -->
<div class="flex items-center space-x-2 ml-2">
<input type="checkbox"
id="showAccessSettings"
class="w-4 h-4 rounded border-gray-900"
onchange="toggleAccessSettings()">
<label for="showAccessSettings" class="text-gray-900">Access Settings</label>
</div>

<!-- PAT input (hidden by default) -->
<div id="accessSettingsContainer" class="hidden">
<div class="relative w-full h-full">
<div class="w-full h-full rounded bg-gray-900 translate-y-1 translate-x-1 absolute inset-0 z-10"></div>
<input type="password"
name="pat"
id="pat"
placeholder="Personal Access Token"
value="{{ pat if pat else '' }}"
class="border-[3px] w-full relative z-20 border-gray-900 placeholder-gray-600 text-lg font-medium focus:outline-none py-3.5 px-6 rounded">
</div>
</div>

<!-- Submit Button -->
<div class="relative w-full sm:w-auto flex-shrink-0 h-full group">
<div class="w-full h-full rounded bg-gray-800 translate-y-1 translate-x-1 absolute inset-0 z-10"></div>
<button type="submit"
class="py-3.5 rounded px-6 group-hover:-translate-y-px group-hover:-translate-x-px ease-out duration-300 z-20 relative w-full border-[3px] border-gray-900 font-medium bg-[#ffc480] tracking-wide text-lg flex-shrink-0 text-gray-900">
Ingest
</button>
</div>

<input type="hidden" name="pattern_type" value="exclude">
<input type="hidden" name="pattern" value="">
</form>

<script>
function toggleAccessSettings() {
const container = document.getElementById('accessSettingsContainer');
const checkbox = document.getElementById('showAccessSettings');
container.classList.toggle('hidden', !checkbox.checked);
}
</script>

<div class="mt-4 relative z-20 flex flex-wrap gap-4 items-start">
<!-- Pattern selector -->
<div class="w-[200px] sm:w-[250px] mr-9 mt-4">
Expand Down
Loading