Skip to content

Commit

Permalink
Merge pull request #40 from memreok/fix-github-api-rate-limit
Browse files Browse the repository at this point in the history
Fix GitHub api rate limit
  • Loading branch information
berkingurcan authored Oct 15, 2024
2 parents 7fdff3b + 2db0d5d commit 746675a
Show file tree
Hide file tree
Showing 7 changed files with 215 additions and 48 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,5 @@ dmypy.json

# Pyright type checker
.pyright/

.vscode/settings.json
111 changes: 65 additions & 46 deletions github_tracker_bot/process_commits.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,20 @@
import os
import sys
import time
import asyncio
import aiohttp
from datetime import datetime
from dateutil import parser
from github import Github
from typing import List, Optional, Dict, Any

from tenacity import retry, wait_fixed, stop_after_attempt, retry_if_exception_type
from tenacity import (
retry,
wait_fixed,
stop_after_attempt,
retry_if_exception_type,
RetryError,
)
from asyncio import Semaphore

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))

Expand All @@ -22,47 +29,54 @@
GITHUB_TOKEN = config.GITHUB_TOKEN
g = Github(GITHUB_TOKEN)


CONCURRENT_REQUESTS = 8
semaphore = Semaphore(CONCURRENT_REQUESTS)

retry_conditions = (
retry_if_exception_type(aiohttp.ClientError)
retry_if_exception_type(
aiohttp.ClientError,
)
| retry_if_exception_type(asyncio.TimeoutError)
| retry_if_exception_type(Exception)
| retry_if_exception_type(aiohttp.ClientConnectorError)
)


@retry(wait=wait_fixed(2), stop=stop_after_attempt(5), retry=retry_conditions)
@retry(wait=wait_fixed(5), stop=stop_after_attempt(8), retry=retry_conditions)
async def fetch_diff(repo: str, sha: str) -> Optional[str]:
url = f"https://api.github.com/repos/{repo}/commits/{sha}"
headers = {"Authorization": f"token {GITHUB_TOKEN}"}

try:
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
if response.status == 200:
commit_data = await response.json()
diff_url = commit_data["html_url"] + ".diff"

async with session.get(diff_url, headers=headers) as diff_response:
if diff_response.status == 200:
return await diff_response.text()
else:
logger.error(
f"Failed to fetch diff: {await diff_response.text()}"
)
return None
else:
logger.error(
f"Failed to fetch commit data: {await response.text()}"
)
return None
except aiohttp.ClientError as e:
logger.error(f"Client error while fetching diff for repo {repo}: {e}")
raise
except asyncio.TimeoutError:
logger.error("Request timed out while fetching diff")
raise
except Exception as e:
logger.error(f"Unexpected error while fetching diff: {e}")
raise
headers = {
"Authorization": f"token {GITHUB_TOKEN}",
"Accept": "application/vnd.github.v3.diff",
}

async with semaphore:
try:
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
if response.status == 200:
diff = await response.text()
return diff
elif response.status == 403:
reset_time = response.headers.get("X-RateLimit-Reset")
sleep_time = (
int(reset_time) - int(time.time()) + 1 if reset_time else 60
)
logger.warning(
f"Rate limit exceeded. Sleeping for {sleep_time} seconds."
)
await asyncio.sleep(sleep_time)
raise aiohttp.ClientError("Rate limit exceeded, retrying...")
else:
error_text = await response.text()
logger.error(
f"Failed to fetch diff: {response.status}, {error_text}"
)
return None
except Exception as e:
logger.error(f"Error while fetching diff for repo {repo}: {e}")
raise


def concatenate_diff_to_commit_info(
Expand Down Expand Up @@ -109,12 +123,15 @@ async def process_commits(commit_infos: List[Dict[str, Any]]):
for commit_info in commit_infos
]

diffs = await asyncio.gather(*tasks)
diffs = await asyncio.gather(*tasks, return_exceptions=True)

processed_commits = [
concatenate_diff_to_commit_info(commit_info, diff)
for commit_info, diff in zip(commit_infos, diffs)
]
processed_commits = []
for commit_info, diff in zip(commit_infos, diffs):
if isinstance(diff, Exception):
logger.error(f"Failed to fetch diff for {commit_info['sha']}: {diff}")
diff = None
processed_commit = concatenate_diff_to_commit_info(commit_info, diff)
processed_commits.append(processed_commit)

grouped_commits = group_and_sort_commits(processed_commits)
for daily_commit in grouped_commits.values():
Expand All @@ -126,9 +143,11 @@ async def process_commits(commit_infos: List[Dict[str, Any]]):
if __name__ == "__main__":
repo_name = "UmstadAI/zkAppUmstad"
sha = "092c20a73859e0b4a4591f815efbdcab08df4df8"
diff = asyncio.run(fetch_diff(repo_name, sha))

if diff:
logger.info(f"Fetched diff successfully: {diff}")
else:
logger.error("Failed to fetch diff")
try:
diff = asyncio.run(fetch_diff(repo_name, sha))
if diff:
logger.info(f"Fetched diff successfully.")
else:
logger.error("Failed to fetch diff")
except RetryError as e:
logger.error(f"Failed after retries: {e}")
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ ujson==5.10.0
uritemplate==4.1.1
urllib3==2.2.2
uvicorn==0.30.3
uvloop==0.19.0
vulture==2.11
watchfiles==0.22.0
websockets==12.0
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@

setup(
name="pgt_leaderbot",
version="0.3.6",
version="0.3.7",
packages=find_packages(),
)
5 changes: 5 additions & 0 deletions tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@ def testmongo(ctx):
ctx.run(f"python -m unittest tests/test_mongo_data_handler.py")


@task
def testfc(ctx):
ctx.run(f"python -m unittest tests/test_process_commits.py")


@task
def testmongoint(ctx):
ctx.run(f"python -m unittest tests/test_mongo_integration.py")
Expand Down
3 changes: 3 additions & 0 deletions tests/test_commit_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
import asyncio
import aiohttp
from github.GithubException import GithubException
import sys
import os

sys.path.append(os.path.abspath(os.path.dirname(__file__) + "/../"))
from github_tracker_bot.commit_scraper import fetch_commits, get_user_commits_in_repo


Expand Down
139 changes: 139 additions & 0 deletions tests/test_process_commits.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
import unittest
from unittest.mock import patch, AsyncMock, call
import asyncio
import aiohttp

import time
import os
import sys

sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from github_tracker_bot.process_commits import fetch_diff


class TestFetchDiff(unittest.IsolatedAsyncioTestCase):

@patch("aiohttp.ClientSession.get")
async def test_successful_response(self, mock_get):
# Mock the response object
mock_response = AsyncMock()
mock_response.status = 200
mock_response.text = AsyncMock(return_value="diff content")
mock_get.return_value.__aenter__.return_value = mock_response

# Call the fetch_diff function
repo = "memreok/PGT_LeaderBot"
sha = "244a732c324d993d62dcb0017f8fd1b0d39d99e0"
diff = await fetch_diff(repo, sha)

# Assert the result
self.assertEqual(diff, "diff content")

@patch("aiohttp.ClientSession.get")
async def test_retry_on_client_error(self, mock_get):
first_attempt = AsyncMock()
first_attempt.__aenter__.side_effect = aiohttp.ClientError()

# Mock a successful response on retry
mock_response_success = AsyncMock()
mock_response_success.status = 200
mock_response_success.text = AsyncMock(return_value="diff content")
second_attempt = AsyncMock()
second_attempt.__aenter__.return_value = mock_response_success

# Set side effects for consecutive calls
mock_get.side_effect = [first_attempt, second_attempt]

repo = "memreok/PGT_LeaderBot"
sha = "244a732c324d993d62dcb0017f8fd1b0d39d99e0"
diff = await fetch_diff(repo, sha)

# Assert the result after retry
self.assertEqual(mock_get.call_count, 2)
self.assertEqual(diff, "diff content")

@patch("aiohttp.ClientSession.get")
async def test_retry_on_timeout_error(self, mock_get):
first_attempt = AsyncMock()
first_attempt.__aenter__.side_effect = asyncio.TimeoutError()

# Mock a successful response on retry
mock_response_success = AsyncMock()
mock_response_success.status = 200
mock_response_success.text = AsyncMock(return_value="diff content")
second_attempt = AsyncMock()
second_attempt.__aenter__.return_value = mock_response_success

# Set side effects for consecutive calls
mock_get.side_effect = [first_attempt, second_attempt]

repo = "memreok/PGT_LeaderBot"
sha = "244a732c324d993d62dcb0017f8fd1b0d39d99e0"
diff = await fetch_diff(repo, sha)

# Assert the result after retry
self.assertEqual(mock_get.call_count, 2)
self.assertEqual(diff, "diff content")

@patch("asyncio.sleep", new_callable=AsyncMock)
@patch("time.time", return_value=1000)
@patch("aiohttp.ClientSession.get")
async def test_handle_403_api_rate_limit(self, mock_get, mock_time, mock_sleep):
# Set a fixed current time
current_time = 1000
mock_time.return_value = current_time
reset_time_in_future = current_time + 120 # 2 minutes in the future

# Mock a 403 response with rate limit reset header
mock_response_403 = AsyncMock()
mock_response_403.status = 403
mock_response_403.text = AsyncMock(return_value="API Rate Limit Exceeded")
mock_response_403.headers = {"X-RateLimit-Reset": str(reset_time_in_future)}

# Mock a successful response on retry
mock_response_success = AsyncMock()
mock_response_success.status = 200
mock_response_success.text = AsyncMock(return_value="diff content")

# Set side effects for consecutive calls
first_attempt = AsyncMock()
first_attempt.__aenter__.return_value = mock_response_403
second_attempt = AsyncMock()
second_attempt.__aenter__.return_value = mock_response_success
mock_get.side_effect = [first_attempt, second_attempt]

repo = "memreok/PGT_LeaderBot"
sha = "244a732c324d993d62dcb0017f8fd1b0d39d99e0"
diff = await fetch_diff(repo, sha)

expected_sleep_time = reset_time_in_future - current_time + 1 # Should be 121

# Assert that sleep was called twice:
# 1. Once for the rate limit (expected_sleep_time)
# 2. Once for the tenacity retry (fixed 2 seconds)
self.assertEqual(mock_sleep.call_count, 2)
mock_sleep.assert_has_calls([call(expected_sleep_time), call(2.0)])

# Ensure that the second call to `aiohttp.get` was successful
self.assertEqual(mock_get.call_count, 2)
self.assertEqual(diff, "diff content")

@patch("aiohttp.ClientSession.get")
async def test_general_exception_handling(self, mock_get):
# Mock an unknown exception
mock_get.side_effect = Exception("Unknown Error")

repo = "memreok/PGT_LeaderBot"
sha = "244a732c324d993d62dcb0017f8fd1b0d39d99e0"

with self.assertRaises(Exception):
await fetch_diff(repo, sha)


def run_async_tests():
loop = asyncio.get_event_loop()
loop.run_until_complete(unittest.main())


if __name__ == "__main__":
run_async_tests()

0 comments on commit 746675a

Please sign in to comment.