Skip to content

Commit

Permalink
0.96.1: Fix #1404: Handle transient errors from GitHub repo collabora…
Browse files Browse the repository at this point in the history
…tors sync (#1406)

### Summary

Fix for #1404. Made this as a separate PR from
#1405 since I can't
push up to the etsy fork.

### Related issues or links

- #1404


### Checklist

Provide proof that this works (this makes reviews move faster). Please
perform one or more of the following:
- [x] Update/add unit or integration tests.
- [x] Include a screenshot showing what the graph looked like before and
after your changes.
- [ ] Include console log trace showing what happened before and after
your changes.

**Not applicable** If you are changing a node or relationship:
- [ ] Update the
[schema](https://github.com/lyft/cartography/tree/master/docs/root/modules)
and
[readme](https://github.com/lyft/cartography/blob/master/docs/schema/README.md).

**Not applicable** If you are implementing a new intel module:
- [ ] Use the NodeSchema [data
model](https://cartography-cncf.github.io/cartography/dev/writing-intel-modules.html#defining-a-node).

---------

Signed-off-by: Daniel Brauer <[email protected]>
Co-authored-by: Daniel Brauer <[email protected]>
  • Loading branch information
Alex Chantavy and danbrauer authored Dec 13, 2024
1 parent 5d5f856 commit e68522e
Show file tree
Hide file tree
Showing 6 changed files with 124 additions and 33 deletions.
53 changes: 43 additions & 10 deletions cartography/intel/github/repos.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

from cartography.intel.github.util import fetch_all
from cartography.intel.github.util import PaginatedGraphqlData
from cartography.util import backoff_handler
from cartography.util import retries_with_backoff
from cartography.util import run_cleanup_job
from cartography.util import timeit

Expand Down Expand Up @@ -134,13 +136,35 @@
"""


def _get_repo_collaborators_inner_func(
org: str,
api_url: str,
token: str,
repo_name: str,
affiliation: str,
collab_users: list[dict[str, Any]],
collab_permission: list[str],
) -> None:
logger.info(f"Loading {affiliation} collaborators for repo {repo_name}.")
collaborators = _get_repo_collaborators(token, api_url, org, repo_name, affiliation)

# nodes and edges are expected to always be present given that we only call for them if totalCount is > 0
# however sometimes GitHub returns None, as in issue 1334 and 1404.
for collab in collaborators.nodes or []:
collab_users.append(collab)

# The `or []` is because `.edges` can be None.
for perm in collaborators.edges or []:
collab_permission.append(perm['permission'])


def _get_repo_collaborators_for_multiple_repos(
repo_raw_data: list[dict[str, Any]],
affiliation: str,
org: str,
api_url: str,
token: str,
) -> dict[str, List[UserAffiliationAndRepoPermission]]:
) -> dict[str, list[UserAffiliationAndRepoPermission]]:
"""
For every repo in the given list, retrieve the collaborators.
:param repo_raw_data: A list of dicts representing repos. See tests.data.github.repos.GET_REPOS for data shape.
Expand All @@ -151,7 +175,7 @@ def _get_repo_collaborators_for_multiple_repos(
:param token: The Github API token as string.
:return: A dictionary of repo URL to list of UserAffiliationAndRepoPermission
"""
result: dict[str, List[UserAffiliationAndRepoPermission]] = {}
result: dict[str, list[UserAffiliationAndRepoPermission]] = {}
for repo in repo_raw_data:
repo_name = repo['name']
repo_url = repo['url']
Expand All @@ -162,14 +186,23 @@ def _get_repo_collaborators_for_multiple_repos(
result[repo_url] = []
continue

collab_users = []
collab_permission = []
collaborators = _get_repo_collaborators(token, api_url, org, repo_name, affiliation)
# nodes and edges are expected to always be present given that we only call for them if totalCount is > 0
for collab in collaborators.nodes:
collab_users.append(collab)
for perm in collaborators.edges:
collab_permission.append(perm['permission'])
collab_users: List[dict[str, Any]] = []
collab_permission: List[str] = []

retries_with_backoff(
_get_repo_collaborators_inner_func,
TypeError,
5,
backoff_handler,
)(
org=org,
api_url=api_url,
token=token,
repo_name=repo_name,
affiliation=affiliation,
collab_users=collab_users,
collab_permission=collab_permission,
)

result[repo_url] = [
UserAffiliationAndRepoPermission(user, permission, affiliation)
Expand Down
49 changes: 32 additions & 17 deletions cartography/intel/github/teams.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,26 @@ def get_teams(org: str, api_url: str, token: str) -> Tuple[PaginatedGraphqlData,
return fetch_all(token, api_url, org, org_teams_gql, 'teams')


def _get_teams_repos_inner_func(
org: str,
api_url: str,
token: str,
team_name: str,
repo_urls: list[str],
repo_permissions: list[str],
) -> None:
logger.info(f"Loading team repos for {team_name}.")
team_repos = _get_team_repos(org, api_url, token, team_name)

# The `or []` is because `.nodes` can be None. See:
# https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection
for repo in team_repos.nodes or []:
repo_urls.append(repo['url'])
# The `or []` is because `.edges` can be None.
for edge in team_repos.edges or []:
repo_permissions.append(edge['permission'])


@timeit
def _get_team_repos_for_multiple_teams(
team_raw_data: list[dict[str, Any]],
Expand All @@ -85,23 +105,18 @@ def _get_team_repos_for_multiple_teams(
repo_urls: List[str] = []
repo_permissions: List[str] = []

def get_teams_repos_inner_func(
org: str, api_url: str, token: str, team_name: str,
repo_urls: List[str], repo_permissions: List[str],
) -> None:
logger.info(f"Loading team repos for {team_name}.")
team_repos = _get_team_repos(org, api_url, token, team_name)
# The `or []` is because `.nodes` can be None. See:
# https://docs.github.com/en/graphql/reference/objects#teamrepositoryconnection
for repo in team_repos.nodes or []:
repo_urls.append(repo['url'])
# The `or []` is because `.edges` can be None.
for edge in team_repos.edges or []:
repo_permissions.append(edge['permission'])

retries_with_backoff(get_teams_repos_inner_func, TypeError, 5, backoff_handler)(
org=org, api_url=api_url, token=token, team_name=team_name,
repo_urls=repo_urls, repo_permissions=repo_permissions,
retries_with_backoff(
_get_teams_repos_inner_func,
TypeError,
5,
backoff_handler,
)(
org=org,
api_url=api_url,
token=token,
team_name=team_name,
repo_urls=repo_urls,
repo_permissions=repo_permissions,
)
# Shape = [(repo_url, 'WRITE'), ...]]
result[team_name] = [RepoPermission(url, perm) for url, perm in zip(repo_urls, repo_permissions)]
Expand Down
8 changes: 5 additions & 3 deletions cartography/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,16 +290,18 @@ def inner_function(*args, **kwargs): # type: ignore


def retries_with_backoff(
func: Callable,
exceptionType: Type[Exception], max_tries: int, on_backoff: Callable,
func: Callable,
exception_type: Type[Exception],
max_tries: int,
on_backoff: Callable,
) -> Callable:
"""
Adds retry with backoff to the given function. (Could expand the possible input parameters as needed.)
"""
@wraps(func)
@backoff.on_exception(
backoff.expo,
exceptionType,
exception_type,
max_tries=max_tries,
on_backoff=on_backoff,
)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from setuptools import find_packages
from setuptools import setup

__version__ = '0.96.0'
__version__ = '0.96.1'


setup(
Expand Down
37 changes: 37 additions & 0 deletions tests/unit/cartography/intel/github/test_repos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from unittest.mock import MagicMock
from unittest.mock import patch

import pytest

from cartography.intel.github.repos import _get_repo_collaborators_for_multiple_repos


@patch('time.sleep', return_value=None)
@patch('cartography.intel.github.repos._get_repo_collaborators')
@patch('cartography.intel.github.repos.backoff_handler', spec=True)
def test_get_team_users_github_returns_none(mock_backoff_handler, mock_get_team_collaborators, mock_sleep):
"""
This test happens to use 'OUTSIDE' affiliation, but it's irrelevant for the test, it just needs either valid value.
"""
# Arrange
repo_data = [{'name': 'repo1', 'url': 'https://github.com/repo1', 'outsideCollaborators': {'totalCount': 1}}]
mock_repo_collabs = MagicMock()
# Set up the condition where GitHub returns a None url and None edge as in #1334.
mock_repo_collabs.nodes = [None]
mock_repo_collabs.edges = [None]
mock_get_team_collaborators.return_value = mock_repo_collabs

# Assert we raise an exception
with pytest.raises(TypeError):
_get_repo_collaborators_for_multiple_repos(
repo_data,
'OUTSIDE',
'test-org',
'https://api.github.com',
'test-token',
)

# Assert that we retry and give up
assert mock_sleep.call_count == 4
assert mock_get_team_collaborators.call_count == 5
assert mock_backoff_handler.call_count == 4
8 changes: 6 additions & 2 deletions tests/unit/cartography/intel/github/test_teams.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,10 @@ def test_get_team_users_happy_path(mock_get_team_users):
mock_get_team_users.assert_called_once_with('test-org', 'https://api.github.com', 'test-token', 'team1')


@patch('time.sleep', return_value=None)
@patch('cartography.intel.github.teams._get_team_repos')
@patch('cartography.intel.github.teams.backoff_handler', spec=True)
def test_get_team_repos_github_returns_none(mock_backoff_handler, mock_get_team_repos):
def test_get_team_repos_github_returns_none(mock_backoff_handler, mock_get_team_repos, mock_sleep):
# Arrange
team_data = [{'slug': 'team1', 'repositories': {'totalCount': 1}}]
mock_team_repos = MagicMock()
Expand All @@ -143,13 +144,15 @@ def test_get_team_repos_github_returns_none(mock_backoff_handler, mock_get_team_
)

# Assert that we retry and give up
assert mock_sleep.call_count == 4
assert mock_get_team_repos.call_count == 5
assert mock_backoff_handler.call_count == 4


@patch('time.sleep', return_value=None)
@patch('cartography.intel.github.teams._get_team_users')
@patch('cartography.intel.github.teams.backoff_handler', spec=True)
def test_get_team_users_github_returns_none(mock_backoff_handler, mock_get_team_users):
def test_get_team_users_github_returns_none(mock_backoff_handler, mock_get_team_users, mock_sleep):
# Arrange
team_data = [{'slug': 'team1', 'members': {'totalCount': 1}}]
mock_team_users = MagicMock()
Expand All @@ -168,6 +171,7 @@ def test_get_team_users_github_returns_none(mock_backoff_handler, mock_get_team_
)

# Assert that we retry and give up
assert mock_sleep.call_count == 4
assert mock_get_team_users.call_count == 5
assert mock_backoff_handler.call_count == 4

Expand Down

0 comments on commit e68522e

Please sign in to comment.