From f821594f3dff737461d28ed3c7164956f0073eeb Mon Sep 17 00:00:00 2001 From: alalkamys Date: Thu, 21 Mar 2024 19:22:22 +0200 Subject: [PATCH 1/8] docs: fix `CODE_MIGRATION_ASSISTANT_USER_AGENT` default style --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 7e9a980..efef639 100644 --- a/README.md +++ b/README.md @@ -292,7 +292,7 @@ Below is an explanation of each field in the configuration file: | `AZURE_DEVOPS_PAT` | Azure DevOps Personal Access Token (PAT) | `None` | | `GITHUB_TOKEN` | GitHub Personal Access Token (PAT) | `None` | | `GITHUB_ENTERPRISE_TOKEN` | GitHub Enterprise Personal Access Token (PAT) | `None` | -| `CODE_MIGRATION_ASSISTANT_USER_AGENT` | User agent used for HTTP requests by Code Migration Assistant | `alalkamys`/code-migration-assistant | +| `CODE_MIGRATION_ASSISTANT_USER_AGENT` | User agent used for HTTP requests by Code Migration Assistant | `alalkamys/code-migration-assistant` | From 5fafab38b837fc38227dad2e927797b0b263eb0d Mon Sep 17 00:00:00 2001 From: alalkamys Date: Fri, 22 Mar 2024 16:34:13 +0200 Subject: [PATCH 2/8] fix: helpers.git.checkout_branch() This change enhance the checkout_branch function and fix it. Previously, checkout_branch was unable to checkout to a branch that was created by a previous run and its local repo was deleted from remote-targets/. As when you clone a remote repo, You only can see refs/remotes/origin/* branches and only refs/heads/. checkout_branch used to look for the branch only in the refs/heads and hence was unable to identify and then it will create a refs/heads branch based on the from_branch. This was wrong. Additionally, I used to think from_branch would work but I haven't tested it on any branch but the refs/heads/main and it was working but if I were to supply a different from_branch it wouldn't have worked because it was only going to search for the refs/heads/ branches. --- app/helpers/git.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/app/helpers/git.py b/app/helpers/git.py index a87ab65..3e1c0b3 100644 --- a/app/helpers/git.py +++ b/app/helpers/git.py @@ -158,37 +158,57 @@ def identity_setup(repo: Repo, actor_username: str, actor_email: str) -> None: return False -def checkout_branch(repo: Repo, branch_name: str, from_branch: str = None) -> bool: - """Checkout or create a new branch in the local repository. +def checkout_branch(repo: Repo, branch_name: str, from_branch: str = None, remote_name: str = "origin") -> bool: + """Checkout an existing branch or create a new branch in the local repository. + + This function checks out an existing branch or creates a new branch in the local repository. + If the specified branch already exists locally, it switches to that branch. + If the branch does not exist locally but exists in the remote repository, it creates a new local branch + tracking the remote branch and switches to it. + If the specified branch does not exist locally or remotely, it attempts to create a new branch + based on the provided base branch (or the current branch if not specified). Args: repo (Repo): The GitPython Repo object representing the local repository. branch_name (str): The name of the branch to checkout or create. from_branch (str, optional): The name of the base branch to create the new branch from. If None, create the new branch from the current branch. Defaults to None. + remote_name (str, optional): The name of the remote repository. Defaults to "origin". Returns: bool: True if the branch was successfully checked out or created, False otherwise. """ try: + remote_branch_name = f"{remote_name}/{branch_name}" if branch_name in repo.branches: _logger.info( - f"'{branch_name}' branch already exists, checking out..") + f"'{branch_name}' branch already exists. Switching..") branch = repo.branches[branch_name] + elif remote_branch_name in repo.refs: + _logger.info(f"'{remote_branch_name}' exists.") + branch = repo.create_head(branch_name, commit=remote_branch_name) + _logger.info(f"Branch '{branch_name}' set up to track '{ + remote_branch_name}'") + branch.set_tracking_branch(repo.refs[remote_branch_name]) else: _logger.info(f"'{branch_name}' doesn't exist, creating..") from_branch = from_branch or repo.active_branch.name + remote_from_branch = f"{remote_name}/{from_branch}" if from_branch in repo.branches: branch = repo.create_head(branch_name, commit=from_branch) _logger.info(f"Created new branch '{ - branch_name}' based on '{from_branch}' branch") + branch_name}' based on '{from_branch}' branch. Switching..") + elif remote_from_branch in repo.refs: + branch = repo.create_head(branch_name, commit=remote_from_branch) + _logger.info(f"Created new branch '{ + branch_name}' based on '{remote_from_branch}' branch. Switching..") else: _logger.error( f"Error: '{from_branch}' based on branch doesn't exist") return False branch.checkout() - _logger.info(f"Checked out branch '{branch_name}' successfully.") + _logger.info(f"Switched to branch '{branch_name}' successfully.") return True except GitCommandError as e: From eab5985ffbb1a09c8749116c22dec755396a715f Mon Sep 17 00:00:00 2001 From: alalkamys Date: Fri, 22 Mar 2024 16:40:28 +0200 Subject: [PATCH 3/8] fix: helpers.git.identity_setup() signature return type --- app/helpers/git.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/helpers/git.py b/app/helpers/git.py index 3e1c0b3..d6b0083 100644 --- a/app/helpers/git.py +++ b/app/helpers/git.py @@ -133,7 +133,7 @@ def load_target_repos(repos: list[dict]) -> list[Repo]: return result -def identity_setup(repo: Repo, actor_username: str, actor_email: str) -> None: +def identity_setup(repo: Repo, actor_username: str, actor_email: str) -> bool: """Set up identity configuration for a GitPython repository. Args: From 17bb17901f361b18411f1d4ed89fdf9fa31a6fae Mon Sep 17 00:00:00 2001 From: alalkamys Date: Fri, 22 Mar 2024 16:43:34 +0200 Subject: [PATCH 4/8] fix: divergent HEADS reconciliation method Added app.helpers.git.configure_divergent_branches_reconciliation_method funciton to to configure how to resolve the divergent HEADs --- app/helpers/git.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/app/helpers/git.py b/app/helpers/git.py index d6b0083..e518adb 100644 --- a/app/helpers/git.py +++ b/app/helpers/git.py @@ -158,6 +158,39 @@ def identity_setup(repo: Repo, actor_username: str, actor_email: str) -> bool: return False +def configure_divergent_branches_reconciliation_method(repo: Repo, rebase: bool = False, fast_forward_only: bool = False) -> bool: + """Configure the reconciliation method for handling divergent branches in a GitPython repository. + + This function configures the reconciliation method to handle situations where the local and remote branches have diverged during pull operations. + + Args: + repo (Repo): The GitPython repository object. + rebase (bool, optional): If True, set the reconciliation method to rebase. Defaults to False. + fast_forward_only (bool, optional): If True, set the reconciliation method to fast-forward only. Ignored if 'rebase' is True. Defaults to False. + + Returns: + bool: True if the reconciliation method was configured successfully, False otherwise. + """ + try: + config_writer = repo.config_writer() + if fast_forward_only: + _logger.debug( + "Setting reconciliation method to fast-forward only..") + config_writer.set_value('pull', 'ff', 'only').release() + elif rebase: + _logger.debug("Setting reconciliation method to rebase..") + config_writer.set_value('pull', 'rebase', 'true').release() + else: + _logger.debug("Setting reconciliation method to merge..") + config_writer.set_value('pull', 'rebase', 'false').release() + del (config_writer) + return True + except Exception as e: + _logger.error(f"An error occurred while setting up reconciliation method: { + str(e).strip()}") + return False + + def checkout_branch(repo: Repo, branch_name: str, from_branch: str = None, remote_name: str = "origin") -> bool: """Checkout an existing branch or create a new branch in the local repository. @@ -199,7 +232,8 @@ def checkout_branch(repo: Repo, branch_name: str, from_branch: str = None, remot _logger.info(f"Created new branch '{ branch_name}' based on '{from_branch}' branch. Switching..") elif remote_from_branch in repo.refs: - branch = repo.create_head(branch_name, commit=remote_from_branch) + branch = repo.create_head( + branch_name, commit=remote_from_branch) _logger.info(f"Created new branch '{ branch_name}' based on '{remote_from_branch}' branch. Switching..") else: From 0bedd9a79a062f319c39607cfe228dd341864cbf Mon Sep 17 00:00:00 2001 From: alalkamys Date: Fri, 22 Mar 2024 16:45:33 +0200 Subject: [PATCH 5/8] fix: configure pull.rebase to true --- app/helpers/git.py | 4 ++-- main.py | 12 ++++++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/app/helpers/git.py b/app/helpers/git.py index e518adb..ba8e9db 100644 --- a/app/helpers/git.py +++ b/app/helpers/git.py @@ -3,11 +3,11 @@ from git import Actor from git import Repo +from git.exc import GitCommandError +from git.exc import NoSuchPathError from git.refs.head import Head from git.remote import PushInfo from git.remote import PushInfoList -from git.exc import GitCommandError -from git.exc import NoSuchPathError from typing import Any import json import logging diff --git a/main.py b/main.py index 7866920..8170db0 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,7 @@ from app.config import app_config from app.helpers.git import checkout_branch from app.helpers.git import commit_changes +from app.helpers.git import configure_divergent_branches_reconciliation_method from app.helpers.git import get_files_count from app.helpers.git import has_tracking_branch from app.helpers.git import identity_setup @@ -82,6 +83,17 @@ _logger.info("Exiting..") sys.exit(2) + reconciliation_method_configured = configure_divergent_branches_reconciliation_method( + repo=repo, rebase=True) + if not reconciliation_method_configured: + _logger.error( + f"Failed to configure the reconciliation method for '{repo_name}'. Review the logs for more details") + if repo != TARGET_REPOS[-1]: + _logger.info("Skipping to the next migration..") + continue + _logger.info("Exiting..") + sys.exit(7) + if check_branch: branch_checked = checkout_branch( repo=repo, branch_name=TARGET_BRANCH['name'], from_branch=TARGET_BRANCH.get('from', None)) From 994a060259267e7a4bca0e9e3616ee6165468a50 Mon Sep 17 00:00:00 2001 From: alalkamys Date: Fri, 22 Mar 2024 16:48:25 +0200 Subject: [PATCH 6/8] fix: helpers.git.push_changes This work is to fix app.helpers.git.push_changes function. Previously, push_changes function was only pushing which was working in cases where there is no divergent heads, I added a pull functionality to fix this and now we can say code-migration-assistant is idempotent --- app/helpers/git.py | 106 ++++++++++++++++++++++++++++----------------- 1 file changed, 66 insertions(+), 40 deletions(-) diff --git a/app/helpers/git.py b/app/helpers/git.py index ba8e9db..4df7ddc 100644 --- a/app/helpers/git.py +++ b/app/helpers/git.py @@ -298,7 +298,7 @@ def commit_changes(repo: Repo, title: str, description: str = None, author: Acto def push_changes(repo: Repo, remote_name: str = 'origin', remote_branch_name: str | None = None, timeout: int | None = 180) -> bool: - """Push changes to the remote repository. + """Push changes to the remote repository, pulling changes from the remote branch if it exists. Args: repo (Repo): The GitPython Repo object representing the local repository. @@ -318,7 +318,8 @@ def push_changes(repo: Repo, remote_name: str = 'origin', remote_branch_name: st This function pushes changes from the active local branch to the specified remote branch. It handles various scenarios such as existing and non-existing remotes, and provides detailed logging information during the push operation. The timeout parameter allows customization of the maximum time - allowed for the push operation. + allowed for the push operation. Before pushing, if the specified remote branch exists, it pulls changes + from that branch to ensure synchronization. Example: # Push changes from the active branch to the 'main' branch of the remote repository 'origin' @@ -330,44 +331,69 @@ def push_changes(repo: Repo, remote_name: str = 'origin', remote_branch_name: st remote = repo.remotes[remote_name] branch_name = repo.active_branch.name remote_branch_name = remote_branch_name if remote_branch_name else branch_name - _logger.info(f"Pushing changes to '{ - remote_branch_name}' branch of remote '{remote_name}'...") - result: PushInfoList = remote.push( - refspec=f"{branch_name}:{remote_branch_name}", progress=RemoteProgressReporter(_logger), kill_after_timeout=timeout) - try: - assert len(result) != 0 - VALID_PUSH_INFO_FLAGS: list[int] = [PushInfo.FAST_FORWARD, PushInfo.NEW_HEAD, - PushInfo.UP_TO_DATE, PushInfo.FORCED_UPDATE, PushInfo.NEW_TAG] - for push_info in result: - _logger.debug("+------------+") - _logger.debug("| Push Info: |") - _logger.debug("+------------+") - _logger.debug(f"Flag: {push_info.flags}") - _logger.debug(f"Local ref: {push_info.local_ref}") - _logger.debug(f"Remote Ref: {push_info.remote_ref}") - _logger.debug(f"Remote ref string: { - push_info.remote_ref_string}") - _logger.debug(f"Old Commit: {push_info.old_commit}") - _logger.debug(f"Summary: {push_info.summary.strip()}") - if push_info.flags not in VALID_PUSH_INFO_FLAGS: - if push_info.flags == PushInfo.ERROR: - _logger.error( - f"Incomplete push error: Push contains rejected heads. Check your internet connection and run in 'debug' mode to see more details.") - else: - _logger.error( - "Unexpected push error, maybe the remote rejected heads. Check your internet connection and run in 'debug' mode to see more details.") - return False - except AssertionError: - _logger.error(f"Pushing changes to remote '{ - remote_name}' completely failed. Check your internet connection and run in 'debug' mode to see the remote push progress.") - return False - _logger.info(f"Changes pushed successfully to '{ - branch_name}' branch of remote '{remote_name}'.") - - _logger.debug(f"Setting '{branch_name}' upstream branch to '{remote_name}/{ - remote_branch_name}'..") - repo.active_branch.set_tracking_branch( - repo.refs[f"{remote_name}/{remote_branch_name}"]) + + push_is_needed = True + + remote_refs = remote.refs + if remote_branch_name in remote_refs: + _logger.debug( + f"'{remote_name}/{remote_branch_name}' remote branch exists.") + if not has_tracking_branch(repo.active_branch): + _logger.debug( + f"'{branch_name}' has no tracking branch. Setting..") + repo.active_branch.set_tracking_branch( + repo.refs[f"{remote_name}/{remote_branch_name}"]) + _logger.debug(f"Pulling changes from '{ + remote_branch_name}' branch of remote '{remote_name}' to '{branch_name}'...") + remote.pull( + refspec=remote_branch_name, kill_after_timeout=timeout) + + push_is_needed = needs_push(repo=repo, branch_name=branch_name) + + if push_is_needed: + _logger.info(f"Pushing changes to '{ + remote_branch_name}' branch of remote '{remote_name}'...") + result: PushInfoList = remote.push( + refspec=f"{branch_name}:{remote_branch_name}", progress=RemoteProgressReporter(_logger), kill_after_timeout=timeout) + + try: + assert len(result) != 0 + VALID_PUSH_INFO_FLAGS: list[int] = [PushInfo.FAST_FORWARD, PushInfo.NEW_HEAD, + PushInfo.UP_TO_DATE, PushInfo.FORCED_UPDATE, PushInfo.NEW_TAG] + for push_info in result: + _logger.debug("+------------+") + _logger.debug("| Push Info: |") + _logger.debug("+------------+") + _logger.debug(f"Flag: {push_info.flags}") + _logger.debug(f"Local ref: {push_info.local_ref}") + _logger.debug(f"Remote Ref: {push_info.remote_ref}") + _logger.debug(f"Remote ref string: { + push_info.remote_ref_string}") + _logger.debug(f"Old Commit: {push_info.old_commit}") + _logger.debug(f"Summary: {push_info.summary.strip()}") + if push_info.flags not in VALID_PUSH_INFO_FLAGS: + if push_info.flags == PushInfo.ERROR: + _logger.error( + "Incomplete push error: Push contains rejected heads. Check your internet connection and run in 'debug' mode to see more details.") + else: + _logger.error( + "Unexpected push error, maybe the remote rejected heads. Check your internet connection and run in 'debug' mode to see more details.") + return False + except AssertionError: + _logger.error(f"Pushing changes to remote '{ + remote_name}' completely failed. Check your internet connection and run in 'debug' mode to see the remote push progress.") + return False + + _logger.info(f"Changes pushed successfully to '{ + branch_name}' branch of remote '{remote_name}'.") + if not has_tracking_branch(repo.active_branch): + _logger.debug(f"Setting '{branch_name}' upstream branch to '{ + remote_name}/{remote_branch_name}'..") + repo.active_branch.set_tracking_branch( + repo.refs[f"{remote_name}/{remote_branch_name}"]) + + else: + _logger.info("Already up-to-date. Skipping..") return True except IndexError: _logger.error(f"Error accessing remote '{ From b973b08c0d2837dd1d79d8956c61d885040aec5c Mon Sep 17 00:00:00 2001 From: alalkamys Date: Fri, 22 Mar 2024 17:16:04 +0200 Subject: [PATCH 7/8] feat: add helpers.git.get_default_branch_name Added helper funciton to query the default branch name of a repository, unfortunately, there is not an official way to do this. This is a workaround found in https://stackoverflow.com/questions/69651536/how-to-get-master-main-branch-from-gitpython --- app/helpers/git.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/app/helpers/git.py b/app/helpers/git.py index 4df7ddc..50ef55f 100644 --- a/app/helpers/git.py +++ b/app/helpers/git.py @@ -9,9 +9,11 @@ from git.remote import PushInfo from git.remote import PushInfoList from typing import Any +from typing import Union import json import logging import os +import re import sys _logger = logging.getLogger(app_config.APP_NAME) @@ -457,3 +459,24 @@ def needs_push(repo: Repo, branch_name: str | None = None) -> bool: if tracking_branch: return any(repo.iter_commits(f"{tracking_branch.name}..{branch.name}")) return False + + +def get_default_branch_name(repo: Repo, remote_name: str = "origin") -> Union[str, None]: + """Get the default branch name of a Git repository. + + Args: + repo (Repo): The GitPython Repo object representing the local repository. + remote_name (str, optional): The name of the remote repository. Defaults to "origin". + + Returns: + Union[str, None]: The name of the default branch, or None if not found. + """ + try: + show_result = repo.git.remote("show", remote_name) + matches = re.search(r"\s*HEAD branch:\s*(.*)", show_result) + if matches: + return matches.group(1) + except Exception as e: + _logger.error(f"Error while querying the default branch: {e}") + + return None From 2315722165ecad68965a1ac71183272cb7eea519 Mon Sep 17 00:00:00 2001 From: alalkamys Date: Fri, 22 Mar 2024 17:18:43 +0200 Subject: [PATCH 8/8] fix: helpers.git.checkout_branch from_branch functionality I used to query by default the repo.active_branch.name which will work only if you clone the repo for the first time. But consequitive runs will not. This ensures to query the default branch name from the SCM provider. --- app/helpers/git.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/helpers/git.py b/app/helpers/git.py index 50ef55f..d867c15 100644 --- a/app/helpers/git.py +++ b/app/helpers/git.py @@ -227,7 +227,7 @@ def checkout_branch(repo: Repo, branch_name: str, from_branch: str = None, remot branch.set_tracking_branch(repo.refs[remote_branch_name]) else: _logger.info(f"'{branch_name}' doesn't exist, creating..") - from_branch = from_branch or repo.active_branch.name + from_branch = from_branch or get_default_branch_name(repo=repo, remote_name=remote_name) remote_from_branch = f"{remote_name}/{from_branch}" if from_branch in repo.branches: branch = repo.create_head(branch_name, commit=from_branch)