diff --git a/.gitignore b/.gitignore index 2f91f4ed0..df6f37aca 100644 --- a/.gitignore +++ b/.gitignore @@ -8,9 +8,16 @@ manual_test/ # other local dev info .vscode/ +.history/ # Mac OS-specific storage files .DS_Store +Icon? +Icon +Icon[\r] + +# ruff +.ruff_cache/ # vim *.swp diff --git a/ccds-help.json b/ccds-help.json index 84b26b8cd..98a68b2ac 100644 --- a/ccds-help.json +++ b/ccds-help.json @@ -277,5 +277,42 @@ } } ] + }, + { + "field": "version_control", + "help": { + "description": "What kind of version control system (vcs) and repository host to use.", + "more_information": "" + }, + "choices": [ + { + "choice": "none", + "help": { + "description": "No version control.", + "more_information": "" + } + }, + { + "choice": "git (local)", + "help": { + "description": "Initialize project as a local git repository.", + "more_information": "[Git CLI](https://git-scm.com/downloads) Required" + } + }, + { + "choice": "git (github private)", + "help": { + "description": "Initialize project and upload to GitHub as a **private** repo.", + "more_information": "[Git CLI](https://git-scm.com/downloads) + [GitHub CLI](https://cli.github.com/) & [Auth](https://cli.github.com/manual/gh_auth_login) Required" + } + }, + { + "choice": "git (github public)", + "help": { + "description": "Initialize project and upload to GitHub as a **public** repo.", + "more_information": "[Git CLI](https://git-scm.com/downloads) + [GitHub CLI](https://cli.github.com/) & [Auth](https://cli.github.com/manual/gh_auth_login) Required" + } + } + ] } ] diff --git a/ccds.json b/ccds.json index 2ad0a0ee2..9d3434bd1 100644 --- a/ccds.json +++ b/ccds.json @@ -28,5 +28,11 @@ ], "open_source_license": ["No license file", "MIT", "BSD-3-Clause"], "docs": ["mkdocs", "none"], - "include_code_scaffold": ["Yes", "No"] + "include_code_scaffold": ["Yes", "No"], + "version_control": [ + "none", + "git (local)", + "git (github private)", + "git (github public)" + ] } \ No newline at end of file diff --git a/ccds/hook_utils/configure_vcs.py b/ccds/hook_utils/configure_vcs.py new file mode 100644 index 000000000..c098017d0 --- /dev/null +++ b/ccds/hook_utils/configure_vcs.py @@ -0,0 +1,156 @@ +import os +import subprocess +from pathlib import Path +from typing import Literal, Union + +# ---------------------------------------------------------------------------- # +# Git # +# ---------------------------------------------------------------------------- # + + +def init_local_git_repo( + directory: Union[str, Path], _make_initial_commit: bool = True +) -> bool: + """ + Initialize a local git repository without any GitHub integration. + + Args: + directory: Directory where the repository will be created + _make_initial_commit: Whether to make initial commit (for testing) + + Returns: + bool: True if initialization was successful, False otherwise + """ + try: + if not _check_git_cli_installed(): + raise RuntimeError("git CLI is required but not installed") + + directory = Path(directory) + if not directory.is_dir(): + raise ValueError(f"Directory '{directory}' does not exist.") + + os.chdir(directory) + + if not (directory / ".git").is_dir(): + _git("init") + if _make_initial_commit: + _git("add .") + _git("commit -m 'Initial commit'") + + return True + except Exception as e: + print(f"Error during repository initialization: {e}") + return False + + +def _git(command: str, **kwargs) -> subprocess.CompletedProcess: + """Run a git command and return the result.""" + return subprocess.run(f"git {command}", shell=True, check=True, **kwargs) + + +def _check_git_cli_installed() -> bool: + """Check whether git cli is installed""" + try: + subprocess.run("git --version", shell=True, check=True, capture_output=True) + return True + except subprocess.CalledProcessError: + return False + + +# ---------------------------------------------------------------------------- # +# Git + Github # +# ---------------------------------------------------------------------------- # + + +def configure_github_repo( + directory: Union[str, Path], + repo_name: str, + visibility: Literal["private", "public"] = "private", +) -> bool: + """ + Configure a Git repository locally and optionally on GitHub with specified branch protections. + + Args: + directory: Directory where the repository will be created or updated + repo_name: Name of the repository + visibility: Whether to upload to github as a public or private repo + + Returns: + bool: True if configuration was successful, False otherwise + """ + try: + subprocess.run("gh --version", shell=True, check=True, capture_output=True) + except subprocess.CalledProcessError: + raise RuntimeError("GitHub CLI is not installed. Please install and try again.") + try: + subprocess.run("gh auth status", shell=True, check=True, capture_output=True) + except subprocess.CalledProcessError: + raise RuntimeError( + "GitHub CLI not authenticated. Please run `gh auth login` and try again." + ) + + try: + # GitHub operations + github_username = _gh( + "api user -q .login", capture_output=True, text=True + ).stdout.strip() + + # Create or update GitHub repository + if not _github_repo_exists(github_username, repo_name): + # Initialize local repository + if not init_local_git_repo(directory): + return False + _gh( + f"repo create {repo_name} --{visibility} --source=. --remote=origin --push" + ) + else: + remote_url = _get_gh_remote_url(github_username, repo_name) + raise RuntimeError(f"GitHub repo already exists at {remote_url}") + # TODO: Prompt user if they would like to set existing repo as origin. + # remote_url = _get_gh_remote_url(github_username, repo_name) + # try: + # _git(f"remote set-url origin {remote_url}") + # except subprocess.CalledProcessError: + # _git(f"remote add origin {remote_url}") + + # Push to newly created origin + _git("push -u origin main") + + print("Repository configuration complete on GitHub!") + + return True + + except Exception as e: + print(f"Error during repository configuration: {e}") + return False + + +def _gh(command: str, **kwargs) -> subprocess.CompletedProcess: + """Run a GitHub CLI command and return the result.""" + return subprocess.run(f"gh {command}", shell=True, check=True, **kwargs) + + +def _get_gh_remote_url(github_username: str, repo_name: str) -> Literal["https", "ssh"]: + """Returns whether the github protocol is https or ssh from user's config""" + try: + protocol = _gh( + "config get git_protocol", capture_output=True, text=True + ).stdout.strip() + if protocol == "ssh": + return f"git@github.com:{github_username}/{repo_name}.git" + elif protocol == "https": + return f"https://github.com/{github_username}/{repo_name}" + else: + raise ValueError(f"Unexepected GitHub protocol {protocol}") + except subprocess.CalledProcessError: + # Default to https if not set + return "https" + + +def _github_repo_exists(username: str, repo_name: str) -> bool: + """Check if a GitHub repository exists.""" + try: + _gh(f"repo view {username}/{repo_name}", capture_output=True) + return True + except subprocess.CalledProcessError: + return False diff --git a/hooks/post_gen_project.py b/hooks/post_gen_project.py index 2e0bc89b6..7b1b33752 100644 --- a/hooks/post_gen_project.py +++ b/hooks/post_gen_project.py @@ -2,6 +2,8 @@ from copy import copy from pathlib import Path +from ccds.hook_utils.configure_vcs import configure_github_repo, init_local_git_repo + # https://github.com/cookiecutter/cookiecutter/issues/824 # our workaround is to include these utility functions in the CCDS package from ccds.hook_utils.custom_config import write_custom_config @@ -80,3 +82,19 @@ # remove any content in __init__.py since it won't be available generated_path.write_text("") # {% endif %} + +# +# VERSION CONTROL +# + +# {% if cookiecutter.version_control == "git (local)" %} +init_local_git_repo(directory=Path.cwd()) +# {% elif cookiecutter.version_control == "git (github private)" %} +configure_github_repo( + directory=Path.cwd(), repo_name="{{ cookiecutter.repo_name }}", visibility="private" +) +# {% elif cookiecutter.version_control == "git (github public)" %} +configure_github_repo( + directory=Path.cwd(), repo_name="{{ cookiecutter.repo_name }}", visibility="public" +) +# {% endif %} diff --git a/pyproject.toml b/pyproject.toml index 86619ee4e..207a6daab 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,3 +42,7 @@ ccds = "ccds.__main__:main" "Source Code" = "https://github.com/drivendataorg/cookiecutter-data-science/" "Bug Tracker" = "https://github.com/drivendataorg/cookiecutter-data-science/issues" "DrivenData" = "https://drivendata.co" + +[tool.pytest.ini_options] +testpaths = "./tests" +addopts = "-vv --color=yes" \ No newline at end of file diff --git a/tests/conftest.py b/tests/conftest.py index 7e9913874..2ef181c82 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,6 +19,7 @@ "module_name": "project_module", "author_name": "DrivenData", "description": "A test project", + "version_control": "git (local)", } @@ -38,6 +39,8 @@ def config_generator(fast=False): ], [("dependency_file", opt) for opt in cookiecutter_json["dependency_file"]], [("pydata_packages", opt) for opt in cookiecutter_json["pydata_packages"]], + [("version_control", opt) for opt in ("none", "git (local)")], + # TODO: Tests for "version_control": "git (github)" ) def _is_valid(config): diff --git a/tests/test_creation.py b/tests/test_creation.py index 1dee39845..e8d5c4912 100644 --- a/tests/test_creation.py +++ b/tests/test_creation.py @@ -57,7 +57,7 @@ def test_baking_configs(config, fast): def verify_folders(root, config): """Tests that expected folders and only expected folders exist.""" - expected_dirs = [ + expected_dirs = { ".", "data", "data/external", @@ -71,32 +71,54 @@ def verify_folders(root, config): "reports", "reports/figures", config["module_name"], - ] + } + ignored_dirs = set() if config["include_code_scaffold"] == "Yes": - expected_dirs += [ - f"{config['module_name']}/modeling", - ] + expected_dirs.add(f"{config['module_name']}/modeling") if config["docs"] == "mkdocs": - expected_dirs += ["docs/docs"] + expected_dirs.add("docs/docs") + + if config["version_control"] in ( + "git (local)", + "git (github public)", + "git (github private)", + ): + # Expected after `git init` + expected_dirs.update( + { + ".git", + ".git/hooks", + ".git/info", + ".git/objects", + ".git/refs", + } + ) + # Expected after initial git commit + expected_dirs.update({".git/logs", ".git/logs/refs"}) + git_patterns = [".git/objects/**/*", ".git/refs/**/*", ".git/logs/refs/**/*"] + ignored_dirs.update( + { + d.relative_to(root) + for pattern in git_patterns + for d in root.glob(pattern) + if d.is_dir() + } + ) - expected_dirs = [ - # (root / d).resolve().relative_to(root) for d in expected_dirs - Path(d) - for d in expected_dirs - ] + expected_dirs = {Path(d) for d in expected_dirs} - existing_dirs = [ + existing_dirs = { d.resolve().relative_to(root) for d in root.glob("**") if d.is_dir() - ] + } - assert sorted(existing_dirs) == sorted(expected_dirs) + assert sorted(existing_dirs - ignored_dirs) == sorted(expected_dirs) def verify_files(root, config): """Test that expected files and only expected files exist.""" - expected_files = [ + expected_files = { "Makefile", "README.md", "pyproject.toml", @@ -114,41 +136,100 @@ def verify_files(root, config): "reports/figures/.gitkeep", "models/.gitkeep", f"{config['module_name']}/__init__.py", - ] + } + + ignored_files = set() # conditional files if not config["open_source_license"].startswith("No license"): - expected_files.append("LICENSE") + expected_files.add("LICENSE") if config["include_code_scaffold"] == "Yes": - expected_files += [ - f"{config['module_name']}/config.py", - f"{config['module_name']}/dataset.py", - f"{config['module_name']}/features.py", - f"{config['module_name']}/modeling/__init__.py", - f"{config['module_name']}/modeling/train.py", - f"{config['module_name']}/modeling/predict.py", - f"{config['module_name']}/plots.py", - ] + expected_files.update( + { + f"{config['module_name']}/config.py", + f"{config['module_name']}/dataset.py", + f"{config['module_name']}/features.py", + f"{config['module_name']}/modeling/__init__.py", + f"{config['module_name']}/modeling/train.py", + f"{config['module_name']}/modeling/predict.py", + f"{config['module_name']}/plots.py", + } + ) if config["docs"] == "mkdocs": - expected_files += [ - "docs/mkdocs.yml", - "docs/README.md", - "docs/docs/index.md", - "docs/docs/getting-started.md", - ] + expected_files.update( + { + "docs/mkdocs.yml", + "docs/README.md", + "docs/docs/index.md", + "docs/docs/getting-started.md", + } + ) + + expected_files.add(config["dependency_file"]) + + if config["version_control"] in ( + "git (local)", + "git (github public)", + "git (github private)", + ): + # Expected after `git init` + expected_files.update( + { + ".git/config", + ".git/description", + ".git/HEAD", + ".git/hooks/applypatch-msg.sample", + ".git/hooks/commit-msg.sample", + ".git/hooks/fsmonitor-watchman.sample", + ".git/hooks/post-update.sample", + ".git/hooks/pre-applypatch.sample", + ".git/hooks/pre-commit.sample", + ".git/hooks/pre-merge-commit.sample", + ".git/hooks/pre-push.sample", + ".git/hooks/pre-rebase.sample", + ".git/hooks/pre-receive.sample", + ".git/hooks/prepare-commit-msg.sample", + ".git/hooks/push-to-checkout.sample", + ".git/hooks/sendemail-validate.sample", + ".git/hooks/update.sample", + ".git/info/exclude", + } + ) + # Expected after initial git commit + expected_files.update( + { + ".git/COMMIT_EDITMSG", + ".git/index", + ".git/logs/HEAD", + } + ) + git_patterns = [".git/objects/**/*", ".git/refs/**/*", ".git/logs/refs/**/*"] + ignored_files.update( + { + f.relative_to(root) + for pattern in git_patterns + for f in root.glob(pattern) + if f.is_file() + } + ) + + expected_files = {Path(f) for f in expected_files} - expected_files.append(config["dependency_file"]) + existing_files = {f.relative_to(root) for f in root.glob("**/*") if f.is_file()} - expected_files = [Path(f) for f in expected_files] + checked_files = existing_files - ignored_files - existing_files = [f.relative_to(root) for f in root.glob("**/*") if f.is_file()] + assert sorted(checked_files) == sorted(expected_files) - assert sorted(existing_files) == sorted(expected_files) + # Ignore files where curlies may exist but aren't unrendered jinja tags + ignore_curly_files = { + Path(".git/hooks/fsmonitor-watchman.sample"), + Path(".git/index"), + } - for f in existing_files: - assert no_curlies(root / f) + assert all(no_curlies(root / f) for f in checked_files - ignore_curly_files) def verify_makefile_commands(root, config):