konstantinjdobler · konstantinjdobler · Nov 7, 2023 · Nov 2, 2023 · Nov 3, 2023 · Nov 3, 2023
diff --git a/.github/scripts/extract_version_info.py b/.github/scripts/extract_version_info.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+import sys
+import yaml
+
+if len(sys.argv) != 2:
+    print("Usage: python extract_pytorch_version.py <package_name>")
+    sys.exit(1)
+
+package_name = sys.argv[1]
+
+# Load the lock file
+try:
+    with open('conda-lock.yml', 'r') as lock_file:
+        lock_data = yaml.safe_load(lock_file)
+except FileNotFoundError:
+    print("Lock file 'conda-lock.yml' not found.")
+    sys.exit(1)
+
+# Extract the version of the specified package
+package_version = None
+for package in lock_data['package']:
+    if package['name'] == package_name and package['platform'] == 'linux-64':
+        package_version = package['version']
+        break
+
+if package_version:
+    print(package_version)
+else:
+    print(f"{package_name}-not-found")
diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml
@@ -0,0 +1,52 @@
+name: Build and push Docker image
+on:
+  push:
+    branches:
+      - main
+      - add/docker_ci
+    paths:
+      - Dockerfile
+      - conda-lock.yml
+jobs:
+  docker:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Maximize build space
+        uses: easimon/maximize-build-space@master
+        with:
+          remove-dotnet: "true"
+          remove-codeql: "true"
+          remove-haskell: "true"
+          remove-android: "true"
+          overprovision-lvm: "true"
+          remove-docker-images: "true"
+      - name: Checkout
+        uses: actions/checkout@v3
+      - name: Set up QEMU
+        uses: docker/setup-qemu-action@v2
+        with:
+          platforms: amd64
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v2
+      - name: Login to Docker Hub
+        uses: docker/login-action@v2
+        with:
+          registry: ${{secrets.DOCKER_REGISTRY}} # if not set, default to Docker Hub
+          username: konstantinjdobler # change this
+          password: ${{secrets.DOCKER_REGISTRY_TOKEN}}
+      - name: Get torch version for amd
+        run: "echo TORCH_VERSION_AMD=$(.github/scripts/extract_version_info.py pytorch) >> $GITHUB_ENV"
+      - name: Get cuda version for amd
+        run: "echo CUDA_VERSION_AMD=$(.github/scripts/extract_version_info.py pytorch-cuda) >> $GITHUB_ENV"
+      - name: Print version info
+        run: |
+          echo "TORCH_VERSION_AMD=$TORCH_VERSION_AMD"
+          echo "CUDA_VERSION_AMD=$CUDA_VERSION_AMD"
+      - name: Build and push AMD
+        uses: docker/build-push-action@v4
+        with:
+          context: .
+          platforms: linux/amd64
+          push: true
+          # change these tags to match your Docker Hub repo
+          tags: konstantinjdobler/nlp-research-template:pytorch${{env.TORCH_VERSION_AMD}}-cuda${{env.CUDA_VERSION_AMD}}, konstantinjdobler/nlp-research-template:latest, konstantinjdobler/nlp-research-template:git-${{ github.sha }}
diff --git a/Dockerfile b/Dockerfile
@@ -13,7 +13,6 @@ ARG OS_SELECTOR=ubi8
 # Load micromamba container to copy from later
 FROM --platform=$TARGETPLATFORM mambaorg/micromamba:1.5.1 as micromamba
 
-
 ####################################################
 ################ BASE IMAGES #######################
 ####################################################
@@ -57,6 +56,8 @@ COPY ppc64le.conda-lock.yml /locks/conda-lock.yml
 # -----------------
 ARG TARGETARCH
 FROM ${TARGETARCH}${OS_SELECTOR} as nvidia-cuda-with-micromamba
+
+# ---------
 # From https://github.com/mamba-org/micromamba-docker#adding-micromamba-to-an-existing-docker-image
 # The commands below add micromamba to an existing image to give the capability to ad-hoc install new dependencies
 USER root
@@ -93,6 +94,7 @@ ENTRYPOINT ["/usr/local/bin/_entrypoint.sh"]
 
 # You can modify the CMD statement as needed....
 CMD ["/bin/bash"]
+# ------------
 
 
 ############################################################
@@ -114,6 +116,7 @@ RUN --mount=type=cache,target=$MAMBA_ROOT_PREFIX/pkgs,id=conda-$TARGETPLATFORM,u
     micromamba install --name base --yes --file /locks/conda-lock.yml 
 
 # Install optional tricky pip dependencies that do not work with conda-lock
+# --no-deps --no-cache-dir to prevent conflicts with micromamba, might have to remove it depending on your use case
 # RUN micromamba run -n research pip install example-dependency --no-deps --no-cache-dir
 
 
@@ -141,4 +144,4 @@ USER $MAMBA_USER
 ARG MAMBA_DOCKERFILE_ACTIVATE=1
 RUN micromamba config prepend channels conda-forge --env
 # Disable micromamba banner at every command
-RUN micromamba config set show_banner false --env
+RUN micromamba config set show_banner false --env
diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# An opinionated template for NLP research code
+# An opinionated template for reproducible NLP research code
 
 [![Docker Hub](https://img.shields.io/docker/v/konstantinjdobler/nlp-research-template/latest?color=blue&label=docker&logo=docker)](https://hub.docker.com/r/konstantinjdobler/nlp-research-template/tags)
 [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
@@ -215,7 +215,6 @@ After having installed the [Remote-SSH-](https://code.visualstudio.com/docs/remo
 
 There is a bit of setup: for a proper dev environment, you will need to configure mounts (cache directories, your datasets, ...) and environment variables like for a regular docker run command, have a look inside [`.devcontainer/devcontainer.json`](.devcontainer/devcontainer.json).
 
-
 `conda-lock` is automatically installed for you but you have to add the `--micromamba` flag inside the Dev Container (e.g. `conda-lock --micromamba -f environment.yml`). Otherwise, conda-lock uses an anaconda installation, which takes over 8 hours to resolve the packages in the environments.
 
 We automatically mount the `~/.gitconfig` and `~/.netrc` files for ease of use of Git and W&B, however these files have to exist on your host machine. They are created when executing `git config --global user.email [email protected]` and `wandb login`, respectively.
@@ -232,3 +231,31 @@ Sometimes it's just quicker or unavoidable to create an environment via `conda-l
 ### Code style
 
 We use the `ruff` linter and `black` formatter. You should install their VS Code extensions and enable "Format on Save" inside VS Code.
+
+## Continuous Integration and Deployment
+
+Our project uses GitHub Actions for CI/CD to automate the building and pushing of our Docker images to Docker Hub. This ensures that our Docker images are always up-to-date with the latest dependencies specified in `conda-lock.yml`.
+
+### Prerequisites for CI/CD
+
+To work with this CI/CD setup, you need to:
+
+- Set the following secrets in your GitHub repository:
+  - `DOCKER_REGISTRY`: The Docker registry URL (if using Docker Hub, this is not needed).
+  - `DOCKER_REGISTRY_TOKEN`: Your Docker Hub access token or password.
+- Replace `konstantinjdobler` and mentions of `nlp-research-template` with your own Docker ID in the workflow file [`.github/workflows/docker.yml`](./.github/workflows/docker.yml)
+
+If you do not want to automatically build and push images, just delete the workflow file.
+
+### How to Update Docker Images
+
+To update the Docker image:
+
+1. Make necessary changes to the `Dockerfile` or update dependencies in the `environment.yml`.
+2. Generate a new `conda-lock.yml` by running `conda-lock -f environment.yml`.
+3. Commit and push the changes to the `main` branch.
+4. The GitHub Actions workflow will automatically build and push the new Docker image to Docker Hub.
+
+### Docker Tags
+
+The Docker images are tagged with the PyTorch and CUDA versions extracted from `conda-lock.yml`, as well as a `latest` tag for the most recent build. Use the specific tags if you need a particular version of PyTorch or CUDA, or use the `latest` tag for the most recent build.