From f5f8c40a25ea30494002affa924a5c4114899a30 Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Thu, 28 Mar 2024 15:50:48 +0000 Subject: [PATCH 01/14] Adds docker github actions --- .github/workflows/docker_push.yml | 46 ++++++++++++++++++ README.md | 4 +- docker/.dockerignore | 3 ++ docker/Dockerfile | 79 +++++++++++++++++++++++++++++++ docker/entrypoint.sh | 5 ++ 5 files changed, 135 insertions(+), 2 deletions(-) create mode 100644 .github/workflows/docker_push.yml create mode 100644 docker/.dockerignore create mode 100644 docker/Dockerfile create mode 100644 docker/entrypoint.sh diff --git a/.github/workflows/docker_push.yml b/.github/workflows/docker_push.yml new file mode 100644 index 0000000..d7f9650 --- /dev/null +++ b/.github/workflows/docker_push.yml @@ -0,0 +1,46 @@ +name: Build and push Docker image + +on: + push: + branches: + - 'main' + tags: + - 'v*' + pull_request: + branches: + - 'main' + create: + tags: + - v* + +jobs: + docker-upload: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + - name: Docker meta + id: meta + uses: docker/metadata-action@v4 + with: + images: samhorsfield96/celebrimbor + - name: Set up QEMU + uses: docker/setup-qemu-action@v1 + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v1 + - name: Login to DockerHub + uses: docker/login-action@v1 + with: + username: ${{ secrets.DOCKER_REGISTRY_USERNAME }} + password: ${{ secrets.DOCKER_REGISTRY_PASSWORD }} + - name: Build and push + id: docker_build + uses: docker/build-push-action@v3 + with: + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + file: docker/Dockerfile + provenance: false + - name: Image digest + run: echo ${{ steps.docker_build.outputs.digest }} \ No newline at end of file diff --git a/README.md b/README.md index db43c35..4838fd5 100644 --- a/README.md +++ b/README.md @@ -38,9 +38,9 @@ You can also use the light bakta database if using a suitable version of bakta: bakta_db download --output /path/to/database --type light ``` -Install [cgt](https://github.com/bacpop/cgt) +Install [cgt](https://github.com/bacpop/cgt) (will install `cgt_bacpop` executable in `./bin` directory) ``` -cargo add cgt_bacpop +cargo install cgt_bacpop --root . ``` Or to build from source: diff --git a/docker/.dockerignore b/docker/.dockerignore new file mode 100644 index 0000000..e125579 --- /dev/null +++ b/docker/.dockerignore @@ -0,0 +1,3 @@ +../test/* +test/* +.git \ No newline at end of file diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..b0e4cfb --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,79 @@ +# From https://github.com/kaust-vislab/python-data-science-project +FROM ubuntu:20.04 + +LABEL maintainer="samhorsfield96 " + +SHELL [ "/bin/bash", "--login", "-c" ] + +RUN apt-get update --fix-missing && \ + apt-get install -y wget bzip2 curl git && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Create a non-root user +ARG username=celebrimbor-usr +ARG uid=1000 +ARG gid=100 +ENV USER $username +ENV UID $uid +ENV GID $gid +ENV HOME /home/$USER + +RUN adduser --disabled-password \ + --gecos "Non-root user" \ + --uid $UID \ + --gid $GID \ + --home $HOME \ + $USER + +COPY environment.yml /tmp/ +RUN chown $UID:$GID /tmp/environment.yml + +COPY docker/entrypoint.sh /usr/local/bin/ +RUN chown $UID:$GID /usr/local/bin/entrypoint.sh && \ + chmod u+x /usr/local/bin/entrypoint.sh + +USER $USER + +# install miniconda +ENV MINICONDA_VERSION latest +ENV CONDA_DIR $HOME/miniconda3 +RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-$MINICONDA_VERSION-Linux-x86_64.sh -O ~/miniconda.sh && \ + chmod +x ~/miniconda.sh && \ + ~/miniconda.sh -b -p $CONDA_DIR && \ + rm ~/miniconda.sh + +# make non-activate conda commands available +ENV PATH=$CONDA_DIR/bin:$PATH + +# make conda activate command available from /bin/bash --login shells +RUN echo ". $CONDA_DIR/etc/profile.d/conda.sh" >> ~/.profile + +# make conda activate command available from /bin/bash --interative shells +RUN conda init bash + +# create a project directory inside user home +# (this isn't used with a clone running snakemake) +ENV PROJECT_DIR $HOME/app +RUN mkdir $PROJECT_DIR +# copy the code in +COPY . $PROJECT_DIR +WORKDIR $PROJECT_DIR + +# build the conda environment +ENV ENV_PREFIX $PROJECT_DIR/env +RUN conda update --name base --channel defaults conda && \ + conda env create --prefix $ENV_PREFIX --file /tmp/environment.yml --force && \ + conda clean --all --yes \ + conda activate celebrimbor \ + cargo install cgt_bacpop --root . + +# copy cgt executable to root bin +COPY ./bin/cgt_bacpop /usr/local/bin/ + +# use an entrypoint script to insure conda environment is properly activated at runtime +USER root +ENTRYPOINT [ "/usr/local/bin/entrypoint.sh" ] + +# default of running shell is fine +#CMD [ "bash" ] \ No newline at end of file diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh new file mode 100644 index 0000000..b026041 --- /dev/null +++ b/docker/entrypoint.sh @@ -0,0 +1,5 @@ +#!/bin/bash --login +set -e + +conda activate $HOME/app/env +exec "$@" \ No newline at end of file From 53d71069144b343f0017d968acf9aeada21a4511 Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Thu, 28 Mar 2024 15:57:36 +0000 Subject: [PATCH 02/14] Corrects copy of cgt_bacpop for docker --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index b0e4cfb..4899a0e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -69,7 +69,7 @@ RUN conda update --name base --channel defaults conda && \ cargo install cgt_bacpop --root . # copy cgt executable to root bin -COPY ./bin/cgt_bacpop /usr/local/bin/ +COPY cgt_bacpop /usr/local/bin/ # use an entrypoint script to insure conda environment is properly activated at runtime USER root From 6c805225ac7cf79086ddc9ff9f46a23f0335b225 Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Thu, 28 Mar 2024 16:06:17 +0000 Subject: [PATCH 03/14] Allows docker build on push to docker branch --- .github/workflows/docker_push.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/docker_push.yml b/.github/workflows/docker_push.yml index d7f9650..5ea7299 100644 --- a/.github/workflows/docker_push.yml +++ b/.github/workflows/docker_push.yml @@ -4,6 +4,7 @@ on: push: branches: - 'main' + - 'docker' tags: - 'v*' pull_request: From 01f17155c1990b15aba6f078d3c5c6c30f863453 Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Thu, 28 Mar 2024 16:13:36 +0000 Subject: [PATCH 04/14] Corrects path to rust binaries in docker --- docker/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 4899a0e..d9f5b1d 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,7 +1,7 @@ # From https://github.com/kaust-vislab/python-data-science-project FROM ubuntu:20.04 -LABEL maintainer="samhorsfield96 " +LABEL maintainer="Sam Horsfield " SHELL [ "/bin/bash", "--login", "-c" ] @@ -66,10 +66,10 @@ RUN conda update --name base --channel defaults conda && \ conda env create --prefix $ENV_PREFIX --file /tmp/environment.yml --force && \ conda clean --all --yes \ conda activate celebrimbor \ - cargo install cgt_bacpop --root . + cargo install cgt_bacpop -# copy cgt executable to root bin -COPY cgt_bacpop /usr/local/bin/ +# add rust binary to path +ENV PATH="$HOME/.cargo/bin:${PATH}" # use an entrypoint script to insure conda environment is properly activated at runtime USER root From 619382c40f61857b499ff7126ec084773ad0c383 Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Thu, 28 Mar 2024 16:18:15 +0000 Subject: [PATCH 05/14] Corrects path to rust binaries in docker --- docker/Dockerfile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index d9f5b1d..503c819 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -63,9 +63,9 @@ WORKDIR $PROJECT_DIR # build the conda environment ENV ENV_PREFIX $PROJECT_DIR/env RUN conda update --name base --channel defaults conda && \ - conda env create --prefix $ENV_PREFIX --file /tmp/environment.yml --force && \ + conda env create --prefix $ENV_PREFIX --file /tmp/environment.yml --yes && \ conda clean --all --yes \ - conda activate celebrimbor \ + conda activate celebrimbo cargo install cgt_bacpop # add rust binary to path From 106e066ef8c052d637c0cd1f334457a6359f914e Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Thu, 28 Mar 2024 16:21:01 +0000 Subject: [PATCH 06/14] Corrects path to rust binaries in docker --- docker/Dockerfile | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 503c819..bbc89db 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -65,8 +65,9 @@ ENV ENV_PREFIX $PROJECT_DIR/env RUN conda update --name base --channel defaults conda && \ conda env create --prefix $ENV_PREFIX --file /tmp/environment.yml --yes && \ conda clean --all --yes \ - conda activate celebrimbo - cargo install cgt_bacpop + conda activate celebrimbor \ + cargo install cgt_bacpop \ + conda deactivate # add rust binary to path ENV PATH="$HOME/.cargo/bin:${PATH}" From ca6b971d87add1acf97b4f78b4b47101d012ae2a Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Thu, 28 Mar 2024 16:34:19 +0000 Subject: [PATCH 07/14] Corrects path to rust binaries in docker --- docker/Dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index bbc89db..afb13fd 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -64,9 +64,9 @@ WORKDIR $PROJECT_DIR ENV ENV_PREFIX $PROJECT_DIR/env RUN conda update --name base --channel defaults conda && \ conda env create --prefix $ENV_PREFIX --file /tmp/environment.yml --yes && \ - conda clean --all --yes \ - conda activate celebrimbor \ - cargo install cgt_bacpop \ + conda clean --all --yes && \ + conda activate celebrimbor && \ + cargo install cgt_bacpop && \ conda deactivate # add rust binary to path From e676888137b306717648ca2fa0430684bc9c6414 Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Thu, 28 Mar 2024 17:12:50 +0000 Subject: [PATCH 08/14] Corrects conda initialisation for docker --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index afb13fd..a997020 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -65,7 +65,7 @@ ENV ENV_PREFIX $PROJECT_DIR/env RUN conda update --name base --channel defaults conda && \ conda env create --prefix $ENV_PREFIX --file /tmp/environment.yml --yes && \ conda clean --all --yes && \ - conda activate celebrimbor && \ + conda activate $ENV_PREFIX && \ cargo install cgt_bacpop && \ conda deactivate From dff717659894c94f273c5bd2eb5953ad1a257b60 Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Wed, 3 Apr 2024 10:48:18 +0100 Subject: [PATCH 09/14] Updates branches to be built by docker --- .github/workflows/docker_push.yml | 1 - README.md | 9 +++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docker_push.yml b/.github/workflows/docker_push.yml index 5ea7299..d7f9650 100644 --- a/.github/workflows/docker_push.yml +++ b/.github/workflows/docker_push.yml @@ -4,7 +4,6 @@ on: push: branches: - 'main' - - 'docker' tags: - 'v*' pull_request: diff --git a/README.md b/README.md index 4838fd5..6db2fb2 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,15 @@ cd cgt cargo install --path "." ``` +### Running inside a container + +An alternative, if you are having trouble with the above, is to use the CELEBRIMBOR docker +container. If you are comfortable running commands inside docker containers and mounting +your external files, the whole pipeline is in the container available by running: +``` +docker pull samhorsfield96/celebrimbor:main +``` + ## Quick start: Update `config.yaml` to specify workflow and directory paths. From fea0dd7f0f3156c9406b1bf4cb68393eabd80d54 Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Wed, 3 Apr 2024 14:57:26 +0100 Subject: [PATCH 10/14] Adds bakta database download --- .github/workflows/docker_push.yml | 1 + docker/Dockerfile | 2 ++ 2 files changed, 3 insertions(+) diff --git a/.github/workflows/docker_push.yml b/.github/workflows/docker_push.yml index d7f9650..5ea7299 100644 --- a/.github/workflows/docker_push.yml +++ b/.github/workflows/docker_push.yml @@ -4,6 +4,7 @@ on: push: branches: - 'main' + - 'docker' tags: - 'v*' pull_request: diff --git a/docker/Dockerfile b/docker/Dockerfile index a997020..412b00f 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -67,6 +67,8 @@ RUN conda update --name base --channel defaults conda && \ conda clean --all --yes && \ conda activate $ENV_PREFIX && \ cargo install cgt_bacpop && \ + mkdir bakta_db && \ + bakta_db download --output bakta_db && \ conda deactivate # add rust binary to path From 4ad5c20dfb7379a3b4f86e3761b7893bcd924d3b Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Wed, 3 Apr 2024 15:00:39 +0100 Subject: [PATCH 11/14] Adds bakta database download --- docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 412b00f..1787172 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -68,7 +68,7 @@ RUN conda update --name base --channel defaults conda && \ conda activate $ENV_PREFIX && \ cargo install cgt_bacpop && \ mkdir bakta_db && \ - bakta_db download --output bakta_db && \ + bakta_db download --output bakta_db --type light && \ conda deactivate # add rust binary to path From 35f86500ecc8cfbacde323ce28565070ae2f2730 Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Wed, 3 Apr 2024 17:13:28 +0100 Subject: [PATCH 12/14] Adds docker guide --- README.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.md b/README.md index 6db2fb2..63b3193 100644 --- a/README.md +++ b/README.md @@ -55,10 +55,17 @@ cargo install --path "." An alternative, if you are having trouble with the above, is to use the CELEBRIMBOR docker container. If you are comfortable running commands inside docker containers and mounting your external files, the whole pipeline is in the container available by running: + ``` docker pull samhorsfield96/celebrimbor:main ``` +To run within the container, use the below command, replacing `path to output dir` and `path to fasta dir` with absolute paths: + +``` +docker run -v :/output -v :/data samhorsfield96/celebrimbor:main snakemake --cores 4 --config genome_fasta=/data output_dir=/output bakta_db=bakta_db/db-light cgt_exe=cgt_bacpop cgt_breaks=0.05,0.95 cgt_error=0.06 clustering_method=panaroo panaroo_stringency=moderate +``` + ## Quick start: Update `config.yaml` to specify workflow and directory paths. From 916faa6d3ce36aece452110bc234ad227b40ba3b Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Wed, 3 Apr 2024 17:13:45 +0100 Subject: [PATCH 13/14] Adds docker guide --- README.md | 6 ++++-- config.yaml | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 63b3193..3ba7c3a 100644 --- a/README.md +++ b/README.md @@ -60,12 +60,14 @@ your external files, the whole pipeline is in the container available by running docker pull samhorsfield96/celebrimbor:main ``` -To run within the container, use the below command, replacing `path to output dir` and `path to fasta dir` with absolute paths: +To run within the container, use the below command, replacing `path to output dir` and `path to fasta dir` with absolute paths and changing other parameters as required: ``` -docker run -v :/output -v :/data samhorsfield96/celebrimbor:main snakemake --cores 4 --config genome_fasta=/data output_dir=/output bakta_db=bakta_db/db-light cgt_exe=cgt_bacpop cgt_breaks=0.05,0.95 cgt_error=0.06 clustering_method=panaroo panaroo_stringency=moderate +docker run -v :/output -v :/data samhorsfield96/celebrimbor:main snakemake --cores 4 --config genome_fasta=/data output_dir=/output bakta_db=bakta_db/db-light cgt_exe=cgt_bacpop cgt_breaks=0.05,0.95 cgt_error=0.05 clustering_method=panaroo panaroo_stringency=moderate ``` +Note: ensure that `clustering_method` and `panaroo_stringency` parameters are not in quotes. + ## Quick start: Update `config.yaml` to specify workflow and directory paths. diff --git a/config.yaml b/config.yaml index 488e5d4..d25a557 100644 --- a/config.yaml +++ b/config.yaml @@ -9,7 +9,7 @@ bakta_db: path/to/bakta/db/dir # cgt executable parameters cgt_exe: /path/to/cgt/exe -cgt_breaks: 0.1,0.95 +cgt_breaks: 0.05,0.95 cgt_error: 0.05 # choice of clustering method, either 'mmseqs2' or 'panaroo' From 263f0e76323140e7fad89cd2e7dc659ac5163153 Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Wed, 3 Apr 2024 17:13:58 +0100 Subject: [PATCH 14/14] Adds docker guide --- config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.yaml b/config.yaml index d25a557..17e3980 100644 --- a/config.yaml +++ b/config.yaml @@ -12,8 +12,8 @@ cgt_exe: /path/to/cgt/exe cgt_breaks: 0.05,0.95 cgt_error: 0.05 -# choice of clustering method, either 'mmseqs2' or 'panaroo' +# choice of clustering method, either "mmseqs2" or "panaroo" clustering_method: "panaroo" -# must be one of 'strict', 'moderate' or 'sensitive' +# must be one of "strict", "moderate" or "sensitive" panaroo_stringency: "strict" \ No newline at end of file