Skip to content

Commit

Permalink
Merge pull request #21 from pangenome/Dockerfile
Browse files Browse the repository at this point in the history
Dockerfile
  • Loading branch information
subwaystation authored Apr 1, 2021
2 parents 0dcc4b7 + 19103e0 commit 8c162e1
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 25 deletions.
6 changes: 4 additions & 2 deletions .github/workflows/build_and_test_docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,7 @@ jobs:
- uses: actions/checkout@v2
- name: Build the Docker image
run: docker build . --file Dockerfile --tag pgge
- name: Run a test on the DRB1-3123 dataset
run: docker run -v ${PWD}/data/:/data pgge "pgge -g "/data/HLA/DRB1-3123/*.consensus*.gfa" -f /data/HLA/DRB1-3123/DRB1-3123.fa -r /scripts/beehave.R -t 2 -o /data/HLA/DRB1-3123/pgge_docker -l 1000 -s 1000 -p 100"
- name: Run a test on the DRB1-3123 dataset dbg
run: docker run -v ${PWD}/data/:/data pgge "pgge -g '/data/HLA/DRB1-3123/*.consensus*.gfa' -f /data/HLA/DRB1-3123/DRB1-3123.fa -r /scripts/beehave.R -t 2 -o /data/HLA/DRB1-3123/pgge_docker -l 1000 -s 1000 -p 100"
- name: Run a test on the DRB1-3123 dataset vg
run: docker run -v ${PWD}/data/:/data pgge "pgge -g '/data/HLA/DRB1-3123/*.consensus*.gfa' -f /data/HLA/DRB1-3123/DRB1-3123.fa -r /scripts/beehave.R -t 2 -o /data/HLA/DRB1-3123/pgge_docker -l 1000 -s 2000 -p 10 --graphaligner-vg-mode"
20 changes: 1 addition & 19 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,12 @@ LABEL about.home="https://github.com/pangenome/pgge"
LABEL about.license="SPDX:MIT"

# Required dependencies
# samtools
# TODO add samtools from Bioconda?
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
wget \
curl \
less \
gcc \
samtools \
tzdata \
make \
git \
Expand Down Expand Up @@ -62,21 +59,6 @@ RUN wget \
ENV PATH /miniconda/bin:$PATH
SHELL ["/bin/bash", "-c"]

# GraphAligner
# Unfortunately, the current Bioconda version of GraphAligner emits a 15-column GAF, whereas the most recent commit on github emits a 16-column GAF
# Therefore, we can't use the Bioconda version as of now
RUN git clone --recursive https://github.com/maickrau/GraphAligner \
&& cd GraphAligner \
&& git pull \
&& git checkout 48143da \
&& git submodule update --init --recursive \
&& conda env create -f CondaEnvironment.yml \
&& source activate GraphAligner \
&& make bin/GraphAligner \
&& cp bin/GraphAligner /usr/local/bin/GraphAligner \
&& cd ../ \
&& exit

# Install the conda environment
COPY environment.yml /
RUN conda env create --quiet -f /environment.yml && conda clean -a
Expand All @@ -93,4 +75,4 @@ RUN mkdir /scripts
COPY scripts/beehave.R /scripts/beehave.R
RUN chmod 777 /usr/local/bin/pgge && chmod 777 /scripts/beehave.R

ENTRYPOINT [ "/bin/bash", "-l", "-c" ]
ENTRYPOINT [ "/bin/sh", "-c", "-l" ]
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,9 @@ cd pgge

you can run the container using the example [DRB1-3123](data/HLA/DRB1-3123) provided in this repo:
```sh
docker run -it -v ${PWD}/data/:/data pangenome/pgge "pgge -g "/data/HLA/DRB1-3123/*.consensus*.gfa" -f /data/HLA/DRB1-3123/DRB1-3123.fa -r /scripts/beehave.R -t 16 -o /data/HLA/DRB1-3123/pgge_docker -l 1000 -s 1000 -p 100"
docker run -it -v ${PWD}/data/:/data pangenome/pgge "pgge -g '/data/HLA/DRB1-3123/*.consensus*.gfa' -f /data/HLA/DRB1-3123/DRB1-3123.fa -r /scripts/beehave.R -t 16 -o /data/HLA/DRB1-3123/pgge_docker -l 1000 -s 1000 -p 100"
```
:warning: In contrast to running `pgge` from the command line, when running in a docker container, we have to use `'` instead of `"` in order to ensure that the regex is parsed properly.

The `-v` argument of `docker run` always expects a full path: `If you intended to pass a host directory, use absolute path.` This is taken care of by using `${PWD}`.

Expand All @@ -173,7 +174,7 @@ docker build -t ${USER}/pgge:latest .
Staying in the `pgge` directory, we can run `pgge` with the locally build image:

```sh
docker run -it -v ${PWD}/data/:/data ${USER}/pgge "pgge -g "/data/HLA/DRB1-3123/*.consensus*.gfa" -f /data/HLA/DRB1-3123/DRB1-3123.fa -r /scripts/beehave.R -t 16 -o /data/HLA/DRB1-3123/pgge_docker -l 1000 -s 1000 -p 100"
docker run -it -v ${PWD}/data/:/data ${USER}/pgge 'pgge -g "/data/HLA/DRB1-3123/*.consensus*.gfa' -f /data/HLA/DRB1-3123/DRB1-3123.fa -r /scripts/beehave.R -t 16 -o /data/HLA/DRB1-3123/pgge_docker -l 1000 -s 1000 -p 100"
```
## TODOs
Expand Down
3 changes: 3 additions & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ dependencies:
- conda-forge::r-ggrepel=0.9.1
- conda-forge::r-gridextra=2.3
- conda-forge::r-tidyverse=1.3.0
- conda-forge::open-fonts=0.7.0
- bioconda::samtools=1.12
- bioconda::graphaligner=1.0.13
13 changes: 11 additions & 2 deletions pgge
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ beehave_R=false
peanut_bed=false
subsample_percentage=false
subsample_number=false
alignment_mode="dbg"
graphaligner_vg_mode=false
threads=1

num_gfas=0
Expand All @@ -29,7 +31,7 @@ fi

# read the options
cmd=$0" "$@
TEMP=`getopt -o g:f:i:n:r:o:l:s:p:u:t:hb --long input-gfa:,input-fasta:,input-gaf:,input-graph-names:,beehave-r:,output-dir:,seq-length:,step:,subsample-percentage:,subsample-number:,threads:,help,peanut-bed -n 'pgge' -- "$@"`
TEMP=`getopt -o g:f:i:n:r:o:l:s:p:u:t:hbv --long input-gfa:,input-fasta:,input-gaf:,input-graph-names:,beehave-r:,output-dir:,seq-length:,step:,subsample-percentage:,subsample-number:,threads:,help,peanut-bed,graphaligner-vg-mode -n 'pgge' -- "$@"`
eval set -- "$TEMP"

# extract options and their arguments into variables.
Expand All @@ -46,6 +48,7 @@ while true ; do
-p|--subsample-percentage) subsample_percentage=$2 ; shift 2 ;;
-u|--subsample-number) subsample_number=$2 ; shift 2 ;;
-b|--peanut-bed) peanut_bed=true ; shift ;;
-v|--graphaligner-vg-mode) graphaligner_vg_mode=true ; shift ;;
-t|--threads) threads=$2 ; shift 2 ;;
-h|--help) show_help=true ; shift ;;
--) shift ; break ;;
Expand Down Expand Up @@ -132,6 +135,11 @@ then
prefix_pgge="$prefix_pgge"-p"$subsample_percentage"
fi

if [[ "$graphaligner_vg_mode" != false ]];
then
alignment_mode="vg"
fi

if [ $show_help ];
then
#padding=`printf %${#0}s` # prints as many spaces as the length of $0
Expand All @@ -147,6 +155,7 @@ then
echo " -r, --beehave-r PATH path to beehave.R"
echo " -b, --peanut-bed PATH output BED file"
echo " -n, --input-graph-names TSV input graph name file: first row is the name of the original input file, second row is the display name in the PNG"
echo " -v, --graphaligner-vg-mode run GraphAligner with -x vg (default is -x dbg)"
echo " [splitfa]"
echo " -l, --seq-length N length of the splits"
echo " -s, --step N step size between splits"
Expand Down Expand Up @@ -334,7 +343,7 @@ do
-g "$gfa" \
-f "$graph_aligner_fasta_input" \
-a "$prefix_pgge"."$n"."$gfa_base".gaf \
-x vg \
-x "$alignment_mode" \
-t "$threads" \
2> >(tee -a "$log_file")
("$timer" -f "$fmt" cut -f 2,3,4,16 "$prefix_pgge"."$n"."$gfa_base".gaf \
Expand Down

0 comments on commit 8c162e1

Please sign in to comment.