From acadcdee7e04d88d2b8982b7f59ee096434f0507 Mon Sep 17 00:00:00 2001 From: Jin Lee Date: Mon, 26 Nov 2018 16:25:06 -0800 Subject: [PATCH 01/14] bug fix for spp (when nth==1) --- src/encode_spp.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/encode_spp.py b/src/encode_spp.py index fc32420c..e2af1b11 100755 --- a/src/encode_spp.py +++ b/src/encode_spp.py @@ -46,6 +46,7 @@ def spp(ta, ctl_ta, fraglen, cap_num_peak, nth, out_dir): basename_prefix = '{}_x_{}'.format(basename_ta, basename_ctl_ta) if len(basename_prefix) > 200: # UNIX cannot have filename > 255 basename_prefix = '{}_x_control'.format(basename_ta) + nth_param = '-p={}'.format(nth) if nth<2 else '' prefix = os.path.join(out_dir, basename_prefix) rpeak = '{}.{}.regionPeak.gz'.format( prefix, @@ -54,7 +55,7 @@ def spp(ta, ctl_ta, fraglen, cap_num_peak, nth, out_dir): rpeak_tmp_gz = '{}.tmp.gz'.format(rpeak) cmd0 = 'Rscript --max-ppsize=500000 $(which run_spp.R) -c={} -i={} ' - cmd0 += '-npeak={} -odir={} -speak={} -savr={} -rf -p={}' + cmd0 += '-npeak={} -odir={} -speak={} -savr={} -rf {}' cmd0 = cmd0.format( ta, ctl_ta, @@ -62,7 +63,7 @@ def spp(ta, ctl_ta, fraglen, cap_num_peak, nth, out_dir): os.path.abspath(out_dir), fraglen, rpeak_tmp, - nth) + nth_param) run_shell_cmd(cmd0) # if we have scientific representation of chr coord. then convert it to int From 0e9024dce03b17969ac9ac57484f19919487c017 Mon Sep 17 00:00:00 2001 From: Jin Lee Date: Mon, 26 Nov 2018 16:35:21 -0800 Subject: [PATCH 02/14] support for PBS cluster --- backends/backend.conf | 58 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/backends/backend.conf b/backends/backend.conf index 874607c7..87828e85 100644 --- a/backends/backend.conf +++ b/backends/backend.conf @@ -4,6 +4,64 @@ backend { default = "Local" providers { + pbs { + actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" + config { + script-epilogue = "sleep 30 && sync" + concurrent-job-limit = 50 + runtime-attributes = """ + Int cpu = 1 + Int? gpu + Int? time + Int? memory_mb + """ + submit = """ + qsub \ + -N ${job_name} \ + -o ${out} \ + -e ${err} \ + ${"-lselect=1:ncpus=" + cpu + ":mem=" + memory_mb/1024 + "gb"} \ + ${"-lwalltime=" + time + ":0:0"} \ + ${if gpu>1 then "-lngpus=" + gpu else ""} \ + -V \ + ${script} + """ + kill = "qdel ${job_id}" + check-alive = "qstat ${job_id}" + job-id-regex = "(\\d+).+" + } + } + + pbs_singularity { + actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" + config { + script-epilogue = "sleep 30 && sync" + concurrent-job-limit = 50 + runtime-attributes = """ + String sge_pe = "shm" + Int cpu = 1 + Int? gpu + Int? time + Int? memory_mb + String singularity_container + String? singularity_bindpath + """ + submit = """ + ls ${singularity_container} $(echo ${singularity_bindpath} | tr , ' ') 1>/dev/null && (echo "chmod u+x ${script} && SINGULARITY_BINDPATH=$(echo ${cwd} | sed 's/cromwell-executions/\n/g' | head -n1),${singularity_bindpath} singularity exec --home ${cwd} ${if defined(gpu) then '--nv' else ''} ${singularity_container} ${script}" | qsub \ + -N ${job_name} \ + -o ${out} \ + -e ${err} \ + ${"-lselect=1:ncpus=" + cpu + ":mem=" + memory_mb/1024 + "gb"} \ + ${"-lwalltime=" + time + ":0:0"} \ + ${if gpu>1 then "-lngpus=" + gpu else ""} \ + -V) + """ + kill = "qdel ${job_id}" + check-alive = "qstat -j ${job_id}" + job-id-regex = "(\\d+)" + } + } + slurm_singularity { actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" config { From d12f78310747118e9555bbc78136b920c385bd26 Mon Sep 17 00:00:00 2001 From: Jin Lee Date: Mon, 26 Nov 2018 16:36:48 -0800 Subject: [PATCH 03/14] remove unnecessary SGE vars\ in PBS backend --- backends/backend.conf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backends/backend.conf b/backends/backend.conf index 87828e85..93a785d7 100644 --- a/backends/backend.conf +++ b/backends/backend.conf @@ -30,7 +30,7 @@ backend { check-alive = "qstat ${job_id}" job-id-regex = "(\\d+).+" } - } + } pbs_singularity { actor-factory = "cromwell.backend.impl.sfs.config.ConfigBackendLifecycleActorFactory" @@ -38,7 +38,6 @@ backend { script-epilogue = "sleep 30 && sync" concurrent-job-limit = 50 runtime-attributes = """ - String sge_pe = "shm" Int cpu = 1 Int? gpu Int? time From 7785a010ec91cbbc51043075b6ef4a9f5c67315e Mon Sep 17 00:00:00 2001 From: Jin Lee Date: Mon, 26 Nov 2018 16:45:53 -0800 Subject: [PATCH 04/14] workaround for cromwell/WDL stdout() issue: write on a temporary file instead --- chip.wdl | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/chip.wdl b/chip.wdl index 18665006..11232846 100755 --- a/chip.wdl +++ b/chip.wdl @@ -1518,10 +1518,10 @@ task qc_report { task read_genome_tsv { File genome_tsv command { - cat ${genome_tsv} + cat ${genome_tsv} > 'tmp.tsv' } output { - Map[String,String] genome = read_map(stdout()) + Map[String,String] genome = read_map('tmp.tsv') } runtime { cpu : 1 @@ -1536,16 +1536,17 @@ task rounded_mean { command <<< python <>> output { - Int rounded_mean = read_int(stdout()) + Int rounded_mean = read_int('tmp.txt') } runtime { cpu : 1 From 0daee12deafa7703f388253be6d203f3a77c94a2 Mon Sep 17 00:00:00 2001 From: Jin Lee Date: Mon, 26 Nov 2018 17:01:04 -0800 Subject: [PATCH 05/14] update doc for approx duration of test pipeline runs (1->6 hours) --- docs/tutorial_dx_cli.md | 2 +- docs/tutorial_dx_web.md | 2 +- docs/tutorial_google.md | 2 +- docs/tutorial_local_conda.md | 2 +- docs/tutorial_local_docker.md | 2 +- docs/tutorial_local_singularity.md | 2 +- docs/tutorial_scg.md | 4 ++-- docs/tutorial_sge.md | 4 ++-- docs/tutorial_sherlock.md | 4 ++-- docs/tutorial_slurm.md | 4 ++-- 10 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/tutorial_dx_cli.md b/docs/tutorial_dx_cli.md index 0a4aa239..512ac5b4 100644 --- a/docs/tutorial_dx_cli.md +++ b/docs/tutorial_dx_cli.md @@ -58,6 +58,6 @@ This document describes instruction for the item 1). 12. Click on "Run as Analysis..." and you will be automatically redirected to the "Monitor" tab. -13. It will take about an hour. You will be able to find all outputs on your output folder. Final QC report (`qc.html`)/JSON (`qc.json`) will be found on it. +13. It will take about 6 hours. You will be able to find all outputs on your output folder. Final QC report (`qc.html`)/JSON (`qc.json`) will be found on it. 14. See full specification for [input JSON file](input.md). diff --git a/docs/tutorial_dx_web.md b/docs/tutorial_dx_web.md index 0c47711e..8b798d4c 100644 --- a/docs/tutorial_dx_web.md +++ b/docs/tutorial_dx_web.md @@ -31,7 +31,7 @@ This document describes instruction for the item 2). 10. Click on "Run as Analysis..." and you will be automatically redirected to the "Monitor" tab. -11. It will take about an hour. You will be able to find all outputs on your output folder. Final QC report (`qc.html`)/JSON (`qc.json`) will be found on it. +11. It will take about 6 hours. You will be able to find all outputs on your output folder. Final QC report (`qc.html`)/JSON (`qc.json`) will be found on it. 11. See full specification for [input JSON file](input.md). diff --git a/docs/tutorial_google.md b/docs/tutorial_google.md index a0c15d52..330be3ef 100644 --- a/docs/tutorial_google.md +++ b/docs/tutorial_google.md @@ -51,7 +51,7 @@ All test samples and genome data are shared on our public Google Cloud buckets. $ java -jar -Dconfig.file=backends/backend.conf -Dbackend.default=google -Dbackend.providers.google.config.project=${PROJECT} -Dbackend.providers.google.config.root=${BUCKET} cromwell-34.jar run chip.wdl -i ${INPUT} -o workflow_opts/docker.json ``` -11. It will take about an hour. You will be able to find all outputs on your Google Cloud bucket. Final QC report/JSON will be written on `gs://[YOUR_BUCKET_NAME]/ENCSR936XTK_subsampled/chip/[SOME_HASH_STRING]/call-qc_report/execution/glob*/qc.html` or `qc.json`. See [output directory structure](output.md) for details. +11. It will take about 6 hours. You will be able to find all outputs on your Google Cloud bucket. Final QC report/JSON will be written on `gs://[YOUR_BUCKET_NAME]/ENCSR936XTK_subsampled/chip/[SOME_HASH_STRING]/call-qc_report/execution/glob*/qc.html` or `qc.json`. See [output directory structure](output.md) for details. 12. See full specification for [input JSON file](input.md). diff --git a/docs/tutorial_local_conda.md b/docs/tutorial_local_conda.md index d4f1a11a..2a8d137b 100644 --- a/docs/tutorial_local_conda.md +++ b/docs/tutorial_local_conda.md @@ -40,6 +40,6 @@ Tutorial for general UNIX computers without docker $ java -jar -Dconfig.file=backends/backend.conf cromwell-34.jar run chip.wdl -i ${INPUT} ``` -8. It will take about an hour. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. +8. It will take about 6 hours. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. 9. See full specification for [input JSON file](input.md). diff --git a/docs/tutorial_local_docker.md b/docs/tutorial_local_docker.md index cd1c89e9..1752bf1f 100644 --- a/docs/tutorial_local_docker.md +++ b/docs/tutorial_local_docker.md @@ -31,6 +31,6 @@ Tutorial for general UNIX computers with docker $ java -jar -Dconfig.file=backends/backend.conf cromwell-34.jar run chip.wdl -i ${INPUT} -o workflow_opts/docker.json ``` -6. It will take about an hour. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. +6. It will take about 6 hours. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. 7. See full specification for [input JSON file](input.md). diff --git a/docs/tutorial_local_singularity.md b/docs/tutorial_local_singularity.md index 9e1f0229..3c779854 100644 --- a/docs/tutorial_local_singularity.md +++ b/docs/tutorial_local_singularity.md @@ -41,7 +41,7 @@ Tutorial for general UNIX computers with singularity $ java -jar -Xmx1G -Dconfig.file=backends/backend.conf -Dbackend.default=singularity cromwell-34.jar run chip.wdl -i ${INPUT} -o workflow_opts/singularity.json ``` -8. It will take about an hour. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. +8. It will take about 6 hours. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. 9. See full specification for [input JSON file](input.md). diff --git a/docs/tutorial_scg.md b/docs/tutorial_scg.md index bdf08583..7c43208a 100644 --- a/docs/tutorial_scg.md +++ b/docs/tutorial_scg.md @@ -48,7 +48,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt $ java -jar -Xmx1G -Dconfig.file=backends/backend.conf -Dbackend.default=slurm cromwell-34.jar run chip.wdl -i ${INPUT} -o workflow_opts/scg.json ``` -8. It will take about an hour. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. +8. It will take about 6 hours. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. 9. See full specification for [input JSON file](input.md). @@ -66,7 +66,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt $ java -jar -Xmx1G -Dconfig.file=backends/backend.conf -Dbackend.default=slurm_singularity cromwell-34.jar run chip.wdl -i ${INPUT} -o workflow_opts/scg.json ``` -7. It will take about an hour. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. +7. It will take about 6 hours. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. 8. See full specification for [input JSON file](input.md). diff --git a/docs/tutorial_sge.md b/docs/tutorial_sge.md index c356bd6e..db9744f7 100644 --- a/docs/tutorial_sge.md +++ b/docs/tutorial_sge.md @@ -54,7 +54,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt $ java -jar -Xmx1G -Dconfig.file=backends/backend.conf -Dbackend.default=sge cromwell-34.jar run chip.wdl -i ${INPUT} -o workflow_opts/sge.json ``` -9. It will take about an hour. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. +9. It will take about 6 hours. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. 10. See full specification for [input JSON file](input.md). @@ -76,7 +76,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt $ java -jar -Xmx1G -Dconfig.file=backends/backend.conf -Dbackend.default=sge_singularity cromwell-34.jar run chip.wdl -i ${INPUT} -o workflow_opts/sge.json ``` -9. It will take about an hour. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. +9. It will take about 6 hours. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. 10. See full specification for [input JSON file](input.md). diff --git a/docs/tutorial_sherlock.md b/docs/tutorial_sherlock.md index e625467f..0da2bfa6 100644 --- a/docs/tutorial_sherlock.md +++ b/docs/tutorial_sherlock.md @@ -48,7 +48,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt $ java -jar -Xmx1G -Dconfig.file=backends/backend.conf -Dbackend.default=slurm -Dbackend.providers.slurm.config.concurrent-job-limit=1 cromwell-34.jar run chip.wdl -i ${INPUT} -o workflow_opts/sherlock.json ``` -8. It will take about an hour. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. +8. It will take about 6 hours. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. 9. See full specification for [input JSON file](input.md). @@ -73,7 +73,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt $ java -jar -Xmx1G -Dconfig.file=backends/backend.conf -Dbackend.default=slurm_singularity -Dbackend.providers.slurm_singularity.config.concurrent-job-limit=1 cromwell-34.jar run chip.wdl -i ${INPUT} -o workflow_opts/sherlock.json ``` -8. It will take about an hour. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. +8. It will take about 6 hours. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. 9. See full specification for [input JSON file](input.md). diff --git a/docs/tutorial_slurm.md b/docs/tutorial_slurm.md index b33f1425..e85f96d0 100644 --- a/docs/tutorial_slurm.md +++ b/docs/tutorial_slurm.md @@ -54,7 +54,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt $ java -jar -Xmx1G -Dconfig.file=backends/backend.conf -Dbackend.default=slurm cromwell-34.jar run chip.wdl -i ${INPUT} -o workflow_opts/slurm.json ``` -9. It will take about an hour. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. +9. It will take about 6 hours. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. 10. See full specification for [input JSON file](input.md). @@ -77,7 +77,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt $ java -jar -Xmx1G -Dconfig.file=backends/backend.conf -Dbackend.default=slurm_singularity cromwell-34.jar run chip.wdl -i ${INPUT} -o workflow_opts/slurm.json ``` -9. It will take about an hour. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. +9. It will take about 6 hours. You will be able to find all outputs on `cromwell-executions/chip/[RANDOM_HASH_STRING]/`. See [output directory structure](output.md) for details. 10. See full specification for [input JSON file](input.md). From e28bd889c4a0c09b1b55f4625e29bb2035cc3734 Mon Sep 17 00:00:00 2001 From: Jin Lee Date: Mon, 26 Nov 2018 17:26:24 -0800 Subject: [PATCH 06/14] fix for LC_COLLATE=C issue in peak_to_bigbed, peak_to_hammock --- src/encode_common_genomic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/encode_common_genomic.py b/src/encode_common_genomic.py index 66923fcd..dd8d55f9 100755 --- a/src/encode_common_genomic.py +++ b/src/encode_common_genomic.py @@ -220,7 +220,7 @@ def peak_to_hammock(peak, out_dir): run_shell_cmd(cmd) cmd2 = 'touch {}'.format(hammock_gz_tbi) else: - cmd = "zcat -f {} | sed '/^\(chr\)/!d' | sort -k1,1V -k2,2n > {}" + cmd = "LC_COLLATE=C && zcat -f {} | sed '/^\(chr\)/!d' | sort -k1,1V -k2,2n > {}" cmd = cmd.format(peak, hammock_tmp) run_shell_cmd(cmd) @@ -346,7 +346,7 @@ def peak_to_bigbed(peak, peak_type, chrsz, keep_irregular_chr, out_dir): else: cmd1 = "cat {} > {}".format(chrsz, chrsz_tmp) run_shell_cmd(cmd1) - cmd2 = "zcat -f {} | sort -k1,1 -k2,2n > {}".format(peak, bigbed_tmp) + cmd2 = "LC_COLLATE=C && zcat -f {} | sort -k1,1 -k2,2n > {}".format(peak, bigbed_tmp) run_shell_cmd(cmd2) cmd3 = "bedClip {} {} {}".format(bigbed_tmp, chrsz_tmp, bigbed_tmp2) run_shell_cmd(cmd3) From a84b1fe2258ce8ba461448a6806ac39886c7c70f Mon Sep 17 00:00:00 2001 From: Jin Lee Date: Mon, 26 Nov 2018 17:39:01 -0800 Subject: [PATCH 07/14] update picard (2.10.6->2.18.16) --- conda/requirements.txt | 2 +- docker_image/Dockerfile | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conda/requirements.txt b/conda/requirements.txt index 75855b7e..1344f0dc 100644 --- a/conda/requirements.txt +++ b/conda/requirements.txt @@ -3,7 +3,7 @@ samtools ==1.2 htslib ==1.4 # 1.5 in bioconda needed libbz2.so.1.0 bedtools ==2.26.0 #2.22 # 2.21.0 -picard ==2.10.6 +picard ==2.18.16 #==2.10.6 ucsc-fetchchromsizes ucsc-wigtobigwig ucsc-bedgraphtobigwig diff --git a/docker_image/Dockerfile b/docker_image/Dockerfile index be1edc65..92a8338e 100644 --- a/docker_image/Dockerfile +++ b/docker_image/Dockerfile @@ -85,8 +85,8 @@ RUN git clone --branch 1.2 --single-branch https://github.com/samtools/samtools. RUN git clone --branch v2.26.0 --single-branch https://github.com/arq5x/bedtools2.git && \ cd bedtools2 && make && make install && cd ../ && rm -rf bedtools2* -# Install Picard 2.10.6 -RUN wget https://github.com/broadinstitute/picard/releases/download/2.10.6/picard.jar && chmod +x picard.jar +# Install Picard 2.18.16 +RUN wget https://github.com/broadinstitute/picard/releases/download/2.18.16/picard.jar && chmod +x picard.jar # Install sambamba 0.6.6 RUN wget https://github.com/lomereiter/sambamba/releases/download/v0.6.6/sambamba_v0.6.6_linux.tar.bz2 && tar -xvjf sambamba_v0.6.6_linux.tar.bz2 && mv sambamba_v0.6.6 sambamba && rm -rf sambamba_* From a9bd795b50dd153d997937a761fc064a6dc22e0e Mon Sep 17 00:00:00 2001 From: Jin Lee Date: Mon, 26 Nov 2018 17:50:16 -0800 Subject: [PATCH 08/14] version: v1.1.2->v1.1.3 --- docs/dev.md | 14 ++++++++++++-- docs/tutorial_dx_web.md | 20 ++++++++++---------- docs/tutorial_local_singularity.md | 4 ++-- docs/tutorial_scg.md | 4 ++-- docs/tutorial_sge.md | 2 +- docs/tutorial_sherlock.md | 4 ++-- docs/tutorial_slurm.md | 4 ++-- test/test_task/test.sh | 2 +- test/test_workflow/test_chip.sh | 2 +- test/test_workflow/test_chip_local.sh | 2 +- workflow_opts/docker.json | 2 +- workflow_opts/scg.json | 2 +- workflow_opts/sge.json | 2 +- workflow_opts/sherlock.json | 2 +- workflow_opts/singularity.json | 2 +- workflow_opts/slurm.json | 2 +- 16 files changed, 40 insertions(+), 30 deletions(-) diff --git a/docs/dev.md b/docs/dev.md index e980bb1e..6bf36cea 100644 --- a/docs/dev.md +++ b/docs/dev.md @@ -1,6 +1,16 @@ Dev === +## Command line for version change +``` +PREV_VER=v1.1.2 +NEW_VER=v1.1.3 +for f in $(grep -rl ${PREV_VER} --include=*.{json,md,sh}) +do + sed -i "s/${PREV_VER}/${NEW_VER}/g" ${f} +done +``` + ## Building templates on DX for each genome Make sure that you have [`dxWDL-0.77.jar`](https://github.com/dnanexus/dxWDL/releases/download/0.77/dxWDL-0.77.jar) on your `$HOME`. Install [DNANexus Platform SDK](https://wiki.dnanexus.com/downloads) with `pip install dxpy`. Log-in on DNANexus with `dx login` and choose "ENCODE Uniform Processing Pipelines" (name of our official DNANexus project for pipelines). @@ -9,7 +19,7 @@ Run the following command line locally to build out DX workflows for this pipeli ``` # version -VER=v1.1.2 +VER=v1.1.3 # general java -jar ~/dxWDL-0.77.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines" -extras workflow_opts/docker.json -f -folder /ChIP-seq2/workflows/$VER/general -defaults examples/dx/template_general.json @@ -42,7 +52,7 @@ java -jar ~/dxWDL-0.77.jar compile chip.wdl -project "ENCODE Uniform Processing ## DX Azure ``` # version -VER=v1.1.2 +VER=v1.1.3 # general java -jar ~/dxWDL-0.77.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines Azure" -extras workflow_opts/docker.json -f -folder /ChIP-seq2/workflows/$VER/general -defaults examples/dx_azure/template_general.json diff --git a/docs/tutorial_dx_web.md b/docs/tutorial_dx_web.md index 8b798d4c..01c41300 100644 --- a/docs/tutorial_dx_web.md +++ b/docs/tutorial_dx_web.md @@ -41,16 +41,16 @@ This document describes instruction for the item 2). 1. DNANexus allows only one copy of a workflow per project. The example workflow in the previous section is pre-built for the subsampled test sample [ENCSR936XTK](https://www.encodeproject.org/experiments/ENCSR936XTK/) with all parameters defined already. 2. Copy one of the following workflows according to the platform you have chosen for your project (AWS or Azure). -* [AWS general](https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/ChIP-seq2/workflows/v1.1.2/general) without pre-defined reference genome. -* [AWS hg38](https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/ChIP-seq2/workflows/v1.1.2/hg38) with pre-defined hg38 reference genome. -* [AWS hg19](https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/ChIP-seq2/workflows/v1.1.2/hg19) with pre-defined hg38 reference genome. -* [AWS mm10](https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/ChIP-seq2/workflows/v1.1.2/mm10) with pre-defined mm10 reference genome. -* [AWS mm9](https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/ChIP-seq2/workflows/v1.1.2/mm9) with pre-defined mm9 reference genome. -* [Azure general](https://platform.dnanexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/ChIP-seq2/workflows/v1.1.2/general) without pre-defined reference genome. -* [Azure hg38](https://platform.dnanexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/ChIP-seq2/workflows/v1.1.2/hg38) with pre-defined hg38 reference genome. -* [Azure hg19](https://platform.dnanexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/ChIP-seq2/workflows/v1.1.2/hg19) with pre-defined hg38 reference genome. -* [Azure mm10](https://platform.dnanexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/ChIP-seq2/workflows/v1.1.2/mm10) with pre-defined mm10 reference genome. -* [Azure mm9](https://platform.dnanexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/ChIP-seq2/workflows/v1.1.2/mm9) with pre-defined mm9 reference genome. +* [AWS general](https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/ChIP-seq2/workflows/v1.1.3/general) without pre-defined reference genome. +* [AWS hg38](https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/ChIP-seq2/workflows/v1.1.3/hg38) with pre-defined hg38 reference genome. +* [AWS hg19](https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/ChIP-seq2/workflows/v1.1.3/hg19) with pre-defined hg38 reference genome. +* [AWS mm10](https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/ChIP-seq2/workflows/v1.1.3/mm10) with pre-defined mm10 reference genome. +* [AWS mm9](https://platform.dnanexus.com/projects/BKpvFg00VBPV975PgJ6Q03v6/data/ChIP-seq2/workflows/v1.1.3/mm9) with pre-defined mm9 reference genome. +* [Azure general](https://platform.dnanexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/ChIP-seq2/workflows/v1.1.3/general) without pre-defined reference genome. +* [Azure hg38](https://platform.dnanexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/ChIP-seq2/workflows/v1.1.3/hg38) with pre-defined hg38 reference genome. +* [Azure hg19](https://platform.dnanexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/ChIP-seq2/workflows/v1.1.3/hg19) with pre-defined hg38 reference genome. +* [Azure mm10](https://platform.dnanexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/ChIP-seq2/workflows/v1.1.3/mm10) with pre-defined mm10 reference genome. +* [Azure mm9](https://platform.dnanexus.com/projects/F6K911Q9xyfgJ36JFzv03Z5J/data/ChIP-seq2/workflows/v1.1.3/mm9) with pre-defined mm9 reference genome. 3. Click on the DX workflow `chip`. diff --git a/docs/tutorial_local_singularity.md b/docs/tutorial_local_singularity.md index 3c779854..09e3daf3 100644 --- a/docs/tutorial_local_singularity.md +++ b/docs/tutorial_local_singularity.md @@ -32,7 +32,7 @@ Tutorial for general UNIX computers with singularity 6. Pull a singularity container for the pipeline. This will pull pipeline's docker container first and build a singularity one on `~/.singularity`. ``` - $ mkdir -p ~/.singularity && cd ~/.singularity && SINGULARITY_CACHEDIR=~/.singularity SINGULARITY_PULLFOLDER=~/.singularity singularity pull --name chip-seq-pipeline-v1.1.2.simg -F docker://quay.io/encode-dcc/chip-seq-pipeline:v1.1.2 + $ mkdir -p ~/.singularity && cd ~/.singularity && SINGULARITY_CACHEDIR=~/.singularity SINGULARITY_PULLFOLDER=~/.singularity singularity pull --name chip-seq-pipeline-v1.1.3.simg -F docker://quay.io/encode-dcc/chip-seq-pipeline:v1.1.3 ``` 7. Run a pipeline for the test sample. @@ -49,7 +49,7 @@ Tutorial for general UNIX computers with singularity ``` { "default_runtime_attributes" : { - "singularity_container" : "~/.singularity/atac-seq-pipeline-v1.1.2.simg", + "singularity_container" : "~/.singularity/atac-seq-pipeline-v1.1.3.simg", "singularity_bindpath" : "/your/,YOUR_OWN_DATA_DIR1,YOUR_OWN_DATA_DIR2,..." } } diff --git a/docs/tutorial_scg.md b/docs/tutorial_scg.md index 7c43208a..4fd13687 100644 --- a/docs/tutorial_scg.md +++ b/docs/tutorial_scg.md @@ -56,7 +56,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt 5. Pull a singularity container for the pipeline. This will pull pipeline's docker container first and build a singularity one on `~/.singularity`. ``` - $ mkdir -p ~/.singularity && cd ~/.singularity && SINGULARITY_CACHEDIR=~/.singularity SINGULARITY_PULLFOLDER=~/.singularity singularity pull --name chip-seq-pipeline-v1.1.2.simg -F docker://quay.io/encode-dcc/chip-seq-pipeline:v1.1.2 + $ mkdir -p ~/.singularity && cd ~/.singularity && SINGULARITY_CACHEDIR=~/.singularity SINGULARITY_PULLFOLDER=~/.singularity singularity pull --name chip-seq-pipeline-v1.1.3.simg -F docker://quay.io/encode-dcc/chip-seq-pipeline:v1.1.3 ``` 6. Run a pipeline for a SUBSAMPLED paired-end sample of [ENCSR936XTK](https://www.encodeproject.org/experiments/ENCSR936XTK/). @@ -74,7 +74,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt ``` { "default_runtime_attributes" : { - "singularity_container" : "~/.singularity/atac-seq-pipeline-v1.1.2.simg", + "singularity_container" : "~/.singularity/atac-seq-pipeline-v1.1.3.simg", "singularity_bindpath" : "/scratch/users,/srv/gsfs0,/your/,YOUR_OWN_DATA_DIR1,YOUR_OWN_DATA_DIR1,..." } } diff --git a/docs/tutorial_sge.md b/docs/tutorial_sge.md index db9744f7..35f0e3b0 100644 --- a/docs/tutorial_sge.md +++ b/docs/tutorial_sge.md @@ -67,7 +67,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt 7. Pull a singularity container for the pipeline. This will pull pipeline's docker container first and build a singularity one on `~/.singularity`. ``` - $ mkdir -p ~/.singularity && cd ~/.singularity && SINGULARITY_CACHEDIR=~/.singularity SINGULARITY_PULLFOLDER=~/.singularity singularity pull --name chip-seq-pipeline-v1.1.2.simg -F docker://quay.io/encode-dcc/chip-seq-pipeline:v1.1 + $ mkdir -p ~/.singularity && cd ~/.singularity && SINGULARITY_CACHEDIR=~/.singularity SINGULARITY_PULLFOLDER=~/.singularity singularity pull --name chip-seq-pipeline-v1.1.3.simg -F docker://quay.io/encode-dcc/chip-seq-pipeline:v1.1 ``` 8. Run a pipeline for the test sample. diff --git a/docs/tutorial_sherlock.md b/docs/tutorial_sherlock.md index 0da2bfa6..6920f3d7 100644 --- a/docs/tutorial_sherlock.md +++ b/docs/tutorial_sherlock.md @@ -62,7 +62,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt 6. Pull a singularity container for the pipeline. This will pull pipeline's docker container first and build a singularity one on `~/.singularity`. Stanford Sherlock does not allow building a container on login nodes. Wait until you get a command prompt after `sdev`. ``` $ sdev # sherlock cluster does not allow building a container on login node - $ mkdir -p ~/.singularity && cd ~/.singularity && SINGULARITY_CACHEDIR=~/.singularity SINGULARITY_PULLFOLDER=~/.singularity singularity pull --name chip-seq-pipeline-v1.1.2.simg -F docker://quay.io/encode-dcc/chip-seq-pipeline:v1.1.2 + $ mkdir -p ~/.singularity && cd ~/.singularity && SINGULARITY_CACHEDIR=~/.singularity SINGULARITY_PULLFOLDER=~/.singularity singularity pull --name chip-seq-pipeline-v1.1.3.simg -F docker://quay.io/encode-dcc/chip-seq-pipeline:v1.1.3 $ exit # exit from an interactive node ``` @@ -81,7 +81,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt ``` { "default_runtime_attributes" : { - "singularity_container" : "~/.singularity/atac-seq-pipeline-v1.1.2.simg", + "singularity_container" : "~/.singularity/atac-seq-pipeline-v1.1.3.simg", "singularity_bindpath" : "/scratch,/oak/stanford,/your/,YOUR_OWN_DATA_DIR1,YOUR_OWN_DATA_DIR1,..." } } diff --git a/docs/tutorial_slurm.md b/docs/tutorial_slurm.md index e85f96d0..16d79467 100644 --- a/docs/tutorial_slurm.md +++ b/docs/tutorial_slurm.md @@ -67,7 +67,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt 7. Pull a singularity container for the pipeline. This will pull pipeline's docker container first and build a singularity one on `~/.singularity`. ``` - $ mkdir -p ~/.singularity && cd ~/.singularity && SINGULARITY_CACHEDIR=~/.singularity SINGULARITY_PULLFOLDER=~/.singularity singularity pull --name chip-seq-pipeline-v1.1.2.simg -F docker://quay.io/encode-dcc/chip-seq-pipeline:v1.1.2 + $ mkdir -p ~/.singularity && cd ~/.singularity && SINGULARITY_CACHEDIR=~/.singularity SINGULARITY_PULLFOLDER=~/.singularity singularity pull --name chip-seq-pipeline-v1.1.3.simg -F docker://quay.io/encode-dcc/chip-seq-pipeline:v1.1.3 ``` 8. Run a pipeline for the test sample. @@ -85,7 +85,7 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt ``` { "default_runtime_attributes" : { - "singularity_container" : "~/.singularity/atac-seq-pipeline-v1.1.2.simg", + "singularity_container" : "~/.singularity/atac-seq-pipeline-v1.1.3.simg", "singularity_bindpath" : "/your/,YOUR_OWN_DATA_DIR1,YOUR_OWN_DATA_DIR2,..." } } diff --git a/test/test_task/test.sh b/test/test_task/test.sh index ad5cb1a2..2079d66b 100755 --- a/test/test_task/test.sh +++ b/test/test_task/test.sh @@ -12,7 +12,7 @@ INPUT=$2 if [ $# -gt 2 ]; then DOCKER_IMAGE=$3 else - DOCKER_IMAGE=quay.io/encode-dcc/chip-seq-pipeline:v1.1.2 + DOCKER_IMAGE=quay.io/encode-dcc/chip-seq-pipeline:v1.1.3 fi if [ -f "cromwell-34.jar" ]; then echo "Skip downloading cromwell." diff --git a/test/test_workflow/test_chip.sh b/test/test_workflow/test_chip.sh index 7c8d8002..88206851 100755 --- a/test/test_workflow/test_chip.sh +++ b/test/test_workflow/test_chip.sh @@ -11,7 +11,7 @@ fi if [ $# -gt 1 ]; then DOCKER_IMAGE=$2 else - DOCKER_IMAGE=quay.io/encode-dcc/chip-seq-pipeline:v1.1.2 + DOCKER_IMAGE=quay.io/encode-dcc/chip-seq-pipeline:v1.1.3 fi INPUT=$1 PREFIX=$(basename $INPUT .json) diff --git a/test/test_workflow/test_chip_local.sh b/test/test_workflow/test_chip_local.sh index 31e8b903..77da9e2b 100755 --- a/test/test_workflow/test_chip_local.sh +++ b/test/test_workflow/test_chip_local.sh @@ -8,7 +8,7 @@ fi if [ $# -gt 2 ]; then DOCKER_IMAGE=$3 else - DOCKER_IMAGE=quay.io/encode-dcc/chip-seq-pipeline:v1.1.2 + DOCKER_IMAGE=quay.io/encode-dcc/chip-seq-pipeline:v1.1.3 fi INPUT=$1 GCLOUD_SERVICE_ACCOUNT_SECRET_JSON_FILE=$2 diff --git a/workflow_opts/docker.json b/workflow_opts/docker.json index 2a825478..8f33b05f 100644 --- a/workflow_opts/docker.json +++ b/workflow_opts/docker.json @@ -1,6 +1,6 @@ { "default_runtime_attributes" : { - "docker" : "quay.io/encode-dcc/chip-seq-pipeline:v1.1.2", + "docker" : "quay.io/encode-dcc/chip-seq-pipeline:v1.1.3", "zones": "us-west1-a us-west1-b us-west1-c us-central1-c us-central1-b", "failOnStderr" : false, "continueOnReturnCode" : 0, diff --git a/workflow_opts/scg.json b/workflow_opts/scg.json index 0bc5ab9e..e1da7f1f 100644 --- a/workflow_opts/scg.json +++ b/workflow_opts/scg.json @@ -1,7 +1,7 @@ { "default_runtime_attributes" : { "slurm_account" : "YOUR_SLURM_ACCOUNT", - "singularity_container" : "~/.singularity/chip-seq-pipeline-v1.1.2.simg", + "singularity_container" : "~/.singularity/chip-seq-pipeline-v1.1.3.simg", "singularity_bindpath" : "/reference/ENCODE,/scratch,/srv/gsfs0" } } diff --git a/workflow_opts/sge.json b/workflow_opts/sge.json index 801d68c9..f5a70640 100644 --- a/workflow_opts/sge.json +++ b/workflow_opts/sge.json @@ -1,6 +1,6 @@ { "default_runtime_attributes" : { "sge_pe" : "shm", - "singularity_container" : "~/.singularity/chip-seq-pipeline-v1.1.2.simg" + "singularity_container" : "~/.singularity/chip-seq-pipeline-v1.1.3.simg" } } diff --git a/workflow_opts/sherlock.json b/workflow_opts/sherlock.json index da40efc4..28a59e61 100644 --- a/workflow_opts/sherlock.json +++ b/workflow_opts/sherlock.json @@ -1,7 +1,7 @@ { "default_runtime_attributes" : { "slurm_partition" : "normal", - "singularity_container" : "~/.singularity/chip-seq-pipeline-v1.1.2.simg", + "singularity_container" : "~/.singularity/chip-seq-pipeline-v1.1.3.simg", "singularity_bindpath" : "/scratch,/lscratch,/oak/stanford,/home/groups/cherry/encode" } } diff --git a/workflow_opts/singularity.json b/workflow_opts/singularity.json index dd2d93cc..5c3165e5 100644 --- a/workflow_opts/singularity.json +++ b/workflow_opts/singularity.json @@ -1,5 +1,5 @@ { "default_runtime_attributes" : { - "singularity_container" : "~/.singularity/chip-seq-pipeline-v1.1.2.simg" + "singularity_container" : "~/.singularity/chip-seq-pipeline-v1.1.3.simg" } } diff --git a/workflow_opts/slurm.json b/workflow_opts/slurm.json index 5837d609..bc26163b 100644 --- a/workflow_opts/slurm.json +++ b/workflow_opts/slurm.json @@ -2,6 +2,6 @@ "default_runtime_attributes" : { "slurm_partition" : "YOUR_SLURM_PARTITION", "slurm_account" : "YOUR_SLURM_ACCOUNT", - "singularity_container" : "~/.singularity/chip-seq-pipeline-v1.1.2.simg" + "singularity_container" : "~/.singularity/chip-seq-pipeline-v1.1.3.simg" } } From 8a0eb7a31ac5b25fd611d92365bef146312b51c1 Mon Sep 17 00:00:00 2001 From: Jin Lee Date: Mon, 26 Nov 2018 18:16:00 -0800 Subject: [PATCH 09/14] added a version-up script in dev.md --- .circleci/config.yml | 2 +- docs/dev.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.circleci/config.yml b/.circleci/config.yml index 2207bb03..59698076 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -51,7 +51,7 @@ jobs: name: build image command: | source ${BASH_ENV} - export DOCKER_CACHE_TAG=v1.1.2 + export DOCKER_CACHE_TAG=v1.1.3 echo "pulling ${DOCKER_CACHE_TAG}!" docker pull quay.io/encode-dcc/chip-seq-pipeline:${DOCKER_CACHE_TAG} docker login -u=${QUAY_ROBOT_USER} -p=${QUAY_ROBOT_USER_TOKEN} quay.io diff --git a/docs/dev.md b/docs/dev.md index 6bf36cea..c4d044bb 100644 --- a/docs/dev.md +++ b/docs/dev.md @@ -5,7 +5,7 @@ Dev ``` PREV_VER=v1.1.2 NEW_VER=v1.1.3 -for f in $(grep -rl ${PREV_VER} --include=*.{json,md,sh}) +for f in $(grep -rl ${PREV_VER} --include=*.{json,md,sh,yml}) do sed -i "s/${PREV_VER}/${NEW_VER}/g" ${f} done From 0bb725a89215e7a1bbb1344250ab0aa697fdd8f7 Mon Sep 17 00:00:00 2001 From: Jin Lee Date: Mon, 26 Nov 2018 22:04:22 -0800 Subject: [PATCH 10/14] revert upgrading picard (2.10.6->2.18->2.10.6) --- conda/requirements.txt | 2 +- docker_image/Dockerfile | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conda/requirements.txt b/conda/requirements.txt index 1344f0dc..75855b7e 100644 --- a/conda/requirements.txt +++ b/conda/requirements.txt @@ -3,7 +3,7 @@ samtools ==1.2 htslib ==1.4 # 1.5 in bioconda needed libbz2.so.1.0 bedtools ==2.26.0 #2.22 # 2.21.0 -picard ==2.18.16 #==2.10.6 +picard ==2.10.6 ucsc-fetchchromsizes ucsc-wigtobigwig ucsc-bedgraphtobigwig diff --git a/docker_image/Dockerfile b/docker_image/Dockerfile index 92a8338e..be1edc65 100644 --- a/docker_image/Dockerfile +++ b/docker_image/Dockerfile @@ -85,8 +85,8 @@ RUN git clone --branch 1.2 --single-branch https://github.com/samtools/samtools. RUN git clone --branch v2.26.0 --single-branch https://github.com/arq5x/bedtools2.git && \ cd bedtools2 && make && make install && cd ../ && rm -rf bedtools2* -# Install Picard 2.18.16 -RUN wget https://github.com/broadinstitute/picard/releases/download/2.18.16/picard.jar && chmod +x picard.jar +# Install Picard 2.10.6 +RUN wget https://github.com/broadinstitute/picard/releases/download/2.10.6/picard.jar && chmod +x picard.jar # Install sambamba 0.6.6 RUN wget https://github.com/lomereiter/sambamba/releases/download/v0.6.6/sambamba_v0.6.6_linux.tar.bz2 && tar -xvjf sambamba_v0.6.6_linux.tar.bz2 && mv sambamba_v0.6.6 sambamba && rm -rf sambamba_* From 8fd52bce80b3090b9008df801d4a9a9aabb110f1 Mon Sep 17 00:00:00 2001 From: Jin Lee Date: Tue, 27 Nov 2018 14:53:05 -0800 Subject: [PATCH 11/14] fix picard issue (using too many threads for GC) --- src/encode_filter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/encode_filter.py b/src/encode_filter.py index ee2c2e9a..5ad1101e 100755 --- a/src/encode_filter.py +++ b/src/encode_filter.py @@ -164,7 +164,7 @@ def mark_dup_picard(bam, out_dir): # shared by both se and pe dupmark_bam = '{}.dupmark.bam'.format(prefix) dup_qc = '{}.dup.qc'.format(prefix) - cmd = 'java -Xmx4G -jar ' + cmd = 'java -Xmx4G -XX:ParallelGCThreads=1 -jar ' cmd += locate_picard() cmd += ' MarkDuplicates ' # cmd = 'picard MarkDuplicates ' From 67f3678b20a3e5534534cf23d59ba99b3fc4c53b Mon Sep 17 00:00:00 2001 From: Jin Lee Date: Thu, 29 Nov 2018 13:46:30 -0800 Subject: [PATCH 12/14] update doc for SCG (login node does not allow building singularity container) --- docs/tutorial_scg.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/tutorial_scg.md b/docs/tutorial_scg.md index 4fd13687..089e530a 100644 --- a/docs/tutorial_scg.md +++ b/docs/tutorial_scg.md @@ -56,7 +56,9 @@ Our pipeline supports both [Conda](https://conda.io/docs/) and [Singularity](htt 5. Pull a singularity container for the pipeline. This will pull pipeline's docker container first and build a singularity one on `~/.singularity`. ``` + $ sdev # SCG cluster does not allow building a container on login node $ mkdir -p ~/.singularity && cd ~/.singularity && SINGULARITY_CACHEDIR=~/.singularity SINGULARITY_PULLFOLDER=~/.singularity singularity pull --name chip-seq-pipeline-v1.1.3.simg -F docker://quay.io/encode-dcc/chip-seq-pipeline:v1.1.3 + $ exit ``` 6. Run a pipeline for a SUBSAMPLED paired-end sample of [ENCSR936XTK](https://www.encodeproject.org/experiments/ENCSR936XTK/). From f8f2e88b4562617756630466a68ba42d16aba6d1 Mon Sep 17 00:00:00 2001 From: Jin Lee Date: Thu, 29 Nov 2018 17:03:16 -0800 Subject: [PATCH 13/14] fix for picard SEGFAULT bug (when using Intel compressing/decompressing algorithm) --- src/encode_filter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/encode_filter.py b/src/encode_filter.py index 5ad1101e..c2aaf8ec 100755 --- a/src/encode_filter.py +++ b/src/encode_filter.py @@ -170,6 +170,7 @@ def mark_dup_picard(bam, out_dir): # shared by both se and pe # cmd = 'picard MarkDuplicates ' cmd += 'INPUT={} OUTPUT={} ' cmd += 'METRICS_FILE={} VALIDATION_STRINGENCY=LENIENT ' + cmd += 'USE_JDK_DEFLATER=TRUE USE_JDK_INFLATER=TRUE ' cmd += 'ASSUME_SORTED=true REMOVE_DUPLICATES=false' cmd = cmd.format( bam, From 1337e499f8e22923e167f0f369a767396c114ec2 Mon Sep 17 00:00:00 2001 From: Jin Lee Date: Fri, 30 Nov 2018 14:16:46 -0800 Subject: [PATCH 14/14] remove Jenkinsfile --- Jenkinsfile | 149 ---------------------------------------------------- 1 file changed, 149 deletions(-) delete mode 100644 Jenkinsfile diff --git a/Jenkinsfile b/Jenkinsfile deleted file mode 100644 index eef378e7..00000000 --- a/Jenkinsfile +++ /dev/null @@ -1,149 +0,0 @@ -pipeline { - agent none - environment { - QUAY_USER = credentials('quay-robot') - QUAY_PASS = credentials('quay-robot-token') - } - stages { - stage('Tag Non-master') { - agent {label 'master-builder'} - when { not {branch 'master'}} - steps { - // the tag gets built here, and can be referenced in the other stages - script { - TAG = sh([script: "echo quay.io/encode-dcc/chip-seq-pipeline:${env.BRANCH_NAME}_${env.BUILD_NUMBER}", returnStdout: true]).trim() - } - echo "On non-master" - } - - } - - stage('Tag Master') { - agent {label 'master-builder'} - when { branch 'master'} - steps { - // the tag gets built here, and can be referenced in the other stages - script { - TAG = sh([script: "echo quay.io/encode-dcc/chip-seq-pipeline:latest", returnStdout: true]).trim() - } - echo "On non-master" - } - } - - stage('Build-nonmaster') { - agent {label 'slave-w-docker-cromwell-60GB-ebs'} - when { not { branch 'master' } } - steps { - echo "the tag is $TAG" - echo "going to build a docker image now.." - slackSend (color: '#7CFC00', message: "started job: ${env.JOB_NAME}, build number ${env.BUILD_NUMBER} on branch: ${env.BRANCH_NAME}.") - slackSend "The images will be tagged as $TAG" - - // pull the cache template image (the image is going to stay pretty much the same so it is no need to be dynamic) - sh "docker pull quay.io/encode-dcc/chip-seq-pipeline:v1.1.2" - sh "docker login -u=${QUAY_USER} -p=${QUAY_PASS} quay.io" - sh "docker build --cache-from quay.io/encode-dcc/chip-seq-pipeline:v1.1.2 -f docker_image/Dockerfile -t chip-seq-pipeline ." - sh "docker tag chip-seq-pipeline $TAG" - sh "docker push $TAG" - sh "docker logout" - } - } - - stage('Build-master') { - agent {label 'slave-w-docker-cromwell-60GB-ebs'} - when { branch 'master'} - steps { - echo "going to build a docker image now.." - slackSend (color: '#7CFC00', message: "started job: ${env.JOB_NAME}, build number ${env.BUILD_NUMBER} on branch: ${env.BRANCH_NAME}.") - slackSend "The images will be tagged as quay.io/encode-dcc/chip-seq-pipeline:latest" - // pull the cache template image (the image is going to stay pretty much the same so it is no need to be dynamic) - sh "docker pull quay.io/encode-dcc/chip-seq-pipeline:v1.1.2" - sh "docker login -u=${QUAY_USER} -p=${QUAY_PASS} quay.io" - sh "docker build --cache-from quay.io/encode-dcc/chip-seq-pipeline:v1.1.2 -f docker_image/Dockerfile -t chip-seq-pipeline ." - sh "docker tag chip-seq-pipeline quay.io/encode-dcc/chip-seq-pipeline:latest" - sh "docker push quay.io/encode-dcc/chip-seq-pipeline:latest" - sh "docker logout" - } - } - - stage('Run-Task-Level-Tests-Non-Master'){ - agent {label 'slave-w-docker-cromwell-60GB-ebs'} - steps{ - sh "cd test/test_task && rm -rf chip-seq-pipeline-test-data && git clone https://github.com/ENCODE-DCC/chip-seq-pipeline-test-data" - sh """cd test/test_task - ./test.sh test_bam2ta.wdl test_bam2ta.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_bam2ta.result.json - ./test.sh test_bwa.wdl test_bwa.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_bwa.result.json - ./test.sh test_choose_ctl.wdl test_choose_ctl.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_choose_ctl.result.json - ./test.sh test_filter.wdl test_filter.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_filter.result.json - ./test.sh test_fingerprint.wdl test_fingerprint.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_fingerprint.result.json - ./test.sh test_idr.wdl test_idr.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_idr.result.json - ./test.sh test_macs2.wdl test_macs2.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_macs2.result.json - ./test.sh test_merge_fastq.wdl test_merge_fastq.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_merge_fastq.result.json - ./test.sh test_overlap.wdl test_overlap.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_overlap.result.json - ./test.sh test_pool_ta.wdl test_pool_ta.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_pool_ta.result.json - ./test.sh test_reproducibility.wdl test_reproducibility.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_reproducibility.result.json - ./test.sh test_spp.wdl test_spp.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_spp.result.json - ./test.sh test_spr.wdl test_spr.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_spr.result.json - ./test.sh test_trim_fastq.wdl test_trim_fastq.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_trim_fastq.result.json - ./test.sh test_xcor.wdl test_xcor.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'match_overall']))" < test_xcor.result.json - """ - } - } - - stage('Run-Workflow-Level-Quick-Tests'){ - agent {label 'master-builder'} - when {not {branch 'master'}} - steps { - echo "running subsampled/chr19_only/paired_end workflow level tests when there is an event on master branch" - //sh """cd test/test_workflow - // ./test_chip.sh ENCSR936XTK_subsampled_chr19_only.json $TAG - // python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'outputs'][u'chip.qc_report.qc_json_match']))" < ENCSR936XTK_subsampled_chr19_only.result.json - // """ - } - } - - stage('Run-Workflow-Level-Full-Tests'){ - agent {label 'slave-w-docker-cromwell-60GB-ebs'} - when { branch 'master'} - steps { - echo "running subsampled/paired_end workflow level tests when there is an event on master branch" - sh """ - ./test_chip.sh ENCSR936XTK_subsampled.json $TAG - python -c "import sys; import json; data=json.loads(sys.stdin.read()); sys.exit(int(not data[u'outputs'][u'chip.qc_report.qc_json_match']))" < ENCSR936XTK_subsampled.result.json - """ - } - } - } - - post { - success { - echo "Post build actions that run on success" - slackSend "Job ${env.JOB_NAME}, build number ${env.BUILD_NUMBER} on branch ${env.BRANCH_NAME} finished with" - slackSend (color: '#7cfc00', message: "SUCCESS") - slackSend "For details, visit ${env.BUILD_URL}" - } - failure { - echo "Post build actions that run on failure" - slackSend "Job ${env.JOB_NAME}, build number ${env.BUILD_NUMBER} on branch ${env.BRANCH_NAME} finished with" - slackSend (color: '#FF0000', message: "FAILURE") - slackSend "For details, visit ${env.BUILD_URL}" - - } - - } -}