From b49b1758083c3d94115af23e9ba40ffe17692200 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Tue, 24 Mar 2020 13:41:55 +0000 Subject: [PATCH 01/15] working --- workflow/Snakefile | 2 + workflow/envs/gridss.yaml | 12 + workflow/rules/gridss.smk | 446 ++++++++++++++++++++++++++++++++++++ workflow/rules/trimming.smk | 2 +- workflow/rules/utils.smk | 4 +- 5 files changed, 463 insertions(+), 3 deletions(-) create mode 100644 workflow/envs/gridss.yaml create mode 100644 workflow/rules/gridss.smk diff --git a/workflow/Snakefile b/workflow/Snakefile index c2fa71386..e907e42ef 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -22,6 +22,7 @@ include: "rules/tmb.smk" include: "rules/vega.smk" include: "rules/utils.smk" include: "rules/oncoprint.smk" +include: "rules/gridss.smk" groups = samples["group"].unique() @@ -32,6 +33,7 @@ if is_activated("oncoprint/stratify"): rule all: input: + expand("results/gridss_vcf/{group}.vcf", group=groups), get_final_output(), get_tmb_targets(), expand("results/plots/oncoprint/{batch}.{event}.pdf", diff --git a/workflow/envs/gridss.yaml b/workflow/envs/gridss.yaml new file mode 100644 index 000000000..3f406d952 --- /dev/null +++ b/workflow/envs/gridss.yaml @@ -0,0 +1,12 @@ +channels: + - conda-forge + - bioconda + - r +dependencies: + - samtools=1.1.0 + - ldc=1.13 + - sambamba=0.7.1 + - openjdk=11.0.1 + - r=3.6.0 + - bwa=0.7.17 + - gridss=2.8 diff --git a/workflow/rules/gridss.smk b/workflow/rules/gridss.smk new file mode 100644 index 000000000..2582f491b --- /dev/null +++ b/workflow/rules/gridss.smk @@ -0,0 +1,446 @@ +#configfile: "config/config.yaml" +#include: "rules/common.smk" + +#reference = "../strling/ref_bwa/GCA_000001405.15_GRCh38_no_alt_plus_hs38d1_analysis_set.fna" + +jvm_args = f"-Dreference_fasta=results/refs/genome.fasta -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true -Dsamjdk.use_async_io_write_tribble=true -Dsamjdk.buffer_size=4194304" + +group_names = samples["group"].unique() +sample_names = samples.sample_name.values + +sample_constraint = "|".join(sample_names) +group_constraint = "|".join(group_names) + +# rule all: +# input: +# expand("results/gridss_vcf/group.{group}.vcf", group=group_names) + + +rule GridssCollectMetrics: + input: + #bam="{sample.bam}" + bam="results/recal/{sample}.sorted.bam" + output: + insert_size_metrics="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_metrics", + histogram="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_histogram.pdf" + params: + tmp_dir="tmp/{sample}.sorted.bam.gridss.working", + prefix="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam", + maxcoverage=50000, + metricsrecords=10000000, + picardoptions="" + log: + "log/collect_gridss_metrics.{sample}.log" + conda: + "../envs/gridss.yaml" + wildcard_constraints: + sample=sample_constraint + shell: + """ +gridss gridss.analysis.CollectGridssMetrics \ +{jvm_args} \ +TMP_DIR={params.tmp_dir} \ +ASSUME_SORTED=true \ +I={input.bam} \ +O={params.prefix} \ +THRESHOLD_COVERAGE={params.maxcoverage} \ +FILE_EXTENSION=null \ +GRIDSS_PROGRAM=null \ +PROGRAM=null \ +PROGRAM=CollectInsertSizeMetrics \ +STOP_AFTER={params.metricsrecords} \ +{params.picardoptions} 2> {log} + """ + +#tmp/EPF-BUR-012-013.bam.gridss.working/tmp.EPF-BUR-012-013.bam.insert_size_metrics + +rule GridssCollectMetricsAndExtractSVReads: + threads: + 50 + input: + bam="results/recal/{sample}.sorted.bam", + insert_size_metrics="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_metrics", + output: + sv_metrics="tmp/{sample}.sorted.bam.gridss.working/{sample}.bam.sv_metrics", + namedsorted_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.namedsorted.bam", + params: + dir="tmp/{sample}.sorted.bam.gridss.working", + prefix="tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam", + tmp_sort="tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.namedsort", + maxcoverage=50000, + picardoptions="", + conda: + "../envs/gridss.yaml" + wildcard_constraints: + sample=sample_constraint + shell: + """ +gridss gridss.CollectGridssMetricsAndExtractSVReads \ +{jvm_args} \ +TMP_DIR={params.dir} \ +ASSUME_SORTED=true \ +I={input.bam} \ +O={params.prefix} \ +THRESHOLD_COVERAGE={params.maxcoverage} \ +FILE_EXTENSION=null \ +GRIDSS_PROGRAM=null \ +GRIDSS_PROGRAM=CollectCigarMetrics \ +GRIDSS_PROGRAM=CollectMapqMetrics \ +GRIDSS_PROGRAM=CollectTagMetrics \ +GRIDSS_PROGRAM=CollectIdsvMetrics \ +GRIDSS_PROGRAM=ReportThresholdCoverage \ +PROGRAM=null \ +PROGRAM=CollectInsertSizeMetrics \ +SV_OUTPUT=/dev/stdout \ +COMPRESSION_LEVEL=0 \ +METRICS_OUTPUT={output.sv_metrics} \ +INSERT_SIZE_METRICS={input.insert_size_metrics} \ +UNMAPPED_READS=false \ +MIN_CLIP_LENGTH=5 \ +INCLUDE_DUPLICATES=true \ +{params.picardoptions} \ +| samtools sort \ +-n \ +-m 100G \ +-T {params.tmp_sort} \ +-Obam \ +-o {output.namedsorted_bam} \ +-@ {threads} \ +/dev/stdin + """ + + +rule GridssComputeSamTags: + threads: + 3 + input: + ref="results/refs/genome.fasta", + idx=rules.bwa_index.output, + namedsorted_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.namedsorted.bam", + output: + coordinate_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.coordinate.bam" + params: + working_dir="tmp", + tmp_sort="tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.coordinate-tmp", + dir="tmp/{sample}.sorted.bam.gridss.working", + picardoptions="", + conda: + "../envs/gridss.yaml" + wildcard_constraints: + sample=sample_constraint + shell: + """ +gridss gridss.ComputeSamTags \ +{jvm_args} \ +TMP_DIR={params.dir} \ +WORKING_DIR="{params.working_dir}" \ +REFERENCE_SEQUENCE={input.ref} \ +COMPRESSION_LEVEL=0 \ +I={input.namedsorted_bam} \ +O=/dev/stdout \ +RECALCULATE_SA_SUPPLEMENTARY=true \ +SOFTEN_HARD_CLIPS=true \ +FIX_MATE_INFORMATION=true \ +FIX_DUPLICATE_FLAG=true \ +FIX_SA=true \ +FIX_MISSING_HARD_CLIP=true \ +TAGS=null \ +TAGS=NM \ +TAGS=SA \ +TAGS=R2 \ +TAGS=Q2 \ +TAGS=MC \ +TAGS=MQ \ +ASSUME_SORTED=true \ +{params.picardoptions} \ +| samtools sort \ +-m 100G \ +-T {params.tmp_sort} \ +-Obam \ +-o {output.coordinate_bam} \ +-@ {threads} \ +/dev/stdin + """ + + +rule GridssSoftClipsToSplitReads: + threads: + 100 + input: + ref="results/refs/genome.fasta", + idx=rules.bwa_index.output, + coordinate_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.coordinate.bam" + output: + primary_sv="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.sc2sr.primary.sv.bam", + supp_sv="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.sc2sr.supp.sv.bam", + params: + working_dir="tmp", + picardoptions="", + conda: + "../envs/gridss.yaml" + wildcard_constraints: + sample=sample_constraint + shell: + """ +gridss gridss.SoftClipsToSplitReads \ +{jvm_args} \ +-Xmx20G \ +-Dsamjdk.create_index=false \ +-Dgridss.gridss.output_to_temp_file=true \ +TMP_DIR={params.working_dir} \ +WORKING_DIR={params.working_dir} \ +REFERENCE_SEQUENCE={input.ref} \ +I={input.coordinate_bam} \ +O={output.primary_sv} \ +OUTPUT_UNORDERED_RECORDS={output.supp_sv} \ +WORKER_THREADS={threads} \ +{params.picardoptions} + """ + + +rule GridssSortSv: + threads: + 64 + input: + supp_sv="{x}.sc2sr.supp.sv.bam" + output: + supp_sv="{x}.sc2sr.suppsorted.sv.bam" + params: + tmp_sort="{x}.suppsorted.sv-tmp", + conda: + "../envs/gridss.yaml" + shell: + "samtools sort -m 100G -@ {threads} -T {params.tmp_sort} -Obam -o {output.supp_sv} {input.supp_sv}" + + +rule GridssMergeSupported: + threads: + 64 + input: + primary_sv="{p}/tmp.{x}.bam.sc2sr.primary.sv.bam", + supp_sv="{p}/tmp.{x}.bam.sc2sr.suppsorted.sv.bam", + output: + merged="{p}/{x}.bam.sv.bam" + conda: + "../envs/gridss.yaml" + # wildcard_constraints: + # x=sample_constraint + "|" + group_constraint + shell: + "samtools merge -@ {threads} {output.merged} {input.primary_sv} {input.supp_sv}" + + +rule GridssAssembleBreakends: + threads: + 64 + input: + ref="results/refs/genome.fasta", + idx=rules.bwa_index.output, + bams=lambda wc: expand("results/recal/{sample}.sorted.{ending}", sample=get_group_samples(wc), ending=["bam", "bam.bai"]), + svs=lambda wc: expand("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam.sv.{ending}", sample=get_group_samples(wc), ending=["bam", "bam.bai"]) + output: + assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam" + conda: + "../envs/gridss.yaml" + params: + jobindex="0", + jobnodes="1", + working_dir="tmp", + picardoptions="", + input_args=lambda wc: " ".join(expand("INPUT=results/recal/{sample}.sorted.bam", sample=get_group_samples(wc))) + wildcard_constraints: + group=group_constraint + shell: + """ +gridss gridss.AssembleBreakends \ +-Dgridss.gridss.output_to_temp_file=true \ +{jvm_args} \ +-Xmx100g \ +JOB_INDEX={params.jobindex} \ +JOB_NODES={params.jobnodes} \ +TMP_DIR={params.working_dir} \ +WORKING_DIR={params.working_dir} \ +REFERENCE_SEQUENCE={input.ref} \ +WORKER_THREADS={threads} \ +O={output.assembly} \ +{params.input_args} \ +{params.picardoptions} \ + """ + + +rule GridssCollectMetricsGroup: + input: + assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam" + output: + idsv_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.idsv_metrics", + mapq_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.mapq_metrics", + tag_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.tag_metrics", + params: + prefix="tmp/group.{group}.bam.gridss.working/group.{group}.bam", + working_dir="tmp/group.{group}.bam.gridss.working", + maxcoverage=50000, + picardoptions="", + conda: + "../envs/gridss.yaml" + wildcard_constraints: + group=group_constraint + shell: + """ +gridss gridss.analysis.CollectGridssMetrics \ +{jvm_args} \ +I={input.assembly} \ +O={params.prefix} \ +THRESHOLD_COVERAGE={params.maxcoverage} \ +TMP_DIR={params.working_dir} \ +FILE_EXTENSION=null \ +GRIDSS_PROGRAM=null \ +GRIDSS_PROGRAM=CollectCigarMetrics \ +GRIDSS_PROGRAM=CollectMapqMetrics \ +GRIDSS_PROGRAM=CollectTagMetrics \ +GRIDSS_PROGRAM=CollectIdsvMetrics \ +GRIDSS_PROGRAM=ReportThresholdCoverage \ +PROGRAM=null \ +PROGRAM=CollectAlignmentSummaryMetrics \ +{params.picardoptions} + """ + + +rule GridssSoftClipsToSplitReadsAssembly: + threads: + 64 + input: + ref="results/refs/genome.fasta", + idx=rules.bwa_index.output, + assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam", + idsv_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.idsv_metrics", + mapq_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.mapq_metrics", + tag_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.tag_metrics", + output: + assembly_primary_sv="tmp/group.{group}.bam.gridss.working/tmp.group.{group}.bam.sc2sr.primary.sv.bam", + assembly_supp_sv="tmp/group.{group}.bam.gridss.working/tmp.group.{group}.bam.sc2sr.supp.sv.bam", + params: + working_dir="tmp", + picardoptions="", + conda: + "../envs/gridss.yaml" + wildcard_constraints: + group=group_constraint + shell: + """ +gridss gridss.SoftClipsToSplitReads \ +{jvm_args} \ +-Xmx50G \ +-Dgridss.async.buffersize=16 \ +-Dsamjdk.create_index=false \ +-Dgridss.gridss.output_to_temp_file=true \ +TMP_DIR={params.working_dir} \ +WORKING_DIR={params.working_dir} \ +REFERENCE_SEQUENCE={input.ref} \ +WORKER_THREADS={threads} \ +I={input.assembly} \ +O={output.assembly_primary_sv} \ +OUTPUT_UNORDERED_RECORDS={output.assembly_supp_sv} \ +REALIGN_ENTIRE_READ=true \ +{params.picardoptions} + """ + + +rule GridssIdentifyVariants: + threads: + 64 + input: + bams=lambda wc: expand("results/recal/{sample}.sorted.bam", sample=get_group_samples(wc)), + assembly_sv="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam", + assembly_sv_index="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam.bai", + assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam", + ref="results/refs/genome.fasta", + idx=rules.bwa_index.output, + output: + unallocated="tmp/group.{group}.vcf.gridss.working/group.{group}.unallocated.vcf" + params: + input_args=lambda wc: " ".join(expand("INPUT=results/recal/{sample}.sorted.bam", sample=get_group_samples(wc))), + working_dir="tmp", + picardoptions="" + conda: + "../envs/gridss.yaml" + wildcard_constraints: + group=group_constraint + shell: + """ +gridss gridss.IdentifyVariants \ +-Dgridss.output_to_temp_file=true \ +{jvm_args} \ +-Xmx50g \ +TMP_DIR={params.working_dir} \ +WORKING_DIR={params.working_dir} \ +REFERENCE_SEQUENCE={input.ref} \ +WORKER_THREADS={threads} \ +{params.input_args} \ +ASSEMBLY={input.assembly} \ +OUTPUT_VCF={output.unallocated} \ + """ + +rule GridssAnnotateVariants: + threads: + 7 + input: + bams=lambda wc: expand("results/recal/{sample}.sorted.bam", sample=get_group_samples(wc)), + assembly_sv="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam", + assembly_sv_index="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam.bai", + unallocated="tmp/group.{group}.vcf.gridss.working/group.{group}.unallocated.vcf", + ref="results/refs/genome.fasta", + idx=rules.bwa_index.output, + assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam", + output: + allocated="tmp/group.{group}.vcf.gridss.working/group.{group}.allocated.vcf", + params: + input_args=lambda wc: " ".join(expand("INPUT=results/recal/{sample}.sorted.bam", sample=get_group_samples(wc))), + working_dir="tmp", + picardoptions="" + conda: + "../envs/gridss.yaml" + wildcard_constraints: + group=group_constraint + shell: """ +gridss gridss.AnnotateVariants \ +-Dgridss.output_to_temp_file=true \ +{jvm_args} \ +-Xmx50g \ +TMP_DIR={params.working_dir} \ +WORKING_DIR={params.working_dir} \ +REFERENCE_SEQUENCE={input.ref} \ +WORKER_THREADS={threads} \ +{params.input_args} \ +ASSEMBLY={input.assembly} \ +INPUT_VCF={input.unallocated} \ +OUTPUT_VCF={output.allocated} \ +{params.picardoptions} + """ + +rule GridssAnnotateUntemplatedSequence: + input: + allocated="tmp/group.{group}.vcf.gridss.working/group.{group}.allocated.vcf", + ref="results/refs/genome.fasta", + idx=rules.bwa_index.output, + output: + vcf="results/gridss_vcf/{group}.vcf" + params: + working_dir="tmp", + picardoptions="" + conda: + "../envs/gridss.yaml" + threads: + 100 + shell: + """ +gridss gridss.AnnotateUntemplatedSequence \ +-Dgridss.output_to_temp_file=true \ +-Xmx4g \ +{jvm_args} \ +TMP_DIR={params.working_dir} \ +WORKING_DIR={params.working_dir} \ +REFERENCE_SEQUENCE={input.ref} \ +WORKER_THREADS={threads} \ +INPUT={input.allocated} \ +OUTPUT={output.vcf} \ +{params.picardoptions} + """ diff --git a/workflow/rules/trimming.smk b/workflow/rules/trimming.smk index b50528927..1f672538e 100644 --- a/workflow/rules/trimming.smk +++ b/workflow/rules/trimming.smk @@ -42,7 +42,7 @@ rule cutadapt_pipe: output: pipe('pipe/cutadapt/{sample}-{unit}.{fq}.{ending}') wildcard_constraints: - dataset="fastq|fastq.gz" + ending="fastq|fastq.gz" threads: 0 shell: "cat {input} > {output}" diff --git a/workflow/rules/utils.smk b/workflow/rules/utils.smk index 2d03198ca..b82dacb83 100644 --- a/workflow/rules/utils.smk +++ b/workflow/rules/utils.smk @@ -11,8 +11,8 @@ rule bcf_index: rule bam_index: input: - "{prefix}.sorted.bam" + "{prefix}.bam" output: - "{prefix}.sorted.bam.bai" + "{prefix}.bam.bai" wrapper: "0.39.0/bio/samtools/index" \ No newline at end of file From 7c9f6c93646ec3a320d801c64dbb943d29b365cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Tue, 24 Mar 2020 13:49:52 +0000 Subject: [PATCH 02/15] rename wc to wildcards --- workflow/rules/gridss.smk | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/workflow/rules/gridss.smk b/workflow/rules/gridss.smk index 2582f491b..7c800b5d0 100644 --- a/workflow/rules/gridss.smk +++ b/workflow/rules/gridss.smk @@ -235,8 +235,8 @@ rule GridssAssembleBreakends: input: ref="results/refs/genome.fasta", idx=rules.bwa_index.output, - bams=lambda wc: expand("results/recal/{sample}.sorted.{ending}", sample=get_group_samples(wc), ending=["bam", "bam.bai"]), - svs=lambda wc: expand("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam.sv.{ending}", sample=get_group_samples(wc), ending=["bam", "bam.bai"]) + bams=lambda wildcards: expand("results/recal/{sample}.sorted.{ending}", sample=get_group_samples(wildcards), ending=["bam", "bam.bai"]), + svs=lambda wildcards: expand("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam.sv.{ending}", sample=get_group_samples(wildcards), ending=["bam", "bam.bai"]) output: assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam" conda: @@ -246,7 +246,7 @@ rule GridssAssembleBreakends: jobnodes="1", working_dir="tmp", picardoptions="", - input_args=lambda wc: " ".join(expand("INPUT=results/recal/{sample}.sorted.bam", sample=get_group_samples(wc))) + input_args=lambda wildcards: " ".join(expand("INPUT=results/recal/{sample}.sorted.bam", sample=get_group_samples(wildcards))) wildcard_constraints: group=group_constraint shell: @@ -348,7 +348,7 @@ rule GridssIdentifyVariants: threads: 64 input: - bams=lambda wc: expand("results/recal/{sample}.sorted.bam", sample=get_group_samples(wc)), + bams=lambda wildcards: expand("results/recal/{sample}.sorted.bam", sample=get_group_samples(wildcards)), assembly_sv="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam", assembly_sv_index="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam.bai", assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam", @@ -357,7 +357,7 @@ rule GridssIdentifyVariants: output: unallocated="tmp/group.{group}.vcf.gridss.working/group.{group}.unallocated.vcf" params: - input_args=lambda wc: " ".join(expand("INPUT=results/recal/{sample}.sorted.bam", sample=get_group_samples(wc))), + input_args=lambda wildcards: " ".join(expand("INPUT=results/recal/{sample}.sorted.bam", sample=get_group_samples(wildcards))), working_dir="tmp", picardoptions="" conda: @@ -383,7 +383,7 @@ rule GridssAnnotateVariants: threads: 7 input: - bams=lambda wc: expand("results/recal/{sample}.sorted.bam", sample=get_group_samples(wc)), + bams=lambda wildcards: expand("results/recal/{sample}.sorted.bam", sample=get_group_samples(wildcards)), assembly_sv="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam", assembly_sv_index="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam.bai", unallocated="tmp/group.{group}.vcf.gridss.working/group.{group}.unallocated.vcf", @@ -393,7 +393,7 @@ rule GridssAnnotateVariants: output: allocated="tmp/group.{group}.vcf.gridss.working/group.{group}.allocated.vcf", params: - input_args=lambda wc: " ".join(expand("INPUT=results/recal/{sample}.sorted.bam", sample=get_group_samples(wc))), + input_args=lambda wildcards: " ".join(expand("INPUT=results/recal/{sample}.sorted.bam", sample=get_group_samples(wildcards))), working_dir="tmp", picardoptions="" conda: From c36a039a9262329b4896eb8cfd611a659df458ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Tue, 24 Mar 2020 13:59:28 +0000 Subject: [PATCH 03/15] reformatting --- workflow/rules/gridss.smk | 70 +++++++++++++++++++-------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/workflow/rules/gridss.smk b/workflow/rules/gridss.smk index 7c800b5d0..e9dcec9f5 100644 --- a/workflow/rules/gridss.smk +++ b/workflow/rules/gridss.smk @@ -23,14 +23,14 @@ rule GridssCollectMetrics: output: insert_size_metrics="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_metrics", histogram="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_histogram.pdf" + log: + "log/collect_gridss_metrics.{sample}.log" params: tmp_dir="tmp/{sample}.sorted.bam.gridss.working", prefix="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam", maxcoverage=50000, metricsrecords=10000000, picardoptions="" - log: - "log/collect_gridss_metrics.{sample}.log" conda: "../envs/gridss.yaml" wildcard_constraints: @@ -55,8 +55,6 @@ STOP_AFTER={params.metricsrecords} \ #tmp/EPF-BUR-012-013.bam.gridss.working/tmp.EPF-BUR-012-013.bam.insert_size_metrics rule GridssCollectMetricsAndExtractSVReads: - threads: - 50 input: bam="results/recal/{sample}.sorted.bam", insert_size_metrics="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_metrics", @@ -73,6 +71,8 @@ rule GridssCollectMetricsAndExtractSVReads: "../envs/gridss.yaml" wildcard_constraints: sample=sample_constraint + threads: + 50 shell: """ gridss gridss.CollectGridssMetricsAndExtractSVReads \ @@ -111,14 +111,14 @@ INCLUDE_DUPLICATES=true \ rule GridssComputeSamTags: - threads: - 3 input: ref="results/refs/genome.fasta", idx=rules.bwa_index.output, namedsorted_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.namedsorted.bam", output: coordinate_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.coordinate.bam" + wildcard_constraints: + sample=sample_constrain params: working_dir="tmp", tmp_sort="tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.coordinate-tmp", @@ -126,8 +126,8 @@ rule GridssComputeSamTags: picardoptions="", conda: "../envs/gridss.yaml" - wildcard_constraints: - sample=sample_constraint + threads: + 3 shell: """ gridss gridss.ComputeSamTags \ @@ -164,8 +164,6 @@ ASSUME_SORTED=true \ rule GridssSoftClipsToSplitReads: - threads: - 100 input: ref="results/refs/genome.fasta", idx=rules.bwa_index.output, @@ -173,13 +171,15 @@ rule GridssSoftClipsToSplitReads: output: primary_sv="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.sc2sr.primary.sv.bam", supp_sv="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.sc2sr.supp.sv.bam", + wildcard_constraints: + sample=sample_constraint params: working_dir="tmp", picardoptions="", conda: "../envs/gridss.yaml" - wildcard_constraints: - sample=sample_constraint + threads: + 100 shell: """ gridss gridss.SoftClipsToSplitReads \ @@ -199,8 +199,6 @@ WORKER_THREADS={threads} \ rule GridssSortSv: - threads: - 64 input: supp_sv="{x}.sc2sr.supp.sv.bam" output: @@ -209,13 +207,15 @@ rule GridssSortSv: tmp_sort="{x}.suppsorted.sv-tmp", conda: "../envs/gridss.yaml" + threads: + 100 shell: "samtools sort -m 100G -@ {threads} -T {params.tmp_sort} -Obam -o {output.supp_sv} {input.supp_sv}" rule GridssMergeSupported: threads: - 64 + 100 input: primary_sv="{p}/tmp.{x}.bam.sc2sr.primary.sv.bam", supp_sv="{p}/tmp.{x}.bam.sc2sr.suppsorted.sv.bam", @@ -230,8 +230,6 @@ rule GridssMergeSupported: rule GridssAssembleBreakends: - threads: - 64 input: ref="results/refs/genome.fasta", idx=rules.bwa_index.output, @@ -239,16 +237,18 @@ rule GridssAssembleBreakends: svs=lambda wildcards: expand("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam.sv.{ending}", sample=get_group_samples(wildcards), ending=["bam", "bam.bai"]) output: assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam" - conda: - "../envs/gridss.yaml" + wildcard_constraints: + group=group_constraint params: jobindex="0", jobnodes="1", working_dir="tmp", picardoptions="", input_args=lambda wildcards: " ".join(expand("INPUT=results/recal/{sample}.sorted.bam", sample=get_group_samples(wildcards))) - wildcard_constraints: - group=group_constraint + conda: + "../envs/gridss.yaml" + threads: + 100 shell: """ gridss gridss.AssembleBreakends \ @@ -274,6 +274,8 @@ rule GridssCollectMetricsGroup: idsv_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.idsv_metrics", mapq_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.mapq_metrics", tag_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.tag_metrics", + wildcard_constraints: + group=group_constraint params: prefix="tmp/group.{group}.bam.gridss.working/group.{group}.bam", working_dir="tmp/group.{group}.bam.gridss.working", @@ -281,8 +283,6 @@ rule GridssCollectMetricsGroup: picardoptions="", conda: "../envs/gridss.yaml" - wildcard_constraints: - group=group_constraint shell: """ gridss gridss.analysis.CollectGridssMetrics \ @@ -305,8 +305,6 @@ PROGRAM=CollectAlignmentSummaryMetrics \ rule GridssSoftClipsToSplitReadsAssembly: - threads: - 64 input: ref="results/refs/genome.fasta", idx=rules.bwa_index.output, @@ -317,13 +315,15 @@ rule GridssSoftClipsToSplitReadsAssembly: output: assembly_primary_sv="tmp/group.{group}.bam.gridss.working/tmp.group.{group}.bam.sc2sr.primary.sv.bam", assembly_supp_sv="tmp/group.{group}.bam.gridss.working/tmp.group.{group}.bam.sc2sr.supp.sv.bam", + wildcard_constraints: + group=group_constraint params: working_dir="tmp", picardoptions="", conda: "../envs/gridss.yaml" - wildcard_constraints: - group=group_constraint + threads: + 100 shell: """ gridss gridss.SoftClipsToSplitReads \ @@ -345,8 +345,6 @@ REALIGN_ENTIRE_READ=true \ rule GridssIdentifyVariants: - threads: - 64 input: bams=lambda wildcards: expand("results/recal/{sample}.sorted.bam", sample=get_group_samples(wildcards)), assembly_sv="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam", @@ -356,14 +354,16 @@ rule GridssIdentifyVariants: idx=rules.bwa_index.output, output: unallocated="tmp/group.{group}.vcf.gridss.working/group.{group}.unallocated.vcf" + wildcard_constraints: + group=group_constraint params: input_args=lambda wildcards: " ".join(expand("INPUT=results/recal/{sample}.sorted.bam", sample=get_group_samples(wildcards))), working_dir="tmp", picardoptions="" conda: "../envs/gridss.yaml" - wildcard_constraints: - group=group_constraint + threads: + 100 shell: """ gridss gridss.IdentifyVariants \ @@ -380,8 +380,6 @@ OUTPUT_VCF={output.unallocated} \ """ rule GridssAnnotateVariants: - threads: - 7 input: bams=lambda wildcards: expand("results/recal/{sample}.sorted.bam", sample=get_group_samples(wildcards)), assembly_sv="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam", @@ -392,14 +390,16 @@ rule GridssAnnotateVariants: assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam", output: allocated="tmp/group.{group}.vcf.gridss.working/group.{group}.allocated.vcf", + wildcard_constraints: + group=group_constraint params: input_args=lambda wildcards: " ".join(expand("INPUT=results/recal/{sample}.sorted.bam", sample=get_group_samples(wildcards))), working_dir="tmp", picardoptions="" conda: "../envs/gridss.yaml" - wildcard_constraints: - group=group_constraint + threads: + 7 shell: """ gridss gridss.AnnotateVariants \ -Dgridss.output_to_temp_file=true \ From d94f67b1145ba61c237496ea4c2211e6f912d0f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Tue, 24 Mar 2020 14:11:50 +0000 Subject: [PATCH 04/15] fix --- workflow/rules/gridss.smk | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/workflow/rules/gridss.smk b/workflow/rules/gridss.smk index e9dcec9f5..4b2b8a919 100644 --- a/workflow/rules/gridss.smk +++ b/workflow/rules/gridss.smk @@ -24,7 +24,7 @@ rule GridssCollectMetrics: insert_size_metrics="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_metrics", histogram="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_histogram.pdf" log: - "log/collect_gridss_metrics.{sample}.log" + "log/gridss/collect_metrics/{sample}.log" params: tmp_dir="tmp/{sample}.sorted.bam.gridss.working", prefix="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam", @@ -61,6 +61,8 @@ rule GridssCollectMetricsAndExtractSVReads: output: sv_metrics="tmp/{sample}.sorted.bam.gridss.working/{sample}.bam.sv_metrics", namedsorted_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.namedsorted.bam", + log: + "log/gridss/collect_metrics_and_extract_sv_reads/{sample}.log" params: dir="tmp/{sample}.sorted.bam.gridss.working", prefix="tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam", @@ -106,7 +108,7 @@ INCLUDE_DUPLICATES=true \ -Obam \ -o {output.namedsorted_bam} \ -@ {threads} \ -/dev/stdin +/dev/stdin 2> {log} """ @@ -118,7 +120,7 @@ rule GridssComputeSamTags: output: coordinate_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.coordinate.bam" wildcard_constraints: - sample=sample_constrain + sample=sample_constraint params: working_dir="tmp", tmp_sort="tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.coordinate-tmp", From 90d063107fa1af0664f77802e4ff7e84add89d3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Tue, 24 Mar 2020 14:41:06 +0000 Subject: [PATCH 05/15] logs --- workflow/rules/gridss.smk | 59 ++++++++++++++++++++++----------------- 1 file changed, 33 insertions(+), 26 deletions(-) diff --git a/workflow/rules/gridss.smk b/workflow/rules/gridss.smk index 4b2b8a919..d3d91fd7c 100644 --- a/workflow/rules/gridss.smk +++ b/workflow/rules/gridss.smk @@ -35,8 +35,7 @@ rule GridssCollectMetrics: "../envs/gridss.yaml" wildcard_constraints: sample=sample_constraint - shell: - """ + shell: """ gridss gridss.analysis.CollectGridssMetrics \ {jvm_args} \ TMP_DIR={params.tmp_dir} \ @@ -75,8 +74,7 @@ rule GridssCollectMetricsAndExtractSVReads: sample=sample_constraint threads: 50 - shell: - """ + shell: """ gridss gridss.CollectGridssMetricsAndExtractSVReads \ {jvm_args} \ TMP_DIR={params.dir} \ @@ -119,6 +117,8 @@ rule GridssComputeSamTags: namedsorted_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.namedsorted.bam", output: coordinate_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.coordinate.bam" + log: + "log/gridss/compute_sam_tags/{sample}.log" wildcard_constraints: sample=sample_constraint params: @@ -130,8 +130,7 @@ rule GridssComputeSamTags: "../envs/gridss.yaml" threads: 3 - shell: - """ + shell: """ gridss gridss.ComputeSamTags \ {jvm_args} \ TMP_DIR={params.dir} \ @@ -161,7 +160,7 @@ ASSUME_SORTED=true \ -Obam \ -o {output.coordinate_bam} \ -@ {threads} \ -/dev/stdin +/dev/stdin 2> {log} """ @@ -173,6 +172,8 @@ rule GridssSoftClipsToSplitReads: output: primary_sv="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.sc2sr.primary.sv.bam", supp_sv="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.sc2sr.supp.sv.bam", + log: + "log/gridss/soft_clips_to_split_reads/{sample}.log" wildcard_constraints: sample=sample_constraint params: @@ -182,8 +183,7 @@ rule GridssSoftClipsToSplitReads: "../envs/gridss.yaml" threads: 100 - shell: - """ + shell: """ gridss gridss.SoftClipsToSplitReads \ {jvm_args} \ -Xmx20G \ @@ -196,7 +196,7 @@ I={input.coordinate_bam} \ O={output.primary_sv} \ OUTPUT_UNORDERED_RECORDS={output.supp_sv} \ WORKER_THREADS={threads} \ -{params.picardoptions} +{params.picardoptions} 2> {log} """ @@ -239,6 +239,8 @@ rule GridssAssembleBreakends: svs=lambda wildcards: expand("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam.sv.{ending}", sample=get_group_samples(wildcards), ending=["bam", "bam.bai"]) output: assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam" + log: + "log/gridss/assemble_breakends/{group}.log" wildcard_constraints: group=group_constraint params: @@ -251,8 +253,7 @@ rule GridssAssembleBreakends: "../envs/gridss.yaml" threads: 100 - shell: - """ + shell: """ gridss gridss.AssembleBreakends \ -Dgridss.gridss.output_to_temp_file=true \ {jvm_args} \ @@ -265,7 +266,7 @@ REFERENCE_SEQUENCE={input.ref} \ WORKER_THREADS={threads} \ O={output.assembly} \ {params.input_args} \ -{params.picardoptions} \ +{params.picardoptions} 2> {log} """ @@ -276,6 +277,8 @@ rule GridssCollectMetricsGroup: idsv_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.idsv_metrics", mapq_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.mapq_metrics", tag_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.tag_metrics", + log: + "log/gridss/collect_metrics_group/{group}.log" wildcard_constraints: group=group_constraint params: @@ -285,8 +288,7 @@ rule GridssCollectMetricsGroup: picardoptions="", conda: "../envs/gridss.yaml" - shell: - """ + shell: """ gridss gridss.analysis.CollectGridssMetrics \ {jvm_args} \ I={input.assembly} \ @@ -302,7 +304,7 @@ GRIDSS_PROGRAM=CollectIdsvMetrics \ GRIDSS_PROGRAM=ReportThresholdCoverage \ PROGRAM=null \ PROGRAM=CollectAlignmentSummaryMetrics \ -{params.picardoptions} +{params.picardoptions} 2> {log} """ @@ -317,6 +319,8 @@ rule GridssSoftClipsToSplitReadsAssembly: output: assembly_primary_sv="tmp/group.{group}.bam.gridss.working/tmp.group.{group}.bam.sc2sr.primary.sv.bam", assembly_supp_sv="tmp/group.{group}.bam.gridss.working/tmp.group.{group}.bam.sc2sr.supp.sv.bam", + log: + "log/gridss/soft_clips_to_split_reads_assembly/{group}.log" wildcard_constraints: group=group_constraint params: @@ -326,8 +330,7 @@ rule GridssSoftClipsToSplitReadsAssembly: "../envs/gridss.yaml" threads: 100 - shell: - """ + shell: """ gridss gridss.SoftClipsToSplitReads \ {jvm_args} \ -Xmx50G \ @@ -342,7 +345,7 @@ I={input.assembly} \ O={output.assembly_primary_sv} \ OUTPUT_UNORDERED_RECORDS={output.assembly_supp_sv} \ REALIGN_ENTIRE_READ=true \ -{params.picardoptions} +{params.picardoptions} >2 {log} """ @@ -356,6 +359,8 @@ rule GridssIdentifyVariants: idx=rules.bwa_index.output, output: unallocated="tmp/group.{group}.vcf.gridss.working/group.{group}.unallocated.vcf" + log: + "log/gridss/indentify_variants/{group}.log" wildcard_constraints: group=group_constraint params: @@ -366,8 +371,7 @@ rule GridssIdentifyVariants: "../envs/gridss.yaml" threads: 100 - shell: - """ + shell: """ gridss gridss.IdentifyVariants \ -Dgridss.output_to_temp_file=true \ {jvm_args} \ @@ -378,7 +382,7 @@ REFERENCE_SEQUENCE={input.ref} \ WORKER_THREADS={threads} \ {params.input_args} \ ASSEMBLY={input.assembly} \ -OUTPUT_VCF={output.unallocated} \ +OUTPUT_VCF={output.unallocated} 2> {log} """ rule GridssAnnotateVariants: @@ -392,6 +396,8 @@ rule GridssAnnotateVariants: assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam", output: allocated="tmp/group.{group}.vcf.gridss.working/group.{group}.allocated.vcf", + log: + "log/gridss/annotate_variants/{group}.log" wildcard_constraints: group=group_constraint params: @@ -415,7 +421,7 @@ WORKER_THREADS={threads} \ ASSEMBLY={input.assembly} \ INPUT_VCF={input.unallocated} \ OUTPUT_VCF={output.allocated} \ -{params.picardoptions} +{params.picardoptions} 2> {log} """ rule GridssAnnotateUntemplatedSequence: @@ -425,6 +431,8 @@ rule GridssAnnotateUntemplatedSequence: idx=rules.bwa_index.output, output: vcf="results/gridss_vcf/{group}.vcf" + log: + "log/gridss/annotate_untemplated_sequences/{group}.log" params: working_dir="tmp", picardoptions="" @@ -432,8 +440,7 @@ rule GridssAnnotateUntemplatedSequence: "../envs/gridss.yaml" threads: 100 - shell: - """ + shell: """ gridss gridss.AnnotateUntemplatedSequence \ -Dgridss.output_to_temp_file=true \ -Xmx4g \ @@ -444,5 +451,5 @@ REFERENCE_SEQUENCE={input.ref} \ WORKER_THREADS={threads} \ INPUT={input.allocated} \ OUTPUT={output.vcf} \ -{params.picardoptions} +{params.picardoptions} 2> {log} """ From 2d6422a61ab76f926e861a38d61dc5e385c8d234 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Tue, 24 Mar 2020 15:39:34 +0000 Subject: [PATCH 06/15] finish logs --- workflow/Snakefile | 10 +++++----- workflow/rules/gridss.smk | 40 +++++++++++++++++++-------------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index e907e42ef..0de7911e0 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -34,8 +34,8 @@ if is_activated("oncoprint/stratify"): rule all: input: expand("results/gridss_vcf/{group}.vcf", group=groups), - get_final_output(), - get_tmb_targets(), - expand("results/plots/oncoprint/{batch}.{event}.pdf", - event=config["calling"]["fdr-control"]["events"], - batch=batches) + #get_final_output(), + #get_tmb_targets(), + # expand("results/plots/oncoprint/{batch}.{event}.pdf", + # event=config["calling"]["fdr-control"]["events"], + # batch=batches) diff --git a/workflow/rules/gridss.smk b/workflow/rules/gridss.smk index d3d91fd7c..e8657dc60 100644 --- a/workflow/rules/gridss.smk +++ b/workflow/rules/gridss.smk @@ -36,7 +36,7 @@ rule GridssCollectMetrics: wildcard_constraints: sample=sample_constraint shell: """ -gridss gridss.analysis.CollectGridssMetrics \ +(gridss gridss.analysis.CollectGridssMetrics \ {jvm_args} \ TMP_DIR={params.tmp_dir} \ ASSUME_SORTED=true \ @@ -48,7 +48,7 @@ GRIDSS_PROGRAM=null \ PROGRAM=null \ PROGRAM=CollectInsertSizeMetrics \ STOP_AFTER={params.metricsrecords} \ -{params.picardoptions} 2> {log} +{params.picardoptions}) > {log} 2>&1 """ #tmp/EPF-BUR-012-013.bam.gridss.working/tmp.EPF-BUR-012-013.bam.insert_size_metrics @@ -75,7 +75,7 @@ rule GridssCollectMetricsAndExtractSVReads: threads: 50 shell: """ -gridss gridss.CollectGridssMetricsAndExtractSVReads \ +(gridss gridss.CollectGridssMetricsAndExtractSVReads \ {jvm_args} \ TMP_DIR={params.dir} \ ASSUME_SORTED=true \ @@ -106,7 +106,7 @@ INCLUDE_DUPLICATES=true \ -Obam \ -o {output.namedsorted_bam} \ -@ {threads} \ -/dev/stdin 2> {log} +/dev/stdin) > {log} 2>&1 """ @@ -131,7 +131,7 @@ rule GridssComputeSamTags: threads: 3 shell: """ -gridss gridss.ComputeSamTags \ +(gridss gridss.ComputeSamTags \ {jvm_args} \ TMP_DIR={params.dir} \ WORKING_DIR="{params.working_dir}" \ @@ -160,7 +160,7 @@ ASSUME_SORTED=true \ -Obam \ -o {output.coordinate_bam} \ -@ {threads} \ -/dev/stdin 2> {log} +/dev/stdin) > {log} 2>&1 """ @@ -184,7 +184,7 @@ rule GridssSoftClipsToSplitReads: threads: 100 shell: """ -gridss gridss.SoftClipsToSplitReads \ +(gridss gridss.SoftClipsToSplitReads \ {jvm_args} \ -Xmx20G \ -Dsamjdk.create_index=false \ @@ -196,7 +196,7 @@ I={input.coordinate_bam} \ O={output.primary_sv} \ OUTPUT_UNORDERED_RECORDS={output.supp_sv} \ WORKER_THREADS={threads} \ -{params.picardoptions} 2> {log} +{params.picardoptions}) > {log} 2>&1 """ @@ -254,7 +254,7 @@ rule GridssAssembleBreakends: threads: 100 shell: """ -gridss gridss.AssembleBreakends \ +(gridss gridss.AssembleBreakends \ -Dgridss.gridss.output_to_temp_file=true \ {jvm_args} \ -Xmx100g \ @@ -266,7 +266,7 @@ REFERENCE_SEQUENCE={input.ref} \ WORKER_THREADS={threads} \ O={output.assembly} \ {params.input_args} \ -{params.picardoptions} 2> {log} +{params.picardoptions}) > {log} 2>&1 """ @@ -289,7 +289,7 @@ rule GridssCollectMetricsGroup: conda: "../envs/gridss.yaml" shell: """ -gridss gridss.analysis.CollectGridssMetrics \ +(gridss gridss.analysis.CollectGridssMetrics \ {jvm_args} \ I={input.assembly} \ O={params.prefix} \ @@ -304,7 +304,7 @@ GRIDSS_PROGRAM=CollectIdsvMetrics \ GRIDSS_PROGRAM=ReportThresholdCoverage \ PROGRAM=null \ PROGRAM=CollectAlignmentSummaryMetrics \ -{params.picardoptions} 2> {log} +{params.picardoptions}) > {log} 2>&1 """ @@ -331,7 +331,7 @@ rule GridssSoftClipsToSplitReadsAssembly: threads: 100 shell: """ -gridss gridss.SoftClipsToSplitReads \ +(gridss gridss.SoftClipsToSplitReads \ {jvm_args} \ -Xmx50G \ -Dgridss.async.buffersize=16 \ @@ -345,7 +345,7 @@ I={input.assembly} \ O={output.assembly_primary_sv} \ OUTPUT_UNORDERED_RECORDS={output.assembly_supp_sv} \ REALIGN_ENTIRE_READ=true \ -{params.picardoptions} >2 {log} +{params.picardoptions}) > {log} 2>&1 """ @@ -372,7 +372,7 @@ rule GridssIdentifyVariants: threads: 100 shell: """ -gridss gridss.IdentifyVariants \ +(gridss gridss.IdentifyVariants \ -Dgridss.output_to_temp_file=true \ {jvm_args} \ -Xmx50g \ @@ -382,7 +382,7 @@ REFERENCE_SEQUENCE={input.ref} \ WORKER_THREADS={threads} \ {params.input_args} \ ASSEMBLY={input.assembly} \ -OUTPUT_VCF={output.unallocated} 2> {log} +OUTPUT_VCF={output.unallocated}) > {log} 2>&1 """ rule GridssAnnotateVariants: @@ -409,7 +409,7 @@ rule GridssAnnotateVariants: threads: 7 shell: """ -gridss gridss.AnnotateVariants \ +(gridss gridss.AnnotateVariants \ -Dgridss.output_to_temp_file=true \ {jvm_args} \ -Xmx50g \ @@ -421,7 +421,7 @@ WORKER_THREADS={threads} \ ASSEMBLY={input.assembly} \ INPUT_VCF={input.unallocated} \ OUTPUT_VCF={output.allocated} \ -{params.picardoptions} 2> {log} +{params.picardoptions}) > {log} 2>&1 """ rule GridssAnnotateUntemplatedSequence: @@ -441,7 +441,7 @@ rule GridssAnnotateUntemplatedSequence: threads: 100 shell: """ -gridss gridss.AnnotateUntemplatedSequence \ +(gridss gridss.AnnotateUntemplatedSequence \ -Dgridss.output_to_temp_file=true \ -Xmx4g \ {jvm_args} \ @@ -451,5 +451,5 @@ REFERENCE_SEQUENCE={input.ref} \ WORKER_THREADS={threads} \ INPUT={input.allocated} \ OUTPUT={output.vcf} \ -{params.picardoptions} 2> {log} +{params.picardoptions}) > {log} 2>&1 """ From 479a6abbabcd19bfae7fe0d061ee1ee74df2429c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Tue, 24 Mar 2020 16:53:08 +0000 Subject: [PATCH 07/15] more output annotations --- workflow/rules/gridss.smk | 49 +++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/workflow/rules/gridss.smk b/workflow/rules/gridss.smk index e8657dc60..4d9298bed 100644 --- a/workflow/rules/gridss.smk +++ b/workflow/rules/gridss.smk @@ -21,13 +21,13 @@ rule GridssCollectMetrics: #bam="{sample.bam}" bam="results/recal/{sample}.sorted.bam" output: - insert_size_metrics="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_metrics", - histogram="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_histogram.pdf" + insert_size_metrics=temp("tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_metrics"), + histogram=temp("tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_histogram.pdf") log: "log/gridss/collect_metrics/{sample}.log" params: - tmp_dir="tmp/{sample}.sorted.bam.gridss.working", - prefix="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam", + tmp_dir=temp("tmp/{sample}.sorted.bam.gridss.working"), + prefix=temp("tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam"), maxcoverage=50000, metricsrecords=10000000, picardoptions="" @@ -58,14 +58,15 @@ rule GridssCollectMetricsAndExtractSVReads: bam="results/recal/{sample}.sorted.bam", insert_size_metrics="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_metrics", output: - sv_metrics="tmp/{sample}.sorted.bam.gridss.working/{sample}.bam.sv_metrics", - namedsorted_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.namedsorted.bam", + sv_metrics=temp("tmp/{sample}.sorted.bam.gridss.working/{sample}.bam.sv_metrics"), + namedsorted_bam=temp("tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.namedsorted.bam"), + metrics=temp(multiext("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam", ".cigar_metrics", ".coverage.blacklist.bed", ".idsv_metrics", ".insert_size_histogram.pdf", ".insert_size_metrics", ".mapq_metrics", ".tag_metrics")), log: "log/gridss/collect_metrics_and_extract_sv_reads/{sample}.log" params: - dir="tmp/{sample}.sorted.bam.gridss.working", - prefix="tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam", - tmp_sort="tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.namedsort", + dir=temp("tmp/{sample}.sorted.bam.gridss.working"), + prefix=temp("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam"), + tmp_sort=temp("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.namedsort"), maxcoverage=50000, picardoptions="", conda: @@ -116,15 +117,15 @@ rule GridssComputeSamTags: idx=rules.bwa_index.output, namedsorted_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.namedsorted.bam", output: - coordinate_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.coordinate.bam" + coordinate_bam=temp("tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.coordinate.bam") log: "log/gridss/compute_sam_tags/{sample}.log" wildcard_constraints: sample=sample_constraint params: working_dir="tmp", - tmp_sort="tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.coordinate-tmp", - dir="tmp/{sample}.sorted.bam.gridss.working", + tmp_sort=temp("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.coordinate-tmp"), + dir=temp("tmp/{sample}.sorted.bam.gridss.working"), picardoptions="", conda: "../envs/gridss.yaml" @@ -170,8 +171,8 @@ rule GridssSoftClipsToSplitReads: idx=rules.bwa_index.output, coordinate_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.coordinate.bam" output: - primary_sv="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.sc2sr.primary.sv.bam", - supp_sv="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.sc2sr.supp.sv.bam", + primary_sv=temp("tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.sc2sr.primary.sv.bam"), + supp_sv=temp("tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.sc2sr.supp.sv.bam"), log: "log/gridss/soft_clips_to_split_reads/{sample}.log" wildcard_constraints: @@ -204,7 +205,7 @@ rule GridssSortSv: input: supp_sv="{x}.sc2sr.supp.sv.bam" output: - supp_sv="{x}.sc2sr.suppsorted.sv.bam" + supp_sv=temp("{x}.sc2sr.suppsorted.sv.bam") params: tmp_sort="{x}.suppsorted.sv-tmp", conda: @@ -222,7 +223,7 @@ rule GridssMergeSupported: primary_sv="{p}/tmp.{x}.bam.sc2sr.primary.sv.bam", supp_sv="{p}/tmp.{x}.bam.sc2sr.suppsorted.sv.bam", output: - merged="{p}/{x}.bam.sv.bam" + merged=temp("{p}/{x}.bam.sv.bam") conda: "../envs/gridss.yaml" # wildcard_constraints: @@ -274,9 +275,9 @@ rule GridssCollectMetricsGroup: input: assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam" output: - idsv_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.idsv_metrics", - mapq_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.mapq_metrics", - tag_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.tag_metrics", + idsv_metrics=temp("tmp/group.{group}.bam.gridss.working/group.{group}.bam.idsv_metrics"), + mapq_metrics=temp("tmp/group.{group}.bam.gridss.working/group.{group}.bam.mapq_metrics"), + tag_metrics=temp("tmp/group.{group}.bam.gridss.working/group.{group}.bam.tag_metrics"), log: "log/gridss/collect_metrics_group/{group}.log" wildcard_constraints: @@ -317,8 +318,8 @@ rule GridssSoftClipsToSplitReadsAssembly: mapq_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.mapq_metrics", tag_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.tag_metrics", output: - assembly_primary_sv="tmp/group.{group}.bam.gridss.working/tmp.group.{group}.bam.sc2sr.primary.sv.bam", - assembly_supp_sv="tmp/group.{group}.bam.gridss.working/tmp.group.{group}.bam.sc2sr.supp.sv.bam", + assembly_primary_sv=temp("tmp/group.{group}.bam.gridss.working/tmp.group.{group}.bam.sc2sr.primary.sv.bam"), + assembly_supp_sv=temp("tmp/group.{group}.bam.gridss.working/tmp.group.{group}.bam.sc2sr.supp.sv.bam"), log: "log/gridss/soft_clips_to_split_reads_assembly/{group}.log" wildcard_constraints: @@ -356,9 +357,10 @@ rule GridssIdentifyVariants: assembly_sv_index="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam.bai", assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam", ref="results/refs/genome.fasta", + svs=lambda wildcards: " ".join(expand("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam.sv.bam", sample=get_group_samples(wildcards))), idx=rules.bwa_index.output, output: - unallocated="tmp/group.{group}.vcf.gridss.working/group.{group}.unallocated.vcf" + unallocated=temp("tmp/group.{group}.vcf.gridss.working/group.{group}.unallocated.vcf") log: "log/gridss/indentify_variants/{group}.log" wildcard_constraints: @@ -393,9 +395,10 @@ rule GridssAnnotateVariants: unallocated="tmp/group.{group}.vcf.gridss.working/group.{group}.unallocated.vcf", ref="results/refs/genome.fasta", idx=rules.bwa_index.output, + svs=lambda wildcards: " ".join(expand("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam.sv.bam", sample=get_group_samples(wildcards))), assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam", output: - allocated="tmp/group.{group}.vcf.gridss.working/group.{group}.allocated.vcf", + allocated=temp("tmp/group.{group}.vcf.gridss.working/group.{group}.allocated.vcf"), log: "log/gridss/annotate_variants/{group}.log" wildcard_constraints: From 10a44d9d2c29aed5ef2c0e7da4c1df6139d436d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Tue, 24 Mar 2020 16:55:34 +0000 Subject: [PATCH 08/15] author --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 49a12e6e6..fbbfa05c6 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ This workflow detects genomic variants with [Delly](https://github.com/dellytool * Felix Mölder (@FelixMoelder) * Johannes Köster (@johanneskoester) +* Christopher Schröder (@christopher-schroeder) ## Usage From e22b8f636a1a580417e4d083d6c26f28fa84ba57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Wed, 25 Mar 2020 11:57:30 +0000 Subject: [PATCH 09/15] config --- .test/config/config.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.test/config/config.yaml b/.test/config/config.yaml index 1c306ab37..3b71b7972 100644 --- a/.test/config/config.yaml +++ b/.test/config/config.yaml @@ -36,6 +36,8 @@ calling: activate: true freebayes: activate: true + gridss: + activate: true # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling scenario: config/scenario.yaml # See http://snpeff.sourceforge.net/SnpSift.html#filter From 36ef08c5a73c5d2d13af34556f2ba77bba27cc26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Wed, 25 Mar 2020 12:02:15 +0000 Subject: [PATCH 10/15] idk --- config/config.yaml | 2 ++ workflow/Snakefile | 12 ++++++------ workflow/rules/common.smk | 8 ++++++++ 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/config/config.yaml b/config/config.yaml index c23fdbdc1..a5acbefd4 100644 --- a/config/config.yaml +++ b/config/config.yaml @@ -46,6 +46,8 @@ calling: activate: true freebayes: activate: true + gridss: + activate: true # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling scenario: config/scenario.yaml filter: diff --git a/workflow/Snakefile b/workflow/Snakefile index 0de7911e0..f280a6696 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -33,9 +33,9 @@ if is_activated("oncoprint/stratify"): rule all: input: - expand("results/gridss_vcf/{group}.vcf", group=groups), - #get_final_output(), - #get_tmb_targets(), - # expand("results/plots/oncoprint/{batch}.{event}.pdf", - # event=config["calling"]["fdr-control"]["events"], - # batch=batches) + get_gridss_targets(), + get_final_output(), + get_tmb_targets(), + expand("results/plots/oncoprint/{batch}.{event}.pdf", + event=config["calling"]["fdr-control"]["events"], + batch=batches) diff --git a/workflow/rules/common.smk b/workflow/rules/common.smk index 83aebecd0..6b0af87a7 100644 --- a/workflow/rules/common.smk +++ b/workflow/rules/common.smk @@ -94,6 +94,14 @@ def get_tmb_targets(): return [] +def get_gridss_targets(): + if is_activated("calling/gridss"): + return expand("results/gridss_vcf/{group}.vcf", + group=groups) + else: + return [] + + def get_annotated_bcf(wildcards, group=None): if group is None: group = wildcards.group From 3ad2ccb74cc3e2f550da86d16d01260383322512 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Wed, 25 Mar 2020 12:24:32 +0000 Subject: [PATCH 11/15] trimming line break error? --- .test/config/config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.test/config/config.yaml b/.test/config/config.yaml index 600a5eb7b..ff4331f24 100644 --- a/.test/config/config.yaml +++ b/.test/config/config.yaml @@ -37,7 +37,7 @@ calling: freebayes: activate: true gridss: - activate: true + activate: false # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling scenario: config/scenario.yaml # See http://snpeff.sourceforge.net/SnpSift.html#filter From 9c76874d7a9361e65a1e2c67aa007d13951dcbb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Wed, 25 Mar 2020 12:24:52 +0000 Subject: [PATCH 12/15] trimming line break error? --- workflow/rules/trimming.smk | 5 ----- 1 file changed, 5 deletions(-) diff --git a/workflow/rules/trimming.smk b/workflow/rules/trimming.smk index e0cc8f433..caf28a605 100644 --- a/workflow/rules/trimming.smk +++ b/workflow/rules/trimming.smk @@ -16,13 +16,8 @@ rule cutadapt_pipe: log: "logs/pipe-fastqs/{sample}-{unit}.{fq}.{ext}" wildcard_constraints: -<<<<<<< HEAD - ending="fastq|fastq.gz" - threads: 0 -======= ext=r"fastq|fastq\.gz" threads: 0 # this does not need CPU ->>>>>>> master shell: "cat {input} > {output} 2> {log}" From 914482095abb33702fd8d436c4ec2fe6e26bc5bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Wed, 25 Mar 2020 16:18:31 +0000 Subject: [PATCH 13/15] i screwed up at merging index rule --- workflow/rules/gridss.smk | 2 +- workflow/rules/utils.smk | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/workflow/rules/gridss.smk b/workflow/rules/gridss.smk index 4d9298bed..be3050b79 100644 --- a/workflow/rules/gridss.smk +++ b/workflow/rules/gridss.smk @@ -58,7 +58,7 @@ rule GridssCollectMetricsAndExtractSVReads: bam="results/recal/{sample}.sorted.bam", insert_size_metrics="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_metrics", output: - sv_metrics=temp("tmp/{sample}.sorted.bam.gridss.working/{sample}.bam.sv_metrics"), + sv_metrics=temp("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam.sv_metrics"), namedsorted_bam=temp("tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.namedsorted.bam"), metrics=temp(multiext("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam", ".cigar_metrics", ".coverage.blacklist.bed", ".idsv_metrics", ".insert_size_histogram.pdf", ".insert_size_metrics", ".mapq_metrics", ".tag_metrics")), log: diff --git a/workflow/rules/utils.smk b/workflow/rules/utils.smk index 7067e764f..fff939b9b 100644 --- a/workflow/rules/utils.smk +++ b/workflow/rules/utils.smk @@ -15,7 +15,7 @@ rule bam_index: input: "{prefix}.bam" output: - "{prefix}.sorted.bam.bai" + "{prefix}.bam.bai" log: "logs/bam-index/{prefix}.log" wrapper: From 27075aa3063ca4f7955075d54bdfdba56362e208 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Wed, 25 Mar 2020 16:21:00 +0000 Subject: [PATCH 14/15] cleaning code --- workflow/rules/gridss.smk | 40 +-------------------------------------- 1 file changed, 1 insertion(+), 39 deletions(-) diff --git a/workflow/rules/gridss.smk b/workflow/rules/gridss.smk index be3050b79..cc3930caf 100644 --- a/workflow/rules/gridss.smk +++ b/workflow/rules/gridss.smk @@ -1,24 +1,7 @@ -#configfile: "config/config.yaml" -#include: "rules/common.smk" - -#reference = "../strling/ref_bwa/GCA_000001405.15_GRCh38_no_alt_plus_hs38d1_analysis_set.fna" - jvm_args = f"-Dreference_fasta=results/refs/genome.fasta -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true -Dsamjdk.use_async_io_write_tribble=true -Dsamjdk.buffer_size=4194304" -group_names = samples["group"].unique() -sample_names = samples.sample_name.values - -sample_constraint = "|".join(sample_names) -group_constraint = "|".join(group_names) - -# rule all: -# input: -# expand("results/gridss_vcf/group.{group}.vcf", group=group_names) - - rule GridssCollectMetrics: input: - #bam="{sample.bam}" bam="results/recal/{sample}.sorted.bam" output: insert_size_metrics=temp("tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.insert_size_metrics"), @@ -33,8 +16,6 @@ rule GridssCollectMetrics: picardoptions="" conda: "../envs/gridss.yaml" - wildcard_constraints: - sample=sample_constraint shell: """ (gridss gridss.analysis.CollectGridssMetrics \ {jvm_args} \ @@ -51,7 +32,6 @@ STOP_AFTER={params.metricsrecords} \ {params.picardoptions}) > {log} 2>&1 """ -#tmp/EPF-BUR-012-013.bam.gridss.working/tmp.EPF-BUR-012-013.bam.insert_size_metrics rule GridssCollectMetricsAndExtractSVReads: input: @@ -71,8 +51,6 @@ rule GridssCollectMetricsAndExtractSVReads: picardoptions="", conda: "../envs/gridss.yaml" - wildcard_constraints: - sample=sample_constraint threads: 50 shell: """ @@ -120,8 +98,6 @@ rule GridssComputeSamTags: coordinate_bam=temp("tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.coordinate.bam") log: "log/gridss/compute_sam_tags/{sample}.log" - wildcard_constraints: - sample=sample_constraint params: working_dir="tmp", tmp_sort=temp("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.coordinate-tmp"), @@ -175,8 +151,6 @@ rule GridssSoftClipsToSplitReads: supp_sv=temp("tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.sc2sr.supp.sv.bam"), log: "log/gridss/soft_clips_to_split_reads/{sample}.log" - wildcard_constraints: - sample=sample_constraint params: working_dir="tmp", picardoptions="", @@ -226,8 +200,6 @@ rule GridssMergeSupported: merged=temp("{p}/{x}.bam.sv.bam") conda: "../envs/gridss.yaml" - # wildcard_constraints: - # x=sample_constraint + "|" + group_constraint shell: "samtools merge -@ {threads} {output.merged} {input.primary_sv} {input.supp_sv}" @@ -242,8 +214,6 @@ rule GridssAssembleBreakends: assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam" log: "log/gridss/assemble_breakends/{group}.log" - wildcard_constraints: - group=group_constraint params: jobindex="0", jobnodes="1", @@ -280,8 +250,6 @@ rule GridssCollectMetricsGroup: tag_metrics=temp("tmp/group.{group}.bam.gridss.working/group.{group}.bam.tag_metrics"), log: "log/gridss/collect_metrics_group/{group}.log" - wildcard_constraints: - group=group_constraint params: prefix="tmp/group.{group}.bam.gridss.working/group.{group}.bam", working_dir="tmp/group.{group}.bam.gridss.working", @@ -322,8 +290,6 @@ rule GridssSoftClipsToSplitReadsAssembly: assembly_supp_sv=temp("tmp/group.{group}.bam.gridss.working/tmp.group.{group}.bam.sc2sr.supp.sv.bam"), log: "log/gridss/soft_clips_to_split_reads_assembly/{group}.log" - wildcard_constraints: - group=group_constraint params: working_dir="tmp", picardoptions="", @@ -363,8 +329,6 @@ rule GridssIdentifyVariants: unallocated=temp("tmp/group.{group}.vcf.gridss.working/group.{group}.unallocated.vcf") log: "log/gridss/indentify_variants/{group}.log" - wildcard_constraints: - group=group_constraint params: input_args=lambda wildcards: " ".join(expand("INPUT=results/recal/{sample}.sorted.bam", sample=get_group_samples(wildcards))), working_dir="tmp", @@ -401,8 +365,6 @@ rule GridssAnnotateVariants: allocated=temp("tmp/group.{group}.vcf.gridss.working/group.{group}.allocated.vcf"), log: "log/gridss/annotate_variants/{group}.log" - wildcard_constraints: - group=group_constraint params: input_args=lambda wildcards: " ".join(expand("INPUT=results/recal/{sample}.sorted.bam", sample=get_group_samples(wildcards))), working_dir="tmp", @@ -455,4 +417,4 @@ WORKER_THREADS={threads} \ INPUT={input.allocated} \ OUTPUT={output.vcf} \ {params.picardoptions}) > {log} 2>&1 - """ + """ \ No newline at end of file From 55187c7755e0ecf608fd32862a9cf2bd273ec08b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christopher=20Schr=C3=B6der?= Date: Wed, 25 Mar 2020 16:59:58 +0000 Subject: [PATCH 15/15] adjusted genome path --- .test/config/config.yaml | 2 +- workflow/rules/gridss.smk | 19 +++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.test/config/config.yaml b/.test/config/config.yaml index ff4331f24..600a5eb7b 100644 --- a/.test/config/config.yaml +++ b/.test/config/config.yaml @@ -37,7 +37,7 @@ calling: freebayes: activate: true gridss: - activate: false + activate: true # See https://varlociraptor.github.io/docs/calling/#generic-variant-calling scenario: config/scenario.yaml # See http://snpeff.sourceforge.net/SnpSift.html#filter diff --git a/workflow/rules/gridss.smk b/workflow/rules/gridss.smk index cc3930caf..57a41b890 100644 --- a/workflow/rules/gridss.smk +++ b/workflow/rules/gridss.smk @@ -1,4 +1,4 @@ -jvm_args = f"-Dreference_fasta=results/refs/genome.fasta -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true -Dsamjdk.use_async_io_write_tribble=true -Dsamjdk.buffer_size=4194304" +jvm_args = f"-Dreference_fasta=resources/genome.fasta -Dsamjdk.use_async_io_read_samtools=true -Dsamjdk.use_async_io_write_samtools=true -Dsamjdk.use_async_io_write_tribble=true -Dsamjdk.buffer_size=4194304" rule GridssCollectMetrics: input: @@ -32,7 +32,6 @@ STOP_AFTER={params.metricsrecords} \ {params.picardoptions}) > {log} 2>&1 """ - rule GridssCollectMetricsAndExtractSVReads: input: bam="results/recal/{sample}.sorted.bam", @@ -91,7 +90,7 @@ INCLUDE_DUPLICATES=true \ rule GridssComputeSamTags: input: - ref="results/refs/genome.fasta", + ref="resources/genome.fasta", idx=rules.bwa_index.output, namedsorted_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.namedsorted.bam", output: @@ -143,7 +142,7 @@ ASSUME_SORTED=true \ rule GridssSoftClipsToSplitReads: input: - ref="results/refs/genome.fasta", + ref="resources/genome.fasta", idx=rules.bwa_index.output, coordinate_bam="tmp/{sample}.sorted.bam.gridss.working/tmp.{sample}.sorted.bam.coordinate.bam" output: @@ -206,7 +205,7 @@ rule GridssMergeSupported: rule GridssAssembleBreakends: input: - ref="results/refs/genome.fasta", + ref="resources/genome.fasta", idx=rules.bwa_index.output, bams=lambda wildcards: expand("results/recal/{sample}.sorted.{ending}", sample=get_group_samples(wildcards), ending=["bam", "bam.bai"]), svs=lambda wildcards: expand("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam.sv.{ending}", sample=get_group_samples(wildcards), ending=["bam", "bam.bai"]) @@ -279,7 +278,7 @@ PROGRAM=CollectAlignmentSummaryMetrics \ rule GridssSoftClipsToSplitReadsAssembly: input: - ref="results/refs/genome.fasta", + ref="resources/genome.fasta", idx=rules.bwa_index.output, assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam", idsv_metrics="tmp/group.{group}.bam.gridss.working/group.{group}.bam.idsv_metrics", @@ -322,7 +321,7 @@ rule GridssIdentifyVariants: assembly_sv="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam", assembly_sv_index="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam.bai", assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam", - ref="results/refs/genome.fasta", + ref="resources/genome.fasta", svs=lambda wildcards: " ".join(expand("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam.sv.bam", sample=get_group_samples(wildcards))), idx=rules.bwa_index.output, output: @@ -357,7 +356,7 @@ rule GridssAnnotateVariants: assembly_sv="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam", assembly_sv_index="tmp/group.{group}.bam.gridss.working/group.{group}.bam.sv.bam.bai", unallocated="tmp/group.{group}.vcf.gridss.working/group.{group}.unallocated.vcf", - ref="results/refs/genome.fasta", + ref="resources/genome.fasta", idx=rules.bwa_index.output, svs=lambda wildcards: " ".join(expand("tmp/{sample}.sorted.bam.gridss.working/{sample}.sorted.bam.sv.bam", sample=get_group_samples(wildcards))), assembly="tmp/group.{group}.bam.gridss.working/group.{group}.bam", @@ -392,7 +391,7 @@ OUTPUT_VCF={output.allocated} \ rule GridssAnnotateUntemplatedSequence: input: allocated="tmp/group.{group}.vcf.gridss.working/group.{group}.allocated.vcf", - ref="results/refs/genome.fasta", + ref="resources/genome.fasta", idx=rules.bwa_index.output, output: vcf="results/gridss_vcf/{group}.vcf" @@ -417,4 +416,4 @@ WORKER_THREADS={threads} \ INPUT={input.allocated} \ OUTPUT={output.vcf} \ {params.picardoptions}) > {log} 2>&1 - """ \ No newline at end of file + """