-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #158 from uclahs-cds/sfitz-combine-gvcfs
Use GVCFs for genotyping - run time/CPU hours substantially reduced (0.52)
- Loading branch information
Showing
11 changed files
with
191 additions
and
114 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,4 +5,4 @@ maintainers: "Boutros Lab Infrastructure <[email protected] | |
languages: ["Nextflow", "Docker"] | ||
dependencies: ["Java", "Nextflow", "Docker"] | ||
references: "https://uclahs-cds.atlassian.net/wiki/spaces/BOUTROSLAB/pages/3189620/Guide+to+Nextflow" | ||
tools: ["Picard:2.26.10", "GATK:3.7.0", "GATK:4.2.4.1"] | ||
tools: ["Picard:2.26.10", "GATK:3.7.0", "GATK:4.5.0.0"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
include { generate_standard_filename } from '../external/pipeline-Nextflow-module/modules/common/generate_standardized_filename/main.nf' | ||
|
||
/* | ||
Nextflow module for merging GVCFs for joint genotyping with GATK | ||
*/ | ||
process run_CombineGVCFs_GATK { | ||
container params.docker_image_gatk | ||
publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", | ||
mode: "copy", | ||
enabled: params.save_intermediate_files, | ||
pattern: '*g.vcf.gz*' | ||
publishDir path: "${params.log_output_dir}/process-log", | ||
pattern: ".command.*", | ||
mode: "copy", | ||
saveAs: { "${task.process.replace(':', '/')}/${task.process.split(':')[-1]}-${interval_id}/log${file(it).getName()}" } | ||
|
||
input: | ||
path(reference_fasta) | ||
path(reference_fasta_fai) | ||
path(reference_fasta_dict) | ||
tuple path(gvcfs), path(gvcf_indices), path(interval_path), val(interval_id) | ||
|
||
output: | ||
path(".command.*") | ||
tuple path(output_filename), path("${output_filename}.tbi"), path(interval_path), val(interval_id), emit: combined_gvcf | ||
|
||
script: | ||
output_filename = generate_standard_filename( | ||
"GATK-${params.gatk_version}", | ||
params.dataset_id, | ||
params.patient_id, | ||
[ | ||
'additional_information': "${interval_id}.g.vcf.gz" | ||
] | ||
) | ||
gvcf_input_str = gvcfs.collect{ "--variant '${it}'" }.join(' ') | ||
""" | ||
set -euo pipefail | ||
gatk --java-options "-Xmx${(task.memory - params.gatk_command_mem_diff).getMega()}m" \ | ||
CombineGVCFs \ | ||
--reference ${reference_fasta} \ | ||
${gvcf_input_str} \ | ||
--output ${output_filename} \ | ||
--create-output-variant-index true \ | ||
--verbosity INFO | ||
""" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
include { generate_standard_filename } from '../external/pipeline-Nextflow-module/modules/common/generate_standardized_filename/main.nf' | ||
|
||
/* | ||
Nextflow module for joint genotyping merged GVCFs with GATK | ||
*/ | ||
process run_GenotypeGVCFs_GATK { | ||
container params.docker_image_gatk | ||
publishDir path: "${params.output_dir_base}/intermediate/${task.process.replace(':', '/')}", | ||
mode: "copy", | ||
enabled: params.save_intermediate_files, | ||
pattern: '*.vcf*' | ||
|
||
publishDir path: "${params.log_output_dir}/process-log", | ||
pattern: ".command.*", | ||
mode: "copy", | ||
saveAs: { "${task.process.replace(':', '/')}/${task.process.split(':')[-1]}-${interval_id}/log${file(it).getName()}" } | ||
|
||
input: | ||
path(reference_fasta) | ||
path(reference_fasta_fai) | ||
path(reference_fasta_dict) | ||
path(dbsnp_bundle) | ||
path(dbsnp_bundle_index) | ||
tuple path(combined_gvcf), path(combined_gvcf_index), path(interval_path), val(interval_id) | ||
|
||
output: | ||
path(".command.*") | ||
tuple path(output_filename), path("${output_filename}.tbi"), emit: vcfs | ||
|
||
script: | ||
output_filename = generate_standard_filename( | ||
"GATK-${params.gatk_version}", | ||
params.dataset_id, | ||
params.patient_id, | ||
[ | ||
'additional_information': "${interval_id}.vcf.gz" | ||
] | ||
) | ||
interval_str = "--intervals ${interval_path}" | ||
interval_padding = params.is_targeted ? "--interval-padding 100" : "" | ||
""" | ||
set -euo pipefail | ||
gatk --java-options "-Xmx${(task.memory - params.gatk_command_mem_diff).getMega()}m" \ | ||
GenotypeGVCFs \ | ||
--variant ${combined_gvcf} \ | ||
--reference ${reference_fasta} \ | ||
--verbosity INFO \ | ||
--output ${output_filename} \ | ||
--dbsnp ${dbsnp_bundle} \ | ||
--standard-min-confidence-threshold-for-calling 50 \ | ||
${interval_str} \ | ||
${interval_padding} | ||
""" | ||
} |
Oops, something went wrong.