diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f81269..4e6d94f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,8 +8,11 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm --- ## [Unreleased] +### Added +- Add workflow for genotyping from GVCFs ### Changed - Standardize description +- Update GATK to 4.5.0.0 --- diff --git a/config/F16.config b/config/F16.config index bca8986..1fb00e0 100644 --- a/config/F16.config +++ b/config/F16.config @@ -11,7 +11,7 @@ process { cpus = 1 memory = 1.GB } - withName: run_HaplotypeCallerVCF_GATK { + withName: run_HaplotypeCallerGVCF_GATK { cpus = 2 memory = 4.GB retry_strategy { @@ -21,7 +21,17 @@ process { } } } - withName: run_HaplotypeCallerGVCF_GATK { + withName: run_CombineGVCFs_GATK { + cpus = 2 + memory = 4.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: run_GenotypeGVCFs_GATK { cpus = 2 memory = 4.GB retry_strategy { diff --git a/config/F32.config b/config/F32.config index bca8986..1fb00e0 100644 --- a/config/F32.config +++ b/config/F32.config @@ -11,7 +11,7 @@ process { cpus = 1 memory = 1.GB } - withName: run_HaplotypeCallerVCF_GATK { + withName: run_HaplotypeCallerGVCF_GATK { cpus = 2 memory = 4.GB retry_strategy { @@ -21,7 +21,17 @@ process { } } } - withName: run_HaplotypeCallerGVCF_GATK { + withName: run_CombineGVCFs_GATK { + cpus = 2 + memory = 4.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: run_GenotypeGVCFs_GATK { cpus = 2 memory = 4.GB retry_strategy { diff --git a/config/F72.config b/config/F72.config index 406d896..b16f3db 100644 --- a/config/F72.config +++ b/config/F72.config @@ -11,7 +11,7 @@ process { cpus = 1 memory = 1.GB } - withName: run_HaplotypeCallerVCF_GATK { + withName: run_HaplotypeCallerGVCF_GATK { cpus = 3 memory = 7.GB retry_strategy { @@ -21,9 +21,19 @@ process { } } } - withName: run_HaplotypeCallerGVCF_GATK { - cpus = 3 - memory = 7.GB + withName: run_CombineGVCFs_GATK { + cpus = 2 + memory = 4.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: run_GenotypeGVCFs_GATK { + cpus = 2 + memory = 4.GB retry_strategy { memory { strategy = 'exponential' diff --git a/config/M64.config b/config/M64.config index 406d896..b16f3db 100644 --- a/config/M64.config +++ b/config/M64.config @@ -11,7 +11,7 @@ process { cpus = 1 memory = 1.GB } - withName: run_HaplotypeCallerVCF_GATK { + withName: run_HaplotypeCallerGVCF_GATK { cpus = 3 memory = 7.GB retry_strategy { @@ -21,9 +21,19 @@ process { } } } - withName: run_HaplotypeCallerGVCF_GATK { - cpus = 3 - memory = 7.GB + withName: run_CombineGVCFs_GATK { + cpus = 2 + memory = 4.GB + retry_strategy { + memory { + strategy = 'exponential' + operand = 2 + } + } + } + withName: run_GenotypeGVCFs_GATK { + cpus = 2 + memory = 4.GB retry_strategy { memory { strategy = 'exponential' diff --git a/config/default.config b/config/default.config index bf96ba3..806d508 100644 --- a/config/default.config +++ b/config/default.config @@ -16,7 +16,7 @@ params { docker_container_registry = "ghcr.io/uclahs-cds" - gatk_version = "4.2.4.1" + gatk_version = "4.5.0.0" picard_version = "2.26.10" pipeval_version = "4.0.0-rc.2" gatkfilter_version = "v1.0.0" diff --git a/main.nf b/main.nf index 2fd315a..33e0937 100644 --- a/main.nf +++ b/main.nf @@ -25,7 +25,7 @@ Current Configuration: bundle_omni_1000g_2p5_vcf_gz: ${params.bundle_omni_1000g_2p5_vcf_gz} bundle_phase1_1000g_snps_high_conf_vcf_gz: ${params.bundle_phase1_1000g_snps_high_conf_vcf_gz} - - output: + - output: output: ${params.output_dir} output_dir_base: ${params.output_dir_base} log_output_dir: ${params.log_output_dir} @@ -58,9 +58,10 @@ include { extract_GenomeIntervals } from './external/pipeline-Nextflow-module/mo ] ) include { - run_HaplotypeCallerVCF_GATK run_HaplotypeCallerGVCF_GATK } from './module/haplotypecaller.nf' +include { run_CombineGVCFs_GATK } from './module/combine-gvcfs.nf' +include { run_GenotypeGVCFs_GATK } from './module/genotype-gvcfs.nf' include { run_MergeVcfs_Picard as run_MergeVcfs_Picard_VCF run_MergeVcfs_Picard as run_MergeVcfs_Picard_GVCF @@ -147,51 +148,60 @@ workflow { /** * Haplotype calling */ - input_ch_collected_files.combine(input_ch_intervals) + + input_ch_samples_with_index.combine(input_ch_intervals) .map{ it -> [ - it[0].bams, - it[0].indices, + it[0].id, + it[0].path, + it[0].index, it[1].interval_path, it[1].interval_id ] } - .set{ input_ch_haplotypecallervcf } + .set{ input_ch_haplotypecallergvcf } - run_HaplotypeCallerVCF_GATK( + run_HaplotypeCallerGVCF_GATK( params.reference_fasta, "${params.reference_fasta}.fai", "${file(params.reference_fasta).parent}/${file(params.reference_fasta).baseName}.dict", params.bundle_v0_dbsnp138_vcf_gz, "${params.bundle_v0_dbsnp138_vcf_gz}.tbi", - input_ch_haplotypecallervcf + input_ch_haplotypecallergvcf ) - input_ch_samples_with_index.combine(input_ch_intervals) + run_HaplotypeCallerGVCF_GATK.out.gvcfs + .groupTuple(by: 4) // Group by interval ID .map{ it -> [ - it[0].id, - it[0].path, - it[0].index, - it[1].interval_path, - it[1].interval_id + it[1].flatten(), // GVCFs + it[2].flatten(), // Indices + it[3][0], // Interval path + it[4] // Interval ID ] } - .set{ input_ch_haplotypecallergvcf } + .set { input_ch_combine_gvcfs } - run_HaplotypeCallerGVCF_GATK( + run_CombineGVCFs_GATK( + params.reference_fasta, + "${params.reference_fasta}.fai", + "${file(params.reference_fasta).parent}/${file(params.reference_fasta).baseName}.dict", + input_ch_combine_gvcfs + ) + + run_GenotypeGVCFs_GATK( params.reference_fasta, "${params.reference_fasta}.fai", "${file(params.reference_fasta).parent}/${file(params.reference_fasta).baseName}.dict", params.bundle_v0_dbsnp138_vcf_gz, "${params.bundle_v0_dbsnp138_vcf_gz}.tbi", - input_ch_haplotypecallergvcf + run_CombineGVCFs_GATK.out.combined_gvcf ) /** * Merge VCFs */ - run_HaplotypeCallerVCF_GATK.out.vcfs + run_GenotypeGVCFs_GATK.out.vcfs .reduce( ['vcfs': [], 'indices': []] ){ a, b -> a.vcfs.add(b[0]); a.indices.add(b[1]); diff --git a/metadata.yaml b/metadata.yaml index a771bf0..771552b 100644 --- a/metadata.yaml +++ b/metadata.yaml @@ -5,4 +5,4 @@ maintainers: "Boutros Lab Infrastructure