From 3f1e61be1fbe87fcd8ca3ecdcc47c7d9e4001ba5 Mon Sep 17 00:00:00 2001 From: Yash Patel Date: Tue, 19 Dec 2023 14:32:48 -0800 Subject: [PATCH 1/9] Add example YAMLs for using recalibration tables --- ...brate-BAM-multiple-input-with-recal-table.yaml | 15 +++++++++++++++ ...te-BAM-normal-only-input-with-recal-table.yaml | 8 ++++++++ ...librate-BAM-paired-input-with-recal-table.yaml | 11 +++++++++++ ...ate-BAM-tumor-only-input-with-recal-table.yaml | 8 ++++++++ 4 files changed, 42 insertions(+) create mode 100644 input/recalibrate-BAM-multiple-input-with-recal-table.yaml create mode 100644 input/recalibrate-BAM-normal-only-input-with-recal-table.yaml create mode 100644 input/recalibrate-BAM-paired-input-with-recal-table.yaml create mode 100644 input/recalibrate-BAM-tumor-only-input-with-recal-table.yaml diff --git a/input/recalibrate-BAM-multiple-input-with-recal-table.yaml b/input/recalibrate-BAM-multiple-input-with-recal-table.yaml new file mode 100644 index 00000000..fb55cdd3 --- /dev/null +++ b/input/recalibrate-BAM-multiple-input-with-recal-table.yaml @@ -0,0 +1,15 @@ +--- +patient_id: "patient_id" +input: + BAM: + normal: + - "/absolute/path/to/BAM" + - "/absolute/path/to/BAM" + tumor: + - "/abosolute/path/to/BAM" + - "/absolute/path/to/BAM" + recalibration_tables: + - "/absolute/path/to/recalibration/table/sample1" + - "/absolute/path/to/recalibration/table/sample2" + - "/absolute/path/to/recalibration/table/sample3" + - "/absolute/path/to/recalibration/table/sample4" diff --git a/input/recalibrate-BAM-normal-only-input-with-recal-table.yaml b/input/recalibrate-BAM-normal-only-input-with-recal-table.yaml new file mode 100644 index 00000000..966edce2 --- /dev/null +++ b/input/recalibrate-BAM-normal-only-input-with-recal-table.yaml @@ -0,0 +1,8 @@ +--- +patient_id: "patient_id" +input: + BAM: + normal: + - "/absolute/path/to/BAM" + recalibration_tables: + - "/absolute/path/to/recalibration/table" diff --git a/input/recalibrate-BAM-paired-input-with-recal-table.yaml b/input/recalibrate-BAM-paired-input-with-recal-table.yaml new file mode 100644 index 00000000..1ccbf386 --- /dev/null +++ b/input/recalibrate-BAM-paired-input-with-recal-table.yaml @@ -0,0 +1,11 @@ +--- +patient_id: "patient_id" +input: + BAM: + normal: + - "/absolute/path/to/BAM" + tumor: + - "/absolute/path/to/BAM" + recalibration_tables: + - "/absolute/path/to/recalibration/table/sample1" + - "/absolute/path/to/recalibration/table/sample2" diff --git a/input/recalibrate-BAM-tumor-only-input-with-recal-table.yaml b/input/recalibrate-BAM-tumor-only-input-with-recal-table.yaml new file mode 100644 index 00000000..63385613 --- /dev/null +++ b/input/recalibrate-BAM-tumor-only-input-with-recal-table.yaml @@ -0,0 +1,8 @@ +--- +patient_id: "patient_id" +input: + BAM: + tumor: + - "/absolute/path/to/BAM" + recalibration_tables: + - "/absolute/path/to/recalibration/table" From 8f5da045c7d45d06d5b1870bdda12efaebb9ec8b Mon Sep 17 00:00:00 2001 From: Yash Patel Date: Tue, 19 Dec 2023 14:33:20 -0800 Subject: [PATCH 2/9] Add recalibration tables input to schema --- config/schema.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/config/schema.yaml b/config/schema.yaml index e27afa33..098c7d6d 100644 --- a/config/schema.yaml +++ b/config/schema.yaml @@ -126,3 +126,8 @@ input: type: 'BAMEntryList' required: false help: 'Input tumor BAMs' + recalibration_tables: + type: 'RecalibrationTableList' + required: false + allow_empty: false + help: 'List of any available recalibration tables' From 8a551e6c971fc2a2a87bb83d724a9cf6009bc98d Mon Sep 17 00:00:00 2001 From: Yash Patel Date: Tue, 19 Dec 2023 14:34:01 -0800 Subject: [PATCH 3/9] Add type for list of recalibration tables --- config/custom_schema_types.config | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/config/custom_schema_types.config b/config/custom_schema_types.config index 3f14322f..91e348af 100644 --- a/config/custom_schema_types.config +++ b/config/custom_schema_types.config @@ -3,7 +3,8 @@ */ custom_schema_types { allowed_input_types = [ - 'BAM' + 'BAM', + 'recalibration_tables' ] allowed_bam_types = [ 'normal', @@ -131,7 +132,7 @@ custom_schema_types { /** * Check if proper BAM entry list */ - check_bam_list = { Map options, String name, Map properties -> + check_readable_file_list = { Map options, String name, Map properties -> custom_schema_types.check_if_list(options[name], name) for (item in options[name]) { schema.check_path(item, 'r') @@ -162,7 +163,8 @@ custom_schema_types { types = [ 'InputNamespace': custom_schema_types.check_input_namespace, 'InputBAMNamespace': custom_schema_types.check_bam_namespace, - 'BAMEntryList': custom_schema_types.check_bam_list, + 'BAMEntryList': custom_schema_types.check_readable_file_list, + 'RecalibrationTableList': custom_schema_types.check_readable_file_list, 'AlignerTool': custom_schema_types.check_aligner, 'ResourceUpdateNamespace': custom_schema_types.check_resource_update_namespace, 'ResourceUpdateList': custom_schema_types.check_resource_update_list From bcd3cebf721d4f0158841af132caba219881c313 Mon Sep 17 00:00:00 2001 From: Yash Patel Date: Tue, 19 Dec 2023 14:34:33 -0800 Subject: [PATCH 4/9] Add handler for recalibration tables and use modularized set_env function --- config/methods.config | 57 +++++++------------------------------------ 1 file changed, 9 insertions(+), 48 deletions(-) diff --git a/config/methods.config b/config/methods.config index 4ccc3205..3d075ceb 100644 --- a/config/methods.config +++ b/config/methods.config @@ -54,54 +54,6 @@ methods { params.output_dir_base = "${params.output_dir}/${manifest.name}-${manifest.version}/${params.patient_id}/${params.docker_image_gatk.split("/")[1].replace(':', '-').toUpperCase()}" } - /** - * Check the permissions and existence of workDir. - * If it doesn't exist, recursively find first existing directory and check write permission. - * If it exists, check write permission. - */ - check_workdir_permissions = { dir -> - dir_file = new File(dir) - if (dir_file.exists()) { - if (dir_file.canWrite()) { - return true - } else { - throw new Exception(" ### ERROR ### The input directory params.work_dir: ${dir} is not writeable. Please verify and try again.") - } - } else { - while (!dir_file.exists()) { - dir_file = dir_file.getParentFile() - } - - if (dir_file.canWrite()) { - return true - } else { - throw new Exception(" ### ERROR ### The input directory params.work_dir: ${dir} cannot be created. The closest existing parent directory ${dir_file.toString()} is not writable. Please verify permissions or change the input parameter.") - } - } - } - - set_env = { - if (params.ucla_cds) { - /** - * By default, if the /scratch directory exists, set it as the Nextflow working directory - * If config file specified work_dir, set it as the Nextflow working directory - * - * WARNING: changing this directory can lead to high server latency and - * potential disk space limitations. Change with caution! The 'workDir' - * in Nextflow determines the location of intermediate and temporary files. - */ - params.work_dir = (params.containsKey('work_dir') && params.work_dir) ? params.work_dir : '/scratch' - if (methods.check_workdir_permissions(params.work_dir)) { - workDir = params.work_dir - } - } else { - // If work_dir was specified as a param and exists or can be created, set workDir. Otherwise, let Nextflow's default behavior dictate workDir - if (params.containsKey('work_dir') && params.work_dir && methods.check_workdir_permissions(params.work_dir)) { - workDir = params.work_dir - } - } - } - set_pipeline_logs = { trace.enabled = true trace.file = "${params.log_output_dir}/nextflow-log/trace.txt" @@ -189,6 +141,14 @@ methods { } } + set_recal_tables = { + params.use_recal_tables = params.input.containsKey('recalibration_tables') + + if (!params.use_recal_tables) { + params.input['recalibration_tables'] = ["${params.work_dir}/NO_FILE.grp"] + } + } + setup = { methods.set_env() schema.load_custom_types("${projectDir}/config/custom_schema_types.config") @@ -204,5 +164,6 @@ methods { retry.setup_retry() methods.setup_docker_cpus() methods.verify_input_deletion() + methods.set_recal_tables() } } From b184cfb1c36140b8373dbb37bfe94ba5d4b6b769 Mon Sep 17 00:00:00 2001 From: Yash Patel Date: Tue, 19 Dec 2023 14:47:30 -0800 Subject: [PATCH 5/9] Add recal tables to log info --- main.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/main.nf b/main.nf index 98dd1172..b836e778 100644 --- a/main.nf +++ b/main.nf @@ -23,6 +23,7 @@ Current Configuration: bundle_v0_dbsnp138_vcf_gz: ${params.bundle_v0_dbsnp138_vcf_gz} bundle_contest_hapmap_3p3_vcf_gz: ${params.bundle_contest_hapmap_3p3_vcf_gz} intervals: ${(params.is_targeted) ?: 'WGS'} + recalibration_tables: ${params.input.recalibration_tables} - output: output: ${params.output_dir} From 95b148d3b0a6e15a947c6af15397248f9aed95c7 Mon Sep 17 00:00:00 2001 From: Yash Patel Date: Tue, 19 Dec 2023 14:48:35 -0800 Subject: [PATCH 6/9] Add check to skip recalibration table generation if table exists for sample --- module/base-recalibration.nf | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/module/base-recalibration.nf b/module/base-recalibration.nf index 83a341a9..e6d97eeb 100644 --- a/module/base-recalibration.nf +++ b/module/base-recalibration.nf @@ -56,6 +56,7 @@ process run_BaseRecalibrator_GATK { path(bundle_v0_dbsnp138_vcf_gz) path(bundle_v0_dbsnp138_vcf_gz_tbi) path(intervals) + path(recal_tables) tuple path(indelrealigned_bams), path(indelrealigned_bams_bai), val(sample_id) output: @@ -67,18 +68,21 @@ process run_BaseRecalibrator_GATK { targeted_options = params.is_targeted ? "--intervals ${intervals} --interval-padding 100" : "" """ set -euo pipefail - gatk --java-options "-Xmx${(task.memory - params.gatk_command_mem_diff).getMega()}m -DGATK_STACKTRACE_ON_USER_EXCEPTION=true -Djava.io.tmpdir=${workDir}" \ - BaseRecalibrator \ - ${all_ir_bams} \ - --reference ${reference_fasta} \ - --verbosity INFO \ - --known-sites ${bundle_mills_and_1000g_gold_standards_vcf_gz} \ - --known-sites ${bundle_known_indels_vcf_gz} \ - --known-sites ${bundle_v0_dbsnp138_vcf_gz} \ - --output ${sample_id}_recalibration_table.grp \ - ${targeted_options} \ - --read-filter SampleReadFilter \ - --sample ${sample_id} + if [ ! -f ${sample_id}_recalibration_table.grp ] + then + gatk --java-options "-Xmx${(task.memory - params.gatk_command_mem_diff).getMega()}m -DGATK_STACKTRACE_ON_USER_EXCEPTION=true -Djava.io.tmpdir=${workDir}" \ + BaseRecalibrator \ + ${all_ir_bams} \ + --reference ${reference_fasta} \ + --verbosity INFO \ + --known-sites ${bundle_mills_and_1000g_gold_standards_vcf_gz} \ + --known-sites ${bundle_known_indels_vcf_gz} \ + --known-sites ${bundle_v0_dbsnp138_vcf_gz} \ + --output ${sample_id}_recalibration_table.grp \ + ${targeted_options} \ + --read-filter SampleReadFilter \ + --sample ${sample_id} + fi """ } @@ -195,6 +199,7 @@ workflow recalibrate_base { params.bundle_v0_dbsnp138_vcf_gz, "${params.bundle_v0_dbsnp138_vcf_gz}.tbi", base_recalibrator_intervals, + params.input.recalibration_tables, input_ch_base_recalibrator ) From 4a1ee89dedd6ac32295610714fece2f189578a32 Mon Sep 17 00:00:00 2001 From: Yash Patel Date: Tue, 19 Dec 2023 14:49:46 -0800 Subject: [PATCH 7/9] Update CHANGELOG --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae327323..21bba92a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm - Add NFTest case - Add new flow diagram to README - Add additional details to Pipeline Steps section of README +- Option to provide base recalibration tables for any subset of samples to skip `BaseRecalibrator` +### [Changed] +- Use modularized `set_env` function --- From b83b02284d70d4dc4536f5fcab44cd8a24bb8c68 Mon Sep 17 00:00:00 2001 From: Yash Patel Date: Tue, 19 Dec 2023 14:53:47 -0800 Subject: [PATCH 8/9] Add recalibration table input option to README --- README.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/README.md b/README.md index c58a74aa..a859fa44 100644 --- a/README.md +++ b/README.md @@ -92,7 +92,10 @@ Generate sha512 checksum for final BAM and BAI files. | patient_id | string | Patient ID (will be standardized according to data storage structure in the near future) | | normal_BAM | path | Set to absolute path to normal BAM | | tumor_BAM | path | Set to absolute path to tumour BAM | +| recalibration_table | path | (Optional) Absolute path to recalibration table | + +Input without pre-existing recalibration table(s): ``` --- patient_id: "patient_id" @@ -104,7 +107,23 @@ input: tumor: - "/absolute/path/to/BAM" - "/absolute/path/to/BAM" +``` +Input with existing recalibration table(s): +``` +--- +patient_id: "patient_id" +input: + BAM: + normal: + - "/absolute/path/to/BAM" + - "/absolute/path/to/BAM" + tumor: + - "/absolute/path/to/BAM" + - "/absolute/path/to/BAM" + recalibration_tables: + - "/absolute/path/to/recalibration/table1" + - "/absolute/path/to/recalibration/table2" ``` For normal-only or tumour-only samples, exclude the fields for the other state. From a772dae32854c142238cc00b4472f766a902f650 Mon Sep 17 00:00:00 2001 From: Yash Patel Date: Wed, 20 Dec 2023 08:41:30 -0800 Subject: [PATCH 9/9] Use singular form for recalibration_table --- README.md | 2 +- config/custom_schema_types.config | 2 +- config/methods.config | 4 ++-- config/schema.yaml | 2 +- input/recalibrate-BAM-multiple-input-with-recal-table.yaml | 2 +- input/recalibrate-BAM-normal-only-input-with-recal-table.yaml | 2 +- input/recalibrate-BAM-paired-input-with-recal-table.yaml | 2 +- input/recalibrate-BAM-tumor-only-input-with-recal-table.yaml | 2 +- main.nf | 2 +- module/base-recalibration.nf | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index a859fa44..462005d0 100644 --- a/README.md +++ b/README.md @@ -121,7 +121,7 @@ input: tumor: - "/absolute/path/to/BAM" - "/absolute/path/to/BAM" - recalibration_tables: + recalibration_table: - "/absolute/path/to/recalibration/table1" - "/absolute/path/to/recalibration/table2" ``` diff --git a/config/custom_schema_types.config b/config/custom_schema_types.config index 91e348af..24c4f888 100644 --- a/config/custom_schema_types.config +++ b/config/custom_schema_types.config @@ -4,7 +4,7 @@ custom_schema_types { allowed_input_types = [ 'BAM', - 'recalibration_tables' + 'recalibration_table' ] allowed_bam_types = [ 'normal', diff --git a/config/methods.config b/config/methods.config index 3d075ceb..7e17cb2c 100644 --- a/config/methods.config +++ b/config/methods.config @@ -142,10 +142,10 @@ methods { } set_recal_tables = { - params.use_recal_tables = params.input.containsKey('recalibration_tables') + params.use_recal_tables = params.input.containsKey('recalibration_table') if (!params.use_recal_tables) { - params.input['recalibration_tables'] = ["${params.work_dir}/NO_FILE.grp"] + params.input['recalibration_table'] = ["${params.work_dir}/NO_FILE.grp"] } } diff --git a/config/schema.yaml b/config/schema.yaml index 098c7d6d..0449f0da 100644 --- a/config/schema.yaml +++ b/config/schema.yaml @@ -126,7 +126,7 @@ input: type: 'BAMEntryList' required: false help: 'Input tumor BAMs' - recalibration_tables: + recalibration_table: type: 'RecalibrationTableList' required: false allow_empty: false diff --git a/input/recalibrate-BAM-multiple-input-with-recal-table.yaml b/input/recalibrate-BAM-multiple-input-with-recal-table.yaml index fb55cdd3..4cb9f1fc 100644 --- a/input/recalibrate-BAM-multiple-input-with-recal-table.yaml +++ b/input/recalibrate-BAM-multiple-input-with-recal-table.yaml @@ -8,7 +8,7 @@ input: tumor: - "/abosolute/path/to/BAM" - "/absolute/path/to/BAM" - recalibration_tables: + recalibration_table: - "/absolute/path/to/recalibration/table/sample1" - "/absolute/path/to/recalibration/table/sample2" - "/absolute/path/to/recalibration/table/sample3" diff --git a/input/recalibrate-BAM-normal-only-input-with-recal-table.yaml b/input/recalibrate-BAM-normal-only-input-with-recal-table.yaml index 966edce2..2cf61092 100644 --- a/input/recalibrate-BAM-normal-only-input-with-recal-table.yaml +++ b/input/recalibrate-BAM-normal-only-input-with-recal-table.yaml @@ -4,5 +4,5 @@ input: BAM: normal: - "/absolute/path/to/BAM" - recalibration_tables: + recalibration_table: - "/absolute/path/to/recalibration/table" diff --git a/input/recalibrate-BAM-paired-input-with-recal-table.yaml b/input/recalibrate-BAM-paired-input-with-recal-table.yaml index 1ccbf386..f1b382f3 100644 --- a/input/recalibrate-BAM-paired-input-with-recal-table.yaml +++ b/input/recalibrate-BAM-paired-input-with-recal-table.yaml @@ -6,6 +6,6 @@ input: - "/absolute/path/to/BAM" tumor: - "/absolute/path/to/BAM" - recalibration_tables: + recalibration_table: - "/absolute/path/to/recalibration/table/sample1" - "/absolute/path/to/recalibration/table/sample2" diff --git a/input/recalibrate-BAM-tumor-only-input-with-recal-table.yaml b/input/recalibrate-BAM-tumor-only-input-with-recal-table.yaml index 63385613..81536d5f 100644 --- a/input/recalibrate-BAM-tumor-only-input-with-recal-table.yaml +++ b/input/recalibrate-BAM-tumor-only-input-with-recal-table.yaml @@ -4,5 +4,5 @@ input: BAM: tumor: - "/absolute/path/to/BAM" - recalibration_tables: + recalibration_table: - "/absolute/path/to/recalibration/table" diff --git a/main.nf b/main.nf index b836e778..82935d78 100644 --- a/main.nf +++ b/main.nf @@ -23,7 +23,7 @@ Current Configuration: bundle_v0_dbsnp138_vcf_gz: ${params.bundle_v0_dbsnp138_vcf_gz} bundle_contest_hapmap_3p3_vcf_gz: ${params.bundle_contest_hapmap_3p3_vcf_gz} intervals: ${(params.is_targeted) ?: 'WGS'} - recalibration_tables: ${params.input.recalibration_tables} + Recalibration tables: ${params.input.recalibration_table} - output: output: ${params.output_dir} diff --git a/module/base-recalibration.nf b/module/base-recalibration.nf index e6d97eeb..7a092f86 100644 --- a/module/base-recalibration.nf +++ b/module/base-recalibration.nf @@ -199,7 +199,7 @@ workflow recalibrate_base { params.bundle_v0_dbsnp138_vcf_gz, "${params.bundle_v0_dbsnp138_vcf_gz}.tbi", base_recalibrator_intervals, - params.input.recalibration_tables, + params.input.recalibration_table, input_ch_base_recalibrator )