Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow input of recalibration tables #45

Merged
merged 9 commits into from
Dec 21, 2023
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
- Add NFTest case
- Add new flow diagram to README
- Add additional details to Pipeline Steps section of README
- Option to provide base recalibration tables for any subset of samples to skip `BaseRecalibrator`
### [Changed]
- Use modularized `set_env` function

---

Expand Down
19 changes: 19 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,10 @@ Generate sha512 checksum for final BAM and BAI files.
| patient_id | string | Patient ID (will be standardized according to data storage structure in the near future) |
| normal_BAM | path | Set to absolute path to normal BAM |
| tumor_BAM | path | Set to absolute path to tumour BAM |
| recalibration_table | path | (Optional) Absolute path to recalibration table |


Input without pre-existing recalibration table(s):
```
---
patient_id: "patient_id"
Expand All @@ -104,7 +107,23 @@ input:
tumor:
- "/absolute/path/to/BAM"
- "/absolute/path/to/BAM"
```

Input with existing recalibration table(s):
```
---
patient_id: "patient_id"
input:
BAM:
normal:
- "/absolute/path/to/BAM"
- "/absolute/path/to/BAM"
tumor:
- "/absolute/path/to/BAM"
- "/absolute/path/to/BAM"
recalibration_tables:
tyamaguchi-ucla marked this conversation as resolved.
Show resolved Hide resolved
- "/absolute/path/to/recalibration/table1"
- "/absolute/path/to/recalibration/table2"
```

For normal-only or tumour-only samples, exclude the fields for the other state.
Expand Down
8 changes: 5 additions & 3 deletions config/custom_schema_types.config
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
*/
custom_schema_types {
allowed_input_types = [
'BAM'
'BAM',
'recalibration_tables'
]
allowed_bam_types = [
'normal',
Expand Down Expand Up @@ -131,7 +132,7 @@ custom_schema_types {
/**
* Check if proper BAM entry list
*/
check_bam_list = { Map options, String name, Map properties ->
check_readable_file_list = { Map options, String name, Map properties ->
custom_schema_types.check_if_list(options[name], name)
for (item in options[name]) {
schema.check_path(item, 'r')
Expand Down Expand Up @@ -162,7 +163,8 @@ custom_schema_types {
types = [
'InputNamespace': custom_schema_types.check_input_namespace,
'InputBAMNamespace': custom_schema_types.check_bam_namespace,
'BAMEntryList': custom_schema_types.check_bam_list,
'BAMEntryList': custom_schema_types.check_readable_file_list,
'RecalibrationTableList': custom_schema_types.check_readable_file_list,
'AlignerTool': custom_schema_types.check_aligner,
'ResourceUpdateNamespace': custom_schema_types.check_resource_update_namespace,
'ResourceUpdateList': custom_schema_types.check_resource_update_list
Expand Down
57 changes: 9 additions & 48 deletions config/methods.config
Original file line number Diff line number Diff line change
Expand Up @@ -54,54 +54,6 @@ methods {
params.output_dir_base = "${params.output_dir}/${manifest.name}-${manifest.version}/${params.patient_id}/${params.docker_image_gatk.split("/")[1].replace(':', '-').toUpperCase()}"
}

/**
* Check the permissions and existence of workDir.
* If it doesn't exist, recursively find first existing directory and check write permission.
* If it exists, check write permission.
*/
check_workdir_permissions = { dir ->
dir_file = new File(dir)
if (dir_file.exists()) {
if (dir_file.canWrite()) {
return true
} else {
throw new Exception(" ### ERROR ### The input directory params.work_dir: ${dir} is not writeable. Please verify and try again.")
}
} else {
while (!dir_file.exists()) {
dir_file = dir_file.getParentFile()
}

if (dir_file.canWrite()) {
return true
} else {
throw new Exception(" ### ERROR ### The input directory params.work_dir: ${dir} cannot be created. The closest existing parent directory ${dir_file.toString()} is not writable. Please verify permissions or change the input parameter.")
}
}
}

set_env = {
if (params.ucla_cds) {
/**
* By default, if the /scratch directory exists, set it as the Nextflow working directory
* If config file specified work_dir, set it as the Nextflow working directory
*
* WARNING: changing this directory can lead to high server latency and
* potential disk space limitations. Change with caution! The 'workDir'
* in Nextflow determines the location of intermediate and temporary files.
*/
params.work_dir = (params.containsKey('work_dir') && params.work_dir) ? params.work_dir : '/scratch'
if (methods.check_workdir_permissions(params.work_dir)) {
workDir = params.work_dir
}
} else {
// If work_dir was specified as a param and exists or can be created, set workDir. Otherwise, let Nextflow's default behavior dictate workDir
if (params.containsKey('work_dir') && params.work_dir && methods.check_workdir_permissions(params.work_dir)) {
workDir = params.work_dir
}
}
}

set_pipeline_logs = {
trace.enabled = true
trace.file = "${params.log_output_dir}/nextflow-log/trace.txt"
Expand Down Expand Up @@ -189,6 +141,14 @@ methods {
}
}

set_recal_tables = {
params.use_recal_tables = params.input.containsKey('recalibration_tables')

if (!params.use_recal_tables) {
params.input['recalibration_tables'] = ["${params.work_dir}/NO_FILE.grp"]
}
}

setup = {
methods.set_env()
schema.load_custom_types("${projectDir}/config/custom_schema_types.config")
Expand All @@ -204,5 +164,6 @@ methods {
retry.setup_retry()
methods.setup_docker_cpus()
methods.verify_input_deletion()
methods.set_recal_tables()
}
}
5 changes: 5 additions & 0 deletions config/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,8 @@ input:
type: 'BAMEntryList'
required: false
help: 'Input tumor BAMs'
recalibration_tables:
type: 'RecalibrationTableList'
required: false
allow_empty: false
help: 'List of any available recalibration tables'
15 changes: 15 additions & 0 deletions input/recalibrate-BAM-multiple-input-with-recal-table.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
---
patient_id: "patient_id"
input:
BAM:
normal:
- "/absolute/path/to/BAM"
- "/absolute/path/to/BAM"
tumor:
- "/abosolute/path/to/BAM"
- "/absolute/path/to/BAM"
recalibration_tables:
- "/absolute/path/to/recalibration/table/sample1"
- "/absolute/path/to/recalibration/table/sample2"
- "/absolute/path/to/recalibration/table/sample3"
- "/absolute/path/to/recalibration/table/sample4"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
patient_id: "patient_id"
input:
BAM:
normal:
- "/absolute/path/to/BAM"
recalibration_tables:
- "/absolute/path/to/recalibration/table"
11 changes: 11 additions & 0 deletions input/recalibrate-BAM-paired-input-with-recal-table.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
patient_id: "patient_id"
input:
BAM:
normal:
- "/absolute/path/to/BAM"
tumor:
- "/absolute/path/to/BAM"
recalibration_tables:
- "/absolute/path/to/recalibration/table/sample1"
- "/absolute/path/to/recalibration/table/sample2"
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
---
patient_id: "patient_id"
input:
BAM:
tumor:
- "/absolute/path/to/BAM"
recalibration_tables:
- "/absolute/path/to/recalibration/table"
1 change: 1 addition & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ Current Configuration:
bundle_v0_dbsnp138_vcf_gz: ${params.bundle_v0_dbsnp138_vcf_gz}
bundle_contest_hapmap_3p3_vcf_gz: ${params.bundle_contest_hapmap_3p3_vcf_gz}
intervals: ${(params.is_targeted) ?: 'WGS'}
recalibration_tables: ${params.input.recalibration_tables}

- output:
output: ${params.output_dir}
Expand Down
29 changes: 17 additions & 12 deletions module/base-recalibration.nf
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ process run_BaseRecalibrator_GATK {
path(bundle_v0_dbsnp138_vcf_gz)
path(bundle_v0_dbsnp138_vcf_gz_tbi)
path(intervals)
path(recal_tables)
tuple path(indelrealigned_bams), path(indelrealigned_bams_bai), val(sample_id)

output:
Expand All @@ -67,18 +68,21 @@ process run_BaseRecalibrator_GATK {
targeted_options = params.is_targeted ? "--intervals ${intervals} --interval-padding 100" : ""
"""
set -euo pipefail
gatk --java-options "-Xmx${(task.memory - params.gatk_command_mem_diff).getMega()}m -DGATK_STACKTRACE_ON_USER_EXCEPTION=true -Djava.io.tmpdir=${workDir}" \
BaseRecalibrator \
${all_ir_bams} \
--reference ${reference_fasta} \
--verbosity INFO \
--known-sites ${bundle_mills_and_1000g_gold_standards_vcf_gz} \
--known-sites ${bundle_known_indels_vcf_gz} \
--known-sites ${bundle_v0_dbsnp138_vcf_gz} \
--output ${sample_id}_recalibration_table.grp \
${targeted_options} \
--read-filter SampleReadFilter \
--sample ${sample_id}
if [ ! -f ${sample_id}_recalibration_table.grp ]
then
gatk --java-options "-Xmx${(task.memory - params.gatk_command_mem_diff).getMega()}m -DGATK_STACKTRACE_ON_USER_EXCEPTION=true -Djava.io.tmpdir=${workDir}" \
BaseRecalibrator \
${all_ir_bams} \
--reference ${reference_fasta} \
--verbosity INFO \
--known-sites ${bundle_mills_and_1000g_gold_standards_vcf_gz} \
--known-sites ${bundle_known_indels_vcf_gz} \
--known-sites ${bundle_v0_dbsnp138_vcf_gz} \
--output ${sample_id}_recalibration_table.grp \
${targeted_options} \
--read-filter SampleReadFilter \
--sample ${sample_id}
fi
"""
}

Expand Down Expand Up @@ -195,6 +199,7 @@ workflow recalibrate_base {
params.bundle_v0_dbsnp138_vcf_gz,
"${params.bundle_v0_dbsnp138_vcf_gz}.tbi",
base_recalibrator_intervals,
params.input.recalibration_tables,
input_ch_base_recalibrator
)

Expand Down
Loading