Skip to content

Commit

Permalink
Updated SRUtils.BamToFq and SRBamToFq with cram support and bette…
Browse files Browse the repository at this point in the history
…r memory / cpu settings (#479)

- Made finalization optional in `SRBamToFq`.
- Now `BamToFq` sorts and converts in 2 separate commands.
- Now `BamToFq` utilizes all available processors.
- `BamToFq` cpu and ram now 8 cores and 16gb respectively.
  • Loading branch information
jonn-smith authored Dec 11, 2024
1 parent 812f3a9 commit b7104df
Showing 2 changed files with 54 additions and 14 deletions.
34 changes: 25 additions & 9 deletions wdl/pipelines/TechAgnostic/Utility/SRBamToFq.wdl
Original file line number Diff line number Diff line change
@@ -6,22 +6,38 @@ import "../../../tasks/Utility/Finalize.wdl" as FF
workflow SRBamToFq {
input {
File bam
File? bam_index

File? reference_fasta
File? reference_fasta_index
File? reference_dict

String participant_name

String gcs_out_root_dir
String? gcs_out_root_dir
}

String outdir = sub(gcs_out_root_dir, "/$", "") + "/SRBamToFq/~{participant_name}"
call SRUtils.BamToFq {
input:
bam = bam,
bam_index = bam_index,
reference_fasta = reference_fasta,
reference_fasta_index = reference_fasta_index,
reference_dict = reference_dict,
prefix = participant_name
}

call SRUtils.BamToFq { input: bam = bam, prefix = participant_name }
if (defined(gcs_out_root_dir)) {
String outdir = sub(select_first([gcs_out_root_dir]), "/$", "") + "/SRBamToFq/~{participant_name}"

call FF.FinalizeToFile as FinalizeFqEnd1 { input: outdir = outdir, file = BamToFq.fq_end1 }
call FF.FinalizeToFile as FinalizeFqEnd2 { input: outdir = outdir, file = BamToFq.fq_end2 }
call FF.FinalizeToFile as FinalizeFqUnpaired { input: outdir = outdir, file = BamToFq.fq_unpaired }
call FF.FinalizeToFile as FinalizeFqEnd1 { input: outdir = outdir, file = BamToFq.fq_end1 }
call FF.FinalizeToFile as FinalizeFqEnd2 { input: outdir = outdir, file = BamToFq.fq_end2 }
call FF.FinalizeToFile as FinalizeFqUnpaired { input: outdir = outdir, file = BamToFq.fq_unpaired }
}

output {
File fq_end1 = FinalizeFqEnd1.gcs_path
File fq_end2 = FinalizeFqEnd2.gcs_path
File fq_unpaired = FinalizeFqUnpaired.gcs_path
File fq_end1 = select_first([FinalizeFqEnd1.gcs_path, BamToFq.fq_end1])
File fq_end2 = select_first([FinalizeFqEnd2.gcs_path, BamToFq.fq_end2])
File fq_unpaired = select_first([FinalizeFqUnpaired.gcs_path, BamToFq.fq_unpaired])
}
}
34 changes: 29 additions & 5 deletions wdl/tasks/Utility/SRUtils.wdl
Original file line number Diff line number Diff line change
@@ -5,22 +5,46 @@ import "../../structs/Structs.wdl"
task BamToFq {
input {
File bam
File? bam_index

File? reference_fasta
File? reference_fasta_index
File? reference_dict

String prefix = "out"

RuntimeAttr? runtime_attr_override
}
Int disk_size = 1 + 4*ceil(size(bam, "GB"))
String ref_arg = if defined(reference_fasta) then " --reference " else ""
Int disk_size = 10 + 20*ceil(size(bam, "GB"))
command <<<
# Make sure we use all our proocesors:
np=$(cat /proc/cpuinfo | grep ^processor | tail -n1 | awk '{print $NF+1}')
if [[ ${np} -gt 2 ]] ; then
np=$((np-1))
fi
set -euxo pipefail
samtools sort -n ~{bam} | samtools bam2fq \
# Have samtools sort use all but one of our processors:
# NOTE: the `@` options is for ADDITIONAL threads, not the total number of threads.
samtools sort -@$((np-1)) -n ~{ref_arg} ~{reference_fasta} ~{bam} -O bam -o tmp.bam
# Have samtools bam2fq use all but one of our processors:
# NOTE: the `@` options is for ADDITIONAL threads, not the total number of threads.
samtools bam2fq -@$((np-1)) \
-n \
-s /dev/null \
-c 2 \
~{ref_arg} ~{reference_fasta} \
-1 ~{prefix}.end1.fq.gz \
-2 ~{prefix}.end2.fq.gz \
-0 ~{prefix}.unpaired.fq.gz
-0 ~{prefix}.unpaired.fq.gz \
tmp.bam
>>>
output {
@@ -31,8 +55,8 @@ task BamToFq {

#########################
RuntimeAttr default_attr = object {
cpu_cores: 4,
mem_gb: 32,
cpu_cores: 8,
mem_gb: 16,
disk_gb: disk_size,
boot_disk_gb: 25,
preemptible_tries: 1,

0 comments on commit b7104df

Please sign in to comment.