Skip to content

Commit

Permalink
Merge pull request #428 from broadinstitute/tws_cleanup
Browse files Browse the repository at this point in the history
repo cleanup
  • Loading branch information
tedsharpe authored Oct 25, 2023
2 parents f0abcc4 + b05f242 commit e0c802c
Show file tree
Hide file tree
Showing 44 changed files with 3,684 additions and 3,497 deletions.
179 changes: 168 additions & 11 deletions wdl/deprecated/ONT10x.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ workflow ONT10x {
Int splint_num = a.left
File fq = a.right

# call Utils.FastaToSam as FastaToSam { input: fasta = C3POa.consensus }
# call FastaToSam { input: fasta = C3POa.consensus }
# call AnnotateAdapters { input: bam = FastaToSam.output_bam }
String rg_consensus = "@RG\\tID:~{SID}.consensus~{splint_num}\\tSM:~{SM}\\tPL:~{PL}\\tPU:~{PU}\\tDT:~{DT}"
Expand All @@ -82,7 +82,7 @@ workflow ONT10x {
map_preset = "splice"
}

#call Utils.CountFastaRecords as CountConsensusReadsInPartition { input: fasta = fq }
#call CountFastaRecords as CountConsensusReadsInPartition { input: fasta = fq }
}

# File align_subreads_bam = AlignSubreads.aligned_bam
Expand All @@ -97,17 +97,17 @@ workflow ONT10x {
call CountNumPasses as CountNumPasses3 { input: fastq = C3POa.subreads3 }
call CountNumPasses as CountNumPasses4 { input: fastq = C3POa.subreads4 }

call Utils.CountFastqRecords as CountSubreadsInPartition1 { input: fastq = C3POa.subreads1 }
call Utils.CountFastqRecords as CountSubreadsInPartition2 { input: fastq = C3POa.subreads2 }
call Utils.CountFastqRecords as CountSubreadsInPartition3 { input: fastq = C3POa.subreads3 }
call Utils.CountFastqRecords as CountSubreadsInPartition4 { input: fastq = C3POa.subreads4 }
call CountFastqRecords as CountSubreadsInPartition1 { input: fastq = C3POa.subreads1 }
call CountFastqRecords as CountSubreadsInPartition2 { input: fastq = C3POa.subreads2 }
call CountFastqRecords as CountSubreadsInPartition3 { input: fastq = C3POa.subreads3 }
call CountFastqRecords as CountSubreadsInPartition4 { input: fastq = C3POa.subreads4 }

# call Utils.CountFastqRecords as CountAnnotatedReadsInPartition { input: fastq = AnnotateAdapters.annotated_fq }
# call CountFastqRecords as CountAnnotatedReadsInPartition { input: fastq = AnnotateAdapters.annotated_fq }
call Utils.CountFastaRecords as CountConsensusReadsInPartition1 { input: fasta = C3POa.consensus1 }
call Utils.CountFastaRecords as CountConsensusReadsInPartition2 { input: fasta = C3POa.consensus2 }
call Utils.CountFastaRecords as CountConsensusReadsInPartition3 { input: fasta = C3POa.consensus3 }
call Utils.CountFastaRecords as CountConsensusReadsInPartition4 { input: fasta = C3POa.consensus4 }
call CountFastaRecords as CountConsensusReadsInPartition1 { input: fasta = C3POa.consensus1 }
call CountFastaRecords as CountConsensusReadsInPartition2 { input: fasta = C3POa.consensus2 }
call CountFastaRecords as CountConsensusReadsInPartition3 { input: fasta = C3POa.consensus3 }
call CountFastaRecords as CountConsensusReadsInPartition4 { input: fasta = C3POa.consensus4 }
}

call Utils.Sum as CountNoSplintReadsInRun { input: ints = C3POa.no_splint_reads }
Expand Down Expand Up @@ -365,3 +365,160 @@ task CountNumPasses {
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}
task FastaToSam {

meta {
description: "Convert a fasta file to a sam file"
}

parameter_meta {
fasta: "The fasta file"
runtime_attr_override: "Override the default runtime attributes"
}
input {
File fasta

RuntimeAttr? runtime_attr_override
}
Float fasta_sam_disk_multiplier = 3.25
Int disk_size = ceil(fasta_sam_disk_multiplier * size(fasta, "GiB")) + 20
command <<<
python /usr/local/bin/prepare_run.py ~{fasta}
>>>
output {
File output_bam = "unmapped.bam"
}

#########################
RuntimeAttr default_attr = object {
cpu_cores: 1,
mem_gb: 1,
disk_gb: disk_size,
boot_disk_gb: 10,
preemptible_tries: 2,
max_retries: 1,
docker: "us.gcr.io/broad-dsp-lrma/lr-align:0.1.28"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}
task CountFastqRecords {

meta {
description: "Count the number of records in a fastq file"
}

parameter_meta {
fastq: "The fastq file"
runtime_attr_override: "Override the default runtime attributes"
}
input {
File fastq

RuntimeAttr? runtime_attr_override
}
Int disk_size = 1 + ceil(2 * size(fastq, "GiB"))
command <<<
set -euxo pipefail
FILE="~{fastq}"
if [[ "$FILE" =~ \.fastq$ ]] || [[ "$FILE" =~ \.fq$ ]]; then
cat ~{fastq} | awk '{s++}END{print s/4}'
elif [[ "$FILE" =~ \.fastq.gz$ ]] || [[ "$FILE" =~ \.fq.gz$ ]]; then
zcat ~{fastq} | awk '{s++}END{print s/4}'
fi
>>>
output {
Int num_records = read_int(stdout())
}

#########################
RuntimeAttr default_attr = object {
cpu_cores: 1,
mem_gb: 1,
disk_gb: disk_size,
boot_disk_gb: 10,
preemptible_tries: 2,
max_retries: 1,
docker: "us.gcr.io/broad-dsp-lrma/lr-align:0.1.28"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}
task CountFastaRecords {

meta {
description: "Count the number of records in a fasta file"
}

parameter_meta {
fasta: "The fasta file"
runtime_attr_override: "Override the default runtime attributes"
}
input {
File fasta

RuntimeAttr? runtime_attr_override
}
Int disk_size = 1 + 2*ceil(size(fasta, "GiB"))
command <<<
grep -c '>' ~{fasta}
exit 0
>>>
output {
Int num_records = read_int(stdout())
}

#########################
RuntimeAttr default_attr = object {
cpu_cores: 1,
mem_gb: 1,
disk_gb: disk_size,
boot_disk_gb: 10,
preemptible_tries: 2,
max_retries: 1,
docker: "us.gcr.io/broad-dsp-lrma/lr-align:0.1.28"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}
117 changes: 114 additions & 3 deletions wdl/deprecated/PBCCS10x.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -115,22 +115,22 @@ workflow PBCCS10x {
File annotated_bam = select_first([ MergeAllAnnotated.merged_bam, MergeAnnotated.merged_bam[0] ])
File annotated_bai = select_first([ MergeAllAnnotated.merged_bai, MergeAnnotated.merged_bai[0] ])

call Utils.GrepCountBamRecords as GrepAnnotatedReadsWithCBC {
call GrepCountBamRecords as GrepAnnotatedReadsWithCBC {
input:
bam = annotated_bam,
prefix = "num_annotated_with_cbc",
regex = "CB:Z:[ACGT]"
}

call Utils.GrepCountBamRecords as GrepAnnotatedReadsWithCBCAndUniqueAlignment {
call GrepCountBamRecords as GrepAnnotatedReadsWithCBCAndUniqueAlignment {
input:
bam = annotated_bam,
samfilter = "-F 0x100",
prefix = "num_annotated_with_cbc_and_unique_alignment",
regex = "CB:Z:[ACGT]"
}

call Utils.BamToTable { input: bam = annotated_bam, prefix = "reads_aligned_annotated.table" }
call BamToTable { input: bam = annotated_bam, prefix = "reads_aligned_annotated.table" }

# compute alignment metrics
call AM.AlignedMetrics as PerSampleMetrics {
Expand Down Expand Up @@ -171,3 +171,114 @@ workflow PBCCS10x {
outdir = outdir + "/alignments"
}
}

task GrepCountBamRecords {

meta {
description : "Count the number of records in a bam file that match a given regex."
}

parameter_meta {
bam: "BAM file to be filtered."
samfilter: "[Optional] Extra arguments to pass into samtools view."
regex: "Regex to match against the bam file."
invert: "[Optional] Invert the regex match."
prefix: "[Optional] Prefix string to name the output file (Default: sum)."
}

input {
File bam
String samfilter = ""
String regex
Boolean invert = false
String prefix = "sum"

RuntimeAttr? runtime_attr_override
}
Int disk_size = 1 + ceil(2 * size(bam, "GiB"))
String arg = if invert then "-vc" else "-c"
command <<<
set -euxo pipefail
samtools view ~{samfilter} ~{bam} | grep ~{arg} ~{regex} > ~{prefix}.txt
>>>
output {
Int num_records = read_int("~{prefix}.txt")
File num_records_file = "~{prefix}.txt"
}

#########################
RuntimeAttr default_attr = object {
cpu_cores: 1,
mem_gb: 1,
disk_gb: disk_size,
boot_disk_gb: 10,
preemptible_tries: 2,
max_retries: 1,
docker: "us.gcr.io/broad-dsp-lrma/lr-align:0.1.28"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}
task BamToTable {

meta {
description : "Convert a BAM file to a table txt file."
}

parameter_meta {
bam: "BAM file to be converted."
prefix: "Prefix for the output table."
runtime_attr_override: "Override the default runtime attributes."
}
input {
File bam
String prefix

RuntimeAttr? runtime_attr_override
}
Int disk_size = 1 + 2*ceil(size(bam, "GB"))
command <<<
samtools view ~{bam} | perl -n -e '($nm) = $_ =~ /NM:i:(\d+)/; ($as) = $_ =~ /AS:i:(\d+)/; ($za) = $_ =~ /ZA:Z:(\w+|\.)/; ($zu) = $_ =~ /ZU:Z:(\w+|\.)/; ($cr) = $_ =~ /CR:Z:(\w+|\.)/; ($cb) = $_ =~ /CB:Z:(\w+|\.)/; @a = split(/\s+/); print join("\t", $a[0], $a[1], $a[2], $a[3], $a[4], length($a[9]), $nm, $as, $za, $zu, $cr, $cb, $a[1], ($a[1] & 0x1 ? "paired" : "unpaired"), ($a[1] & 0x4 ? "unmapped" : "mapped"), ($a[1] & 0x10 ? "rev" : "fwd"), ($a[1] & 0x100 ? "secondary" : "primary"), ($a[1] & 0x800 ? "supplementary" : "non_supplementary")) . "\n"' | gzip > ~{prefix}.txt.gz
>>>
output {
File table = "~{prefix}.txt.gz"
}

#########################
RuntimeAttr default_attr = object {
cpu_cores: 1,
mem_gb: 1,
disk_gb: disk_size,
boot_disk_gb: 10,
preemptible_tries: 2,
max_retries: 1,
docker: "us.gcr.io/broad-dsp-lrma/lr-align:0.1.28"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}
5 changes: 3 additions & 2 deletions wdl/deprecated/PBCCSDemultiplexWholeGenome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ version 1.0
## and figures are produced along the way.
##########################################################################################
import "tasks/Utility/PBUtils.wdl" as DeprecatedPB
import "../tasks/Utility/PBUtils.wdl" as PB
import "../tasks/Utility/Utils.wdl" as Utils
import "../tasks/Alignment/AlignReads.wdl" as AR
Expand Down Expand Up @@ -91,8 +92,8 @@ workflow PBCCSDemultiplexWholeGenome {
call PB.MakeSummarizedDemultiplexingReport as SummarizedDemuxReportPNG { input: report = Demultiplex.report }
call PB.MakeDetailedDemultiplexingReport as DetailedDemuxReportPNG { input: report = Demultiplex.report, type="png" }
call PB.MakeDetailedDemultiplexingReport as DetailedDemuxReportPDF { input: report = Demultiplex.report, type="pdf" }
call PB.MakePerBarcodeDemultiplexingReports as PerBarcodeDetailedDemuxReportPNG { input: report = Demultiplex.report, type="png" }
call PB.MakePerBarcodeDemultiplexingReports as PerBarcodeDetailedDemuxReportPDF { input: report = Demultiplex.report, type="pdf" }
call DeprecatedPB.MakePerBarcodeDemultiplexingReports as PerBarcodeDetailedDemuxReportPNG { input: report = Demultiplex.report, type="png" }
call DeprecatedPB.MakePerBarcodeDemultiplexingReports as PerBarcodeDetailedDemuxReportPDF { input: report = Demultiplex.report, type="pdf" }

# scatter on each demultiplexed BAM file
scatter (demux_bam in Demultiplex.demux_bams) {
Expand Down
5 changes: 3 additions & 2 deletions wdl/deprecated/PBCLRDemultiplexWholeGenome.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ version 1.0
## produced along the way.
##########################################################################################
import "tasks/Utility/PBUtils.wdl" as DeprecatedPB
import "../tasks/Utility/PBUtils.wdl" as PB
import "../tasks/Utility/Utils.wdl" as Utils
import "../tasks/Alignment/AlignReads.wdl" as AR
Expand Down Expand Up @@ -67,8 +68,8 @@ workflow PBCLRDemultiplexWholeGenome {
call PB.MakeSummarizedDemultiplexingReport as SummarizedDemuxReportPNG { input: report = Demultiplex.report }
call PB.MakeDetailedDemultiplexingReport as DetailedDemuxReportPNG { input: report = Demultiplex.report, type="png" }
call PB.MakeDetailedDemultiplexingReport as DetailedDemuxReportPDF { input: report = Demultiplex.report, type="pdf" }
call PB.MakePerBarcodeDemultiplexingReports as PerBarcodeDetailedDemuxReportPNG { input: report = Demultiplex.report, type="png" }
call PB.MakePerBarcodeDemultiplexingReports as PerBarcodeDetailedDemuxReportPDF { input: report = Demultiplex.report, type="pdf" }
call DeprecatedPB.MakePerBarcodeDemultiplexingReports as PerBarcodeDetailedDemuxReportPNG { input: report = Demultiplex.report, type="png" }
call DeprecatedPB.MakePerBarcodeDemultiplexingReports as PerBarcodeDetailedDemuxReportPDF { input: report = Demultiplex.report, type="pdf" }

# scatter on each demultiplexed BAM file
scatter (demux_bam in Demultiplex.demux_bams) {
Expand Down
Loading

0 comments on commit e0c802c

Please sign in to comment.