Skip to content

Commit

Permalink
Merge pull request #2 from mr-c/flexi_runtime
Browse files Browse the repository at this point in the history
Use biocontainers, when possible
  • Loading branch information
cmball1 authored Aug 31, 2018
2 parents fdd1199 + e578e5f commit 578066e
Show file tree
Hide file tree
Showing 18 changed files with 367 additions and 183 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env cwl-runner
doc: |
Compare 2 input BAM files and report results.
Exit 0 if sucess.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env cwl-runner
doc: |
Calculates the MD5 hash of the input file and compares it to the input MD5 hash.
If hashes match: Exit 0
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env cwl-runner
doc: |
Calculate the MD5 hash for the input file.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env cwl-runner
doc: |
Compare 2 input BAM files using [BamUtil diff](https://genome.sph.umich.edu/wiki/BamUtil:_diff)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: ExpressionTool

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env cwl-runner
cwlVersion: v1.0
class: CommandLineTool
baseCommand: []
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env cwl-runner
doc: |
Extract all files from archive.tar and filter through gzip

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env cwl-runner
doc: |
A workflow to verify the proper execution of [TOPMed RNA-seq Workflow](https://github.com/heliumdatacommons/cwl_workflows/blob/master/topmed-workflows/TOPMed_RNAseq_pipeline/rnaseq_pipeline_fastq.cwl)

Expand All @@ -20,29 +21,23 @@ inputs:
type: File[]
prefix_str:
type: string
threads:
type: int
memory:
type: int
rsem_ref_dir_tar:
type: File
max_frag_len:
type: int
estimate_rspd:
type: string
type: boolean
is_stranded:
type: string
type: boolean
paired_end:
type: string
type: boolean
genes_gtf:
type: File
genome_fasta:
type: File
secondaryFiles:
- .fai
- ^.dict
java_path:
type: string
rnaseqc_flags:
type: string[]
# gatk_flags:
Expand Down Expand Up @@ -72,8 +67,6 @@ inputs:
# type: string
# hash_exon_counts:
# type: string
hash_count_metrics:
type: string
# hash_count_outputs:
# type: string
checker_star_output_bam:
Expand Down Expand Up @@ -157,16 +150,13 @@ steps:
star_index: untar_star_index/untarred_dir
fastqs: fastqs
prefix_str: prefix_str
threads: threads
memory: memory
rsem_ref_dir: untar_rsem_reference/untarred_dir
max_frag_len: max_frag_len
estimate_rspd: estimate_rspd
is_stranded: is_stranded
paired_end: paired_end
genes_gtf: genes_gtf
genome_fasta: genome_fasta
java_path: java_path
rnaseqc_flags: rnaseqc_flags
# gatk_flags: gatk_flags
out:
Expand Down Expand Up @@ -310,7 +300,7 @@ steps:
# out: [out_hash_string]

$namespaces:
s: https://schema.org/
s: http://schema.org/

$schemas:
- http://dublincore.org/2012/06/14/dcterms.rdf
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env cwl-runner
doc: |
A workflow to verify the proper execution of [TOPMed RNA-seq Workflow](https://github.com/heliumdatacommons/cwl_workflows/blob/master/topmed-workflows/TOPMed_RNAseq_pipeline/rnaseq_pipeline_fastq.cwl)

Expand All @@ -18,29 +19,23 @@ inputs:
type: File[]
prefix_str:
type: string
threads:
type: int
memory:
type: int
rsem_ref_dir:
type: Directory
max_frag_len:
type: int
estimate_rspd:
type: string
type: boolean
is_stranded:
type: string
type: boolean
paired_end:
type: string
type: boolean
genes_gtf:
type: File
genome_fasta:
type: File
secondaryFiles:
- .fai
- ^.dict
java_path:
type: string
rnaseqc_flags:
type: string[]
# gatk_flags:
Expand Down Expand Up @@ -70,8 +65,6 @@ inputs:
# type: string
# hash_exon_counts:
# type: string
hash_count_metrics:
type: string
# hash_count_outputs:
# type: string
checker_star_output_bam:
Expand Down Expand Up @@ -143,16 +136,13 @@ steps:
star_index: star_index
fastqs: fastqs
prefix_str: prefix_str
threads: threads
memory: memory
rsem_ref_dir: rsem_ref_dir
max_frag_len: max_frag_len
estimate_rspd: estimate_rspd
is_stranded: is_stranded
paired_end: paired_end
genes_gtf: genes_gtf
genome_fasta: genome_fasta
java_path: java_path
rnaseqc_flags: rnaseqc_flags
# gatk_flags: gatk_flags
out:
Expand Down Expand Up @@ -296,7 +286,7 @@ steps:
# out: [out_hash_string]

$namespaces:
s: https://schema.org/
s: http://schema.org/

$schemas:
- http://dublincore.org/2012/06/14/dcterms.rdf
Expand Down
16 changes: 8 additions & 8 deletions topmed-workflows/TOPMed_RNAseq_pipeline/indexbam.cwl
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
#!/usr/bin/env cwl-runner
doc: |
A wrapper for running `samtools index <bam>`.

cwlVersion: v1.0
class: CommandLineTool
id: "run-index-bam"
label: "run-index-bam"
baseCommand: ["samtools", "index"]
baseCommand: [ samtools, index ]

requirements:
- class: InlineJavascriptRequirement
- class: DockerRequirement
dockerPull: heliumdatacommons/topmed-rnaseq:latest
- class: InitialWorkDirRequirement
listing:
- $(inputs.input_bam)
DockerRequirement:
dockerPull: quay.io/biocontainers/samtools:1.8--4
InitialWorkDirRequirement:
listing:
- $(inputs.input_bam)

inputs:
input_bam:
type: File
inputBinding:
position: 1
valueFrom: $(self.basename)

outputs:
bam_index:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ rsem_ref_dir:
class: Directory
location: /rsem_ref/
max_frag_len: 1000
estimate_rspd: "true"
is_stranded: "true"
paired_end: "true"
estimate_rspd: true
is_stranded: true
paired_end: true
genes_gtf: {
class: File,
path: gencode.v26.annotation.withTranscriptID.gtf
Expand All @@ -22,9 +22,6 @@ genome_fasta: {
class: File,
path: Homo_sapiens_assembly38_noALT_noHLA_noDecoy_ERCC.fasta
}
java_path: /usr/lib/jvm/java-1.7.0-openjdk-amd64/bin/java
memory: 8
rnaseqc_flags: ["noDoC", "strictMode"]
# gatk_flags: []
prefix_str: "LC_C13_cRNA"
threads: 4
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ transcriptome_bam: {
}
prefix_str: "LC_C13_cRNA"
max_frag_len: 1000
estimate_rspd: "true"
is_stranded: "true"
paired_end: "true"
estimate_rspd: true
is_stranded: true
paired_end: true
threads: 4
40 changes: 24 additions & 16 deletions topmed-workflows/TOPMed_RNAseq_pipeline/markduplicates.cwl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env cwl-runner
doc: |
A CWL wrapper for [run_MarkDuplicates.py](https://github.com/broadinstitute/gtex-pipeline/blob/master/rnaseq/src/run_MarkDuplicates.py)

Expand All @@ -9,35 +10,42 @@ doc: |

cwlVersion: v1.0
class: CommandLineTool
id: "run-MarkDuplicates"
label: "run-MarkDuplicates"
baseCommand: ["python3", "-u", "/src/run_MarkDuplicates.py"]
baseCommand: [java]

requirements:
- class: DockerRequirement
dockerPull: heliumdatacommons/topmed-rnaseq:latest
requirements: # turn back into a hint when the biocontainer has its classpath
# updated
EnvVarRequirement:
envDef:
CLASSPATH: /usr/local/share/picard-2.9.2-2/picard.jar
DockerRequirement:
dockerPull: quay.io/biocontainers/picard:2.9.2--2

inputs:
input_bam:
type: File
inputBinding:
position: 1
prefix_str:
type: string
inputBinding:
position: 2
memory:
type: int
inputBinding:
position: 3
prefix: --memory

arguments:
- prefix: -Xmx
valueFrom: $(runtime.ram)M
separate: false
- picard.cmdline.PicardCommandLine
- MarkDuplicates
- I=$(inputs.input_bam.path)
- O=$(runtime.outdir)/$(inputs.input_bam.nameroot).md.bam
- M=$(runtime.outdir)/$(inputs.prefix_str).marked_dup_metrics.txt
- ASSUME_SORT_ORDER=coordinate
- OPTICAL_DUPLICATE_PIXEL_DISTANCE=100

outputs:
bam_file:
type: File
outputBinding:
glob: "*.md.bam"
glob: $(inputs.input_bam.nameroot).md.bam
metrics:
type: File
outputBinding:
glob: "*.marked_dup_metrics.txt"
glob: $(inputs.prefix_str).marked_dup_metrics.txt

18 changes: 7 additions & 11 deletions topmed-workflows/TOPMed_RNAseq_pipeline/rna_seqc.cwl
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#!/usr/bin/env cwl-runner
doc: |
A CWL wrapper for [run_rnaseqc.py](https://github.com/heliumdatacommons/cwl_workflows/blob/master/topmed-workflows/TOPMed_RNAseq_pipeline/src/run_rnaseqc.py) duplicated from [run_rnaseqc.py](https://github.com/broadinstitute/gtex-pipeline/blob/master/rnaseq/src/run_rnaseqc.py) with minor modifications.

Expand All @@ -9,12 +10,12 @@ doc: |

cwlVersion: v1.0
class: CommandLineTool
id: "run-seqc"
label: "run-seqc"
# run_rnaseqc.py is not an executable file in the docker container.
baseCommand: ["python3", "/src/run_rnaseqc.py"]

requirements:
InlineJavascriptRequirement: {}
DockerRequirement:
dockerPull: heliumdatacommons/topmed-rnaseq:latest

Expand All @@ -40,16 +41,6 @@ inputs:
type: string
inputBinding:
position: 4
java_path:
type: string
inputBinding:
position: 5
prefix: --java
memory:
type: int
inputBinding:
position: 6
prefix: --memory
rnaseqc_flags:
type:
type: array
Expand All @@ -70,6 +61,11 @@ inputs:
# position: 8
# prefix: --gatk_flags

arguments:
- prefix: --memory
valueFrom: $(runtime.ram / 1024)
- prefix: --java
valueFrom: /usr/lib/jvm/java-1.7.0-openjdk-amd64/bin/java
outputs:
gene_rpkm:
type: File
Expand Down
Loading

0 comments on commit 578066e

Please sign in to comment.