Skip to content

Commit

Permalink
Merge branch 'localdata' into master
Browse files Browse the repository at this point in the history
  • Loading branch information
mpoelchau authored Sep 21, 2021
2 parents 0b58c25 + 0a856c7 commit 79a9357
Show file tree
Hide file tree
Showing 11 changed files with 45 additions and 399 deletions.
30 changes: 22 additions & 8 deletions final-workflow-short.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,46 +38,60 @@ scientific_name: [
Saccharomyces, #genus
eubayanus #species
]
#Genomic fasta name and URL
#Genomic fasta name and URL. Here, URL is for metadata. If no URL is available, add 'NA'
genome_fasta_name: [
GCF_001298625.1_SEUB3.0_genomic.fna
]
url_genomic_fasta: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_genomic.fna.gz
]
#file needs to be unzipped!!!
path_genomic_fasta:
class: File
path: /app/data/GCF_001298625.1_SEUB3.0_genomic.fna
#Protein fasta name(s) and url(s)
protein_fasta_name: [
GCF_001298625.1_SEUB3.0_translated_cds.faa
]
url_protein_fasta: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_translated_cds.faa.gz
]
path_protein_fasta:
class: File
path: /app/data/GCF_001298625.1_SEUB3.0_translated_cds.faa
#Transcript fasta name and URL
transcript_fasta_name: [
GCF_001298625.1_SEUB3.0_rna_from_genomic.fna
]
url_transcript_fasta: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_rna_from_genomic.fna.gz
]
path_transcript_fasta:
class: File
path: /app/data/GCF_001298625.1_SEUB3.0_rna_from_genomic.fna
#CDS fasta name and URL
cds_fasta_name: [
GCF_001298625.1_SEUB3.0_cds_from_genomic.fna
]
url_cds_fasta: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_cds_from_genomic.fna.gz
]
path_cds_fasta:
class: File
path: /app/data/GCF_001298625.1_SEUB3.0_cds_from_genomic.fna
#GFF name and URL
url_genomic_gff: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_genomic.gff.gz
]
path_genomic_gff:
class: File
path: /app/data/GCF_001298625.1_SEUB3.0_genomic.gff

gff_name: [
GCF_001298625.1_SEUB3.0_genomic.gff
]
gff_release_number: 100
#-------------------------------------------------------------------------------
url_md5checksums: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/md5checksums.txt
url_genomic_gff: [
https://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/001/298/625/GCF_001298625.1_SEUB3.0/GCF_001298625.1_SEUB3.0_genomic.gff.gz
]
gff_release_number: 100

#-------------------------------------------------------------------------------
deepPATH_genomic_fasta: [
scaffold #the containing directory for the assembly. Don't change this unless you have to.
Expand Down
81 changes: 20 additions & 61 deletions final-workflow.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -13,72 +13,36 @@ inputs:
hmmerdb_Path_stage: string[]
tree: string[]
scientific_name: string[]
url_md5checksums: string[]
deepPATH_genomic_fasta: string[]
url_genomic_fasta: string[]
path_genomic_fasta: File
deepPATH_analyses: string[]
url_genomic_gff: string[]
path_genomic_gff: File
gff_release_number: int
url_protein_fasta: string[]
url_transcript_fasta: string[]
url_cds_fasta: string[]
path_protein_fasta: File
path_transcript_fasta: File
path_cds_fasta: File
deepPATH_apollo2_data: string[]
deepPATH_bigwig: string[]
organization: string
accession: string
link_to_publication: string
url_genomic_fasta: string[]
url_genomic_gff: string[]
url_protein_fasta: string[]
url_transcript_fasta: string[]
url_cds_fasta: string[]

steps:
#step1
download:
run: flow_download/workflow.cwl
in:
url_md5checksums: url_md5checksums
url_genomic_fasta: url_genomic_fasta
url_genomic_gff: url_genomic_gff
url_protein_fasta: url_protein_fasta
url_transcript_fasta: url_transcript_fasta
url_cds_fasta: url_cds_fasta
out:
[OUT_md5checksums, #'*.txt'
OUT_genomic_fasta, #'*.gz'
OUT_genomic_gff, #'*.gz'
OUT_protein_fasta, #'*.gz'
OUT_transcript_fasta, #'*.gz'
OUT_cds_fasta] #'*.gz'
#step2
md5checksums:
run: flow_md5checksums/workflow.cwl
in:
in_md5checksums: download/OUT_md5checksums
in_genomic_fasta: download/OUT_genomic_fasta
in_genomic_gff: download/OUT_genomic_gff
in_protein_fasta: download/OUT_protein_fasta
in_transcript_fasta: download/OUT_transcript_fasta
in_cds_fasta: download/OUT_cds_fasta
out:
[
OUT_extract, #'*.txt2', extracted from *.txt
OUT_check, #'*.log', log file for execution of md5sum -c
OUT_genomic_fasta, #'*.fa, '*.fna', '*.faa'
OUT_genomic_gff, #'*.gff', '*.gff3'
OUT_protein_fasta,
OUT_transcript_fasta,
OUT_cds_fasta
]

#verify:
#fasta_diff,gff3_QC......
#step3
apollo2_data_processing:
run: flow_apollo2_data_processing/processing/workflow.cwl
in:
tree: tree
scientific_name: scientific_name
gff_release_number: gff_release_number
url_genomic_gff: url_genomic_gff
in_fasta: md5checksums/OUT_genomic_fasta
in_gff: md5checksums/OUT_genomic_gff
in_fasta: path_genomic_fasta
in_gff: path_genomic_gff
out:
[OUT_2bi,
OUT_seq,
Expand All @@ -90,7 +54,7 @@ steps:
OUT_trackList_json,
OUT_trackList_json_bak,
]
#step4
#step2
create_assembly_readme:
run: flow_create_readme/readme-assembly-workflow.cwl
in:
Expand All @@ -101,7 +65,7 @@ steps:
accession: accession
link_to_publication: link_to_publication
out: [readme_file]
#step5
#step3
create_genePrediction_readme:
run: flow_create_readme/readme-genePrediction-workflow.cwl
in:
Expand All @@ -113,25 +77,22 @@ steps:
url_transcript_fasta: url_transcript_fasta
link_to_publication: link_to_publication
out: [readme_file]
#step6
#step4
dispatch:
run: flow_dispatch/workflow.cwl
in:
PATH: PATH
tree: tree
deepPATH_genomic_fasta: deepPATH_genomic_fasta
in_genomic_fasta: md5checksums/OUT_genomic_fasta
in_genomic_fasta: path_genomic_fasta
deepPATH_analyses: deepPATH_analyses
in_genomic_gff: md5checksums/OUT_genomic_gff
in_genomic_gff: path_genomic_gff
#
in_protein_fasta: md5checksums/OUT_protein_fasta
in_transcript_fasta: md5checksums/OUT_transcript_fasta
in_cds_fasta: md5checksums/OUT_cds_fasta
in_protein_fasta: path_protein_fasta
in_transcript_fasta: path_transcript_fasta
in_cds_fasta: path_cds_fasta
in_assembly_readme: create_assembly_readme/readme_file
in_genePrediction_readme: create_genePrediction_readme/readme_file
in_md5checksums: download/OUT_md5checksums
in_extract: md5checksums/OUT_extract
in_check: md5checksums/OUT_check
#
deepPATH_apollo2_data: deepPATH_apollo2_data
deepPATH_bigwig: deepPATH_bigwig
Expand All @@ -147,6 +108,4 @@ steps:
out:
[out_dummy]



outputs: []
3 changes: 1 addition & 2 deletions flow_dispatch/2other_species/workflow.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ inputs:
in_genomic_fasta: File
deepPATH_analyses: string[]
in_genomic_gff: File
in_protein_fasta: File[]
in_protein_fasta: File
in_transcript_fasta: File
in_cds_fasta: File
in_assembly_readme: File
Expand Down Expand Up @@ -55,7 +55,6 @@ steps:
out: []
cp_protein_fasta:
run: cp_file_4_annotation.cwl
scatter: in_data
in:
PATH: PATH
deepPATH_1: deepPATH_genomic_fasta
Expand Down
27 changes: 0 additions & 27 deletions flow_dispatch/2working_files/cp_file.cwl

This file was deleted.

41 changes: 0 additions & 41 deletions flow_dispatch/2working_files/workflow.cwl

This file was deleted.

18 changes: 2 additions & 16 deletions flow_dispatch/workflow.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,11 @@ inputs:
deepPATH_analyses: string[]
in_genomic_gff: File
#
in_protein_fasta: File[]
in_protein_fasta: File
in_transcript_fasta: File
in_cds_fasta: File
in_assembly_readme: File
in_genePrediction_readme: File
in_md5checksums: File
in_extract: File
in_check: File
#
in_2bi: File
#
Expand All @@ -46,7 +43,7 @@ steps:
tree: tree
deepPATH_genomic_fasta: deepPATH_genomic_fasta
deepPATH_analyses: deepPATH_analyses
out: [out_dummy]
out: [out_dummy]
#To other_species
2other_species:
run: 2other_species/workflow.cwl
Expand Down Expand Up @@ -76,17 +73,6 @@ steps:
in_gaps_bigwig: in_gaps_bigwig
in_gc_bigwig: in_gc_bigwig
out: []
#To working_files
2working_files:
run: 2working_files/workflow.cwl
in:
in_dummy: setup_folder/out_dummy
PATH: PATH
tree: tree
in_md5checksums: in_md5checksums
in_extract: in_extract
in_check: in_check
out: []
#To blat/db/
2blat:
run: 2blat/workflow.cwl
Expand Down
36 changes: 0 additions & 36 deletions flow_md5checksums/check_md5checksums.cwl

This file was deleted.

Loading

0 comments on commit 79a9357

Please sign in to comment.