Skip to content

Commit

Permalink
fixes after review
Browse files Browse the repository at this point in the history
  • Loading branch information
KateSakharova committed Nov 8, 2024
1 parent 3948397 commit 380ad87
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 25 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/unit_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ jobs:
- name: Install dependencies
run: |
pip install -r requirements-test.txt
pip install --upgrade numpy pandas
pip install --upgrade pandas==1.4.0
- name: Unit tests
run: |
# TODO, improve the pythonpath handling
Expand Down
15 changes: 7 additions & 8 deletions bin/parse_viral_pred.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,14 +222,13 @@ def parse_virus_sorter2(sorter_files, vs_cutoff):
prophages = dict()

final_boundary_file, final_score_file, final_combined_fa_file = "", "", ""
print('SORTER',sorter_files)
for i in sorter_files:
if "final-viral-boundary.tsv" in i:
final_boundary_file = i
elif "final-viral-score.tsv" in i:
final_score_file = i
elif "final-viral-combined.fa" in i:
final_combined_fa_file = i
for sorter_results_file in sorter_files:
if "final-viral-boundary.tsv" in sorter_results_file:
final_boundary_file = sorter_results_file
elif "final-viral-score.tsv" in sorter_results_file:
final_score_file = sorter_results_file
elif "final-viral-combined.fa" in sorter_results_file:
final_combined_fa_file = sorter_results_file
else:
print('ERROR: The result files of VirSorter2 are incomplete. The code expects the files final-viral-{boundary,score}.tsv and final-viral-combined.fa.', file=sys.stderr)
return high_confidence, low_confidence, prophages
Expand Down
3 changes: 1 addition & 2 deletions modules/local/utils.nf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
process CONCATENATE_FILES {
process CONCATENATE_VIRSORTER2_FILES {
tag "${meta.id}"
label "process_medium"

Expand All @@ -15,6 +15,5 @@ process CONCATENATE_FILES {
grep 'seqname' inputs/\${first_file} > header.tsv || true
cat inputs/* | grep -v 'seqname' > without_header.${output_name}
cat header.tsv without_header.${output_name} > ${output_name}
rm without_header.${output_name}
"""
}
7 changes: 4 additions & 3 deletions modules/local/virsorter2/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ process VIRSORTER2 {
container 'quay.io/microbiome-informatics/virsorter:2.2.4'

input:
tuple val(meta), file(fasta), val(contig_number)
tuple val(meta), file(fasta), val(number_of_contigs)
path(database)

when:
contig_number.toInteger() > 0
number_of_contigs.toInteger() > 0

output:
tuple val(meta), path("*.final-viral-score.tsv"), emit: score_tsv
Expand All @@ -18,7 +18,8 @@ process VIRSORTER2 {
script:
def args = task.ext.args ?: ''
"""
# speed up hmmsearch
# Settings to speed up hmmsearch
# TODO: this needs to be tested, it doesn't seem to speed up so we decided to chunk the fasta instead
#virsorter config --set HMMSEARCH_THREADS=4
#virsorter config --set FAA_BP_PER_SPLIT=50000
Expand Down
13 changes: 13 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
},
"output": {
"type": "string",
"format": "directory-path",
"default": "results",
"description": "name of the result folder"
}
Expand All @@ -85,10 +86,12 @@
"properties": {
"virsorter": {
"type": "string",
"format": "directory-path",
"description": "a virsorter database provided as 'virsorter/virsorter-data'"
},
"virsorter2": {
"type": "string",
"format": "directory-path",
"description": "a virsorter2 database"
},
"virfinder": {
Expand Down Expand Up @@ -243,52 +246,62 @@
"properties": {
"assemblydir": {
"type": "string",
"format": "directory-path",
"default": "00-assembly",
"description": "output directory for assembly step",
"fa_icon": "far fa-file-code"
},
"virusdir": {
"type": "string",
"format": "directory-path",
"default": "01-viruses",
"description": "output directory for detected viruses step"
},
"prodigaldir": {
"type": "string",
"format": "directory-path",
"default": "02-prodigal",
"description": "output directory for prodigal step"
},
"phanotatedir": {
"type": "string",
"format": "directory-path",
"default": "02-phanotate",
"description": "output directory for phanotate step"
},
"hmmerdir": {
"type": "string",
"format": "directory-path",
"default": "03-hmmer",
"description": "output directory for hmmer step"
},
"blastdir": {
"type": "string",
"format": "directory-path",
"default": "04-blast",
"description": "output directory for blast step"
},
"plotdir": {
"type": "string",
"format": "directory-path",
"default": "05-plots",
"description": "output directory for plots"
},
"taxdir": {
"type": "string",
"format": "directory-path",
"default": "06-taxonomy",
"description": "output directory for taxonomy results"
},
"checkvdir": {
"type": "string",
"format": "directory-path",
"default": "07-checkv",
"description": "output directory for checkV step"
},
"finaldir": {
"type": "string",
"format": "directory-path",
"default": "08-final",
"description": "final output directory"
}
Expand Down
20 changes: 10 additions & 10 deletions subworkflows/local/detect.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
* Run virus detection tools and parse the predictions according to defined filters.
*/

include { VIRSORTER } from '../../modules/local/virsorter'
include { VIRSORTER2 } from '../../modules/local/virsorter2'
include { VIRFINDER } from '../../modules/local/virfinder'
include { PPRMETA } from '../../modules/local/pprmeta'
include { PARSE } from '../../modules/local/parse'
include { CONCATENATE_FILES as CONCATENATE_FILES_SCORE } from '../../modules/local/utils'
include { CONCATENATE_FILES as CONCATENATE_FILES_BOUNDARY } from '../../modules/local/utils'
include { CONCATENATE_FILES as CONCATENATE_FILES_FA } from '../../modules/local/utils'
include { VIRSORTER } from '../../modules/local/virsorter'
include { VIRSORTER2 } from '../../modules/local/virsorter2'
include { VIRFINDER } from '../../modules/local/virfinder'
include { PPRMETA } from '../../modules/local/pprmeta'
include { PARSE } from '../../modules/local/parse'
include { CONCATENATE_VIRSORTER2_FILES as CONCATENATE_FILES_SCORE } from '../../modules/local/utils'
include { CONCATENATE_VIRSORTER2_FILES as CONCATENATE_FILES_BOUNDARY } from '../../modules/local/utils'
include { CONCATENATE_VIRSORTER2_FILES as CONCATENATE_FILES_FA } from '../../modules/local/utils'

workflow DETECT {

Expand Down Expand Up @@ -38,9 +38,9 @@ workflow DETECT {
virsorter_output = VIRSORTER.out
}
else {
// chunk fasta by 10Mb
// chunk fasta by 500Mb
chunked_ch = length_filtered_ch.flatMap{ meta, fasta, value ->
def chunks = fasta.splitFasta(file: true, size: 10.MB);
def chunks = fasta.splitFasta(file: true, size: 500.MB);
chunks.collect{ chunk ->
return tuple(meta, chunk, value);
}
Expand Down
2 changes: 1 addition & 1 deletion tests/test_parse_viral_preds.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ def test_virsorter_precedence(self):
shutil.rmtree(test_dir)

def test_virsorter2_precedence(self):
"""VirSorter2 results take precedence over the other tools
"""VirSorter2 results should take precedence over the other tools
"""
pprmeta_path = self._build_path("/virsorter_precedence/pprmeta.csv")
vf_path = self._build_path("/virsorter_precedence/virfinder.txt")
Expand Down

0 comments on commit 380ad87

Please sign in to comment.