diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index cf84350..30892a0 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -28,7 +28,7 @@ jobs: - name: Install dependencies run: | pip install -r requirements-test.txt - pip install --upgrade numpy pandas + pip install --upgrade pandas==1.4.0 - name: Unit tests run: | # TODO, improve the pythonpath handling diff --git a/bin/parse_viral_pred.py b/bin/parse_viral_pred.py index 4eeac3e..99aa17e 100755 --- a/bin/parse_viral_pred.py +++ b/bin/parse_viral_pred.py @@ -222,14 +222,13 @@ def parse_virus_sorter2(sorter_files, vs_cutoff): prophages = dict() final_boundary_file, final_score_file, final_combined_fa_file = "", "", "" - print('SORTER',sorter_files) - for i in sorter_files: - if "final-viral-boundary.tsv" in i: - final_boundary_file = i - elif "final-viral-score.tsv" in i: - final_score_file = i - elif "final-viral-combined.fa" in i: - final_combined_fa_file = i + for sorter_results_file in sorter_files: + if "final-viral-boundary.tsv" in sorter_results_file: + final_boundary_file = sorter_results_file + elif "final-viral-score.tsv" in sorter_results_file: + final_score_file = sorter_results_file + elif "final-viral-combined.fa" in sorter_results_file: + final_combined_fa_file = sorter_results_file else: print('ERROR: The result files of VirSorter2 are incomplete. The code expects the files final-viral-{boundary,score}.tsv and final-viral-combined.fa.', file=sys.stderr) return high_confidence, low_confidence, prophages diff --git a/modules/local/utils.nf b/modules/local/utils.nf index 5e60cf9..e4f06db 100644 --- a/modules/local/utils.nf +++ b/modules/local/utils.nf @@ -1,4 +1,4 @@ -process CONCATENATE_FILES { +process CONCATENATE_VIRSORTER2_FILES { tag "${meta.id}" label "process_medium" @@ -15,6 +15,5 @@ process CONCATENATE_FILES { grep 'seqname' inputs/\${first_file} > header.tsv || true cat inputs/* | grep -v 'seqname' > without_header.${output_name} cat header.tsv without_header.${output_name} > ${output_name} - rm without_header.${output_name} """ } \ No newline at end of file diff --git a/modules/local/virsorter2/main.nf b/modules/local/virsorter2/main.nf index 2fed902..cf130b0 100644 --- a/modules/local/virsorter2/main.nf +++ b/modules/local/virsorter2/main.nf @@ -4,11 +4,11 @@ process VIRSORTER2 { container 'quay.io/microbiome-informatics/virsorter:2.2.4' input: - tuple val(meta), file(fasta), val(contig_number) + tuple val(meta), file(fasta), val(number_of_contigs) path(database) when: - contig_number.toInteger() > 0 + number_of_contigs.toInteger() > 0 output: tuple val(meta), path("*.final-viral-score.tsv"), emit: score_tsv @@ -18,7 +18,8 @@ process VIRSORTER2 { script: def args = task.ext.args ?: '' """ - # speed up hmmsearch + # Settings to speed up hmmsearch + # TODO: this needs to be tested, it doesn't seem to speed up so we decided to chunk the fasta instead #virsorter config --set HMMSEARCH_THREADS=4 #virsorter config --set FAA_BP_PER_SPLIT=50000 diff --git a/nextflow_schema.json b/nextflow_schema.json index 09b4e7d..69a8c7f 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -72,6 +72,7 @@ }, "output": { "type": "string", + "format": "directory-path", "default": "results", "description": "name of the result folder" } @@ -85,10 +86,12 @@ "properties": { "virsorter": { "type": "string", + "format": "directory-path", "description": "a virsorter database provided as 'virsorter/virsorter-data'" }, "virsorter2": { "type": "string", + "format": "directory-path", "description": "a virsorter2 database" }, "virfinder": { @@ -243,52 +246,62 @@ "properties": { "assemblydir": { "type": "string", + "format": "directory-path", "default": "00-assembly", "description": "output directory for assembly step", "fa_icon": "far fa-file-code" }, "virusdir": { "type": "string", + "format": "directory-path", "default": "01-viruses", "description": "output directory for detected viruses step" }, "prodigaldir": { "type": "string", + "format": "directory-path", "default": "02-prodigal", "description": "output directory for prodigal step" }, "phanotatedir": { "type": "string", + "format": "directory-path", "default": "02-phanotate", "description": "output directory for phanotate step" }, "hmmerdir": { "type": "string", + "format": "directory-path", "default": "03-hmmer", "description": "output directory for hmmer step" }, "blastdir": { "type": "string", + "format": "directory-path", "default": "04-blast", "description": "output directory for blast step" }, "plotdir": { "type": "string", + "format": "directory-path", "default": "05-plots", "description": "output directory for plots" }, "taxdir": { "type": "string", + "format": "directory-path", "default": "06-taxonomy", "description": "output directory for taxonomy results" }, "checkvdir": { "type": "string", + "format": "directory-path", "default": "07-checkv", "description": "output directory for checkV step" }, "finaldir": { "type": "string", + "format": "directory-path", "default": "08-final", "description": "final output directory" } diff --git a/subworkflows/local/detect.nf b/subworkflows/local/detect.nf index 16eda9d..67868e4 100644 --- a/subworkflows/local/detect.nf +++ b/subworkflows/local/detect.nf @@ -2,14 +2,14 @@ * Run virus detection tools and parse the predictions according to defined filters. */ -include { VIRSORTER } from '../../modules/local/virsorter' -include { VIRSORTER2 } from '../../modules/local/virsorter2' -include { VIRFINDER } from '../../modules/local/virfinder' -include { PPRMETA } from '../../modules/local/pprmeta' -include { PARSE } from '../../modules/local/parse' -include { CONCATENATE_FILES as CONCATENATE_FILES_SCORE } from '../../modules/local/utils' -include { CONCATENATE_FILES as CONCATENATE_FILES_BOUNDARY } from '../../modules/local/utils' -include { CONCATENATE_FILES as CONCATENATE_FILES_FA } from '../../modules/local/utils' +include { VIRSORTER } from '../../modules/local/virsorter' +include { VIRSORTER2 } from '../../modules/local/virsorter2' +include { VIRFINDER } from '../../modules/local/virfinder' +include { PPRMETA } from '../../modules/local/pprmeta' +include { PARSE } from '../../modules/local/parse' +include { CONCATENATE_VIRSORTER2_FILES as CONCATENATE_FILES_SCORE } from '../../modules/local/utils' +include { CONCATENATE_VIRSORTER2_FILES as CONCATENATE_FILES_BOUNDARY } from '../../modules/local/utils' +include { CONCATENATE_VIRSORTER2_FILES as CONCATENATE_FILES_FA } from '../../modules/local/utils' workflow DETECT { @@ -38,9 +38,9 @@ workflow DETECT { virsorter_output = VIRSORTER.out } else { - // chunk fasta by 10Mb + // chunk fasta by 500Mb chunked_ch = length_filtered_ch.flatMap{ meta, fasta, value -> - def chunks = fasta.splitFasta(file: true, size: 10.MB); + def chunks = fasta.splitFasta(file: true, size: 500.MB); chunks.collect{ chunk -> return tuple(meta, chunk, value); } diff --git a/tests/test_parse_viral_preds.py b/tests/test_parse_viral_preds.py index 74875dc..124f8c8 100644 --- a/tests/test_parse_viral_preds.py +++ b/tests/test_parse_viral_preds.py @@ -228,7 +228,7 @@ def test_virsorter_precedence(self): shutil.rmtree(test_dir) def test_virsorter2_precedence(self): - """VirSorter2 results take precedence over the other tools + """VirSorter2 results should take precedence over the other tools """ pprmeta_path = self._build_path("/virsorter_precedence/pprmeta.csv") vf_path = self._build_path("/virsorter_precedence/virfinder.txt")