From 2a6c0623c1c3e41a345bd1f0e995190b3c2705ac Mon Sep 17 00:00:00 2001
From: Ge94 <germana.baldi@gmail.com>
Date: Mon, 2 Sep 2024 15:44:30 +0100
Subject: [PATCH 01/33] Addition of long-reads pre-assembly qcs

---
 bin/check_raw_quality.py                      |  22 +
 conf/modules.config                           |  55 +-
 conf/test.config                              |   2 +
 modules.json                                  |  32 +-
 modules/local/fetchtool_reads.nf              |  19 +-
 modules/local/raw_read_quality_check.nf       |  24 +
 modules/nf-core/canu/environment.yml          |   6 +
 modules/nf-core/canu/main.nf                  |  50 ++
 modules/nf-core/canu/meta.yml                 |  79 +++
 modules/nf-core/fastp/main.nf                 |  13 +-
 modules/nf-core/fastp/meta.yml                |   6 +-
 modules/nf-core/flye/environment.yml          |   6 +
 modules/nf-core/flye/main.nf                  |  68 +++
 modules/nf-core/flye/meta.yml                 |  68 +++
 modules/nf-core/flye/tests/main.nf.test       | 258 ++++++++++
 modules/nf-core/flye/tests/main.nf.test.snap  |  80 +++
 modules/nf-core/flye/tests/nextflow.config    |   4 +
 modules/nf-core/flye/tests/tags.yml           |   2 +
 modules/nf-core/medaka/environment.yml        |   6 +
 modules/nf-core/medaka/main.nf                |  40 ++
 modules/nf-core/medaka/meta.yml               |  45 ++
 modules/nf-core/medaka/tests/main.nf.test     |  33 ++
 .../nf-core/medaka/tests/main.nf.test.snap    |  33 ++
 modules/nf-core/medaka/tests/tags.yml         |   2 +
 .../nf-core/minimap2/align/environment.yml    |  11 +
 modules/nf-core/minimap2/align/main.nf        |  81 +++
 modules/nf-core/minimap2/align/meta.yml       |  84 ++++
 .../nf-core/minimap2/align/tests/main.nf.test | 441 ++++++++++++++++
 .../minimap2/align/tests/main.nf.test.snap    | 476 ++++++++++++++++++
 modules/nf-core/minimap2/align/tests/tags.yml |   2 +
 modules/nf-core/porechop/abi/environment.yml  |   9 +
 modules/nf-core/porechop/abi/main.nf          |  50 ++
 modules/nf-core/porechop/abi/meta.yml         |  48 ++
 .../nf-core/porechop/abi/tests/main.nf.test   |  59 +++
 .../porechop/abi/tests/main.nf.test.snap      |  94 ++++
 modules/nf-core/porechop/abi/tests/tags.yml   |   2 +
 modules/nf-core/racon/environment.yml         |   6 +
 modules/nf-core/racon/main.nf                 |  38 ++
 modules/nf-core/racon/meta.yml                |  51 ++
 nextflow.config                               |  19 +
 nextflow_schema.json                          |  38 +-
 subworkflows/local/long_reads_qc.nf           |  91 ++++
 subworkflows/local/ont_hq.nf                  |  16 +
 subworkflows/local/ont_lq.nf                  |  18 +
 subworkflows/local/pacbio_hifi.nf             |   3 +
 subworkflows/local/pacbio_lq.nf               |  14 +
 subworkflows/local/reads_qc.nf                |   1 +
 tests/samplesheet/test_minION_SRR10303629.csv |   2 +
 workflows/longreadassembler.nf                | 244 +++++++++
 49 files changed, 2836 insertions(+), 15 deletions(-)
 create mode 100755 bin/check_raw_quality.py
 create mode 100644 modules/local/raw_read_quality_check.nf
 create mode 100644 modules/nf-core/canu/environment.yml
 create mode 100644 modules/nf-core/canu/main.nf
 create mode 100644 modules/nf-core/canu/meta.yml
 create mode 100644 modules/nf-core/flye/environment.yml
 create mode 100644 modules/nf-core/flye/main.nf
 create mode 100644 modules/nf-core/flye/meta.yml
 create mode 100644 modules/nf-core/flye/tests/main.nf.test
 create mode 100644 modules/nf-core/flye/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/flye/tests/nextflow.config
 create mode 100644 modules/nf-core/flye/tests/tags.yml
 create mode 100644 modules/nf-core/medaka/environment.yml
 create mode 100644 modules/nf-core/medaka/main.nf
 create mode 100644 modules/nf-core/medaka/meta.yml
 create mode 100644 modules/nf-core/medaka/tests/main.nf.test
 create mode 100644 modules/nf-core/medaka/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/medaka/tests/tags.yml
 create mode 100644 modules/nf-core/minimap2/align/environment.yml
 create mode 100644 modules/nf-core/minimap2/align/main.nf
 create mode 100644 modules/nf-core/minimap2/align/meta.yml
 create mode 100644 modules/nf-core/minimap2/align/tests/main.nf.test
 create mode 100644 modules/nf-core/minimap2/align/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/minimap2/align/tests/tags.yml
 create mode 100644 modules/nf-core/porechop/abi/environment.yml
 create mode 100644 modules/nf-core/porechop/abi/main.nf
 create mode 100644 modules/nf-core/porechop/abi/meta.yml
 create mode 100644 modules/nf-core/porechop/abi/tests/main.nf.test
 create mode 100644 modules/nf-core/porechop/abi/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/porechop/abi/tests/tags.yml
 create mode 100644 modules/nf-core/racon/environment.yml
 create mode 100644 modules/nf-core/racon/main.nf
 create mode 100644 modules/nf-core/racon/meta.yml
 create mode 100644 subworkflows/local/long_reads_qc.nf
 create mode 100644 subworkflows/local/ont_hq.nf
 create mode 100644 subworkflows/local/ont_lq.nf
 create mode 100644 subworkflows/local/pacbio_hifi.nf
 create mode 100644 subworkflows/local/pacbio_lq.nf
 create mode 100644 tests/samplesheet/test_minION_SRR10303629.csv
 create mode 100644 workflows/longreadassembler.nf

diff --git a/bin/check_raw_quality.py b/bin/check_raw_quality.py
new file mode 100755
index 0000000..9a9dc5b
--- /dev/null
+++ b/bin/check_raw_quality.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+ 
+import json
+import argparse
+
+parser = argparse.ArgumentParser(description="Evaluate run quality from fastp output")
+parser.add_argument('--json','-j',help='Fastp json output',required=True)
+
+argv = parser.parse_args()
+
+fastp_out = argv.json
+data = json.load(open(fastp_out))
+
+q20_bases = float(data['read1_before_filtering']['q20_bases'])
+total_bases = float(data['read1_before_filtering']['total_bases'])
+q20_percentage = q20_bases/total_bases*100
+
+quality = "low"
+if q20_percentage >= 80:
+    quality = "high"
+
+print(quality)
\ No newline at end of file
diff --git a/conf/modules.config b/conf/modules.config
index 367222c..8cf286e 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -20,7 +20,7 @@ process {
         ext.args = params.private_study ? "--private" : ""
     }
 
-    withName: 'FASTP' {
+    withName: 'FASTP*' {
         cpus   = { check_max( 6     * task.attempt, 'cpus'    ) }
         memory = { check_max( 36.GB * task.attempt, 'memory'  ) }
         time   = { check_max( 8.h   * task.attempt, 'time'    ) }
@@ -50,6 +50,16 @@ process {
         ]
     }
 
+    withName: 'FASTP_LR' {
+        ext.args = [
+            '--average_qual',
+            '10',
+            '--length_required',
+            "${params.min_read_length}",
+            '--disable_adapter_trimming'
+        ].join(' ').trim()
+    }
+
     withName: 'FASTQC' {
         cpus   = { check_max( 6     * task.attempt, 'cpus'    ) }
         memory = { check_max( 36.GB * task.attempt, 'memory'  ) }
@@ -89,13 +99,54 @@ process {
         ext.prefix = "decontaminated"
     }
 
-    withName: 'HUMAN_PHIX_DECONTAMINATION' {
+    withName: 'HUMAN*_DECONTAMINATION' {
         memory = { check_max( 64.GB * task.attempt, 'memory'  ) }
     }
 
     withName: 'HOST_DECONTAMINATION' {
         memory = { check_max( 24.GB * task.attempt, 'memory'  ) }
     }
+
+    withName: 'CANU*' {
+        cpus   = { check_max( 4                  , 'cpus'    ) }
+        memory = { check_max( 3.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+
+        ext.args = [
+            '-trim',
+            '-corrected',
+            'corMinCoverage=0',
+            'stopOnLowCoverage=0',
+            'minInputCoverage=0',
+            'maxInputCoverage=10000',
+            'corOutCoverage=all',
+            'corMhapSensitivity=high',
+            'corMaxEvidenceCoverageLocal=10',
+            'corMaxEvidenceCoverageGlobal=10',
+            'oeaMemory=10',
+            'redMemory=10',
+            'batMemory=10',
+        ].join(' ').trim()
+    }
+
+    withName: 'CANU_ONT' {
+        ext.args2 = [
+            'correctedErrorRate=0.16',
+        ].join(' ').trim()
+    }
+
+    withName: 'CANU_PACBIO' {
+        ext.args2 = [
+            'correctedErrorRate=0.105',
+        ].join(' ').trim()
+    }
+
+    withName: 'PORECHOP_ONT' {
+        cpus   = { check_max( 1                  , 'cpus'    ) }
+        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
+        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+    }
+    
     /* --------- */
 
     /* Assembly */
diff --git a/conf/test.config b/conf/test.config
index 9e95f65..421e7f7 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -22,6 +22,8 @@ profiles {
             blast_reference_genomes_folder   = "tests/human_phix/blast"
             human_phix_blast_index_name      = "human_phix"
             human_phix_bwamem2_index_name    = "human_phix"
+            human_blast_index_name           = "human"
+            human_bwamem2_index_name         = "human"
         }
     }
 }
diff --git a/modules.json b/modules.json
index 54f81f3..f510e07 100644
--- a/modules.json
+++ b/modules.json
@@ -32,6 +32,11 @@
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
                         "installed_by": ["modules"]
                     },
+                    "canu": {
+                        "branch": "master",
+                        "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+                        "installed_by": ["modules"]
+                    },
                     "custom/dumpsoftwareversions": {
                         "branch": "master",
                         "git_sha": "82024cf6325d2ee194e7f056d841ecad2f6856e9",
@@ -39,7 +44,7 @@
                     },
                     "fastp": {
                         "branch": "master",
-                        "git_sha": "95cf5fe0194c7bf5cb0e3027a2eb7e7c89385080",
+                        "git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7",
                         "installed_by": ["modules"],
                         "patch": "modules/nf-core/fastp/fastp.diff"
                     },
@@ -49,6 +54,16 @@
                         "installed_by": ["modules"],
                         "patch": "modules/nf-core/fastqc/fastqc.diff"
                     },
+                    "flye": {
+                        "branch": "master",
+                        "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+                        "installed_by": ["modules"]
+                    },
+                    "medaka": {
+                        "branch": "master",
+                        "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+                        "installed_by": ["modules"]
+                    },
                     "megahit": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
@@ -60,17 +75,32 @@
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
                         "installed_by": ["modules"]
                     },
+                    "minimap2/align": {
+                        "branch": "master",
+                        "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306",
+                        "installed_by": ["modules"]
+                    },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "314d742bdb357a1df5f9b88427b3b6ac78aa33f7",
                         "installed_by": ["modules"]
                     },
+                    "porechop/abi": {
+                        "branch": "master",
+                        "git_sha": "870f9af2eaf0000c94d74910d762cf153752af98",
+                        "installed_by": ["modules"]
+                    },
                     "quast": {
                         "branch": "master",
                         "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
                         "installed_by": ["modules"],
                         "patch": "modules/nf-core/quast/quast.diff"
                     },
+                    "racon": {
+                        "branch": "master",
+                        "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
+                        "installed_by": ["modules"]
+                    },
                     "samtools/idxstats": {
                         "branch": "master",
                         "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c",
diff --git a/modules/local/fetchtool_reads.nf b/modules/local/fetchtool_reads.nf
index 129e452..e62484a 100644
--- a/modules/local/fetchtool_reads.nf
+++ b/modules/local/fetchtool_reads.nf
@@ -3,17 +3,17 @@ process FETCHTOOL_READS {
 
     label 'process_single'
 
-    container "quay.io/microbiome-informatics/fetch-tool:v1.0.0rc"
+    container "quay.io/microbiome-informatics/fetch-tool:v1.0.2"
 
     input:
     tuple val(meta), val(study_accession), val(reads_accession)
     path fetchtool_config
 
     output:
-    tuple val(meta), path("download_folder/${study_accession}/raw/${reads_accession}*.fastq.gz"), env(library_strategy), env(library_layout), emit: reads
+    tuple val(meta), path("download_folder/${study_accession}/raw/${reads_accession}*.fastq.gz"), env(library_strategy), env(library_layout), env(platform), emit: reads
     // The '_mqc.' is for multiQC
-    tuple val(meta), path("download_folder/${study_accession}/${study_accession}.txt")                                     , emit: metadata_tsv
-    path "versions.yml"                                                                                                    , emit: versions
+    tuple val(meta), path("download_folder/${study_accession}/${study_accession}.txt")      , emit: metadata_tsv
+    path "versions.yml"                                                                     , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -32,6 +32,15 @@ process FETCHTOOL_READS {
     library_strategy=\$(echo "\$(grep ${reads_accession} download_folder/${study_accession}/${study_accession}.txt | cut -f 7)" | tr '[:upper:]' '[:lower:]')
     library_layout=\$(echo "\$(grep ${reads_accession} download_folder/${study_accession}/${study_accession}.txt | cut -f 5)" | tr '[:upper:]' '[:lower:]')
 
+    export metadata_platform=\$(echo "\$(grep ${reads_accession} download_folder/${study_accession}/${study_accession}.txt | cut -f 8)" | tr '[:upper:]' '[:lower:]')
+    if [[ \$metadata_platform == "minion" || \$metadata_platform == "promethion" || \$metadata_platform == "gridion" ]]; then
+        platform="ont"
+    elif [[ \$metadata_platform == "pacbio rs" || \$metadata_platform == "pacbio rs ii" ]]; then
+        platform="pacbio"
+    else
+        platform="short"
+    fi
+
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         fetch-tool: \$(fetch-read-tool --version)
@@ -53,4 +62,4 @@ process FETCHTOOL_READS {
         fetch-tool: \$(fetch-read-tool --version)
     END_VERSIONS
     """
-}
+}
\ No newline at end of file
diff --git a/modules/local/raw_read_quality_check.nf b/modules/local/raw_read_quality_check.nf
new file mode 100644
index 0000000..01ea6f2
--- /dev/null
+++ b/modules/local/raw_read_quality_check.nf
@@ -0,0 +1,24 @@
+process RAW_READ_QUALITY_CHECK {
+    tag "$reads_accession"
+    label 'process_single'
+
+    input:
+    tuple val(meta), path(fastp_json)
+
+    output:
+    env(quality)       , emit: quality
+    path "versions.yml", emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    """
+    quality=\$(check_raw_quality.py -j ${fastp_json})
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version 2>&1 | sed 's/Python //g')
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/canu/environment.yml b/modules/nf-core/canu/environment.yml
new file mode 100644
index 0000000..7b601cb
--- /dev/null
+++ b/modules/nf-core/canu/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::canu=2.2
diff --git a/modules/nf-core/canu/main.nf b/modules/nf-core/canu/main.nf
new file mode 100644
index 0000000..7c5deab
--- /dev/null
+++ b/modules/nf-core/canu/main.nf
@@ -0,0 +1,50 @@
+process CANU {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/canu:2.2--ha47f30e_0':
+        'biocontainers/canu:2.2--ha47f30e_0' }"
+
+    input:
+    tuple val(meta), path(reads)
+    val mode
+    val genomesize
+
+    output:
+    tuple val(meta), path("*.report")                   , emit: report
+    tuple val(meta), path("*.contigs.fasta.gz")         , emit: assembly                , optional: true
+    tuple val(meta), path("*.unassembled.fasta.gz")     , emit: contigs                 , optional: true
+    tuple val(meta), path("*.correctedReads.fasta.gz")	, emit: corrected_reads         , optional: true
+    tuple val(meta), path("*.trimmedReads.fasta.gz")	, emit: corrected_trimmed_reads , optional: true
+    tuple val(meta), path("*.contigs.layout")           , emit: metadata                , optional: true
+    tuple val(meta), path("*.contigs.layout.readToTig") , emit: contig_position         , optional: true
+    tuple val(meta), path("*.contigs.layout.tigInfo")   , emit: contig_info             , optional: true
+    path "versions.yml"                                 , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def valid_mode = ["-pacbio", "-nanopore", "-pacbio-hifi"]
+    if ( !valid_mode.contains(mode) )  { error "Unrecognised mode to run Canu. Options: ${valid_mode.join(', ')}" }
+    """
+    canu \\
+        -p ${prefix} \\
+        $mode \\
+        genomeSize=${genomesize} \\
+        $args \\
+        $args2 \\
+        maxThreads=$task.cpus \\
+        $reads
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        canu: \$(echo \$(canu --version 2>&1) | sed 's/^.*canu //; s/Using.*\$//' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/canu/meta.yml b/modules/nf-core/canu/meta.yml
new file mode 100644
index 0000000..2feed43
--- /dev/null
+++ b/modules/nf-core/canu/meta.yml
@@ -0,0 +1,79 @@
+name: "canu"
+description: Accurate assembly of segmental duplications, satellites, and allelic variants from high-fidelity long reads.
+keywords:
+  - Assembly
+  - pacbio
+  - hifi
+  - nanopore
+tools:
+  - "canu":
+      description: "Canu is a fork of the Celera Assembler designed for high-noise single-molecule sequencing."
+      homepage: "https://canu.readthedocs.io/en/latest/index.html#"
+      documentation: "https://canu.readthedocs.io/en/latest/tutorial.html"
+      tool_dev_url: "https://github.com/marbl/canu"
+      doi: "10.1101/gr.215087.116"
+      licence: "['GPL v2 and others']"
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:true ]
+  - reads:
+      type: file
+      description: fasta/fastq file
+      pattern: "*.{fasta,fastq}"
+  - mode:
+      type: value
+      description: Canu mode depending on the input data (source and error rate)
+      pattern: "-pacbio|-nanopore|-pacbio-hifi"
+  - genomesize:
+      type: value
+      description: An estimate of the size of the genome. Common suffices are allowed, for example, 3.7m or 2.8g
+      pattern: "<number>[g|m|k]"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - report:
+      type: file
+      description: Most of the analysis reported during assembly
+      pattern: "*.report"
+  - assembly:
+      type: file
+      description: Everything which could be assembled and is the full assembly, including both unique, repetitive, and bubble elements.
+      pattern: "*.contigs.fasta"
+  - contigs:
+      type: file
+      description: Reads and low-coverage contigs which could not be incorporated into the primary assembly.
+      pattern: "*.unassembled.fasta"
+  - corrected_reads:
+      type: file
+      description: The reads after correction.
+      pattern: "*.correctedReads.fasta.gz"
+  - corrected_trimmed_reads:
+      type: file
+      description: The corrected reads after overlap based trimming
+      pattern: "*.trimmedReads.fasta.gz"
+  - metadata:
+      type: file
+      description: (undocumented)
+      pattern: "*.contigs.layout"
+  - contig_position:
+      type: file
+      description: The position of each read in a contig
+      pattern: "*.contigs.layout.readToTig"
+  - contig_info:
+      type: file
+      description: A list of the contigs, lengths, coverage, number of reads and other metadata. Essentially the same information provided in the FASTA header line.
+      pattern: "*.contigs.layout.tigInfo"
+authors:
+  - "@scorreard"
+maintainers:
+  - "@scorreard"
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
index 3d97ca9..7c51260 100644
--- a/modules/nf-core/fastp/main.nf
+++ b/modules/nf-core/fastp/main.nf
@@ -10,6 +10,7 @@ process FASTP {
     input:
     tuple val(meta), path(reads)
     path  adapter_fasta
+    val   discard_trimmed_pass
     val   save_trimmed_fail
     val   save_merged
     val   trim_polyA
@@ -32,8 +33,11 @@ process FASTP {
     def polyA = ( trim_polyA || meta.library_strategy == "metatranscriptomic" ) ? "--trim_poly_x" : ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
-    def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
+    def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
+    def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_1.fastp.fastq.gz" )
+    def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_2.fastp.fastq.gz"
     // Added soft-links to original fastqs for consistent naming in MultiQC
+    // Use single ended for interleaved. Add --interleaved_in in config.
     if ( task.ext.args?.contains('--interleaved_in') ) {
         """
         [ ! -f  ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz
@@ -62,6 +66,7 @@ process FASTP {
 
         fastp \\
             --in1 ${prefix}.fastq.gz \\
+            $out_fq1 \\
             --out1  ${prefix}.fastp.fastq.gz \\
             --thread $task.cpus \\
             --json ${prefix}.fastp.json \\
@@ -85,8 +90,8 @@ process FASTP {
         fastp \\
             --in1 ${prefix}_1.fastq.gz \\
             --in2 ${prefix}_2.fastq.gz \\
-            --out1 ${prefix}_1.fastp.fastq.gz \\
-            --out2 ${prefix}_2.fastp.fastq.gz \\
+            $out_fq1 \\
+            $out_fq2 \\
             --json ${prefix}.fastp.json \\
             --html ${prefix}.fastp.html \\
             $adapter_list \\
@@ -96,7 +101,7 @@ process FASTP {
             --thread $task.cpus \\
             --detect_adapter_for_pe \\
             $args \\
-            2> ${prefix}.fastp.log
+            2> >(tee ${prefix}.fastp.log >&2)
 
         cat <<-END_VERSIONS > versions.yml
         "${task.process}":
diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml
index c22a16a..8dfecc1 100644
--- a/modules/nf-core/fastp/meta.yml
+++ b/modules/nf-core/fastp/meta.yml
@@ -27,12 +27,16 @@ input:
       type: file
       description: File in FASTA format containing possible adapters to remove.
       pattern: "*.{fasta,fna,fas,fa}"
+  - discard_trimmed_pass:
+      type: boolean
+      description: Specify true to not write any reads that pass trimming thresholds. |
+        This can be used to use fastp for the output report only.
   - save_trimmed_fail:
       type: boolean
       description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz`
   - save_merged:
       type: boolean
-      description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz`
+      description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz`
 output:
   - meta:
       type: map
diff --git a/modules/nf-core/flye/environment.yml b/modules/nf-core/flye/environment.yml
new file mode 100644
index 0000000..f5364d5
--- /dev/null
+++ b/modules/nf-core/flye/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::flye=2.9
diff --git a/modules/nf-core/flye/main.nf b/modules/nf-core/flye/main.nf
new file mode 100644
index 0000000..3d89218
--- /dev/null
+++ b/modules/nf-core/flye/main.nf
@@ -0,0 +1,68 @@
+process FLYE {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/flye:2.9--py39h6935b12_1' :
+        'biocontainers/flye:2.9--py39h6935b12_1' }"
+
+    input:
+    tuple val(meta), path(reads)
+    val mode
+
+    output:
+    tuple val(meta), path("*.fasta.gz"), emit: fasta
+    tuple val(meta), path("*.gfa.gz")  , emit: gfa
+    tuple val(meta), path("*.gv.gz")   , emit: gv
+    tuple val(meta), path("*.txt")     , emit: txt
+    tuple val(meta), path("*.log")     , emit: log
+    tuple val(meta), path("*.json")    , emit: json
+    path "versions.yml"                , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def valid_mode = ["--pacbio-raw", "--pacbio-corr", "--pacbio-hifi", "--nano-raw", "--nano-corr", "--nano-hq"]
+    if ( !valid_mode.contains(mode) )  { error "Unrecognised mode to run Flye. Options: ${valid_mode.join(', ')}" }
+    """
+    flye \\
+        $mode \\
+        $reads \\
+        --out-dir . \\
+        --threads \\
+        $task.cpus \\
+        $args
+
+    gzip -c assembly.fasta > ${prefix}.assembly.fasta.gz
+    gzip -c assembly_graph.gfa > ${prefix}.assembly_graph.gfa.gz
+    gzip -c assembly_graph.gv > ${prefix}.assembly_graph.gv.gz
+    mv assembly_info.txt ${prefix}.assembly_info.txt
+    mv flye.log ${prefix}.flye.log
+    mv params.json ${prefix}.params.json
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        flye: \$( flye --version )
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    echo stub | gzip -c > ${prefix}.assembly.fasta.gz
+    echo stub | gzip -c > ${prefix}.assembly_graph.gfa.gz
+    echo stub | gzip -c > ${prefix}.assembly_graph.gv.gz
+    echo contig_1 > ${prefix}.assembly_info.txt
+    echo stub > ${prefix}.flye.log
+    echo stub > ${prefix}.params.json
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        flye: \$( flye --version )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/flye/meta.yml b/modules/nf-core/flye/meta.yml
new file mode 100644
index 0000000..5c3c816
--- /dev/null
+++ b/modules/nf-core/flye/meta.yml
@@ -0,0 +1,68 @@
+name: "flye"
+description: De novo assembler for single molecule sequencing reads
+keywords:
+  - assembly
+  - genome
+  - de novo
+  - genome assembler
+  - single molecule
+tools:
+  - "flye":
+      description: "Fast and accurate de novo assembler for single molecule sequencing reads"
+      homepage: "https://github.com/fenderglass/Flye"
+      documentation: "https://github.com/fenderglass/Flye/blob/flye/docs/USAGE.md"
+      tool_dev_url: "https://github.com/fenderglass/Flye"
+      doi: "10.1038/s41592-020-00971-x"
+      licence: "['BSD-3-clause']"
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test' ]
+  - reads:
+      type: file
+      description: Input reads from Oxford Nanopore or PacBio data in FASTA/FASTQ format.
+      pattern: "*.{fasta,fastq,fasta.gz,fastq.gz,fa,fq,fa.gz,fq.gz}"
+  - mode:
+      type: string
+      description: Flye mode depending on the input data (source and error rate)
+      pattern: "--pacbio-raw|--pacbio-corr|--pacbio-hifi|--nano-raw|--nano-corr|--nano-hq"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test' ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - fasta:
+      type: file
+      description: Assembled FASTA file
+      pattern: "*.fasta.gz"
+  - gfa:
+      type: file
+      description: Repeat graph in gfa format
+      pattern: "*.gfa.gz"
+  - gv:
+      type: file
+      description: Repeat graph in gv format
+      pattern: "*.gv.gz"
+  - txt:
+      type: file
+      description: Extra information and statistics about resulting contigs
+      pattern: "*.txt"
+  - log:
+      type: file
+      description: Flye log file
+      pattern: "*.log"
+  - json:
+      type: file
+      description: Flye parameters
+      pattern: "*.json"
+authors:
+  - "@mirpedrol"
+maintainers:
+  - "@mirpedrol"
diff --git a/modules/nf-core/flye/tests/main.nf.test b/modules/nf-core/flye/tests/main.nf.test
new file mode 100644
index 0000000..f06aa1b
--- /dev/null
+++ b/modules/nf-core/flye/tests/main.nf.test
@@ -0,0 +1,258 @@
+// According to the issue https://github.com/fenderglass/Flye/issues/164
+// Some fluctuations are expected because of the heuristics
+// Here we check the that test.assembly_info.txt contains at least one contig
+
+nextflow_process {
+
+    name "Test Process FLYE"
+    script "../main.nf"
+    process "FLYE"
+    config "./nextflow.config"
+    tag "flye"
+    tag "modules"
+    tag "modules_nfcore"
+
+
+    test("flye_pacbio_raw") {
+        tag "flye_pacbio_raw"
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] =  [
+                                [ id:'test' ], // meta map
+                                file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true)
+                            ]
+                input[1] = "--pacbio-raw"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' },
+                { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' },
+                { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' },
+                { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' },
+                { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' },
+                { assert process.out.json.get(0).get(1) ==~ '.*/test.params.json' },
+
+                // check for contig_1 in assembly_info
+                { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ },
+                // Check if test.params.json matches
+                { assert snapshot(process.out.json).match() }
+
+
+
+            )
+        }
+
+    }
+
+
+    test("flye_pacbio_corr") {
+        tag "flye_pacbio_corr"
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] =  [
+                                [ id:'test' ], // meta map
+                                file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true)
+                            ]
+                input[1] = "--pacbio-corr"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' },
+                { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' },
+                { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' },
+                { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' },
+                { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' },
+
+                // check for contig_1 in assembly_info
+                { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ },
+                // Check if test.params.json matches
+                { assert snapshot(process.out.json).match() }
+
+
+
+
+            )
+        }
+
+    }
+
+    test("flye_pacbio_hifi") {
+        tag "flye_pacbio_hifi"
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] =  [
+                                [ id:'test' ], // meta map
+                                file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true)
+                            ]
+                input[1] = "--pacbio-hifi"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' },
+                { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' },
+                { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' },
+                { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' },
+                { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' },
+
+                // check for contig_1 in assembly_info
+                { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ },
+                // Check if test.params.json matches
+                { assert snapshot(process.out.json).match() }
+
+
+
+
+            )
+        }
+
+    }
+
+    test("flye_nano_raw") {
+        tag "flye_nano_raw"
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] =  [
+                                [ id:'test' ], // meta map
+                                file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true)
+                            ]
+                input[1] = "--nano-raw"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' },
+                { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' },
+                { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' },
+                { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' },
+                { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' },
+                { assert process.out.json.get(0).get(1) ==~ '.*/test.params.json' },
+
+                // check for contig_1 in assembly_info
+                { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ },
+                // Check if test.params.json matches
+                { assert snapshot(process.out.json).match() }
+
+
+
+            )
+        }
+
+    }
+
+    test("flye_nano_corr") {
+        tag "flye_nano_corr"
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] =  [
+                                [ id:'test' ], // meta map
+                                file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true)
+                            ]
+                input[1] = "--nano-corr"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' },
+                { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' },
+                { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' },
+                { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' },
+                { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' },
+                { assert process.out.json.get(0).get(1) ==~ '.*/test.params.json' },
+
+                // check for contig_1 in assembly_info
+                { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ },
+                // Check if test.params.json matches
+                { assert snapshot(process.out.json).match() }
+
+
+            )
+        }
+
+    }
+
+
+    test("flye_nano_hq") {
+        tag "flye_nano_hq"
+
+        when {
+            params {
+                outdir = "$outputDir"
+            }
+            process {
+                """
+                input[0] =  [
+                                [ id:'test' ], // meta map
+                                file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true)
+                            ]
+                input[1] = "--nano-hq"
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' },
+                { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' },
+                { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' },
+                { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' },
+                { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' },
+                { assert process.out.json.get(0).get(1) ==~ '.*/test.params.json' },
+
+                // check for contig_1 in assembly_info
+                { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ },
+                // Check if test.params.json matches
+                { assert snapshot(process.out.json).match() }
+
+
+            )
+        }
+
+    }
+
+
+
+}
diff --git a/modules/nf-core/flye/tests/main.nf.test.snap b/modules/nf-core/flye/tests/main.nf.test.snap
new file mode 100644
index 0000000..a4aef73
--- /dev/null
+++ b/modules/nf-core/flye/tests/main.nf.test.snap
@@ -0,0 +1,80 @@
+{
+    "flye_pacbio_raw": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9"
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-18T09:38:04.835173617"
+    },
+    "flye_pacbio_hifi": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9"
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-18T08:38:39.624137639"
+    },
+    "flye_nano_raw": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9"
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-18T09:51:24.546896915"
+    },
+    "flye_pacbio_corr": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9"
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-18T08:34:15.751344742"
+    },
+    "flye_nano_corr": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9"
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-18T09:17:49.861781685"
+    },
+    "flye_nano_hq": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9"
+                ]
+            ]
+        ],
+        "timestamp": "2023-10-18T09:26:29.081427909"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/flye/tests/nextflow.config b/modules/nf-core/flye/tests/nextflow.config
new file mode 100644
index 0000000..40cf878
--- /dev/null
+++ b/modules/nf-core/flye/tests/nextflow.config
@@ -0,0 +1,4 @@
+// profile=docker with tests flye_pacbio_raw and flye_nano_raw need more memory that the default of 3.GB
+process {
+    memory = 6.GB
+}
diff --git a/modules/nf-core/flye/tests/tags.yml b/modules/nf-core/flye/tests/tags.yml
new file mode 100644
index 0000000..31103d1
--- /dev/null
+++ b/modules/nf-core/flye/tests/tags.yml
@@ -0,0 +1,2 @@
+flye:
+  - modules/nf-core/flye/**
diff --git a/modules/nf-core/medaka/environment.yml b/modules/nf-core/medaka/environment.yml
new file mode 100644
index 0000000..fea1532
--- /dev/null
+++ b/modules/nf-core/medaka/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::medaka=1.4.4
diff --git a/modules/nf-core/medaka/main.nf b/modules/nf-core/medaka/main.nf
new file mode 100644
index 0000000..e87c910
--- /dev/null
+++ b/modules/nf-core/medaka/main.nf
@@ -0,0 +1,40 @@
+process MEDAKA {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/medaka:1.4.4--py38h130def0_0' :
+        'biocontainers/medaka:1.4.4--py38h130def0_0' }"
+
+    input:
+    tuple val(meta), path(reads), path(assembly)
+
+    output:
+    tuple val(meta), path("*.fa.gz"), emit: assembly
+    path "versions.yml"             , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    medaka_consensus \\
+        -t $task.cpus \\
+        $args \\
+        -i $reads \\
+        -d $assembly \\
+        -o ./
+
+    mv consensus.fasta ${prefix}.fa
+
+    gzip -n ${prefix}.fa
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/medaka/meta.yml b/modules/nf-core/medaka/meta.yml
new file mode 100644
index 0000000..9ed3589
--- /dev/null
+++ b/modules/nf-core/medaka/meta.yml
@@ -0,0 +1,45 @@
+name: medaka
+description: A tool to create consensus sequences and variant calls from nanopore sequencing data
+keywords:
+  - assembly
+  - polishing
+  - nanopore
+tools:
+  - medaka:
+      description: Neural network sequence error correction.
+      homepage: https://nanoporetech.github.io/medaka/index.html
+      documentation: https://nanoporetech.github.io/medaka/index.html
+      tool_dev_url: https://github.com/nanoporetech/medaka
+      licence: ["Mozilla Public License 2.0"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: List of input nanopore fasta/FastQ files
+      pattern: "*.{fasta,fa,fastq,fastq.gz,fq,fq.gz}"
+  - assembly:
+      type: file
+      description: Genome assembly
+      pattern: "*.{fasta,fa}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - assembly:
+      type: file
+      description: Polished genome assembly
+      pattern: "*.fa.gz"
+authors:
+  - "@avantonder"
+maintainers:
+  - "@avantonder"
diff --git a/modules/nf-core/medaka/tests/main.nf.test b/modules/nf-core/medaka/tests/main.nf.test
new file mode 100644
index 0000000..1c5c55f
--- /dev/null
+++ b/modules/nf-core/medaka/tests/main.nf.test
@@ -0,0 +1,33 @@
+nextflow_process {
+
+    name "Test Process MEDAKA"
+    tag "modules_nfcore"
+    tag "modules"
+    tag "medaka"
+    script "../main.nf"
+    process "MEDAKA"
+
+    test("Medaka") {
+
+        when {
+            process {
+                """
+                input[0] = [ 
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true),
+                    file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+}
diff --git a/modules/nf-core/medaka/tests/main.nf.test.snap b/modules/nf-core/medaka/tests/main.nf.test.snap
new file mode 100644
index 0000000..d3fcba2
--- /dev/null
+++ b/modules/nf-core/medaka/tests/main.nf.test.snap
@@ -0,0 +1,33 @@
+{
+    "Medaka": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.fa.gz:md5,f42303f1d6c2c79175faeb00e10b9a6e"
+                    ]
+                ],
+                "1": [
+                    "versions.yml:md5,739bb00a08faba4029f9f5ab9c15275a"
+                ],
+                "assembly": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.fa.gz:md5,f42303f1d6c2c79175faeb00e10b9a6e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,739bb00a08faba4029f9f5ab9c15275a"
+                ]
+            }
+        ],
+        "timestamp": "2023-10-18T12:38:17.806031909"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/medaka/tests/tags.yml b/modules/nf-core/medaka/tests/tags.yml
new file mode 100644
index 0000000..dd9fb10
--- /dev/null
+++ b/modules/nf-core/medaka/tests/tags.yml
@@ -0,0 +1,2 @@
+medaka:
+  - modules/nf-core/medaka/**
diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml
new file mode 100644
index 0000000..41e8fe9
--- /dev/null
+++ b/modules/nf-core/minimap2/align/environment.yml
@@ -0,0 +1,11 @@
+name: minimap2_align
+
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+
+dependencies:
+  - bioconda::htslib=1.20
+  - bioconda::minimap2=2.28
+  - bioconda::samtools=1.20
diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf
new file mode 100644
index 0000000..cbfc5bf
--- /dev/null
+++ b/modules/nf-core/minimap2/align/main.nf
@@ -0,0 +1,81 @@
+process MINIMAP2_ALIGN {
+    tag "$meta.id"
+    label 'process_high'
+
+    // Note: the versions here need to match the versions used in the mulled container below and minimap2/index
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' :
+        'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }"
+
+    input:
+    tuple val(meta), path(reads)
+    tuple val(meta2), path(reference)
+    val prefix2
+    val bam_format
+    val bam_index_extension
+    val cigar_paf_format
+    val cigar_bam
+
+    output:
+    tuple val(meta), path("*.minimap*")                  , optional: true, emit: filtered_fastq
+    tuple val(meta), path("*.paf")                       , optional: true, emit: paf
+    tuple val(meta), path("*.bam")                       , optional: true, emit: bam
+    tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index
+    path "versions.yml"                                  , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args  = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def args3 = task.ext.args3 ?: ''
+    def args4 = task.ext.args4 ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam"
+    def map_mode = "${meta.platform}" ? "-x map-${meta.platform}" : ''
+    def bam_output = bam_format ? "-a | samtools fastq -f 4 | gzip > ${prefix}.${prefix2}.minimap.fastq.gz" : "-o ${prefix}.paf"
+    def cigar_paf = cigar_paf_format && !bam_format ? "-c" : ''
+    def set_cigar_bam = cigar_bam && bam_format ? "-L" : ''
+    def bam_input = "${reads.extension}".matches('sam|bam|cram')
+    def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : ''
+    def query = bam_input ? "-" : reads
+    def target = reference ?: (bam_input ? error("BAM input requires reference") : reads)
+
+    """
+    $samtools_reset_fastq \\
+    minimap2 \\
+        $args \\
+        -t $task.cpus \\
+        $map_mode \\
+        $target \\
+        $query \\
+        $cigar_paf \\
+        $set_cigar_bam \\
+        $bam_output
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        minimap2: \$(minimap2 --version 2>&1)
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: c
+    def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf"
+    def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : ""
+    def bam_input = "${reads.extension}".matches('sam|bam|cram')
+    def target = reference ?: (bam_input ? error("BAM input requires reference") : reads)
+
+    """
+    touch $output_file
+    ${bam_index}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        minimap2: \$(minimap2 --version 2>&1)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml
new file mode 100644
index 0000000..8996f88
--- /dev/null
+++ b/modules/nf-core/minimap2/align/meta.yml
@@ -0,0 +1,84 @@
+name: minimap2_align
+description: A versatile pairwise aligner for genomic and spliced nucleotide sequences
+keywords:
+  - align
+  - fasta
+  - fastq
+  - genome
+  - paf
+  - reference
+tools:
+  - minimap2:
+      description: |
+        A versatile pairwise aligner for genomic and spliced nucleotide sequences.
+      homepage: https://github.com/lh3/minimap2
+      documentation: https://github.com/lh3/minimap2#uguide
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: |
+        List of input FASTA or FASTQ files of size 1 and 2 for single-end
+        and paired-end data, respectively.
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'test_ref']
+  - reference:
+      type: file
+      description: |
+        Reference database in FASTA format.
+  - bam_format:
+      type: boolean
+      description: Specify that output should be in BAM format
+  - bam_index_extension:
+      type: string
+      description: BAM alignment index extension (e.g. "bai")
+  - cigar_paf_format:
+      type: boolean
+      description: Specify that output CIGAR should be in PAF format
+  - cigar_bam:
+      type: boolean
+      description: |
+        Write CIGAR with >65535 ops at the CG tag. This is recommended when
+        doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations)
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - paf:
+      type: file
+      description: Alignment in PAF format
+      pattern: "*.paf"
+  - bam:
+      type: file
+      description: Alignment in BAM format
+      pattern: "*.bam"
+  - index:
+      type: file
+      description: BAM alignment index
+      pattern: "*.bam.*"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@heuermh"
+  - "@sofstam"
+  - "@sateeshperi"
+  - "@jfy133"
+  - "@fellen31"
+maintainers:
+  - "@heuermh"
+  - "@sofstam"
+  - "@sateeshperi"
+  - "@jfy133"
+  - "@fellen31"
diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test
new file mode 100644
index 0000000..4072c17
--- /dev/null
+++ b/modules/nf-core/minimap2/align/tests/main.nf.test
@@ -0,0 +1,441 @@
+nextflow_process {
+
+    name "Test Process MINIMAP2_ALIGN"
+    script "../main.nf"
+    process "MINIMAP2_ALIGN"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "minimap2"
+    tag "minimap2/align"
+
+    test("sarscov2 - fastq, fasta, true, [], false, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    bam(process.out.bam[0][1]).getHeader(),
+                    bam(process.out.bam[0][1]).getReadsMD5(),
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - fastq, fasta, true, 'bai', false, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = 'bai'
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    bam(process.out.bam[0][1]).getHeader(),
+                    bam(process.out.bam[0][1]).getReadsMD5(),
+                    file(process.out.index[0][1]).name,
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - [fastq1, fastq2], fasta, true, false, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:false ], // meta map
+                    [
+                        file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+                    ]
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    bam(process.out.bam[0][1]).getHeader(),
+                    bam(process.out.bam[0][1]).getReadsMD5(),
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - fastq, [], true, false, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    []
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    bam(process.out.bam[0][1]).getHeader(),
+                    bam(process.out.bam[0][1]).getReadsMD5(),
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - bam, fasta, true, [], false, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    bam(process.out.bam[0][1]).getHeader(),
+                    bam(process.out.bam[0][1]).getReadsMD5(),
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - bam, fasta, true, 'bai', false, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = 'bai'
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    bam(process.out.bam[0][1]).getHeader(),
+                    bam(process.out.bam[0][1]).getReadsMD5(),
+                    file(process.out.index[0][1]).name,
+                    process.out.versions
+                ).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - bam, [], true, false, false") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    []
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.failed }
+            )
+        }
+
+    }
+
+    test("sarscov2 - fastq, fasta, true, [], false, false - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - fastq, fasta, true, 'bai', false, false - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = 'bai'
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - fastq, fasta, false, [], false, false - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = false
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - bam, fasta, true, [], false, false - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - bam, fasta, true, 'bai', false, false - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+                ]
+                input[2] = true
+                input[3] = 'bai'
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+
+    }
+
+    test("sarscov2 - bam, [], true, false, false - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true)
+                ]
+                input[1] = [
+                    [ id:'test_ref' ], // meta map
+                    []
+                ]
+                input[2] = true
+                input[3] = []
+                input[4] = false
+                input[5] = false
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.failed }
+            )
+        }
+
+    }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap
new file mode 100644
index 0000000..12264a8
--- /dev/null
+++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap
@@ -0,0 +1,476 @@
+{
+    "sarscov2 - bam, fasta, true, 'bai', false, false": {
+        "content": [
+            [
+                "@HD\tVN:1.6\tSO:coordinate",
+                "@SQ\tSN:MT192765.1\tLN:29829",
+                "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -",
+                "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index"
+            ],
+            "5d426b9a5f5b2c54f1d7f1e4c238ae94",
+            "test.bam.bai",
+            [
+                "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-25T09:03:00.827260362"
+    },
+    "sarscov2 - bam, fasta, true, 'bai', false, false - stub": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "3": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "index": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "paf": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-23T11:21:37.92353539"
+    },
+    "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "3": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "index": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "paf": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-06-03T11:29:44.669021368"
+    },
+    "sarscov2 - fastq, fasta, false, [], false, false - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ],
+                "bam": [
+                    
+                ],
+                "index": [
+                    
+                ],
+                "paf": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-06-03T11:15:52.738781039"
+    },
+    "sarscov2 - fastq, fasta, true, [], false, false - stub": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "index": [
+                    
+                ],
+                "paf": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-06-03T11:15:23.033808223"
+    },
+    "sarscov2 - [fastq1, fastq2], fasta, true, false, false": {
+        "content": [
+            [
+                "@HD\tVN:1.6\tSO:coordinate",
+                "@SQ\tSN:MT192765.1\tLN:29829",
+                "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz",
+                "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+            ],
+            "1bc392244f228bf52cf0b5a8f6a654c9",
+            [
+                "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-23T11:18:18.964586894"
+    },
+    "sarscov2 - fastq, fasta, true, [], false, false": {
+        "content": [
+            [
+                "@HD\tVN:1.6\tSO:coordinate",
+                "@SQ\tSN:MT192765.1\tLN:29829",
+                "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz",
+                "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+            ],
+            "f194745c0ccfcb2a9c0aee094a08750",
+            [
+                "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-23T11:17:48.667488325"
+    },
+    "sarscov2 - fastq, fasta, true, 'bai', false, false": {
+        "content": [
+            [
+                "@HD\tVN:1.6\tSO:coordinate",
+                "@SQ\tSN:MT192765.1\tLN:29829",
+                "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz",
+                "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index"
+            ],
+            "f194745c0ccfcb2a9c0aee094a08750",
+            "test.bam.bai",
+            [
+                "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-23T11:18:02.517416733"
+    },
+    "sarscov2 - bam, fasta, true, [], false, false": {
+        "content": [
+            [
+                "@HD\tVN:1.6\tSO:coordinate",
+                "@SQ\tSN:MT192765.1\tLN:29829",
+                "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -",
+                "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+            ],
+            "5d426b9a5f5b2c54f1d7f1e4c238ae94",
+            [
+                "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-25T09:02:49.64829488"
+    },
+    "sarscov2 - bam, fasta, true, [], false, false - stub": {
+        "content": [
+            {
+                "0": [
+                    
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    
+                ],
+                "3": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ],
+                "bam": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "index": [
+                    
+                ],
+                "paf": [
+                    
+                ],
+                "versions": [
+                    "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-23T11:21:22.162291795"
+    },
+    "sarscov2 - fastq, [], true, false, false": {
+        "content": [
+            [
+                "@HD\tVN:1.6\tSO:coordinate",
+                "@SQ\tSN:ERR5069949.2151832\tLN:150",
+                "@SQ\tSN:ERR5069949.576388\tLN:77",
+                "@SQ\tSN:ERR5069949.501486\tLN:146",
+                "@SQ\tSN:ERR5069949.1331889\tLN:132",
+                "@SQ\tSN:ERR5069949.2161340\tLN:80",
+                "@SQ\tSN:ERR5069949.973930\tLN:79",
+                "@SQ\tSN:ERR5069949.2417063\tLN:150",
+                "@SQ\tSN:ERR5069949.376959\tLN:151",
+                "@SQ\tSN:ERR5069949.1088785\tLN:149",
+                "@SQ\tSN:ERR5069949.1066259\tLN:147",
+                "@SQ\tSN:ERR5069949.2832676\tLN:139",
+                "@SQ\tSN:ERR5069949.2953930\tLN:151",
+                "@SQ\tSN:ERR5069949.324865\tLN:151",
+                "@SQ\tSN:ERR5069949.2185111\tLN:150",
+                "@SQ\tSN:ERR5069949.937422\tLN:151",
+                "@SQ\tSN:ERR5069949.2431709\tLN:150",
+                "@SQ\tSN:ERR5069949.1246538\tLN:148",
+                "@SQ\tSN:ERR5069949.1189252\tLN:98",
+                "@SQ\tSN:ERR5069949.2216307\tLN:147",
+                "@SQ\tSN:ERR5069949.3273002\tLN:148",
+                "@SQ\tSN:ERR5069949.3277445\tLN:151",
+                "@SQ\tSN:ERR5069949.3022231\tLN:147",
+                "@SQ\tSN:ERR5069949.184542\tLN:151",
+                "@SQ\tSN:ERR5069949.540529\tLN:149",
+                "@SQ\tSN:ERR5069949.686090\tLN:150",
+                "@SQ\tSN:ERR5069949.2787556\tLN:106",
+                "@SQ\tSN:ERR5069949.2650879\tLN:150",
+                "@SQ\tSN:ERR5069949.2064910\tLN:149",
+                "@SQ\tSN:ERR5069949.2328704\tLN:150",
+                "@SQ\tSN:ERR5069949.1067032\tLN:150",
+                "@SQ\tSN:ERR5069949.3338256\tLN:151",
+                "@SQ\tSN:ERR5069949.1412839\tLN:147",
+                "@SQ\tSN:ERR5069949.1538968\tLN:150",
+                "@SQ\tSN:ERR5069949.147998\tLN:94",
+                "@SQ\tSN:ERR5069949.366975\tLN:106",
+                "@SQ\tSN:ERR5069949.1372331\tLN:151",
+                "@SQ\tSN:ERR5069949.1709367\tLN:129",
+                "@SQ\tSN:ERR5069949.2388984\tLN:150",
+                "@SQ\tSN:ERR5069949.1132353\tLN:150",
+                "@SQ\tSN:ERR5069949.1151736\tLN:151",
+                "@SQ\tSN:ERR5069949.479807\tLN:150",
+                "@SQ\tSN:ERR5069949.2176303\tLN:151",
+                "@SQ\tSN:ERR5069949.2772897\tLN:151",
+                "@SQ\tSN:ERR5069949.1020777\tLN:122",
+                "@SQ\tSN:ERR5069949.465452\tLN:151",
+                "@SQ\tSN:ERR5069949.1704586\tLN:149",
+                "@SQ\tSN:ERR5069949.1258508\tLN:151",
+                "@SQ\tSN:ERR5069949.986441\tLN:119",
+                "@SQ\tSN:ERR5069949.2674295\tLN:148",
+                "@SQ\tSN:ERR5069949.885966\tLN:79",
+                "@SQ\tSN:ERR5069949.2342766\tLN:151",
+                "@SQ\tSN:ERR5069949.3122970\tLN:127",
+                "@SQ\tSN:ERR5069949.3279513\tLN:72",
+                "@SQ\tSN:ERR5069949.309410\tLN:151",
+                "@SQ\tSN:ERR5069949.532979\tLN:149",
+                "@SQ\tSN:ERR5069949.2888794\tLN:151",
+                "@SQ\tSN:ERR5069949.2205229\tLN:150",
+                "@SQ\tSN:ERR5069949.786562\tLN:151",
+                "@SQ\tSN:ERR5069949.919671\tLN:151",
+                "@SQ\tSN:ERR5069949.1328186\tLN:151",
+                "@SQ\tSN:ERR5069949.870926\tLN:149",
+                "@SQ\tSN:ERR5069949.2257580\tLN:151",
+                "@SQ\tSN:ERR5069949.3249622\tLN:77",
+                "@SQ\tSN:ERR5069949.611123\tLN:125",
+                "@SQ\tSN:ERR5069949.651338\tLN:142",
+                "@SQ\tSN:ERR5069949.169513\tLN:92",
+                "@SQ\tSN:ERR5069949.155944\tLN:150",
+                "@SQ\tSN:ERR5069949.2033605\tLN:150",
+                "@SQ\tSN:ERR5069949.2730382\tLN:142",
+                "@SQ\tSN:ERR5069949.2125592\tLN:150",
+                "@SQ\tSN:ERR5069949.1062611\tLN:151",
+                "@SQ\tSN:ERR5069949.1778133\tLN:151",
+                "@SQ\tSN:ERR5069949.3057020\tLN:95",
+                "@SQ\tSN:ERR5069949.2972968\tLN:141",
+                "@SQ\tSN:ERR5069949.2734474\tLN:149",
+                "@SQ\tSN:ERR5069949.856527\tLN:151",
+                "@SQ\tSN:ERR5069949.2098070\tLN:151",
+                "@SQ\tSN:ERR5069949.1552198\tLN:150",
+                "@SQ\tSN:ERR5069949.2385514\tLN:150",
+                "@SQ\tSN:ERR5069949.2270078\tLN:151",
+                "@SQ\tSN:ERR5069949.114870\tLN:150",
+                "@SQ\tSN:ERR5069949.2668880\tLN:147",
+                "@SQ\tSN:ERR5069949.257821\tLN:139",
+                "@SQ\tSN:ERR5069949.2243023\tLN:150",
+                "@SQ\tSN:ERR5069949.2605155\tLN:146",
+                "@SQ\tSN:ERR5069949.1340552\tLN:151",
+                "@SQ\tSN:ERR5069949.1561137\tLN:150",
+                "@SQ\tSN:ERR5069949.2361683\tLN:149",
+                "@SQ\tSN:ERR5069949.2521353\tLN:150",
+                "@SQ\tSN:ERR5069949.1261808\tLN:149",
+                "@SQ\tSN:ERR5069949.2734873\tLN:98",
+                "@SQ\tSN:ERR5069949.3017828\tLN:107",
+                "@SQ\tSN:ERR5069949.573706\tLN:150",
+                "@SQ\tSN:ERR5069949.1980512\tLN:151",
+                "@SQ\tSN:ERR5069949.1014693\tLN:150",
+                "@SQ\tSN:ERR5069949.3184655\tLN:150",
+                "@SQ\tSN:ERR5069949.29668\tLN:89",
+                "@SQ\tSN:ERR5069949.3258358\tLN:151",
+                "@SQ\tSN:ERR5069949.1476386\tLN:151",
+                "@SQ\tSN:ERR5069949.2415814\tLN:150",
+                "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz",
+                "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam"
+            ],
+            "16c1c651f8ec67383bcdee3c55aed94f",
+            [
+                "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.2"
+        },
+        "timestamp": "2024-07-23T11:18:34.246998277"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/minimap2/align/tests/tags.yml b/modules/nf-core/minimap2/align/tests/tags.yml
new file mode 100644
index 0000000..39dba37
--- /dev/null
+++ b/modules/nf-core/minimap2/align/tests/tags.yml
@@ -0,0 +1,2 @@
+minimap2/align:
+  - "modules/nf-core/minimap2/align/**"
diff --git a/modules/nf-core/porechop/abi/environment.yml b/modules/nf-core/porechop/abi/environment.yml
new file mode 100644
index 0000000..4dd2eab
--- /dev/null
+++ b/modules/nf-core/porechop/abi/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: porechop_abi
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::porechop_abi=0.5.0
diff --git a/modules/nf-core/porechop/abi/main.nf b/modules/nf-core/porechop/abi/main.nf
new file mode 100644
index 0000000..88ec5bd
--- /dev/null
+++ b/modules/nf-core/porechop/abi/main.nf
@@ -0,0 +1,50 @@
+process PORECHOP_ABI {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/porechop_abi:0.5.0--py310h590eda1_0':
+        'biocontainers/porechop_abi:0.5.0--py310h590eda1_0' }"
+
+    input:
+    tuple val(meta), path(reads)
+
+    output:
+    tuple val(meta), path("*.fastq.gz") , emit: reads
+    tuple val(meta), path("*.log")      , emit: log
+    path "versions.yml"                 , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}.porechop_abi"
+    if ("$reads" == "${prefix}.fastq.gz") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+    """
+    porechop_abi \\
+        --input $reads \\
+        --threads $task.cpus \\
+        $args \\
+        --output ${prefix}.fastq.gz \\
+        | tee ${prefix}.log
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        porechop_abi: \$( porechop_abi --version )
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}.porechop_abi"
+    """
+    echo "" | gzip > ${prefix}.fastq.gz
+    touch ${prefix}.log
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        porechop_abi: \$( porechop_abi --version )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/porechop/abi/meta.yml b/modules/nf-core/porechop/abi/meta.yml
new file mode 100644
index 0000000..a856ffb
--- /dev/null
+++ b/modules/nf-core/porechop/abi/meta.yml
@@ -0,0 +1,48 @@
+name: "porechop_abi"
+description: Extension of Porechop whose purpose is to process adapter sequences in ONT reads.
+keywords:
+  - porechop_abi
+  - adapter
+  - nanopore
+tools:
+  - "porechop_abi":
+      description: Extension of Porechop whose purpose is to process adapter sequences in ONT reads.
+      homepage: "https://github.com/bonsai-team/Porechop_ABI"
+      documentation: "https://github.com/bonsai-team/Porechop_ABI"
+      tool_dev_url: "https://github.com/bonsai-team/Porechop_ABI"
+      doi: "10.1101/2022.07.07.499093"
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: fastq/fastq.gz file
+      pattern: "*.{fastq,fastq.gz,fq,fq.gz}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - reads:
+      type: file
+      description: Adapter-trimmed fastq.gz file
+      pattern: "*.fastq.gz"
+  - log:
+      type: file
+      description: Log file containing stdout information
+      pattern: "*.log"
+authors:
+  - "@sofstam"
+  - "LilyAnderssonLee"
+maintainers:
+  - "@sofstam"
+  - "LilyAnderssonLee"
diff --git a/modules/nf-core/porechop/abi/tests/main.nf.test b/modules/nf-core/porechop/abi/tests/main.nf.test
new file mode 100644
index 0000000..b5a29f9
--- /dev/null
+++ b/modules/nf-core/porechop/abi/tests/main.nf.test
@@ -0,0 +1,59 @@
+nextflow_process {
+
+    name "Test Process PORECHOP_ABI"
+    script "../main.nf"
+    process "PORECHOP_ABI"
+    tag "modules"
+    tag "modules_nfcore"
+    tag "porechop"
+    tag "porechop/abi"
+
+    test("sarscov2-nanopore") {
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test'], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(
+                    process.out.reads,
+                    file(process.out.log.get(0).get(1)).readLines()[20..40],
+                    process.out.versions).match()
+                }
+            )
+        }
+    }
+
+    test("sarscov2-nanopore - stub") {
+
+        options "-stub"
+
+        when {
+
+            process {
+                """
+                input[0] = [
+                    [ id:'test'], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true)
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/porechop/abi/tests/main.nf.test.snap b/modules/nf-core/porechop/abi/tests/main.nf.test.snap
new file mode 100644
index 0000000..ad63f4e
--- /dev/null
+++ b/modules/nf-core/porechop/abi/tests/main.nf.test.snap
@@ -0,0 +1,94 @@
+{
+    "sarscov2-nanopore": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.porechop_abi.fastq.gz:md5,886fdb859fb50e0dddd35007bcff043e"
+                ]
+            ],
+            [
+                "                                        Best               \u001b[0m",
+                "                                        read       Best    \u001b[0m",
+                "                                        start      read end\u001b[0m",
+                "  \u001b[4mSet                                   %ID        %ID     \u001b[0m",
+                "  \u001b[32mSQK-NSK007                               100.0       73.1\u001b[0m",
+                "  Rapid                                     40.4        0.0",
+                "  RBK004_upstream                           77.5        0.0",
+                "  SQK-MAP006                                75.8       72.7",
+                "  SQK-MAP006 short                          65.5       66.7",
+                "  PCR adapters 1                            73.9       69.6",
+                "  PCR adapters 2                            80.0       72.7",
+                "  PCR adapters 3                            70.8       69.6",
+                "  1D^2 part 1                               71.4       70.0",
+                "  1D^2 part 2                               84.8       75.8",
+                "  cDNA SSP                                  63.0       61.7",
+                "  \u001b[32mBarcode 1 (reverse)                      100.0      100.0\u001b[0m",
+                "  Barcode 2 (reverse)                       70.8       69.2",
+                "  Barcode 3 (reverse)                       76.0       70.4",
+                "  Barcode 4 (reverse)                       74.1       71.4",
+                "  Barcode 5 (reverse)                       77.8       80.8",
+                "  Barcode 6 (reverse)                       73.1       70.8"
+            ],
+            [
+                "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.1"
+        },
+        "timestamp": "2024-07-29T13:50:49.318599"
+    },
+    "sarscov2-nanopore - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.porechop_abi.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.porechop_abi.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98"
+                ],
+                "log": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.porechop_abi.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "reads": [
+                    [
+                        {
+                            "id": "test"
+                        },
+                        "test.porechop_abi.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.04.1"
+        },
+        "timestamp": "2024-07-29T13:50:54.425389"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/porechop/abi/tests/tags.yml b/modules/nf-core/porechop/abi/tests/tags.yml
new file mode 100644
index 0000000..e19350c
--- /dev/null
+++ b/modules/nf-core/porechop/abi/tests/tags.yml
@@ -0,0 +1,2 @@
+porechop/abi:
+  - "modules/nf-core/porechop/abi/**"
diff --git a/modules/nf-core/racon/environment.yml b/modules/nf-core/racon/environment.yml
new file mode 100644
index 0000000..e5cd0b8
--- /dev/null
+++ b/modules/nf-core/racon/environment.yml
@@ -0,0 +1,6 @@
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - bioconda::racon=1.4.20
diff --git a/modules/nf-core/racon/main.nf b/modules/nf-core/racon/main.nf
new file mode 100644
index 0000000..de29e35
--- /dev/null
+++ b/modules/nf-core/racon/main.nf
@@ -0,0 +1,38 @@
+process RACON {
+    tag "$meta.id"
+    label 'process_high'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/racon:1.4.20--h9a82719_1' :
+        'biocontainers/racon:1.4.20--h9a82719_1' }"
+
+    input:
+    tuple val(meta), path(reads), path(assembly), path(paf)
+
+    output:
+    tuple val(meta), path('*_assembly_consensus.fasta.gz') , emit: improved_assembly
+    path "versions.yml"          , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    racon -t "$task.cpus" \\
+        "${reads}" \\
+        "${paf}" \\
+        $args \\
+        "${assembly}" > \\
+        ${prefix}_assembly_consensus.fasta
+
+    gzip -n ${prefix}_assembly_consensus.fasta
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        racon: \$( racon --version 2>&1 | sed 's/^.*v//' )
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/racon/meta.yml b/modules/nf-core/racon/meta.yml
new file mode 100644
index 0000000..9698c0a
--- /dev/null
+++ b/modules/nf-core/racon/meta.yml
@@ -0,0 +1,51 @@
+name: racon
+description: Consensus module for raw de novo DNA assembly of long uncorrected reads
+keywords:
+  - assembly
+  - pacbio
+  - nanopore
+  - polish
+tools:
+  - racon:
+      description: Ultrafast consensus module for raw de novo genome assembly of long uncorrected reads.
+      homepage: https://github.com/lbcb-sci/racon
+      documentation: https://github.com/lbcb-sci/racon
+      tool_dev_url: https://github.com/lbcb-sci/racon
+      doi: 10.1101/gr.214270.116
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - reads:
+      type: file
+      description: List of input FastQ files. Racon expects single end reads
+      pattern: "*.{fastq,fastq.gz,fq,fq.gz}"
+  - assembly:
+      type: file
+      description: Genome assembly to be improved
+      pattern: "*.{fasta,fa}"
+  - paf:
+      type: file
+      description: Alignment in PAF format
+      pattern: "*.paf"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - improved_assembly:
+      type: file
+      description: Improved genome assembly
+      pattern: "*_assembly_consensus.fasta.gz"
+authors:
+  - "@avantonder"
+maintainers:
+  - "@avantonder"
diff --git a/nextflow.config b/nextflow.config
index 168873d..8791b13 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -17,6 +17,7 @@ params {
     study_accession            = null
     reads_accession            = null
     private_study              = false
+    min_read_length            = 200
 
     // For already fetched data
     samplesheet                = null
@@ -36,8 +37,13 @@ params {
     *   for metaspades are prohibitively high, such as:
     *    - Memory >1TB
     *    - Runtime >3-4 days
+    *
+    * - flye: Use for any long-read assembly. assembler_config
+    *   should be selected depending on input data (if ONT or 
+    *   pacbio, and if data quality is high or low)
     */
     assembler                        = null
+    assembler_config                 = null
 
     // The pipeline will use the metadata from ENA (obtained by the fetch_tool)
     // As the metadata can be incorrect, we provide the following parameters to
@@ -45,6 +51,7 @@ params {
     single_end                       = null
     library_layout                   = null
     library_strategy                 = null
+    platform                         = null
 
     // Reads QC filtering options
     filter_ratio_threshold           = 0.9
@@ -53,6 +60,14 @@ params {
     // Reference genome
     reference_genome                 = null
 
+    /*
+    * Long-read assemblies won't require phiX, 
+    * parameters should be defined as follows:
+    * remove_human                = true
+    * human_blast_index_name      = "human"
+    * human_bwamem2_index_name    = "human"
+    * Need to integrate them
+    */
     remove_human_phix                = true
     human_phix_blast_index_name      = "human_phix"
     human_phix_bwamem2_index_name    = "human_phix"
@@ -93,6 +108,7 @@ params {
     // Assembler versions
     spades_version                   = "3.15.5"
     megahit_version                  = "1.2.9"
+    flye_version               = "2.9"
 
 }
 
@@ -200,6 +216,9 @@ profiles {
         executor.cpus          = 4
         executor.memory        = 8.GB
     }
+    test { 
+        includeConfig 'conf/test.config'      
+    }
     codon_slurm { includeConfig 'conf/codon_slurm.config' }
 }
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index ebfb512..541ee4d 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -40,14 +40,24 @@
                     "fa_icon": "far fa-address-card",
                     "minLength": 3
                 },
+                "min_read_length": {
+                    "type": "integer",
+                    "description": "Minimum read length for pre-assembly quality filtering",
+                    "default": 200
+                },
                 "private_study": {
                     "type": "boolean",
                     "description": "To use if the ENA study is private"
                 },
                 "assembler": {
                     "type": "string",
-                    "enum": ["spades", "metaspades", "megahit"],
-                    "description": "The short reads assembler"
+                    "enum": ["spades", "metaspades", "megahit", "flye"],
+                    "description": "The short or long reads assembler"
+                },
+                "assembler_config": {
+                    "type": "string",
+                    "description": "Configuration to use flye with. Pick from nano-raw, nano-corr, nano-hq, pacbio-raw, pacbio-corr, pacbio-hifi", 
+                    "default": ""
                 },
                 "single_end": {
                     "type": "boolean",
@@ -63,6 +73,15 @@
                     "description": "Force the library_layout value for the study / reads",
                     "enum": ["single", "paired"]
                 },
+                "platform": {
+                    "type": "string",
+                    "description": "Force the instrument_platform value for the study / reads",
+                    "default": "ont"
+                },
+                "flye_version": {
+                    "type": "string",
+                    "default": "2.9"
+                },
                 "spades_version": {
                     "type": "string",
                     "default": "3.15.5"
@@ -104,16 +123,31 @@
                     "description": "Remove human and phiX reads pre assembly, and contigs matching those genomes.",
                     "default": true
                 },
+                "remove_human": {
+                    "type": "boolean",
+                    "description": "Remove human reads pre assembly, and contigs matching those genomes.",
+                    "default": true
+                },
                 "human_phix_blast_index_name": {
                     "type": "string",
                     "description": "Combined Human and phiX BLAST db.",
                     "default": "human_phix"
                 },
+                "human_blast_index_name": {
+                    "type": "string",
+                    "description": "Human BLAST db.",
+                    "default": "human"
+                },
                 "human_phix_bwamem2_index_name": {
                     "type": "string",
                     "description": "Combined Human and phiX bwa-mem2 index.",
                     "default": "human_phix"
                 },
+                "human_bwamem2_index_name": {
+                    "type": "string",
+                    "description": "Human bwa-mem2 index.",
+                    "default": "human"
+                },
                 "min_contig_length": {
                     "type": "integer",
                     "default": 500,
diff --git a/subworkflows/local/long_reads_qc.nf b/subworkflows/local/long_reads_qc.nf
new file mode 100644
index 0000000..13635ba
--- /dev/null
+++ b/subworkflows/local/long_reads_qc.nf
@@ -0,0 +1,91 @@
+include { FASTP_LR                                } from '../../modules/nf-core/fastp/main'
+include { RAW_READ_QUALITY_CHECK                  } from '../../modules/local/raw_read_quality_check/'
+include { MINIMAP2_ALIGN as HUMAN_DECONTAMINATION } from '../../modules/nf-core/minimap2/align/main'
+include { MINIMAP2_ALIGN as HOST_DECONTAMINATION  } from '../../modules/nf-core/minimap2/align/main'
+
+workflow LONG_READS_QC {
+    take:
+    reads                   // [ val(meta), path(reads) ]
+    host_reference_genome   // [ val(meta2), path(reference_genome) ]
+
+    main:
+    ch_versions = Channel.empty()
+
+    FASTP_LR(
+        reads,
+        [],
+        false,
+        false,
+        false,
+        false
+    )
+
+    ch_versions = ch_versions.mix(FASTP.out.versions)
+
+    RAW_READ_QUALITY_CHECK(
+        FASTP.out.json
+    )
+
+    decontaminated_reads = channel.empty()
+
+    if ( params.remove_human ) {
+
+        ch_bwamem2_human_refs = Channel.fromPath( "${params.bwamem2_reference_genomes_folder}/${params.human_blast_index_name}.fna", checkIfExists: true)
+            .collect().map {
+                files -> [ ["id": params.human_blast_index_name], files ]
+            }
+
+        // TODO: can we change the way human/host are given via prefixes?
+
+        HUMAN_DECONTAMINATION(
+            FASTP.out.reads,
+            ch_bwamem2_human_refs,
+            "human",
+            true,
+            "bai",
+            false,
+            true
+        )
+
+        ch_versions = ch_versions.mix(HUMAN_DECONTAMINATION.out.versions)
+
+        decontaminated_reads = HUMAN_DECONTAMINATION.out.filtered_fastq
+
+    } else {
+        decontaminated_reads = FASTP.out.reads
+    }
+
+    if ( host_reference_genome != null ) {
+
+        ch_bwamem2_host_refs = Channel.fromPath( "${params.bwamem2_reference_genomes_folder}/${host_reference_genome}*", checkIfExists: true)
+            .collect().map {
+                files -> [ ["id": host_reference_genome], files ]
+            }
+
+        HOST_DECONTAMINATION(
+            decontaminated_reads,
+            ch_bwamem2_host_refs,
+            "host",
+            true,
+            "bai",
+            false,
+            true
+        )
+
+        ch_versions = ch_versions.mix(HOST_DECONTAMINATION.out.versions)
+
+        decontaminated_reads = HOST_DECONTAMINATION.out.filtered_fastq
+    }
+
+    final_reads = decontaminated_reads
+                .map{ meta, reads -> {
+                        [ meta + [
+                            "quality": RAW_READ_QUALITY_CHECK.out.quality.val
+                        ], reads ]
+                    }
+                }
+
+    emit:
+    qc_reads = final_reads
+    versions = ch_versions
+}
diff --git a/subworkflows/local/ont_hq.nf b/subworkflows/local/ont_hq.nf
new file mode 100644
index 0000000..7255d24
--- /dev/null
+++ b/subworkflows/local/ont_hq.nf
@@ -0,0 +1,16 @@
+include { PORECHOP_ABI as PORECHOP_ONT         } from '../../modules/nf-core/porechop/abi/main'
+
+workflow ONT_HQ {
+    take:
+    reads                   // [ val(meta), path(reads) ]
+
+    main:
+    PORECHOP_ONT(
+        reads
+    )
+    PORECHOP_ONT.out.reads.view()
+
+    // temporary just to test the module
+    emit:
+    contigs = PORECHOP_ONT.out.reads
+}
diff --git a/subworkflows/local/ont_lq.nf b/subworkflows/local/ont_lq.nf
new file mode 100644
index 0000000..6538c14
--- /dev/null
+++ b/subworkflows/local/ont_lq.nf
@@ -0,0 +1,18 @@
+include { CANU as CANU_ONT                     } from '../../modules/nf-core/canu/main'
+
+workflow ONT_LQ {
+    take:
+    reads                   // [ val(meta), path(reads) ]
+
+    main:
+    CANU_ONT(
+        reads,
+        "-nanopore",
+        "5m"
+    )
+    CANU_ONT.out.corrected_trimmed_reads.view()
+
+    // temporary just to test the module
+    emit:
+    contigs = CANU_ONT.out.corrected_trimmed_reads
+}
diff --git a/subworkflows/local/pacbio_hifi.nf b/subworkflows/local/pacbio_hifi.nf
new file mode 100644
index 0000000..491bf28
--- /dev/null
+++ b/subworkflows/local/pacbio_hifi.nf
@@ -0,0 +1,3 @@
+workflow PACBIO_HIFI {
+
+}
\ No newline at end of file
diff --git a/subworkflows/local/pacbio_lq.nf b/subworkflows/local/pacbio_lq.nf
new file mode 100644
index 0000000..df49b01
--- /dev/null
+++ b/subworkflows/local/pacbio_lq.nf
@@ -0,0 +1,14 @@
+include { CANU as CANU_PACBIO                  } from '../../modules/nf-core/canu/main'
+
+workflow PACBIO_LQ {
+    take:
+    reads                   // [ val(meta), path(reads) ]
+
+    main:
+    CANU_PACBIO(
+        reads,
+        "-pacbio",
+        "5m"
+    )
+    CANU_PACBIO.out.corrected_reads.view()
+}
diff --git a/subworkflows/local/reads_qc.nf b/subworkflows/local/reads_qc.nf
index a3e99af..4cbbbe6 100644
--- a/subworkflows/local/reads_qc.nf
+++ b/subworkflows/local/reads_qc.nf
@@ -16,6 +16,7 @@ workflow READS_QC {
         [],
         false,
         false,
+        false,
         false
     )
 
diff --git a/tests/samplesheet/test_minION_SRR10303629.csv b/tests/samplesheet/test_minION_SRR10303629.csv
new file mode 100644
index 0000000..c6ac9e8
--- /dev/null
+++ b/tests/samplesheet/test_minION_SRR10303629.csv
@@ -0,0 +1,2 @@
+study_accession,reads_accession,fastq_1,library_layout,library_strategy,assembler,assembly_memory
+SRP226117,SRR10303629,/home/germana/Desktop/EBI_root/Git/long-read-assembly/tests/test_reads/SRR10303629_1.fastq.gz,single,metagenomic,,
\ No newline at end of file
diff --git a/workflows/longreadassembler.nf b/workflows/longreadassembler.nf
new file mode 100644
index 0000000..227e62d
--- /dev/null
+++ b/workflows/longreadassembler.nf
@@ -0,0 +1,244 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    PRINT PARAMS SUMMARY
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+include { validateParameters; paramsSummaryLog; paramsSummaryMap; samplesheetToList } from 'plugin/nf-schema'
+
+def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
+def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
+def summary_params = paramsSummaryMap(workflow)
+
+// Print parameter summary log to screen
+log.info logo + paramsSummaryLog(workflow) + citation
+
+validateParameters()
+
+if (params.help) {
+    log.info paramsHelp("nextflow run ebi-metagenomics/longreadsassembly --help")
+    exit 0
+}
+
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    CONFIG FILES
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+ch_multiqc_config          = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
+ch_multiqc_custom_config   = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty()
+ch_multiqc_logo            = params.multiqc_logo   ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty()
+ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    IMPORT LOCAL MODULES/SUBWORKFLOWS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+//
+// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
+//
+
+include { FETCHTOOL_READS } from '../modules/local/fetchtool_reads'
+include { LONG_READS_QC   } from '../subworkflows/local/long_reads_qc'
+include { ONT_LQ          } from '../subworkflows/local/ont_lq'
+include { ONT_HQ          } from '../subworkflows/local/ont_hq'
+// include { PACBIO_LQ       } from '../subworkflows/local/pacbio_lq'
+// include { PACBIO_HIFI     } from '../subworkflows/local/pacbio_hifi'
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    IMPORT NF-CORE MODULES/SUBWORKFLOWS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+//
+// MODULE: Installed directly from nf-core/modules
+//
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    RUN MAIN WORKFLOW
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+// Info required for completion email and summary
+def multiqc_report = []
+
+workflow LONGREADSASSEMBLY {
+
+    ch_versions = Channel.empty()
+    longReads = Channel.empty()
+    fetch_tool_metadata = Channel.empty()
+
+    if ( params.samplesheet ) {
+
+        longReads = { study_accession, reads_accession, fq1, library_layout, library_strategy, assembler, assembler_config, assembly_memory ->
+            return tuple(
+                [
+                    "id": reads_accession,
+                    "study_accession": study_accession,
+                    "library_strategy": library_strategy,
+                    "library_layout": library_layout,
+                    "single_end": true,
+                    "assembler": assembler ?: params.assembler,
+                    "assembler_config": assembler_config ?: params.assembler_config,
+                    "assembly_memory": assembly_memory ?: params.assembly_memory
+                ],
+                [fq1]
+            )
+        }
+
+        samplesheet = Channel.fromList(samplesheetToList(params.samplesheet, "./assets/schema_input.json"))
+
+        fetch_reads_transformed = samplesheet.map(longReads)
+
+    } else {
+        // TODO: remove when the fetch tools gets published on bioconda
+        fetch_tool_config = file("${projectDir}/assets/fetch_tool_anonymous.json", checkIfExists: true)
+
+        if ( params.private_study ) {
+            fetch_tool_config = file("${projectDir}/assets/fetch_tool_credentials.json", checkIfExists: true)
+        }
+
+        FETCHTOOL_READS(
+            [ [id: params.reads_accession], params.study_accession, params.reads_accession ],
+            fetch_tool_config
+        )
+
+        ch_versions = ch_versions.mix(FETCHTOOL_READS.out.versions)
+
+        // Push the library strategy into the meta of the reads, this is to make it easier to handle downstream
+        fetch_reads_transformed = FETCHTOOL_READS.out.reads.map { meta, reads, library_strategy, library_layout, platform -> {
+                [ meta + [
+                    //  -- The metadata will be overriden by the parameters -- //
+                    "assembler": params.assembler,
+                    "assembly_memory": params.assembly_memory,
+                    "assembler_config": params.assembler_config,
+                    "library_strategy": params.library_strategy ?: library_strategy,
+                    "library_layout": params.library_layout ?: library_layout,
+                    "single_end": params.single_end ?: library_layout == "single",
+                    "platform": params.platform ?: platform
+                ], reads ]
+            }
+        }
+
+        // Metadata for MultiQC
+        fetch_tool_metadata = FETCHTOOL_READS.out.metadata_tsv.map { it[1] }.collectFile(
+            name: 'fetch_tool_mqc.tsv',
+            newLine: true,
+            keepHeader: true,
+            skip: 1
+        )
+    }
+
+    LONG_READS_QC (
+        fetch_reads_transformed, 
+        params.reference_genome
+    )
+    ch_versions = ch_versions.mix(LONG_READS_QC.out.versions)
+
+    /*********************************************************************************/
+    /* Selecting the combination of adapter trimming, assembler, and post-processing */
+    /*********************************************************************************/
+    /*
+        The selection process ensures that:
+        - The user selected assembler configuration is always used (either from the samplesheet assembler column (with precedence) or the params.assembler)
+        - Low-quality ONT reads are trimmed with canu and assembled with flye --nano-corr/raw), unless specified otherwise.
+        - High-quality ONT reads are trimmed with porechob_abi and assembled with flye --nano-hq), unless specified otherwise.
+        - Low-quality pacbio reads are trimmed with canu and assembled with flye --pacbio-corr/raw), unless specified otherwise.
+        - High-quality pacbio reads are trimmed with HiFiAdapterFilt and assembled with flye --pacbio-hifi), unless specified otherwise.
+        Extra polishing steps are applied to low-quality reads. All subworkflows also apply post-assembly host decontamination. 
+    */
+
+    reads_assembler_config = LONG_READS_QC.out.qc_reads.map { meta, reads ->
+        if (meta.platform == "ont") {
+            if (params.assembler_config == "nano-raw" || meta.quality == "low") {
+                return [meta + ["assembler_config": "nano-raw"], reads]
+            } else if (params.assembler_config == "nano-hq" || meta.quality == "high") {
+                return [meta + ["assembler_config": "nano-hq"], reads]
+            }
+        } else if (meta.platform == "pacbio") {
+            if (params.assembler_config == "pacbio-raw" || meta.quality == "low") {
+                return [meta + ["assembler_config": "pacbio-raw"], reads]
+            } else if (params.assembler_config == "pacbio-hifi" || meta.quality == "high") {
+                return [meta + ["assembler_config": "pacbio-hifi"], reads]
+            }
+        } else {
+            error "Incompatible configuration"
+        }
+    }
+    
+    reads_assembler_config.branch { meta, reads ->
+        lq_ont: meta.assembler_config == "nano-raw"
+        hq_ont: meta.assembler_config == "pacbio-raw"
+        lq_pacbio: meta.assembler_config == "nano-hq"
+        hq_pacbio: meta.assembler_config == "pacbio-hifi"
+    }.set {subworkflow_platform_reads}
+
+    ONT_LQ(
+        subworkflow_platform_reads.lq_ont
+    )
+
+    ONT_HQ(
+        subworkflow_platform_reads.hq_ont
+    )
+
+    // PACBIO_LQ(
+    //     subworkflow_platform_reads.lq_pacbio.map { meta, reads -> [meta, reads] }
+    // )
+
+    // PACBIO_HIFI(
+    //     subworkflow_platform_reads.hq_pacbio.map { meta, reads -> [meta, reads] }
+    // )
+
+    assembly = ONT_LQ.out.contigs.mix( ONT_HQ.out.contigs )//, PACBIO_LQ.out.contigs, PACBIO_HIFI.out.contigs )
+
+    /*************************************/
+    /* Post-assembly: coverage and stats */
+    /*************************************/
+    
+    //
+    // MODULE: Run FastQC
+    //
+    // FASTQC (
+    //     INPUT_CHECK.out.reads
+    // )
+    // ch_versions = ch_versions.mix(FASTQC.out.versions.first())
+
+    // CUSTOM_DUMPSOFTWAREVERSIONS (
+    //     ch_versions.unique().collectFile(name: 'collated_versions.yml')
+    // )
+
+    //
+    // MODULE: MultiQC
+    //
+    // workflow_summary    = WorkflowLongreadsassembly.paramsSummaryMultiqc(workflow, summary_params)
+    // ch_workflow_summary = Channel.value(workflow_summary)
+
+    // methods_description    = WorkflowLongreadsassembly.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params)
+    // ch_methods_description = Channel.value(methods_description)
+
+    // ch_multiqc_files = Channel.empty()
+    // ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
+    // ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
+    // ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
+    // ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([]))
+
+    // MULTIQC (
+    //     ch_multiqc_files.collect(),
+    //     ch_multiqc_config.toList(),
+    //     ch_multiqc_custom_config.toList(),
+    //     ch_multiqc_logo.toList()
+    // )
+    // multiqc_report = MULTIQC.out.report.toList()
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    THE END
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/

From 76e8d011d9caef7acdcef4b8b0a23df24102cc42 Mon Sep 17 00:00:00 2001
From: Ge94 <germana.baldi@gmail.com>
Date: Tue, 3 Sep 2024 14:01:55 +0100
Subject: [PATCH 02/33] Adapt params to short/long reads workflows

---
 README.md                                  | 17 +++--
 conf/codon_slurm.config                    |  1 +
 conf/test.config                           |  4 +-
 modules/nf-core/quast/main.nf              |  4 +-
 modules/nf-core/quast/quast.diff           |  4 +-
 modules/nf-core/seqkit/seq/main.nf         |  4 +-
 modules/nf-core/seqkit/seq/seqkit-seq.diff |  4 +-
 nextflow.config                            | 89 ++++++++++++----------
 nextflow_schema.json                       | 24 +++---
 subworkflows/local/assembly_qc.nf          |  4 +-
 subworkflows/local/long_reads_qc.nf        |  8 +-
 tests/main.nf.test                         |  7 ++
 workflows/longreadassembler.nf             | 32 ++++----
 workflows/miassembler.nf                   | 14 ++--
 14 files changed, 115 insertions(+), 101 deletions(-)

diff --git a/README.md b/README.md
index f0809fd..64ad609 100644
--- a/README.md
+++ b/README.md
@@ -37,18 +37,21 @@ Input/output options
   --library_layout                        [string]  Force the library_layout value for the study / reads (accepted: single, paired)
   --spades_version                        [string]  null [default: 3.15.5]
   --megahit_version                       [string]  null [default: 1.2.9]
-  --reference_genome                      [string]  The genome to be used to clean the assembly, the genome will be taken from the Microbiome Informatics
+  --flye_version                          [string]  null [default: 2.9]
+  --host_reference_genome                 [string]  The genome to be used to clean the assembly, the genome will be taken from the Microbiome Informatics
                                                     internal directory (accepted: chicken.fna, salmon.fna, cod.fna, pig.fna, cow.fna, mouse.fna,
                                                     honeybee.fna, rainbow_trout.fna, ...)
   --blast_reference_genomes_folder        [string]  The folder with the reference genome blast indexes, defaults to the Microbiome Informatics internal
                                                     directory.
   --bwamem2_reference_genomes_folder      [string]  The folder with the reference genome bwa-mem2 indexes, defaults to the Microbiome Informatics internal
+  
+  --reference_genomes_folder              [string]  The folder with reference genomes, defaults to the Microbiome Informatics internal
                                                     directory.
   --remove_human_phix                     [boolean] Remove human and phiX reads pre assembly, and contigs matching those genomes. [default: true]
   --human_phix_blast_index_name           [string]  Combined Human and phiX BLAST db. [default: human_phix]
   --human_phix_bwamem2_index_name         [string]  Combined Human and phiX bwa-mem2 index. [default: human_phix]
-  --min_contig_length                     [integer] Minimum contig length filter. [default: 500]
-  --min_contig_length_metatranscriptomics [integer] Minimum contig length filter for metaT. [default: 200]
+  --short_reads_min_contig_length         [integer] Minimum contig length filter. [default: 500]
+  --short_reads_min_contig_length_metat   [integer] Minimum contig length filter for metaT. [default: 200]
   --assembly_memory                       [integer] Default memory allocated for the assembly process. [default: 100]
   --spades_only_assembler                 [boolean] Run SPAdes/metaSPAdes without the error correction step. [default: true]
   --outdir                                [string]  The output directory where the results will be saved. You have to use absolute paths to storage on Cloud
@@ -66,7 +69,7 @@ Example:
 nextflow run ebi-metagenomics/miassembler \
   -profile codon_slurm \
   --assembler metaspades \
-  --reference_genome human \
+  --host_reference_genome human \
   --outdir testing_results \
   --study_accession SRP002480 \
   --reads_accession SRR1631361
@@ -182,15 +185,15 @@ Runs that fail QC checks are excluded from the assembly process. These runs are
 Example:
 
 ```csv
-SRR6180434,filter_ratio_threshold_exceeded
+SRR6180434,short_reads_filter_ratio_threshold_exceeded
 ```
 
 ##### Runs exclusion messages
 
 | Exclusion Message                 | Description                                                                                                                                                                                                                                                                            |
 | --------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `filter_ratio_threshold_exceeded` | The maximum fraction of reads that are allowed to be filtered out. If exceeded, it flags excessive filtering. The default value is 0.9, meaning that if more than 90% of the reads are filtered out, the threshold is considered exceeded, and the run is not assembled. |
-| `low_reads_count_threshold`       | The minimum number of reads required after filtering. If below, it flags a low read count, and the run is not assembled.                                                                                                                                                               |
+| `short_reads_filter_ratio_threshold_exceeded` | The maximum fraction of reads that are allowed to be filtered out. If exceeded, it flags excessive filtering. The default value is 0.9, meaning that if more than 90% of the reads are filtered out, the threshold is considered exceeded, and the run is not assembled. |
+| `short_reads_low_reads_count_threshold`       | The minimum number of reads required after filtering. If below, it flags a low read count, and the run is not assembled.                                                                                                                                                               |
 
 #### Assembled Runs
 
diff --git a/conf/codon_slurm.config b/conf/codon_slurm.config
index 541a69d..7fb4789 100644
--- a/conf/codon_slurm.config
+++ b/conf/codon_slurm.config
@@ -1,4 +1,5 @@
 params {
+    reference_genomes_folder         = "/hps/nobackup/rdf/metagenomics/service-team/ref-dbs/bwa-mem2/"
     bwamem2_reference_genomes_folder = "/hps/nobackup/rdf/metagenomics/service-team/ref-dbs/bwa-mem2/"
     blast_reference_genomes_folder   = "/nfs/production/rdf/metagenomics/pipelines/prod/assembly-pipeline/blast_dbs/"
     human_phix_blast_index_name      = "human_phix"
diff --git a/conf/test.config b/conf/test.config
index 421e7f7..60db88e 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -18,12 +18,12 @@ profiles {
             max_memory = '6.GB'
             max_time   = '6.h'
 
+            reference_genomes_folder         = "tests/human_phix/bwa2mem"
             bwamem2_reference_genomes_folder = "tests/human_phix/bwa2mem"
             blast_reference_genomes_folder   = "tests/human_phix/blast"
             human_phix_blast_index_name      = "human_phix"
             human_phix_bwamem2_index_name    = "human_phix"
-            human_blast_index_name           = "human"
-            human_bwamem2_index_name         = "human"
+            human_fasta_prefix               = "human"
         }
     }
 }
diff --git a/modules/nf-core/quast/main.nf b/modules/nf-core/quast/main.nf
index ce9befd..da16b9f 100644
--- a/modules/nf-core/quast/main.nf
+++ b/modules/nf-core/quast/main.nf
@@ -26,9 +26,9 @@ process QUAST {
     script:
     def args      = task.ext.args   ?: ''
     prefix        = task.ext.prefix ?: "${meta.id}"
-    def min_contig_len = "--min-contig ${params.min_contig_length}"
+    def min_contig_len = "--min-contig ${params.short_reads_min_contig_length}"
     if ( meta.library_strategy == "metatranscriptomics" ) {
-        min_contig_len = "--min-contig ${params.min_contig_length_metatranscriptomics}"
+        min_contig_len = "--min-contig ${params.short_reads_min_contig_length_metat}"
     } 
     def features  = gff             ?  "--features $gff" : ''
     def reference = fasta           ?  "-r $fasta"       : ''
diff --git a/modules/nf-core/quast/quast.diff b/modules/nf-core/quast/quast.diff
index 7d48832..bfaf013 100644
--- a/modules/nf-core/quast/quast.diff
+++ b/modules/nf-core/quast/quast.diff
@@ -14,9 +14,9 @@ Changes in module 'nf-core/quast'
      script:
      def args      = task.ext.args   ?: ''
      prefix        = task.ext.prefix ?: "${meta.id}"
-+    def min_contig_len = "--min-contig ${params.min_contig_length}"
++    def min_contig_len = "--min-contig ${params.short_reads_min_contig_length}"
 +    if ( meta.library_strategy == "metatranscriptomics" ) {
-+        min_contig_len = "--min-contig ${params.min_contig_length_metatranscriptomics}"
++        min_contig_len = "--min-contig ${params.short_reads_min_contig_length_metat}"
 +    } 
      def features  = gff             ?  "--features $gff" : ''
      def reference = fasta           ?  "-r $fasta"       : ''
diff --git a/modules/nf-core/seqkit/seq/main.nf b/modules/nf-core/seqkit/seq/main.nf
index a6a05b7..4a1d0f3 100644
--- a/modules/nf-core/seqkit/seq/main.nf
+++ b/modules/nf-core/seqkit/seq/main.nf
@@ -19,9 +19,9 @@ process SEQKIT_SEQ {
     task.ext.when == null || task.ext.when
 
     script:
-    def min_len     = params.min_contig_length
+    def min_len     = params.short_reads_min_contig_length
     if ( meta.library_strategy == "metatranscriptomic" ) {
-        min_len = params.min_contig_length_metatranscriptomics 
+        min_len = params.short_reads_min_contig_length_metat
     }
     def args        = task.ext.args ?: ''
     def args2       = task.ext.args2 ?: ''
diff --git a/modules/nf-core/seqkit/seq/seqkit-seq.diff b/modules/nf-core/seqkit/seq/seqkit-seq.diff
index 168ac0b..af070e2 100644
--- a/modules/nf-core/seqkit/seq/seqkit-seq.diff
+++ b/modules/nf-core/seqkit/seq/seqkit-seq.diff
@@ -5,9 +5,9 @@ Changes in module 'nf-core/seqkit/seq'
      task.ext.when == null || task.ext.when
  
      script:
-+    def min_len     = params.min_contig_length
++    def min_len     = params.short_reads_min_contig_length
 +    if ( meta.library_strategy == "metatranscriptomic" ) {
-+        min_len = params.min_contig_length_metatranscriptomics 
++        min_len = params.short_reads_min_contig_length_metat
 +    }
      def args        = task.ext.args ?: ''
      def args2       = task.ext.args2 ?: ''
diff --git a/nextflow.config b/nextflow.config
index 8791b13..ad7e5d6 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -17,14 +17,51 @@ params {
     study_accession            = null
     reads_accession            = null
     private_study              = false
-    min_read_length            = 200
 
     // For already fetched data
     samplesheet                = null
-    /*
-    * Assembler options, by default the pipeline will pick
-    * - metaspades for pair-end
-    * - megahit for single-end
+    
+    // The pipeline will use the metadata from ENA (obtained by the fetch_tool)
+    // As the metadata can be incorrect, we provide the following parameters to
+    // "force" them
+    single_end                       = null
+    library_layout                   = null
+    library_strategy                 = null
+    platform                         = null
+
+    // QC FILTERING
+
+    // Short reads options
+    short_reads_filter_ratio_threshold    = 0.9
+    short_reads_low_reads_count_threshold = 1000
+
+    // Long reads options
+    long_read_min_read_length        = 200
+
+    // Reference genome name (to select from list)
+    bwamem2_reference_genomes_folder = ""
+    blast_reference_genomes_folder   = ""
+    host_reference_genome            = null
+
+    // Short-read sequences and assemblies are
+    // automatically polished from human and phix seqs
+    // Both blast and bwa indices are needed
+    remove_human_phix                = true
+    human_phix_blast_index_name      = "human_phix"
+    human_phix_bwamem2_index_name    = "human_phix"
+
+    // Long-read assemblies don't require phiX
+    // nor indices, just a fasta file
+    reference_genomes_folder         = null
+    remove_human                     = true
+    human_fasta_prefix               = "human" 
+
+    // ASSEMBLY
+     
+    /* By default the pipeline will pick
+    * - metaspades for paired-end short reads
+    * - megahit for single-end short reads
+    * - flye for long reads
     *
     * Setting --assembler will force the assembler
     *
@@ -43,43 +80,13 @@ params {
     *   pacbio, and if data quality is high or low)
     */
     assembler                        = null
-    assembler_config                 = null
-
-    // The pipeline will use the metadata from ENA (obtained by the fetch_tool)
-    // As the metadata can be incorrect, we provide the following parameters to
-    // "force" them
-    single_end                       = null
-    library_layout                   = null
-    library_strategy                 = null
-    platform                         = null
-
-    // Reads QC filtering options
-    filter_ratio_threshold           = 0.9
-    low_reads_count_threshold        = 1000
-
-    // Reference genome
-    reference_genome                 = null
-
-    /*
-    * Long-read assemblies won't require phiX, 
-    * parameters should be defined as follows:
-    * remove_human                = true
-    * human_blast_index_name      = "human"
-    * human_bwamem2_index_name    = "human"
-    * Need to integrate them
-    */
-    remove_human_phix                = true
-    human_phix_blast_index_name      = "human_phix"
-    human_phix_bwamem2_index_name    = "human_phix"
-
-    bwamem2_reference_genomes_folder = ""
-    blast_reference_genomes_folder   = ""
 
     // Assembly options
-    spades_only_assembler                 = true
-    min_contig_length                     = 500
-    min_contig_length_metatranscriptomics = 200
-    assembly_memory                       = 100
+    spades_only_assembler               = true
+    short_reads_min_contig_length       = 500
+    short_reads_min_contig_length_metat = 200
+    long_read_assembler_config          = null
+    assembly_memory                     = 100
 
     // MultiQC options
     multiqc_config                   = null
@@ -108,7 +115,7 @@ params {
     // Assembler versions
     spades_version                   = "3.15.5"
     megahit_version                  = "1.2.9"
-    flye_version               = "2.9"
+    flye_version                     = "2.9"
 
 }
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 541ee4d..d592847 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -11,10 +11,6 @@
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
             "required": [
-                "blast_reference_genomes_folder",
-                "bwamem2_reference_genomes_folder",
-                "human_phix_blast_index_name",
-                "human_phix_bwamem2_index_name",
                 "outdir"
             ],
             "properties": {
@@ -54,7 +50,7 @@
                     "enum": ["spades", "metaspades", "megahit", "flye"],
                     "description": "The short or long reads assembler"
                 },
-                "assembler_config": {
+                "long_read_assembler_config": {
                     "type": "string",
                     "description": "Configuration to use flye with. Pick from nano-raw, nano-corr, nano-hq, pacbio-raw, pacbio-corr, pacbio-hifi", 
                     "default": ""
@@ -90,7 +86,7 @@
                     "type": "string",
                     "default": "1.2.9"
                 },
-                "reference_genome": {
+                "host_reference_genome": {
                     "type": "string",
                     "description": "The genome to be used to clean the assembly, the genome will be taken from the Microbiome Informatics internal directory",
                     "enum": [
@@ -143,20 +139,20 @@
                     "description": "Combined Human and phiX bwa-mem2 index.",
                     "default": "human_phix"
                 },
-                "human_bwamem2_index_name": {
+                "human_fasta_prefix": {
                     "type": "string",
-                    "description": "Human bwa-mem2 index.",
+                    "description": "Human prefix name.",
                     "default": "human"
                 },
-                "min_contig_length": {
+                "short_reads_min_contig_length": {
                     "type": "integer",
                     "default": 500,
-                    "description": "Minimum contig length filter."
+                    "description": "Minimum contig length filter for short reads."
                 },
-                "min_contig_length_metatranscriptomics": {
+                "short_reads_min_contig_length_metat": {
                     "type": "integer",
                     "default": 200,
-                    "description": "Minimum contig length filter for metaT."
+                    "description": "Minimum contig length filter for short reads metaT."
                 },
                 "assembly_memory": {
                     "type": "integer",
@@ -196,14 +192,14 @@
             "description": "Set the thresholds for the reads QC/filtering steps. Reads that fail QC won't be assembled.",
             "help_text": "Use these options to define the quality control thresholds for your reads. You can specify the maximum allowed filtering ratio and the minimum acceptable read count. If the filtering ratio exceeds the set limit or the read count falls below the threshold, the reads will be flagged and excluded from further assembly. The information about those runs that failed are aggregated in the qc_failed_runs.csv file.",
             "properties": {
-                "filter_ratio_threshold": {
+                "short_reads_filter_ratio_threshold": {
                     "type": "number",
                     "description": "The maximum fraction of reads that are allowed to be filtered out. If exceeded, it flags excessive filtering. The default value is 0.9, meaning that if more than 90% of the reads are filtered out, the threshold is considered exceeded, and the run is not assembled.",
                     "default": 0.9,
                     "minimum": 0.0,
                     "maximum": 1.0
                 },
-                "low_reads_count_threshold": {
+                "short_reads_low_reads_count_threshold": {
                     "type": "number",
                     "description": "The minimum number of reads required after filtering. If below, it flags a low read count and the run is not assembled.",
                     "default": 1000
diff --git a/subworkflows/local/assembly_qc.nf b/subworkflows/local/assembly_qc.nf
index f5bfa7d..e96a475 100644
--- a/subworkflows/local/assembly_qc.nf
+++ b/subworkflows/local/assembly_qc.nf
@@ -22,13 +22,13 @@ workflow ASSEMBLY_QC {
 
     take:
     assembly                    // [ val(meta), path(assembly_fasta) ]
-    host_reference_genome       // [ val(meta2), path(reference_genome) ] | meta2 contains the name of the reference genome
+    host_reference_genome       // [ val(meta2), path(host_reference_genome) ] | meta2 contains the name of the reference genome
 
     main:
 
     ch_versions = Channel.empty()
 
-    /* Len filter using the parameter "min_contig_length" */
+    /* Len filter using the parameter "short_reads_min_contig_length" */
     SEQKIT_SEQ(
         assembly
     )
diff --git a/subworkflows/local/long_reads_qc.nf b/subworkflows/local/long_reads_qc.nf
index 13635ba..5ef54dd 100644
--- a/subworkflows/local/long_reads_qc.nf
+++ b/subworkflows/local/long_reads_qc.nf
@@ -30,7 +30,7 @@ workflow LONG_READS_QC {
 
     if ( params.remove_human ) {
 
-        ch_bwamem2_human_refs = Channel.fromPath( "${params.bwamem2_reference_genomes_folder}/${params.human_blast_index_name}.fna", checkIfExists: true)
+        human_reference = Channel.fromPath( "${params.reference_genomes_folder}/${params.human_fasta_prefix}.fna", checkIfExists: true)
             .collect().map {
                 files -> [ ["id": params.human_blast_index_name], files ]
             }
@@ -39,7 +39,7 @@ workflow LONG_READS_QC {
 
         HUMAN_DECONTAMINATION(
             FASTP.out.reads,
-            ch_bwamem2_human_refs,
+            human_reference,
             "human",
             true,
             "bai",
@@ -57,14 +57,14 @@ workflow LONG_READS_QC {
 
     if ( host_reference_genome != null ) {
 
-        ch_bwamem2_host_refs = Channel.fromPath( "${params.bwamem2_reference_genomes_folder}/${host_reference_genome}*", checkIfExists: true)
+        host_reference = Channel.fromPath( "${params.reference_genomes_folder}/${host_reference_genome}*", checkIfExists: true)
             .collect().map {
                 files -> [ ["id": host_reference_genome], files ]
             }
 
         HOST_DECONTAMINATION(
             decontaminated_reads,
-            ch_bwamem2_host_refs,
+            host_reference,
             "host",
             true,
             "bai",
diff --git a/tests/main.nf.test b/tests/main.nf.test
index 06e3213..ed59d5b 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -11,6 +11,7 @@ nextflow_pipeline {
             params {
                 outdir = "tests/results"
                 assembler = "spades"
+                reference_genomes_folder         = "${projectDir}/tests/human_phix/bwa2mem"
                 bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 samplesheet                      = "${projectDir}/tests/samplesheet/test.csv"
@@ -35,6 +36,7 @@ nextflow_pipeline {
 
             params {
                 outdir = "tests/results"
+                reference_genomes_folder         = "${projectDir}/tests/human_phix/bwa2mem"
                 bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 study_accession                  = "SRP115494"
@@ -63,6 +65,7 @@ nextflow_pipeline {
                 assembler                        = "megahit"
                 study_accession                  = "SRP115494"
                 reads_accession                  = "SRR6180434"
+                reference_genomes_folder         = "${projectDir}/tests/human_phix/bwa2mem"
                 bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
             }
@@ -87,6 +90,7 @@ nextflow_pipeline {
             params {
                 outdir                           = "tests/results"
                 assembler                        = "metaspades"
+                reference_genomes_folder         = "${projectDir}/tests/human_phix/bwa2mem"
                 bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 study_accession                  = "ERP012810"
@@ -111,6 +115,7 @@ nextflow_pipeline {
         when {
             params {
                 outdir = "tests/results"
+                reference_genomes_folder         = "${projectDir}/tests/human_phix/bwa2mem"
                 bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 study_accession                  = "ERP012810"
@@ -133,6 +138,7 @@ nextflow_pipeline {
         when {
             params {
                 outdir = "tests/results"
+                reference_genomes_folder         = "${projectDir}/tests/human_phix/bwa2mem"
                 bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 study_accession                  = "DRP007622"
@@ -159,6 +165,7 @@ nextflow_pipeline {
             params {
                 outdir = "tests/results"
                 assembler = "spades"
+                reference_genomes_folder         = "${projectDir}/tests/human_phix/bwa2mem"
                 bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 study_accession                  = "DRP007622"
diff --git a/workflows/longreadassembler.nf b/workflows/longreadassembler.nf
index 227e62d..7b39717 100644
--- a/workflows/longreadassembler.nf
+++ b/workflows/longreadassembler.nf
@@ -76,7 +76,7 @@ workflow LONGREADSASSEMBLY {
 
     if ( params.samplesheet ) {
 
-        longReads = { study_accession, reads_accession, fq1, library_layout, library_strategy, assembler, assembler_config, assembly_memory ->
+        longReads = { study_accession, reads_accession, fq1, library_layout, library_strategy, assembler, long_read_assembler_config, assembly_memory ->
             return tuple(
                 [
                     "id": reads_accession,
@@ -85,7 +85,7 @@ workflow LONGREADSASSEMBLY {
                     "library_layout": library_layout,
                     "single_end": true,
                     "assembler": assembler ?: params.assembler,
-                    "assembler_config": assembler_config ?: params.assembler_config,
+                    "long_read_assembler_config": long_read_assembler_config ?: params.long_read_assembler_config,
                     "assembly_memory": assembly_memory ?: params.assembly_memory
                 ],
                 [fq1]
@@ -117,7 +117,7 @@ workflow LONGREADSASSEMBLY {
                     //  -- The metadata will be overriden by the parameters -- //
                     "assembler": params.assembler,
                     "assembly_memory": params.assembly_memory,
-                    "assembler_config": params.assembler_config,
+                    "long_read_assembler_config": params.long_read_assembler_config,
                     "library_strategy": params.library_strategy ?: library_strategy,
                     "library_layout": params.library_layout ?: library_layout,
                     "single_end": params.single_end ?: library_layout == "single",
@@ -137,7 +137,7 @@ workflow LONGREADSASSEMBLY {
 
     LONG_READS_QC (
         fetch_reads_transformed, 
-        params.reference_genome
+        params.host_reference_genome
     )
     ch_versions = ch_versions.mix(LONG_READS_QC.out.versions)
 
@@ -156,16 +156,16 @@ workflow LONGREADSASSEMBLY {
 
     reads_assembler_config = LONG_READS_QC.out.qc_reads.map { meta, reads ->
         if (meta.platform == "ont") {
-            if (params.assembler_config == "nano-raw" || meta.quality == "low") {
-                return [meta + ["assembler_config": "nano-raw"], reads]
-            } else if (params.assembler_config == "nano-hq" || meta.quality == "high") {
-                return [meta + ["assembler_config": "nano-hq"], reads]
+            if (params.long_read_assembler_config == "nano-raw" || meta.quality == "low") {
+                return [meta + ["long_read_assembler_config": "nano-raw"], reads]
+            } else if (params.long_read_assembler_config == "nano-hq" || meta.quality == "high") {
+                return [meta + ["long_read_assembler_config": "nano-hq"], reads]
             }
         } else if (meta.platform == "pacbio") {
-            if (params.assembler_config == "pacbio-raw" || meta.quality == "low") {
-                return [meta + ["assembler_config": "pacbio-raw"], reads]
-            } else if (params.assembler_config == "pacbio-hifi" || meta.quality == "high") {
-                return [meta + ["assembler_config": "pacbio-hifi"], reads]
+            if (params.long_read_assembler_config == "pacbio-raw" || meta.quality == "low") {
+                return [meta + ["long_read_assembler_config": "pacbio-raw"], reads]
+            } else if (params.long_read_assembler_config == "pacbio-hifi" || meta.quality == "high") {
+                return [meta + ["long_read_assembler_config": "pacbio-hifi"], reads]
             }
         } else {
             error "Incompatible configuration"
@@ -173,10 +173,10 @@ workflow LONGREADSASSEMBLY {
     }
     
     reads_assembler_config.branch { meta, reads ->
-        lq_ont: meta.assembler_config == "nano-raw"
-        hq_ont: meta.assembler_config == "pacbio-raw"
-        lq_pacbio: meta.assembler_config == "nano-hq"
-        hq_pacbio: meta.assembler_config == "pacbio-hifi"
+        lq_ont: meta.long_read_assembler_config == "nano-raw"
+        hq_ont: meta.long_read_assembler_config == "pacbio-raw"
+        lq_pacbio: meta.long_read_assembler_config == "nano-hq"
+        hq_pacbio: meta.long_read_assembler_config == "pacbio-hifi"
     }.set {subworkflow_platform_reads}
 
     ONT_LQ(
diff --git a/workflows/miassembler.nf b/workflows/miassembler.nf
index 69a80d6..f4e62ec 100644
--- a/workflows/miassembler.nf
+++ b/workflows/miassembler.nf
@@ -182,7 +182,7 @@ workflow MIASSEMBLER {
 
     READS_QC(
         fetch_reads_transformed,
-        params.reference_genome
+        params.host_reference_genome
     )
 
     FASTQC_AFTER (
@@ -199,8 +199,8 @@ workflow MIASSEMBLER {
             bf_total_reads = json_txt?.summary?.before_filtering?.total_reads ?: 0;
             af_total_reads = json_txt?.summary?.after_filtering?.total_reads ?: 0;
             reads_qc_meta = [
-                "low_reads_count": af_total_reads <= params.low_reads_count_threshold,
-                "filter_ratio_threshold_exceeded": af_total_reads == 0 || ((af_total_reads / bf_total_reads) <= params.filter_ratio_threshold )
+                "short_reads_low_reads_count": af_total_reads <= params.short_reads_low_reads_count_threshold,
+                "short_reads_filter_ratio_threshold_exceeded": af_total_reads == 0 || ((af_total_reads / bf_total_reads) <= params.short_reads_filter_ratio_threshold )
             ]
             return [meta, reads_qc_meta]
         }
@@ -210,7 +210,7 @@ workflow MIASSEMBLER {
 
     extended_reads_qc.branch { meta, reads, reads_qc_meta ->
         // Filter out failed reads //
-        qc_failed: reads_qc_meta.low_reads_count || reads_qc_meta.filter_ratio_threshold_exceeded
+        qc_failed: reads_qc_meta.low_reads_count || reads_qc_meta.short_reads_filter_ratio_threshold_exceeded
         megahit: meta.assembler == "megahit"
         xspades: ["metaspades", "spades"].contains(meta.assembler)
     }.set { qc_filtered_reads }
@@ -239,7 +239,7 @@ workflow MIASSEMBLER {
     // Clean the assembly contigs //
     ASSEMBLY_QC(
         assembly,
-        params.reference_genome
+        params.host_reference_genome
     )
 
     ch_versions = ch_versions.mix(ASSEMBLY_QC.out.versions)
@@ -370,8 +370,8 @@ workflow MIASSEMBLER {
             if ( extended_meta.low_reads_count ) {
                 return "${meta.id},low_reads_count"
             }
-            if ( extended_meta.filter_ratio_threshold_exceeded ) {
-                return "${meta.id},filter_ratio_threshold_exceeded"
+            if ( extended_meta.short_reads_filter_ratio_threshold_exceeded ) {
+                return "${meta.id},short_reads_filter_ratio_threshold_exceeded"
             }
             error "Unexpected. meta: ${meta}, extended_meta: ${extended_meta}"
         }

From bf547ffcc4b97e56a6fc9d34aef4f8f4121e387e Mon Sep 17 00:00:00 2001
From: Ge94 <germana.baldi@gmail.com>
Date: Tue, 3 Sep 2024 14:17:53 +0100
Subject: [PATCH 03/33] Debug missed parameters

---
 conf/modules.config            |  2 +-
 nextflow.config                | 10 +++++-----
 nextflow_schema.json           |  4 ++--
 workflows/longreadassembler.nf | 30 +++++++++++++++---------------
 4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 8cf286e..55eec1d 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -55,7 +55,7 @@ process {
             '--average_qual',
             '10',
             '--length_required',
-            "${params.min_read_length}",
+            "${params.long_reads_min_read_length}",
             '--disable_adapter_trimming'
         ].join(' ').trim()
     }
diff --git a/nextflow.config b/nextflow.config
index ad7e5d6..9940d9a 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -36,7 +36,7 @@ params {
     short_reads_low_reads_count_threshold = 1000
 
     // Long reads options
-    long_read_min_read_length        = 200
+    long_reads_min_read_length        = 200
 
     // Reference genome name (to select from list)
     bwamem2_reference_genomes_folder = ""
@@ -54,10 +54,10 @@ params {
     // nor indices, just a fasta file
     reference_genomes_folder         = null
     remove_human                     = true
-    human_fasta_prefix               = "human" 
+    human_fasta_prefix               = "human"
 
     // ASSEMBLY
-     
+
     /* By default the pipeline will pick
     * - metaspades for paired-end short reads
     * - megahit for single-end short reads
@@ -75,7 +75,7 @@ params {
     *    - Memory >1TB
     *    - Runtime >3-4 days
     *
-    * - flye: Use for any long-read assembly. assembler_config
+    * - flye: Use for any long-read assembly. long_reads_assembler_config
     *   should be selected depending on input data (if ONT or 
     *   pacbio, and if data quality is high or low)
     */
@@ -85,7 +85,7 @@ params {
     spades_only_assembler               = true
     short_reads_min_contig_length       = 500
     short_reads_min_contig_length_metat = 200
-    long_read_assembler_config          = null
+    long_reads_assembler_config         = null
     assembly_memory                     = 100
 
     // MultiQC options
diff --git a/nextflow_schema.json b/nextflow_schema.json
index d592847..2fbd699 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -36,7 +36,7 @@
                     "fa_icon": "far fa-address-card",
                     "minLength": 3
                 },
-                "min_read_length": {
+                "long_reads_min_read_length": {
                     "type": "integer",
                     "description": "Minimum read length for pre-assembly quality filtering",
                     "default": 200
@@ -50,7 +50,7 @@
                     "enum": ["spades", "metaspades", "megahit", "flye"],
                     "description": "The short or long reads assembler"
                 },
-                "long_read_assembler_config": {
+                "long_reads_assembler_config": {
                     "type": "string",
                     "description": "Configuration to use flye with. Pick from nano-raw, nano-corr, nano-hq, pacbio-raw, pacbio-corr, pacbio-hifi", 
                     "default": ""
diff --git a/workflows/longreadassembler.nf b/workflows/longreadassembler.nf
index 7b39717..dfd7bec 100644
--- a/workflows/longreadassembler.nf
+++ b/workflows/longreadassembler.nf
@@ -76,7 +76,7 @@ workflow LONGREADSASSEMBLY {
 
     if ( params.samplesheet ) {
 
-        longReads = { study_accession, reads_accession, fq1, library_layout, library_strategy, assembler, long_read_assembler_config, assembly_memory ->
+        longReads = { study_accession, reads_accession, fq1, library_layout, library_strategy, assembler, long_reads_assembler_config, assembly_memory ->
             return tuple(
                 [
                     "id": reads_accession,
@@ -85,7 +85,7 @@ workflow LONGREADSASSEMBLY {
                     "library_layout": library_layout,
                     "single_end": true,
                     "assembler": assembler ?: params.assembler,
-                    "long_read_assembler_config": long_read_assembler_config ?: params.long_read_assembler_config,
+                    "long_reads_assembler_config": long_reads_assembler_config ?: params.long_reads_assembler_config,
                     "assembly_memory": assembly_memory ?: params.assembly_memory
                 ],
                 [fq1]
@@ -117,7 +117,7 @@ workflow LONGREADSASSEMBLY {
                     //  -- The metadata will be overriden by the parameters -- //
                     "assembler": params.assembler,
                     "assembly_memory": params.assembly_memory,
-                    "long_read_assembler_config": params.long_read_assembler_config,
+                    "long_reads_assembler_config": params.long_reads_assembler_config,
                     "library_strategy": params.library_strategy ?: library_strategy,
                     "library_layout": params.library_layout ?: library_layout,
                     "single_end": params.single_end ?: library_layout == "single",
@@ -156,16 +156,16 @@ workflow LONGREADSASSEMBLY {
 
     reads_assembler_config = LONG_READS_QC.out.qc_reads.map { meta, reads ->
         if (meta.platform == "ont") {
-            if (params.long_read_assembler_config == "nano-raw" || meta.quality == "low") {
-                return [meta + ["long_read_assembler_config": "nano-raw"], reads]
-            } else if (params.long_read_assembler_config == "nano-hq" || meta.quality == "high") {
-                return [meta + ["long_read_assembler_config": "nano-hq"], reads]
+            if (params.long_reads_assembler_config == "nano-raw" || meta.quality == "low") {
+                return [meta + ["long_reads_assembler_config": "nano-raw"], reads]
+            } else if (params.long_reads_assembler_config == "nano-hq" || meta.quality == "high") {
+                return [meta + ["long_reads_assembler_config": "nano-hq"], reads]
             }
         } else if (meta.platform == "pacbio") {
-            if (params.long_read_assembler_config == "pacbio-raw" || meta.quality == "low") {
-                return [meta + ["long_read_assembler_config": "pacbio-raw"], reads]
-            } else if (params.long_read_assembler_config == "pacbio-hifi" || meta.quality == "high") {
-                return [meta + ["long_read_assembler_config": "pacbio-hifi"], reads]
+            if (params.long_reads_assembler_config == "pacbio-raw" || meta.quality == "low") {
+                return [meta + ["long_reads_assembler_config": "pacbio-raw"], reads]
+            } else if (params.long_reads_assembler_config == "pacbio-hifi" || meta.quality == "high") {
+                return [meta + ["long_reads_assembler_config": "pacbio-hifi"], reads]
             }
         } else {
             error "Incompatible configuration"
@@ -173,10 +173,10 @@ workflow LONGREADSASSEMBLY {
     }
     
     reads_assembler_config.branch { meta, reads ->
-        lq_ont: meta.long_read_assembler_config == "nano-raw"
-        hq_ont: meta.long_read_assembler_config == "pacbio-raw"
-        lq_pacbio: meta.long_read_assembler_config == "nano-hq"
-        hq_pacbio: meta.long_read_assembler_config == "pacbio-hifi"
+        lq_ont: meta.long_reads_assembler_config == "nano-raw"
+        hq_ont: meta.long_reads_assembler_config == "pacbio-raw"
+        lq_pacbio: meta.long_reads_assembler_config == "nano-hq"
+        hq_pacbio: meta.long_reads_assembler_config == "pacbio-hifi"
     }.set {subworkflow_platform_reads}
 
     ONT_LQ(

From 7982339921c872ecee30354c3a0a0503c5049408 Mon Sep 17 00:00:00 2001
From: Ge94 <germana.baldi@gmail.com>
Date: Tue, 3 Sep 2024 14:42:27 +0100
Subject: [PATCH 04/33] Remove extra parameters from tests

---
 tests/main.nf.test | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tests/main.nf.test b/tests/main.nf.test
index ed59d5b..06e3213 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -11,7 +11,6 @@ nextflow_pipeline {
             params {
                 outdir = "tests/results"
                 assembler = "spades"
-                reference_genomes_folder         = "${projectDir}/tests/human_phix/bwa2mem"
                 bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 samplesheet                      = "${projectDir}/tests/samplesheet/test.csv"
@@ -36,7 +35,6 @@ nextflow_pipeline {
 
             params {
                 outdir = "tests/results"
-                reference_genomes_folder         = "${projectDir}/tests/human_phix/bwa2mem"
                 bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 study_accession                  = "SRP115494"
@@ -65,7 +63,6 @@ nextflow_pipeline {
                 assembler                        = "megahit"
                 study_accession                  = "SRP115494"
                 reads_accession                  = "SRR6180434"
-                reference_genomes_folder         = "${projectDir}/tests/human_phix/bwa2mem"
                 bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
             }
@@ -90,7 +87,6 @@ nextflow_pipeline {
             params {
                 outdir                           = "tests/results"
                 assembler                        = "metaspades"
-                reference_genomes_folder         = "${projectDir}/tests/human_phix/bwa2mem"
                 bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 study_accession                  = "ERP012810"
@@ -115,7 +111,6 @@ nextflow_pipeline {
         when {
             params {
                 outdir = "tests/results"
-                reference_genomes_folder         = "${projectDir}/tests/human_phix/bwa2mem"
                 bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 study_accession                  = "ERP012810"
@@ -138,7 +133,6 @@ nextflow_pipeline {
         when {
             params {
                 outdir = "tests/results"
-                reference_genomes_folder         = "${projectDir}/tests/human_phix/bwa2mem"
                 bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 study_accession                  = "DRP007622"
@@ -165,7 +159,6 @@ nextflow_pipeline {
             params {
                 outdir = "tests/results"
                 assembler = "spades"
-                reference_genomes_folder         = "${projectDir}/tests/human_phix/bwa2mem"
                 bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 study_accession                  = "DRP007622"

From 70bef0228729f6533e6e0899626359930ef5e70a Mon Sep 17 00:00:00 2001
From: Ge94 <germana.baldi@gmail.com>
Date: Tue, 3 Sep 2024 14:46:40 +0100
Subject: [PATCH 05/33] Remove extra parameter from test

---
 conf/test.config | 1 -
 1 file changed, 1 deletion(-)

diff --git a/conf/test.config b/conf/test.config
index 60db88e..223443f 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -18,7 +18,6 @@ profiles {
             max_memory = '6.GB'
             max_time   = '6.h'
 
-            reference_genomes_folder         = "tests/human_phix/bwa2mem"
             bwamem2_reference_genomes_folder = "tests/human_phix/bwa2mem"
             blast_reference_genomes_folder   = "tests/human_phix/blast"
             human_phix_blast_index_name      = "human_phix"

From 120d5fe8467d724d5b8bfe4a92db9b090ffa19b3 Mon Sep 17 00:00:00 2001
From: Ge94 <germana.baldi@gmail.com>
Date: Tue, 3 Sep 2024 14:49:16 +0100
Subject: [PATCH 06/33] Add ref genome folder to schema

---
 nextflow_schema.json | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index 2fbd699..d585895 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -104,6 +104,11 @@
                         "zebrafish.fna"
                     ]
                 },
+                "reference_genomes_folder": {
+                    "type": "string",
+                    "description": "The folder with the reference genomes, defaults to the Microbiome Informatics internal directory.",
+                    "format": "directory-path"
+                },
                 "blast_reference_genomes_folder": {
                     "type": "string",
                     "description": "The folder with the reference genome blast indexes, defaults to the Microbiome Informatics internal directory.",

From 2c2faa9c14e7b949d8708217f96aaaf48c1decc9 Mon Sep 17 00:00:00 2001
From: Martin Beracochea <mbc@ebi.ac.uk>
Date: Tue, 3 Sep 2024 16:29:54 +0100
Subject: [PATCH 07/33] WIP - Restructure the pipeline to support Long Reads
 and Short Reads

---
 assets/schema_input.json                      |   6 +
 conf/modules.config                           |   4 +-
 conf/puthi.config                             |  30 +++
 nextflow.config                               |   8 +-
 subworkflows/local/long_reads_qc.nf           |   1 +
 ...ge.nf => short_reads_assembly_coverage.nf} |   2 +-
 ...embly_qc.nf => short_reads_assembly_qc.nf} |   2 +-
 .../local/{reads_qc.nf => short_reads_qc.nf}  |   2 +-
 tests/samplesheet/test.csv                    |   8 +-
 ...adassembler.nf => long_reads_assembler.nf} | 142 ++++--------
 workflows/miassembler.nf                      | 212 ++++++------------
 workflows/short_reads_assembler.nf            | 180 +++++++++++++++
 12 files changed, 347 insertions(+), 250 deletions(-)
 create mode 100644 conf/puthi.config
 rename subworkflows/local/{assembly_coverage.nf => short_reads_assembly_coverage.nf} (97%)
 rename subworkflows/local/{assembly_qc.nf => short_reads_assembly_qc.nf} (98%)
 rename subworkflows/local/{reads_qc.nf => short_reads_qc.nf} (98%)
 rename workflows/{longreadassembler.nf => long_reads_assembler.nf} (57%)
 create mode 100644 workflows/short_reads_assembler.nf

diff --git a/assets/schema_input.json b/assets/schema_input.json
index 84444d1..5da904b 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -47,6 +47,9 @@
                 "enum": ["metagenomic", "metatranscriptomic", "genomic", "transcriptomic", "other"],
                 "errorMessage": "library strategy should be only value from list: 'metagenomic', 'metatranscriptomic', 'genomic', 'transcriptomic', 'other'"
             },
+            "platform": {
+                "type": "string"
+            },
             "assembler": {
                 "type": "string",
                 "enum": ["spades", "metaspades", "megahit"],
@@ -57,6 +60,9 @@
                 "type": "integer",
                 "default": null,
                 "description": "Default memory (in GB) allocated for the assembly process for the run."
+            },
+            "assembler_config": {
+                "type": "string"
             }
         },
         "required": ["study_accession", "reads_accession", "fastq_1", "library_layout", "library_strategy"]
diff --git a/conf/modules.config b/conf/modules.config
index 8cf286e..2668475 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -146,7 +146,7 @@ process {
         memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
         time   = { check_max( 4.h  * task.attempt, 'time'    ) }
     }
-    
+
     /* --------- */
 
     /* Assembly */
@@ -383,6 +383,8 @@ process {
                             return null;
                         }
                         def output_file = new File(filename);
+                        println ("COSO")
+                        println (meta)
                         return "${study_reads_folder( meta )}/assembly/${meta.assembler}/${meta.assembler_version}/qc/multiqc/${output_file.name}";
                     }
                 }
diff --git a/conf/puthi.config b/conf/puthi.config
new file mode 100644
index 0000000..a5c1e69
--- /dev/null
+++ b/conf/puthi.config
@@ -0,0 +1,30 @@
+params {
+    bwamem2_reference_genomes_folder = "/projappl/project_2010686/ebi/reference_dbs/bwamem2"
+    blast_reference_genomes_folder   = "/projappl/project_2010686/ebi/reference_dbs/blast"
+    human_phix_blast_index_name      = "human_phix"
+    human_phix_bwamem2_index_name    = "human_phix"
+}
+
+executor {
+    name = "slurm"
+    queueSize = 200
+    queueGlobalStatus = true
+    submitRateLimit = "10 sec"
+    pollInterval = "10 sec"
+}
+
+conda.enabled = false
+
+// If true, on a successful completion of a run all files in work directory are automatically deleted.
+cleanup = true
+
+singularity {
+    enabled = true
+    autoMounts = true
+    cacheDir = "/projappl/project_2010686/ebi/singularity_cache"
+}
+
+conda.enabled = false
+
+// If true, on a successful completion of a run all files in work directory are automatically deleted.
+cleanup = true
diff --git a/nextflow.config b/nextflow.config
index 8791b13..41c103f 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -39,7 +39,7 @@ params {
     *    - Runtime >3-4 days
     *
     * - flye: Use for any long-read assembly. assembler_config
-    *   should be selected depending on input data (if ONT or 
+    *   should be selected depending on input data (if ONT or
     *   pacbio, and if data quality is high or low)
     */
     assembler                        = null
@@ -61,7 +61,7 @@ params {
     reference_genome                 = null
 
     /*
-    * Long-read assemblies won't require phiX, 
+    * Long-read assemblies won't require phiX,
     * parameters should be defined as follows:
     * remove_human                = true
     * human_blast_index_name      = "human"
@@ -216,8 +216,8 @@ profiles {
         executor.cpus          = 4
         executor.memory        = 8.GB
     }
-    test { 
-        includeConfig 'conf/test.config'      
+    test {
+        includeConfig 'conf/test.config'
     }
     codon_slurm { includeConfig 'conf/codon_slurm.config' }
 }
diff --git a/subworkflows/local/long_reads_qc.nf b/subworkflows/local/long_reads_qc.nf
index 13635ba..9d2f491 100644
--- a/subworkflows/local/long_reads_qc.nf
+++ b/subworkflows/local/long_reads_qc.nf
@@ -4,6 +4,7 @@ include { MINIMAP2_ALIGN as HUMAN_DECONTAMINATION } from '../../modules/nf-core/
 include { MINIMAP2_ALIGN as HOST_DECONTAMINATION  } from '../../modules/nf-core/minimap2/align/main'
 
 workflow LONG_READS_QC {
+
     take:
     reads                   // [ val(meta), path(reads) ]
     host_reference_genome   // [ val(meta2), path(reference_genome) ]
diff --git a/subworkflows/local/assembly_coverage.nf b/subworkflows/local/short_reads_assembly_coverage.nf
similarity index 97%
rename from subworkflows/local/assembly_coverage.nf
rename to subworkflows/local/short_reads_assembly_coverage.nf
index cfd6698..194c8fc 100644
--- a/subworkflows/local/assembly_coverage.nf
+++ b/subworkflows/local/short_reads_assembly_coverage.nf
@@ -3,7 +3,7 @@ include { BWAMEM2_MEM as BWAMEM2_MEM_COVERAGE  } from '../../modules/ebi-metagen
 include { SAMTOOLS_IDXSTATS                    } from '../../modules/nf-core/samtools/idxstats/main'
 include { METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS } from '../../modules/nf-core/metabat2/jgisummarizebamcontigdepths/main'
 
-workflow ASSEMBLY_COVERAGE {
+workflow SHORT_READS_ASSEMBLY_COVERAGE {
 
     take:
     assembly_reads   // [ val(meta), path(assembly_fasta), path(reads) ]
diff --git a/subworkflows/local/assembly_qc.nf b/subworkflows/local/short_reads_assembly_qc.nf
similarity index 98%
rename from subworkflows/local/assembly_qc.nf
rename to subworkflows/local/short_reads_assembly_qc.nf
index f5bfa7d..fc54f89 100644
--- a/subworkflows/local/assembly_qc.nf
+++ b/subworkflows/local/short_reads_assembly_qc.nf
@@ -18,7 +18,7 @@ process PUBLISH_CLEANED_CONTIGS {
     """
 }
 
-workflow ASSEMBLY_QC {
+workflow SHORT_READS_ASSEMBLY_QC {
 
     take:
     assembly                    // [ val(meta), path(assembly_fasta) ]
diff --git a/subworkflows/local/reads_qc.nf b/subworkflows/local/short_reads_qc.nf
similarity index 98%
rename from subworkflows/local/reads_qc.nf
rename to subworkflows/local/short_reads_qc.nf
index 4cbbbe6..7dbd198 100644
--- a/subworkflows/local/reads_qc.nf
+++ b/subworkflows/local/short_reads_qc.nf
@@ -2,7 +2,7 @@ include { FASTP                                             } from '../../module
 include { BWAMEM2DECONTNOBAMS as HUMAN_PHIX_DECONTAMINATION } from '../../modules/ebi-metagenomics/bwamem2decontnobams/main'
 include { BWAMEM2DECONTNOBAMS as HOST_DECONTAMINATION       } from '../../modules/ebi-metagenomics/bwamem2decontnobams/main'
 
-workflow READS_QC {
+workflow SHORT_READS_QC {
 
     take:
     reads                 // [ val(meta), path(reads) ]
diff --git a/tests/samplesheet/test.csv b/tests/samplesheet/test.csv
index b2c4b99..fab7d69 100644
--- a/tests/samplesheet/test.csv
+++ b/tests/samplesheet/test.csv
@@ -1,4 +1,4 @@
-study_accession,reads_accession,fastq_1,fastq_2,library_layout,library_strategy,assembler,assembly_memory
-SRP115494,SRR6180434,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR6180434_1.fastq.gz,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR6180434_2.fastq.gz,paired,metagenomic,,
-SRP115494,SRR5949318,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR5949318_1.fastq.gz,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR5949318_2.fastq.gz,paired,metagenomic,,
-DRP007622,DRR280712,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/DRR280712.fastq.gz,,single,metatranscriptomic,megahit,
+study_accession,reads_accession,fastq_1,fastq_2,library_layout,library_strategy,platform,assembler,assembly_memory,assembler_config
+SRP115494,SRR6180434,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR6180434_1.fastq.gz,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR6180434_2.fastq.gz,paired,metagenomic,,,,,
+SRP115494,SRR5949318,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR5949318_1.fastq.gz,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR5949318_2.fastq.gz,paired,metagenomic,,,,,
+DRP007622,DRR280712,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/DRR280712.fastq.gz,,single,metatranscriptomic,megahit,,,,
diff --git a/workflows/longreadassembler.nf b/workflows/long_reads_assembler.nf
similarity index 57%
rename from workflows/longreadassembler.nf
rename to workflows/long_reads_assembler.nf
index 227e62d..0c269ab 100644
--- a/workflows/longreadassembler.nf
+++ b/workflows/long_reads_assembler.nf
@@ -1,37 +1,3 @@
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    PRINT PARAMS SUMMARY
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-include { validateParameters; paramsSummaryLog; paramsSummaryMap; samplesheetToList } from 'plugin/nf-schema'
-
-def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
-def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
-def summary_params = paramsSummaryMap(workflow)
-
-// Print parameter summary log to screen
-log.info logo + paramsSummaryLog(workflow) + citation
-
-validateParameters()
-
-if (params.help) {
-    log.info paramsHelp("nextflow run ebi-metagenomics/longreadsassembly --help")
-    exit 0
-}
-
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    CONFIG FILES
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-ch_multiqc_config          = Channel.fromPath("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
-ch_multiqc_custom_config   = params.multiqc_config ? Channel.fromPath( params.multiqc_config, checkIfExists: true ) : Channel.empty()
-ch_multiqc_logo            = params.multiqc_logo   ? Channel.fromPath( params.multiqc_logo, checkIfExists: true ) : Channel.empty()
-ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
-
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     IMPORT LOCAL MODULES/SUBWORKFLOWS
@@ -44,6 +10,7 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
 
 include { FETCHTOOL_READS } from '../modules/local/fetchtool_reads'
 include { LONG_READS_QC   } from '../subworkflows/local/long_reads_qc'
+
 include { ONT_LQ          } from '../subworkflows/local/ont_lq'
 include { ONT_HQ          } from '../subworkflows/local/ont_hq'
 // include { PACBIO_LQ       } from '../subworkflows/local/pacbio_lq'
@@ -66,79 +33,54 @@ include { ONT_HQ          } from '../subworkflows/local/ont_hq'
 */
 
 // Info required for completion email and summary
-def multiqc_report = []
-
-workflow LONGREADSASSEMBLY {
 
-    ch_versions = Channel.empty()
-    longReads = Channel.empty()
-    fetch_tool_metadata = Channel.empty()
-
-    if ( params.samplesheet ) {
-
-        longReads = { study_accession, reads_accession, fq1, library_layout, library_strategy, assembler, assembler_config, assembly_memory ->
-            return tuple(
-                [
-                    "id": reads_accession,
-                    "study_accession": study_accession,
-                    "library_strategy": library_strategy,
-                    "library_layout": library_layout,
-                    "single_end": true,
-                    "assembler": assembler ?: params.assembler,
-                    "assembler_config": assembler_config ?: params.assembler_config,
-                    "assembly_memory": assembly_memory ?: params.assembly_memory
-                ],
-                [fq1]
-            )
-        }
+workflow LONGREADSASSEMBLER {
 
-        samplesheet = Channel.fromList(samplesheetToList(params.samplesheet, "./assets/schema_input.json"))
+    take:
+    reads // TODO
 
-        fetch_reads_transformed = samplesheet.map(longReads)
+    main:
 
-    } else {
-        // TODO: remove when the fetch tools gets published on bioconda
-        fetch_tool_config = file("${projectDir}/assets/fetch_tool_anonymous.json", checkIfExists: true)
+    LONG_READS_QC (
+        reads,
+        params.reference_genome
+    )
 
-        if ( params.private_study ) {
-            fetch_tool_config = file("${projectDir}/assets/fetch_tool_credentials.json", checkIfExists: true)
-        }
+    /*********************************************************************************/
+    /* Selecting the combination of adapter trimming, assembler, and post-processing */
+    /*********************************************************************************/
+    /*
+        The selection process ensures that:
+        - The user selected assembler configuration is always used (either from the samplesheet assembler column (with precedence) or the params.assembler)
+        - Low-quality ONT reads are trimmed with canu and assembled with flye --nano-corr/raw), unless specified otherwise.
+        - High-quality ONT reads are trimmed with porechob_abi and assembled with flye --nano-hq), unless specified otherwise.
+        - Low-quality pacbio reads are trimmed with canu and assembled with flye --pacbio-corr/raw), unless specified otherwise.
+        - High-quality pacbio reads are trimmed with HiFiAdapterFilt and assembled with flye --pacbio-hifi), unless specified otherwise.
+        Extra polishing steps are applied to low-quality reads. All subworkflows also apply post-assembly host decontamination.
+    */
 
-        FETCHTOOL_READS(
-            [ [id: params.reads_accession], params.study_accession, params.reads_accession ],
-            fetch_tool_config
-        )
-
-        ch_versions = ch_versions.mix(FETCHTOOL_READS.out.versions)
-
-        // Push the library strategy into the meta of the reads, this is to make it easier to handle downstream
-        fetch_reads_transformed = FETCHTOOL_READS.out.reads.map { meta, reads, library_strategy, library_layout, platform -> {
-                [ meta + [
-                    //  -- The metadata will be overriden by the parameters -- //
-                    "assembler": params.assembler,
-                    "assembly_memory": params.assembly_memory,
-                    "assembler_config": params.assembler_config,
-                    "library_strategy": params.library_strategy ?: library_strategy,
-                    "library_layout": params.library_layout ?: library_layout,
-                    "single_end": params.single_end ?: library_layout == "single",
-                    "platform": params.platform ?: platform
-                ], reads ]
+    reads_assembler_config = LONG_READS_QC.out.qc_reads.map { meta, reads ->
+        if (meta.platform == "ont") {
+            if (params.assembler_config == "nano-raw" || meta.quality == "low") {
+                return [meta + ["assembler_config": "nano-raw"], reads]
+            } else if (params.assembler_config == "nano-hq" || meta.quality == "high") {
+                return [meta + ["assembler_config": "nano-hq"], reads]
+            }
+        } else if (meta.platform == "pacbio") {
+            if (params.assembler_config == "pacbio-raw" || meta.quality == "low") {
+                return [meta + ["assembler_config": "pacbio-raw"], reads]
+            } else if (params.assembler_config == "pacbio-hifi" || meta.quality == "high") {
+                return [meta + ["assembler_config": "pacbio-hifi"], reads]
             }
+        } else {
+            error "Incompatible configuration"
         }
-
-        // Metadata for MultiQC
-        fetch_tool_metadata = FETCHTOOL_READS.out.metadata_tsv.map { it[1] }.collectFile(
-            name: 'fetch_tool_mqc.tsv',
-            newLine: true,
-            keepHeader: true,
-            skip: 1
-        )
     }
 
-    LONG_READS_QC (
-        fetch_reads_transformed, 
-        params.reference_genome
-    )
+
+    ch_versions = Channel.empty()
+
+
     ch_versions = ch_versions.mix(LONG_READS_QC.out.versions)
 
     /*********************************************************************************/
@@ -151,7 +93,7 @@ workflow LONGREADSASSEMBLY {
         - High-quality ONT reads are trimmed with porechob_abi and assembled with flye --nano-hq), unless specified otherwise.
         - Low-quality pacbio reads are trimmed with canu and assembled with flye --pacbio-corr/raw), unless specified otherwise.
         - High-quality pacbio reads are trimmed with HiFiAdapterFilt and assembled with flye --pacbio-hifi), unless specified otherwise.
-        Extra polishing steps are applied to low-quality reads. All subworkflows also apply post-assembly host decontamination. 
+        Extra polishing steps are applied to low-quality reads. All subworkflows also apply post-assembly host decontamination.
     */
 
     reads_assembler_config = LONG_READS_QC.out.qc_reads.map { meta, reads ->
@@ -171,7 +113,7 @@ workflow LONGREADSASSEMBLY {
             error "Incompatible configuration"
         }
     }
-    
+
     reads_assembler_config.branch { meta, reads ->
         lq_ont: meta.assembler_config == "nano-raw"
         hq_ont: meta.assembler_config == "pacbio-raw"
@@ -200,7 +142,7 @@ workflow LONGREADSASSEMBLY {
     /*************************************/
     /* Post-assembly: coverage and stats */
     /*************************************/
-    
+
     //
     // MODULE: Run FastQC
     //
diff --git a/workflows/miassembler.nf b/workflows/miassembler.nf
index 69a80d6..5ef05de 100644
--- a/workflows/miassembler.nf
+++ b/workflows/miassembler.nf
@@ -23,7 +23,6 @@ if (params.help) {
    exit 0
 }
 
-
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     CONFIG FILES
@@ -35,37 +34,40 @@ ch_multiqc_custom_config   = params.multiqc_config ? file( params.multiqc_config
 ch_multiqc_logo            = params.multiqc_logo   ? file( params.multiqc_logo, checkIfExists: true ) : file("$projectDir/assets/mgnify_logo.png", checkIfExists: true)
 ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
 
+
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    IMPORT LOCAL MODULES/SUBWORKFLOWS
+    IMPORT NF-CORE MODULES/SUBWORKFLOWS
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
 //
-// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
+// MODULE: Installed directly from nf-core/modules
 //
-include { FETCHTOOL_READS    } from '../modules/local/fetchtool_reads'
-include { READS_QC           } from '../subworkflows/local/reads_qc'
-include { ASSEMBLY_QC        } from '../subworkflows/local/assembly_qc'
-include { ASSEMBLY_COVERAGE  } from '../subworkflows/local/assembly_coverage'
+
+include { MULTIQC as MULTIQC_STUDY     } from '../modules/nf-core/multiqc/main'
+include { MULTIQC as MULTIQC_RUN       } from '../modules/nf-core/multiqc/main'
+include { CUSTOM_DUMPSOFTWAREVERSIONS  } from '../modules/nf-core/custom/dumpsoftwareversions/main'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    IMPORT NF-CORE MODULES/SUBWORKFLOWS
+    IMPORT THE MAIN ENTRY POINT WORKFLOWS
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
 //
-// MODULE: Installed directly from nf-core/modules
+// WORKFLOWS
 //
-include { FASTQC as FASTQC_BEFORE      } from '../modules/nf-core/fastqc/main'
-include { FASTQC as FASTQC_AFTER       } from '../modules/nf-core/fastqc/main'
-include { MULTIQC as MULTIQC_STUDY     } from '../modules/nf-core/multiqc/main'
-include { MULTIQC as MULTIQC_RUN       } from '../modules/nf-core/multiqc/main'
-include { CUSTOM_DUMPSOFTWAREVERSIONS  } from '../modules/nf-core/custom/dumpsoftwareversions/main'
-include { SPADES                       } from '../modules/nf-core/spades/main'
-include { MEGAHIT                      } from '../modules/nf-core/megahit/main'
-include { QUAST                        } from '../modules/nf-core/quast/main'
+include { SHORT_READS_ASSEMBLER   } from '../workflows/short_reads_assembler'
+// include { LONG_READS_ASSEMBLER    } from '../workflows/long_reads_assembler'
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    IMPORT NF-CORE MODULES/SUBWORKFLOWS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+include { FETCHTOOL_READS       } from '../modules/local/fetchtool_reads'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -80,20 +82,21 @@ def multiqc_report = []
 workflow MIASSEMBLER {
 
     ch_versions = Channel.empty()
-
     fetch_tool_metadata = Channel.empty()
 
     if ( params.samplesheet ) {
 
-        groupReads = { study_accession, reads_accession, fq1, fq2, library_layout, library_strategy, assembler, assembly_memory ->
+        groupReads = { study_accession, reads_accession, fq1, fq2, library_layout, library_strategy, platform, assembler, assembly_memory, assembler_config ->
             if (fq2 == []) {
                 return tuple(["id": reads_accession,
                               "study_accession": study_accession,
-                              "library_strategy": library_strategy,
                               "library_layout": library_layout,
+                              "library_strategy": library_strategy,
+                              "platform": params.platform ?: platform,
                               "single_end": true,
                               "assembler": assembler ?: params.assembler,
-                              "assembly_memory": assembly_memory ?: params.assembly_memory
+                              "assembly_memory": assembly_memory ?: params.assembly_memory,
+                              "assembler_config": params.assembler_config
                             ],
                             [fq1]
                         )
@@ -104,7 +107,9 @@ workflow MIASSEMBLER {
                               "library_layout": library_layout,
                               "single_end": false,
                               "assembler": assembler ?: params.assembler,
-                              "assembly_memory": assembly_memory ?: params.assembly_memory
+                              "assembly_memory": assembly_memory ?: params.assembly_memory,
+                              "assembler_config": params.assembler_config,
+                              "platform": params.platform ?: platform
                             ],
                             [fq1, fq2])
             }
@@ -112,7 +117,7 @@ workflow MIASSEMBLER {
 
         samplesheet = Channel.fromList(samplesheetToList(params.samplesheet, "./assets/schema_input.json"))
 
-        // [ study, sample, read1, [read2], library_layout, library_strategy, assembly_memory ]
+        // [ study, sample, read1, [read2], library_layout, library_strategy, platform, assembly_memory]
         fetch_reads_transformed = samplesheet.map(groupReads)
 
     } else {
@@ -131,14 +136,16 @@ workflow MIASSEMBLER {
         ch_versions = ch_versions.mix(FETCHTOOL_READS.out.versions)
 
         // Push the library strategy into the meta of the reads, this is to make it easier to handle downstream
-        fetch_reads_transformed = FETCHTOOL_READS.out.reads.map { meta, reads, library_strategy, library_layout -> {
+        fetch_reads_transformed = FETCHTOOL_READS.out.reads.map { meta, reads, library_strategy, library_layout, platform -> {
                 [ meta + [
                     //  -- The metadata will be overriden by the parameters -- //
                     "assembler": params.assembler,
+                    "assembler_config": params.assembler_config,
                     "assembly_memory": params.assembler_memory,
                     "library_strategy": params.library_strategy ?: library_strategy,
                     "library_layout": params.library_layout ?: library_layout,
-                    "single_end": params.single_end ?: library_layout == "single"
+                    "single_end": params.single_end ?: library_layout == "single",
+                    "platform": params.platform ?: platform
                 ], reads ]
             }
         }
@@ -152,112 +159,41 @@ workflow MIASSEMBLER {
         )
     }
 
-    /***************************/
-    /* Selecting the assembler */
-    /***************************/
-    /*
-        The selection process ensures that:
-        - The user selected assembler is always used (either from the samplesheet assembler column (with precedesnse) or the params.assembler)
-        - Single-end reads are assembled with MEGAHIT, unless specified otherwise.
-        - Paired-end reads are assembled with MetaSPAdes, unless specified otherwise
-        - An error is raised if the assembler and read layout are incompatible (shouldn't happen...)
-    */
-    fetch_reads_transformed = fetch_reads_transformed.map { meta, reads ->
-        def selected_assembler = meta.assembler;
-        if ( selected_assembler == "megahit" || ( meta.single_end && selected_assembler == null ) ) {
-            return [ meta + [assembler: "megahit", assembler_version: params.megahit_version], reads]
-        } else if ( ["metaspades", "spades"].contains(selected_assembler) || ( !meta.single_end && selected_assembler == null ) ) {
-            def xspades_assembler = selected_assembler ?: "metaspades" // Default to "metaspades" if the user didn't select one
-            return [ meta + [assembler: xspades_assembler, assembler_version: params.spades_version], reads]
-        } else {
-            error "Incompatible assembler and/or reads layout. We can't assembly data that is. Reads - single end value: ${meta.single_end}."
-        }
-    }
-
-    FASTQC_BEFORE (
-        fetch_reads_transformed
-    )
-
-    ch_versions = ch_versions.mix(FASTQC_BEFORE.out.versions)
-
-    READS_QC(
-        fetch_reads_transformed,
-        params.reference_genome
-    )
-
-    FASTQC_AFTER (
-        READS_QC.out.qc_reads
-    )
+    /********************************************/
+    /* Selecting the assembly pipeline flavour */
+    /*******************************************/
 
-    /******************************************/
-    /*  Reads that fail the following rules:  */
-    /*  - Reads discarded by fastp > 90% (default value) */
-    /*  - Less than 1k reads                  */
-    /******************************************/
-    extended_qc = READS_QC.out.fastp_json.map { meta, json -> {
-            json_txt = new JsonSlurper().parseText(json.text)
-            bf_total_reads = json_txt?.summary?.before_filtering?.total_reads ?: 0;
-            af_total_reads = json_txt?.summary?.after_filtering?.total_reads ?: 0;
-            reads_qc_meta = [
-                "low_reads_count": af_total_reads <= params.low_reads_count_threshold,
-                "filter_ratio_threshold_exceeded": af_total_reads == 0 || ((af_total_reads / bf_total_reads) <= params.filter_ratio_threshold )
-            ]
-            return [meta, reads_qc_meta]
+    classified_reads = fetch_reads_transformed.map { meta, reads ->
+        // Long reads //
+        if ( ["ont", "pacbio"].contains( meta.platform ) ) {
+            return [ meta + [long_reads: true], reads]
+        // Short reads //
+        } else {
+            return [ meta + [short_reads: true], reads]
         }
     }
 
-    extended_reads_qc = READS_QC.out.qc_reads.join( extended_qc )
-
-    extended_reads_qc.branch { meta, reads, reads_qc_meta ->
-        // Filter out failed reads //
-        qc_failed: reads_qc_meta.low_reads_count || reads_qc_meta.filter_ratio_threshold_exceeded
-        megahit: meta.assembler == "megahit"
-        xspades: ["metaspades", "spades"].contains(meta.assembler)
-    }.set { qc_filtered_reads }
+    classified_reads.branch { meta, reads ->
+        short_reads: meta.short_reads
+        long_reads: meta.long_reads
+    }.set { reads_to_assemble }
 
-    ch_versions = ch_versions.mix(READS_QC.out.versions)
+    /***************************************/
+    /* Assemble short reads and long reads */
+    /***************************************/
 
-    /*********************/
-    /*     Assembly     */
-    /********************/
-    SPADES(
-        qc_filtered_reads.xspades.map { meta, reads, _ -> [meta, reads, [], []] },
-        [], // yml input parameters, which we don't use
-        []  // hmm, not used
+    SHORT_READS_ASSEMBLER(
+        reads_to_assemble.short_reads
     )
 
-    ch_versions = ch_versions.mix(SPADES.out.versions)
+    ch_versions.mix( SHORT_READS_ASSEMBLER.out.versions )
 
-    MEGAHIT(
-        qc_filtered_reads.megahit.map { meta, reads, _ -> [meta, reads] }
-    )
+    // TODO: enable once this is ready
+    // LONG_READS_ASSEMBLER(
+    //     reads_to_assemble.out.long_reads
+    // )
 
-    assembly = SPADES.out.contigs.mix( MEGAHIT.out.contigs )
-
-    ch_versions = ch_versions.mix(MEGAHIT.out.versions)
-
-    // Clean the assembly contigs //
-    ASSEMBLY_QC(
-        assembly,
-        params.reference_genome
-    )
-
-    ch_versions = ch_versions.mix(ASSEMBLY_QC.out.versions)
-
-    // Coverage //
-    ASSEMBLY_COVERAGE(
-        ASSEMBLY_QC.out.filtered_contigs.join( READS_QC.out.qc_reads, remainder: false )
-    )
-
-    ch_versions = ch_versions.mix(ASSEMBLY_COVERAGE.out.versions)
-
-    // Stats //
-    /* The QUAST module was modified to run metaQUAST instead */
-    QUAST(
-        ASSEMBLY_QC.out.filtered_contigs,
-        [ [], [] ], // reference
-        [ [], [] ]  // gff
-    )
+    // ch_versions.mix( LONG_READS_ASSEMBLER.out.versions )
 
     CUSTOM_DUMPSOFTWAREVERSIONS (
         ch_versions.unique().collectFile(name: 'collated_versions.yml')
@@ -307,12 +243,12 @@ workflow MIASSEMBLER {
 
     ch_multiqc_study_tools_files = Channel.empty()
 
-    ch_multiqc_study_tools_files = FASTQC_BEFORE.out.zip.map(meta_by_study)
-        .join( FASTQC_AFTER.out.zip.map(meta_by_study) )
-        .join( ASSEMBLY_COVERAGE.out.samtools_idxstats.map(meta_by_study), remainder: true ) // the assembly step could fail
-        .join( QUAST.out.results.map(meta_by_study), remainder: true )                       // the assembly step could fail
+    study_multiqc_files = SHORT_READS_ASSEMBLER.out.fastqc_before_zip.map(meta_by_study)
+        .join( SHORT_READS_ASSEMBLER.out.fastqc_after_zip.map(meta_by_study) )
+        .join( SHORT_READS_ASSEMBLER.out.assembly_coverage_samtools_idxstats.map(meta_by_study), remainder: true ) // the assembly step could fail
+        .join( SHORT_READS_ASSEMBLER.out.quast_results.map(meta_by_study), remainder: true )                          // the assembly step could fail
 
-    ch_multiqc_study_tools_files = ch_multiqc_study_tools_files.flatMap( combineFiles ).groupTuple()
+    ch_multiqc_study_tools_files = study_multiqc_files.flatMap( combineFiles ).groupTuple()
 
     // TODO: add the fetch tool log file
     MULTIQC_STUDY (
@@ -331,15 +267,13 @@ workflow MIASSEMBLER {
         [ meta.subMap("study_accession", "id", "assembler", "assembler_version"), result_artifact ]
     }
 
-    ch_multiqc_run_tools_files = Channel.empty()
-
-    ch_multiqc_run_tools_files = FASTQC_BEFORE.out.zip.map(meta_by_run)
-        .join( FASTQC_AFTER.out.zip.map(meta_by_run) )
-        .join( ASSEMBLY_COVERAGE.out.samtools_idxstats.map(meta_by_run), remainder: true ) // the assembly step could fail
-        .join( QUAST.out.results.map(meta_by_run), remainder: true )                       // the assembly step could fail
+    run_multiqc_files = SHORT_READS_ASSEMBLER.out.fastqc_before_zip.map(meta_by_run)
+        .join( SHORT_READS_ASSEMBLER.out.fastqc_after_zip.map(meta_by_run) )
+        .join( SHORT_READS_ASSEMBLER.out.assembly_coverage_samtools_idxstats.map(meta_by_run), remainder: true ) // the assembly step could fail
+        .join( SHORT_READS_ASSEMBLER.out.quast_results.map(meta_by_run), remainder: true )                          // the assembly step could fail
 
     // Filter out the non-assembled runs //
-    ch_multiqc_run_tools_files = ch_multiqc_run_tools_files.filter { meta, fastqc_before, fastqc_after, assembly_coverage, quast -> {
+    ch_multiqc_run_tools_files = run_multiqc_files.filter { meta, fastqc_before, fastqc_after, assembly_coverage, quast -> {
             return assembly_coverage != null && quast != null
         }
     } .flatMap( combineFiles ).groupTuple()
@@ -357,15 +291,17 @@ workflow MIASSEMBLER {
     /* End of execution reports */
     /****************************/
 
-    // Asssembled runs //
-    ASSEMBLY_COVERAGE.out.samtools_idxstats.map {
+    // TODO: we need to add LR end-of-run reports
+
+    // Short reads asssembled runs //
+    SHORT_READS_ASSEMBLER.out.assembly_coverage_samtools_idxstats.map {
         meta, _ -> {
             return "${meta.id},${meta.assembler},${meta.assembler_version}"
         }
      }.collectFile(name: "assembled_runs.csv", storeDir: "${params.outdir}", newLine: true, cache: false)
 
-    // Reads QC failed //
-    qc_failed_entries = qc_filtered_reads.qc_failed.map {
+    // Short reads QC failed //
+    short_reads_qc_failed_entries = SHORT_READS_ASSEMBLER.out.qc_failed.map {
         meta, _, extended_meta -> {
             if ( extended_meta.low_reads_count ) {
                 return "${meta.id},low_reads_count"
@@ -377,7 +313,7 @@ workflow MIASSEMBLER {
         }
     }
 
-    qc_failed_entries.collectFile(name: "qc_failed_runs.csv", storeDir: "${params.outdir}", newLine: true, cache: false)
+    short_reads_qc_failed_entries.collectFile(name: "qc_failed_runs.csv", storeDir: "${params.outdir}", newLine: true, cache: false)
 }
 
 /*
diff --git a/workflows/short_reads_assembler.nf b/workflows/short_reads_assembler.nf
new file mode 100644
index 0000000..862652a
--- /dev/null
+++ b/workflows/short_reads_assembler.nf
@@ -0,0 +1,180 @@
+import groovy.json.JsonSlurper
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    CONFIG FILES
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+ch_multiqc_config          = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
+ch_multiqc_custom_config   = params.multiqc_config ? file( params.multiqc_config, checkIfExists: true ) : []
+ch_multiqc_logo            = params.multiqc_logo   ? file( params.multiqc_logo, checkIfExists: true ) : file("$projectDir/assets/mgnify_logo.png", checkIfExists: true)
+ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    IMPORT LOCAL MODULES/SUBWORKFLOWS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+//
+// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
+//
+
+include { SHORT_READS_QC                 } from '../subworkflows/local/short_reads_qc'
+include { SHORT_READS_ASSEMBLY_QC        } from '../subworkflows/local/short_reads_assembly_qc'
+include { SHORT_READS_ASSEMBLY_COVERAGE  } from '../subworkflows/local/short_reads_assembly_coverage'
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    IMPORT NF-CORE MODULES/SUBWORKFLOWS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+//
+// MODULE: Installed directly from nf-core/modules
+//
+include { FASTQC as FASTQC_BEFORE      } from '../modules/nf-core/fastqc/main'
+include { FASTQC as FASTQC_AFTER       } from '../modules/nf-core/fastqc/main'
+include { SPADES                       } from '../modules/nf-core/spades/main'
+include { MEGAHIT                      } from '../modules/nf-core/megahit/main'
+include { QUAST                        } from '../modules/nf-core/quast/main'
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    RUN MAIN WORKFLOW
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+workflow SHORT_READS_ASSEMBLER {
+
+    take:
+    reads // tuple(meta), path(reads)
+
+    main:
+
+    ch_versions = Channel.empty()
+
+    /***************************/
+    /* Selecting the assembler */
+    /***************************/
+    /*
+        The selection process ensures that:
+        - The user selected assembler is always used (either from the samplesheet assembler column (with precedesnse) or the params.assembler)
+        - Single-end reads are assembled with MEGAHIT, unless specified otherwise.
+        - Paired-end reads are assembled with MetaSPAdes, unless specified otherwise
+        - An error is raised if the assembler and read layout are incompatible (shouldn't happen...)
+    */
+    reads_by_assembler = reads.map { meta, reads ->
+        def selected_assembler = meta.assembler;
+        if ( selected_assembler == "megahit" || ( meta.single_end && selected_assembler == null ) ) {
+            return [ meta + [assembler: "megahit", assembler_version: params.megahit_version], reads]
+        } else if ( ["metaspades", "spades"].contains(selected_assembler) || ( !meta.single_end && selected_assembler == null ) ) {
+            def xspades_assembler = selected_assembler ?: "metaspades" // Default to "metaspades" if the user didn't select one
+            return [ meta + [assembler: xspades_assembler, assembler_version: params.spades_version], reads]
+        } else {
+            error "Incompatible assembler and/or reads layout. We can't assembly data that is. Reads - single end value: ${meta.single_end}."
+        }
+    }
+
+    FASTQC_BEFORE (
+        reads_by_assembler
+    )
+
+    ch_versions = ch_versions.mix(FASTQC_BEFORE.out.versions)
+
+    SHORT_READS_QC(
+        reads_by_assembler,
+        params.reference_genome
+    )
+
+    FASTQC_AFTER (
+        SHORT_READS_QC.out.qc_reads
+    )
+
+    /******************************************/
+    /*  Reads that fail the following rules:  */
+    /*  - Reads discarded by fastp > 90% (default value) */
+    /*  - Less than 1k reads                  */
+    /******************************************/
+    extended_qc = SHORT_READS_QC.out.fastp_json.map { meta, json -> {
+            json_txt = new JsonSlurper().parseText(json.text)
+            bf_total_reads = json_txt?.summary?.before_filtering?.total_reads ?: 0;
+            af_total_reads = json_txt?.summary?.after_filtering?.total_reads ?: 0;
+            reads_qc_meta = [
+                "low_reads_count": af_total_reads <= params.low_reads_count_threshold,
+                "filter_ratio_threshold_exceeded": af_total_reads == 0 || ((af_total_reads / bf_total_reads) <= params.filter_ratio_threshold )
+            ]
+            return [meta, reads_qc_meta]
+        }
+    }
+
+    extended_reads_qc = SHORT_READS_QC.out.qc_reads.join( extended_qc )
+
+    extended_reads_qc.branch { meta, reads, reads_qc_meta ->
+        // Filter out failed reads //
+        qc_failed: reads_qc_meta.low_reads_count || reads_qc_meta.filter_ratio_threshold_exceeded
+        megahit: meta.assembler == "megahit"
+        xspades: ["metaspades", "spades"].contains(meta.assembler)
+    }.set { qc_filtered_reads }
+
+    ch_versions = ch_versions.mix(SHORT_READS_QC.out.versions)
+
+    /*********************/
+    /*     Assembly     */
+    /********************/
+    SPADES(
+        qc_filtered_reads.xspades.map { meta, reads, _ -> [meta, reads, [], []] },
+        [], // yml input parameters, which we don't use
+        []  // hmm, not used
+    )
+
+    ch_versions = ch_versions.mix(SPADES.out.versions)
+
+    MEGAHIT(
+        qc_filtered_reads.megahit.map { meta, reads, _ -> [meta, reads] }
+    )
+
+    assembly = SPADES.out.contigs.mix( MEGAHIT.out.contigs )
+
+    ch_versions = ch_versions.mix(MEGAHIT.out.versions)
+
+    // Clean the assembly contigs //
+    SHORT_READS_ASSEMBLY_QC(
+        assembly,
+        params.reference_genome
+    )
+
+    ch_versions = ch_versions.mix(SHORT_READS_ASSEMBLY_QC.out.versions)
+
+    // Coverage //
+    SHORT_READS_ASSEMBLY_COVERAGE(
+        SHORT_READS_ASSEMBLY_QC.out.filtered_contigs.join( SHORT_READS_QC.out.qc_reads, remainder: false )
+    )
+
+    ch_versions = ch_versions.mix(SHORT_READS_ASSEMBLY_COVERAGE.out.versions)
+
+    // Stats //
+    /* The QUAST module was modified to run metaQUAST instead */
+    QUAST(
+        SHORT_READS_ASSEMBLY_QC.out.filtered_contigs,
+        [ [], [] ], // reference
+        [ [], [] ]  // gff
+    )
+
+    ch_versions = ch_versions.mix(QUAST.out.versions)
+
+    emit:
+    fastqc_before_zip                    = FASTQC_BEFORE.out.zip                                // tuple(meta)
+    qc_failed                            = qc_filtered_reads.qc_failed                          // tuple(meta)
+    fastqc_after_zip                     = FASTQC_AFTER.out.zip                                // tuple(meta)
+    assembly_coverage_samtools_idxstats  = SHORT_READS_ASSEMBLY_COVERAGE.out.samtools_idxstats // tuple(meta)
+    quast_results                        = QUAST.out.results                                   // tuple(meta)
+    versions                             = ch_versions
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    THE END
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/

From 0ef2e63c97cef217b46d9b3a372e4fcb2901e6d3 Mon Sep 17 00:00:00 2001
From: Martin Beracochea <mbc@ebi.ac.uk>
Date: Tue, 3 Sep 2024 16:32:13 +0100
Subject: [PATCH 08/33] Remove debug println statements

---
 conf/modules.config | 2 --
 1 file changed, 2 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 2668475..c7a3174 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -383,8 +383,6 @@ process {
                             return null;
                         }
                         def output_file = new File(filename);
-                        println ("COSO")
-                        println (meta)
                         return "${study_reads_folder( meta )}/assembly/${meta.assembler}/${meta.assembler_version}/qc/multiqc/${output_file.name}";
                     }
                 }

From 48aa4ee594865e2fbc3aa7d89185f16aa07d8958 Mon Sep 17 00:00:00 2001
From: Ekaterina Sakharova <kates@ebi.ac.uk>
Date: Fri, 6 Sep 2024 13:44:39 +0100
Subject: [PATCH 09/33] add dbs links

---
 README.md | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f0809fd..10020a7 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ This pipeline is still in early development. It's mostly a direct port of the mi
 ## Usage
 
 > [!WARNING]
-> It only runs in Codon using Slurm ATM.
+> It only runs in EBI Codon cluster using Slurm ATM.
 
 Pipeline help:
 
@@ -60,6 +60,12 @@ Generic options
   --multiqc_methods_description           [string]  Custom MultiQC yaml file containing HTML including a methods description.
 ```
 
+### Required DBs:
+- `--reference_genome`: reference genome in FASTA format
+- `--blast_reference_genomes_folder`: mandatory **human_phiX** is provided on [FTP](https://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/references/)
+- `--bwamem2_reference_genomes_folder`: mandatory **human_phiX** is provided on [FTP](https://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/references/)
+
+
 Example:
 
 ```bash

From ea24e1f902a24bea33eac951a7d4c5b3cb56263a Mon Sep 17 00:00:00 2001
From: Germana Baldi <Ge94@users.noreply.github.com>
Date: Sun, 8 Sep 2024 11:46:42 +0100
Subject: [PATCH 10/33] Add db generation instructions to README

---
 README.md | 32 +++++++++++++++++++++++++++-----
 1 file changed, 27 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 10020a7..997b801 100644
--- a/README.md
+++ b/README.md
@@ -60,11 +60,6 @@ Generic options
   --multiqc_methods_description           [string]  Custom MultiQC yaml file containing HTML including a methods description.
 ```
 
-### Required DBs:
-- `--reference_genome`: reference genome in FASTA format
-- `--blast_reference_genomes_folder`: mandatory **human_phiX** is provided on [FTP](https://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/references/)
-- `--bwamem2_reference_genomes_folder`: mandatory **human_phiX** is provided on [FTP](https://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/references/)
-
 
 Example:
 
@@ -78,6 +73,33 @@ nextflow run ebi-metagenomics/miassembler \
   --reads_accession SRR1631361
 ```
 
+### Required DBs:
+- `--reference_genome`: reference genome in FASTA format
+- `--blast_reference_genomes_folder`: mandatory **human_phiX** is provided on [FTP](https://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/references/)
+- `--bwamem2_reference_genomes_folder`: mandatory **human_phiX** is provided on [FTP](https://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/references/)
+
+Blast and bwa-mem2 reference databases can be generated for any reference genome to polish input sequences with.
+
+#### BWA-MEM2
+As explained in [bwa-mem2's README](https://github.com/bwa-mem2/bwa-mem2?tab=readme-ov-file#getting-started):
+```
+# Use precompiled binaries (recommended)
+curl -L https://github.com/bwa-mem2/bwa-mem2/releases/download/v2.2.1/bwa-mem2-2.2.1_x64-linux.tar.bz2 \
+  | tar jxf -
+
+# Index your reference genome with
+bwa-mem2-2.2.1_x64-linux/bwa-mem2 index ref.fa
+```
+
+This will generate multiple index files in a folder. The folder containing them is the one to use as `bwamem2_reference_genomes_folder`.
+
+#### BLAST
+```
+makeblastdb -in <ref.fa> -dbtype nucl -out <my_db_file>
+```
+
+As with bwa-mem2, numerous files will be generated in the same folder, which should be used for `blast_reference_genomes_folder`.
+
 ### Samplesheet
 
 The samplesheet is a comma-separated file (.csv) with the following columns:

From d63ce268bc2a0698e79de9947ba5f5858ef7d810 Mon Sep 17 00:00:00 2001
From: Sandy Rogers <sandyr@ebi.ac.uk>
Date: Tue, 17 Sep 2024 11:10:57 +0100
Subject: [PATCH 11/33] Bugfix/assembler memory (#19)

---
 conf/modules.config            |  4 ++++
 nextflow.config                |  2 ++
 nextflow_schema.json           | 18 +++++++++++++++++-
 tests/main.nf.test             | 33 ++++++++++++++++++++++++++++++++-
 tests/samplesheet/test_mem.csv |  2 ++
 workflows/miassembler.nf       |  2 +-
 6 files changed, 58 insertions(+), 3 deletions(-)
 create mode 100644 tests/samplesheet/test_mem.csv

diff --git a/conf/modules.config b/conf/modules.config
index 367222c..1e7016d 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -109,6 +109,8 @@ process {
         // TODO: tweak this based on input ( using the biome maybe? )
         time   = { check_max( 168.h                    * task.attempt, 'time') }
         ext.args = params.spades_only_assembler ? "--only-assembler" : ""
+        errorStrategy = 'retry'
+        maxRetries    = params.max_spades_retries
 
         publishDir = [
             [
@@ -145,6 +147,8 @@ process {
         }
         cpus   = { check_max( 12    * task.attempt, 'cpus'    ) }
         time   = { check_max( 16.h  * task.attempt, 'time'    ) }
+        errorStrategy = 'retry'
+        maxRetries    = params.max_megahit_retries
 
         publishDir = [
             [
diff --git a/nextflow.config b/nextflow.config
index 168873d..47baab4 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -89,6 +89,8 @@ params {
     max_memory                       = '1.TB'
     max_cpus                         = 32
     max_time                         = '168.h' // 7 days
+    max_spades_retries               = 3
+    max_megahit_retries              = 3
 
     // Assembler versions
     spades_version                   = "3.15.5"
diff --git a/nextflow_schema.json b/nextflow_schema.json
index ebfb512..3619bc5 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -125,7 +125,7 @@
                     "description": "Minimum contig length filter for metaT."
                 },
                 "assembly_memory": {
-                    "type": "integer",
+                    "type": "number",
                     "default": 100,
                     "description": "Default memory allocated for the assembly process."
                 },
@@ -208,6 +208,22 @@
                     "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$",
                     "hidden": true,
                     "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`"
+                },
+                "max_spades_retries": {
+                    "type": "integer",
+                    "description": "Maximum number of task attempt retries for (meta)spades assembly steps only.",
+                    "default": 3,
+                    "fa_icon": "fas fa-repeat",
+                    "hidden": true,
+                    "help_text": "Each retry will increase the memory by 50%. Use to limit how many times this increase-and-retry happens."
+                },
+                "max_megahit_retries": {
+                    "type": "integer",
+                    "description": "Maximum number of task attempt retries for megahit assembly steps only.",
+                    "default": 3,
+                    "fa_icon": "fas fa-repeat",
+                    "hidden": true,
+                    "help_text": "Each retry will increase the memory by 50%. Use to limit how many times this increase-and-retry happens."
                 }
             }
         },
diff --git a/tests/main.nf.test b/tests/main.nf.test
index 06e3213..872a323 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -174,4 +174,35 @@ nextflow_pipeline {
         }
 
     }
-}
+
+    test("Samplesheet spades - retries") {
+
+        tag "samplesheet"
+        tag "retries"
+
+        when {
+            params {
+                outdir = "tests/results"
+                assembler = "spades"
+                bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
+                blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
+                samplesheet                      = "${projectDir}/tests/samplesheet/test_mem.csv"
+                assembly_memory                  = 0.5
+                  // will will be [0.5GB, 0.75GB, 1.13GB, ...] which rounds down to [0, 0, 1, ...] so should definitely fail twice before succeeding. after a few trys.
+                max_spades_retries               = 5
+            }
+        }
+
+        then {
+            with(workflow) {
+                // eventual success:
+                assert success
+                assert trace.succeeded().count{ task -> task.name.contains("SPADES") } == 1
+
+                // but failed and therefore retried multiple times first:
+                assert trace.failed().count{ task -> task.name.contains("SPADES") } >= 2
+            }
+        }
+
+    }
+}
\ No newline at end of file
diff --git a/tests/samplesheet/test_mem.csv b/tests/samplesheet/test_mem.csv
new file mode 100644
index 0000000..bad87ae
--- /dev/null
+++ b/tests/samplesheet/test_mem.csv
@@ -0,0 +1,2 @@
+study_accession,reads_accession,fastq_1,fastq_2,library_layout,library_strategy,assembler,assembly_memory
+SRP115494,SRR5949318,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR5949318_1.fastq.gz,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR5949318_2.fastq.gz,paired,metagenomic,,
diff --git a/workflows/miassembler.nf b/workflows/miassembler.nf
index 69a80d6..508470e 100644
--- a/workflows/miassembler.nf
+++ b/workflows/miassembler.nf
@@ -135,7 +135,7 @@ workflow MIASSEMBLER {
                 [ meta + [
                     //  -- The metadata will be overriden by the parameters -- //
                     "assembler": params.assembler,
-                    "assembly_memory": params.assembler_memory,
+                    "assembly_memory": params.assembly_memory,
                     "library_strategy": params.library_strategy ?: library_strategy,
                     "library_layout": params.library_layout ?: library_layout,
                     "single_end": params.single_end ?: library_layout == "single"

From 1fc455fb370cc9df87a767aa85ec9b026cb1106f Mon Sep 17 00:00:00 2001
From: Ge94 <germana.baldi@gmail.com>
Date: Fri, 25 Oct 2024 17:32:26 +0100
Subject: [PATCH 12/33] LR-SR miassembler merge working

---
 README.md                                     |  4 ++--
 nextflow.config                               |  2 +-
 nextflow_schema.json                          |  2 +-
 subworkflows/local/long_reads_qc.nf           | 15 +++++++++------
 subworkflows/local/short_reads_assembly_qc.nf |  2 +-
 subworkflows/local/short_reads_qc.nf          |  2 +-
 workflows/long_reads_assembler.nf             | 18 +++++++++---------
 workflows/miassembler.nf                      | 13 ++++++-------
 workflows/short_reads_assembler.nf            |  4 ++--
 9 files changed, 32 insertions(+), 30 deletions(-)

diff --git a/README.md b/README.md
index f0809fd..4e64e3f 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Input/output options
   --library_layout                        [string]  Force the library_layout value for the study / reads (accepted: single, paired)
   --spades_version                        [string]  null [default: 3.15.5]
   --megahit_version                       [string]  null [default: 1.2.9]
-  --reference_genome                      [string]  The genome to be used to clean the assembly, the genome will be taken from the Microbiome Informatics
+  --host_reference_genome                 [string]  The genome to be used to clean the assembly, the genome will be taken from the Microbiome Informatics
                                                     internal directory (accepted: chicken.fna, salmon.fna, cod.fna, pig.fna, cow.fna, mouse.fna,
                                                     honeybee.fna, rainbow_trout.fna, ...)
   --blast_reference_genomes_folder        [string]  The folder with the reference genome blast indexes, defaults to the Microbiome Informatics internal
@@ -66,7 +66,7 @@ Example:
 nextflow run ebi-metagenomics/miassembler \
   -profile codon_slurm \
   --assembler metaspades \
-  --reference_genome human \
+  --host_reference_genome human \
   --outdir testing_results \
   --study_accession SRP002480 \
   --reads_accession SRR1631361
diff --git a/nextflow.config b/nextflow.config
index 41c103f..6bab930 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -58,7 +58,7 @@ params {
     low_reads_count_threshold        = 1000
 
     // Reference genome
-    reference_genome                 = null
+    host_reference_genome            = null
 
     /*
     * Long-read assemblies won't require phiX,
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 541ee4d..5f24c7e 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -90,7 +90,7 @@
                     "type": "string",
                     "default": "1.2.9"
                 },
-                "reference_genome": {
+                "host_reference_genome": {
                     "type": "string",
                     "description": "The genome to be used to clean the assembly, the genome will be taken from the Microbiome Informatics internal directory",
                     "enum": [
diff --git a/subworkflows/local/long_reads_qc.nf b/subworkflows/local/long_reads_qc.nf
index 9d2f491..7ff199b 100644
--- a/subworkflows/local/long_reads_qc.nf
+++ b/subworkflows/local/long_reads_qc.nf
@@ -1,4 +1,4 @@
-include { FASTP_LR                                } from '../../modules/nf-core/fastp/main'
+include { FASTP as FASTP_LR                       } from '../../modules/nf-core/fastp/main'
 include { RAW_READ_QUALITY_CHECK                  } from '../../modules/local/raw_read_quality_check/'
 include { MINIMAP2_ALIGN as HUMAN_DECONTAMINATION } from '../../modules/nf-core/minimap2/align/main'
 include { MINIMAP2_ALIGN as HOST_DECONTAMINATION  } from '../../modules/nf-core/minimap2/align/main'
@@ -21,15 +21,18 @@ workflow LONG_READS_QC {
         false
     )
 
-    ch_versions = ch_versions.mix(FASTP.out.versions)
+    ch_versions = ch_versions.mix(FASTP_LR.out.versions)
 
     RAW_READ_QUALITY_CHECK(
-        FASTP.out.json
+        FASTP_LR.out.json
     )
 
     decontaminated_reads = channel.empty()
 
     if ( params.remove_human ) {
+        // TODO: make this consistent with short_reads
+        // can we use the same flag, even if one has phix but not the other?
+        // Check file extensions too
 
         ch_bwamem2_human_refs = Channel.fromPath( "${params.bwamem2_reference_genomes_folder}/${params.human_blast_index_name}.fna", checkIfExists: true)
             .collect().map {
@@ -39,7 +42,7 @@ workflow LONG_READS_QC {
         // TODO: can we change the way human/host are given via prefixes?
 
         HUMAN_DECONTAMINATION(
-            FASTP.out.reads,
+            FASTP_LR.out.reads,
             ch_bwamem2_human_refs,
             "human",
             true,
@@ -53,12 +56,12 @@ workflow LONG_READS_QC {
         decontaminated_reads = HUMAN_DECONTAMINATION.out.filtered_fastq
 
     } else {
-        decontaminated_reads = FASTP.out.reads
+        decontaminated_reads = FASTP_LR.out.reads
     }
 
     if ( host_reference_genome != null ) {
 
-        ch_bwamem2_host_refs = Channel.fromPath( "${params.bwamem2_reference_genomes_folder}/${host_reference_genome}*", checkIfExists: true)
+        ch_bwamem2_host_refs = Channel.fromPath( "${params.bwamem2_reference_genomes_folder}/${host_reference_genome}", checkIfExists: true)
             .collect().map {
                 files -> [ ["id": host_reference_genome], files ]
             }
diff --git a/subworkflows/local/short_reads_assembly_qc.nf b/subworkflows/local/short_reads_assembly_qc.nf
index fc54f89..563a215 100644
--- a/subworkflows/local/short_reads_assembly_qc.nf
+++ b/subworkflows/local/short_reads_assembly_qc.nf
@@ -22,7 +22,7 @@ workflow SHORT_READS_ASSEMBLY_QC {
 
     take:
     assembly                    // [ val(meta), path(assembly_fasta) ]
-    host_reference_genome       // [ val(meta2), path(reference_genome) ] | meta2 contains the name of the reference genome
+    host_reference_genome       // [ val(meta2), path(host_reference_genome) ] | meta2 contains the name of the reference genome
 
     main:
 
diff --git a/subworkflows/local/short_reads_qc.nf b/subworkflows/local/short_reads_qc.nf
index 7dbd198..1be9cfd 100644
--- a/subworkflows/local/short_reads_qc.nf
+++ b/subworkflows/local/short_reads_qc.nf
@@ -6,7 +6,7 @@ workflow SHORT_READS_QC {
 
     take:
     reads                 // [ val(meta), path(reads) ]
-    host_reference_genome // [ val(meta2), path(reference_genome) ] | meta2 contains the name of the reference genome
+    host_reference_genome // [ val(meta2), path(host_reference_genome) ] | meta2 contains the name of the reference genome
 
     main:
     ch_versions = Channel.empty()
diff --git a/workflows/long_reads_assembler.nf b/workflows/long_reads_assembler.nf
index 0c269ab..7cb9f13 100644
--- a/workflows/long_reads_assembler.nf
+++ b/workflows/long_reads_assembler.nf
@@ -34,17 +34,20 @@ include { ONT_HQ          } from '../subworkflows/local/ont_hq'
 
 // Info required for completion email and summary
 
-workflow LONGREADSASSEMBLER {
+workflow LONG_READS_ASSEMBLER {
 
     take:
-    reads // TODO
+    reads // tuple(meta), path(reads)
 
     main:
 
+    ch_versions = Channel.empty()
+
     LONG_READS_QC (
         reads,
-        params.reference_genome
+        params.host_reference_genome
     )
+    ch_versions = ch_versions.mix(LONG_READS_QC.out.versions)
 
     /*********************************************************************************/
     /* Selecting the combination of adapter trimming, assembler, and post-processing */
@@ -77,12 +80,6 @@ workflow LONGREADSASSEMBLER {
         }
     }
 
-
-    ch_versions = Channel.empty()
-
-
-    ch_versions = ch_versions.mix(LONG_READS_QC.out.versions)
-
     /*********************************************************************************/
     /* Selecting the combination of adapter trimming, assembler, and post-processing */
     /*********************************************************************************/
@@ -177,6 +174,9 @@ workflow LONGREADSASSEMBLER {
     //     ch_multiqc_logo.toList()
     // )
     // multiqc_report = MULTIQC.out.report.toList()
+
+    emit:
+    versions                             = ch_versions
 }
 
 /*
diff --git a/workflows/miassembler.nf b/workflows/miassembler.nf
index 5ef05de..d357681 100644
--- a/workflows/miassembler.nf
+++ b/workflows/miassembler.nf
@@ -59,7 +59,7 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS  } from '../modules/nf-core/custom/dumpsof
 // WORKFLOWS
 //
 include { SHORT_READS_ASSEMBLER   } from '../workflows/short_reads_assembler'
-// include { LONG_READS_ASSEMBLER    } from '../workflows/long_reads_assembler'
+include { LONG_READS_ASSEMBLER    } from '../workflows/long_reads_assembler'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -141,7 +141,7 @@ workflow MIASSEMBLER {
                     //  -- The metadata will be overriden by the parameters -- //
                     "assembler": params.assembler,
                     "assembler_config": params.assembler_config,
-                    "assembly_memory": params.assembler_memory,
+                    "assembly_memory": params.assembly_memory,
                     "library_strategy": params.library_strategy ?: library_strategy,
                     "library_layout": params.library_layout ?: library_layout,
                     "single_end": params.single_end ?: library_layout == "single",
@@ -188,12 +188,11 @@ workflow MIASSEMBLER {
 
     ch_versions.mix( SHORT_READS_ASSEMBLER.out.versions )
 
-    // TODO: enable once this is ready
-    // LONG_READS_ASSEMBLER(
-    //     reads_to_assemble.out.long_reads
-    // )
+    LONG_READS_ASSEMBLER(
+        reads_to_assemble.long_reads
+    )
 
-    // ch_versions.mix( LONG_READS_ASSEMBLER.out.versions )
+    ch_versions.mix( LONG_READS_ASSEMBLER.out.versions )
 
     CUSTOM_DUMPSOFTWAREVERSIONS (
         ch_versions.unique().collectFile(name: 'collated_versions.yml')
diff --git a/workflows/short_reads_assembler.nf b/workflows/short_reads_assembler.nf
index 862652a..744c4e9 100644
--- a/workflows/short_reads_assembler.nf
+++ b/workflows/short_reads_assembler.nf
@@ -85,7 +85,7 @@ workflow SHORT_READS_ASSEMBLER {
 
     SHORT_READS_QC(
         reads_by_assembler,
-        params.reference_genome
+        params.host_reference_genome
     )
 
     FASTQC_AFTER (
@@ -142,7 +142,7 @@ workflow SHORT_READS_ASSEMBLER {
     // Clean the assembly contigs //
     SHORT_READS_ASSEMBLY_QC(
         assembly,
-        params.reference_genome
+        params.host_reference_genome
     )
 
     ch_versions = ch_versions.mix(SHORT_READS_ASSEMBLY_QC.out.versions)

From 8fd885c0734b0f8ab63b44f670ced3bd8aa04645 Mon Sep 17 00:00:00 2001
From: Ge94 <germana.baldi@gmail.com>
Date: Mon, 28 Oct 2024 13:16:34 +0000
Subject: [PATCH 13/33] Updated conflict params and flags after merging

---
 nextflow.config | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 12b1476..da8586b 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -31,7 +31,7 @@ params {
 
     // QC FILTERING
 
-    // Short reads options
+    // Short reads QC filtering options
     short_reads_filter_ratio_threshold    = 0.9
     short_reads_low_reads_count_threshold = 1000
 
@@ -80,7 +80,6 @@ params {
     *   pacbio, and if data quality is high or low)
     */
     assembler                        = null
-    assembler_config                 = null
 
     // The pipeline will use the metadata from ENA (obtained by the fetch_tool)
     // As the metadata can be incorrect, we provide the following parameters to
@@ -90,10 +89,6 @@ params {
     library_strategy                 = null
     platform                         = null
 
-    // Reads QC filtering options
-    filter_ratio_threshold           = 0.9
-    low_reads_count_threshold        = 1000
-
     // Reference genome
     host_reference_genome            = null
 

From 123ffbc830d4533c994c19753247c13fd5a12a6f Mon Sep 17 00:00:00 2001
From: Ge94 <germana.baldi@gmail.com>
Date: Fri, 1 Nov 2024 16:44:31 +0000
Subject: [PATCH 14/33] Fixed tests, refined variables, harmonised SR+LR

---
 conf/test.config                    |  8 ++-
 nextflow.config                     | 34 ++-----------
 nextflow_schema.json                |  5 --
 subworkflows/local/long_reads_qc.nf |  2 +-
 tests/human/human.fna               | 79 +++++++++++++++++++++++++++++
 tests/main.nf.test                  | 58 ++++++++++++++++-----
 tests/samplesheet/test.csv          |  2 +-
 workflows/long_reads_assembler.nf   | 12 ++---
 workflows/miassembler.nf            | 14 ++---
 workflows/short_reads_assembler.nf  |  4 +-
 10 files changed, 146 insertions(+), 72 deletions(-)
 create mode 100644 tests/human/human.fna

diff --git a/conf/test.config b/conf/test.config
index 223443f..cde44ce 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -18,11 +18,9 @@ profiles {
             max_memory = '6.GB'
             max_time   = '6.h'
 
-            bwamem2_reference_genomes_folder = "tests/human_phix/bwa2mem"
-            blast_reference_genomes_folder   = "tests/human_phix/blast"
-            human_phix_blast_index_name      = "human_phix"
-            human_phix_bwamem2_index_name    = "human_phix"
-            human_fasta_prefix               = "human"
+            bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
+            blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
+            reference_genomes_folder         = "${projectDir}/tests/human/"
         }
     }
 }
diff --git a/nextflow.config b/nextflow.config
index da8586b..0846ccc 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -38,9 +38,11 @@ params {
     // Long reads options
     long_reads_min_read_length        = 200
 
-    // Reference genome name (to select from list)
+    // Short reads reference databases (name to be selected from list)
     bwamem2_reference_genomes_folder = ""
     blast_reference_genomes_folder   = ""
+
+    // Long reads reference genome
     host_reference_genome            = null
 
     // Short-read sequences and assemblies are
@@ -52,7 +54,7 @@ params {
 
     // Long-read assemblies don't require phiX
     // nor indices, just a fasta file
-    reference_genomes_folder         = null
+    reference_genomes_folder         = ""
     remove_human                     = true
     human_fasta_prefix               = "human"
 
@@ -75,38 +77,12 @@ params {
     *    - Memory >1TB
     *    - Runtime >3-4 days
     *
-    * - flye: Use for any long-read assembly. assembler_config
+    * - flye: Use for any long-read assembly. long_reads_assembler_config
     *   should be selected depending on input data (if ONT or
     *   pacbio, and if data quality is high or low)
     */
     assembler                        = null
 
-    // The pipeline will use the metadata from ENA (obtained by the fetch_tool)
-    // As the metadata can be incorrect, we provide the following parameters to
-    // "force" them
-    single_end                       = null
-    library_layout                   = null
-    library_strategy                 = null
-    platform                         = null
-
-    // Reference genome
-    host_reference_genome            = null
-
-    /*
-    * Long-read assemblies won't require phiX,
-    * parameters should be defined as follows:
-    * remove_human                = true
-    * human_blast_index_name      = "human"
-    * human_bwamem2_index_name    = "human"
-    * Need to integrate them
-    */
-    remove_human_phix                = true
-    human_phix_blast_index_name      = "human_phix"
-    human_phix_bwamem2_index_name    = "human_phix"
-
-    bwamem2_reference_genomes_folder = ""
-    blast_reference_genomes_folder   = ""
-
     // Assembly options
     spades_only_assembler               = true
     short_reads_min_contig_length       = 500
diff --git a/nextflow_schema.json b/nextflow_schema.json
index d585895..502976c 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -134,11 +134,6 @@
                     "description": "Combined Human and phiX BLAST db.",
                     "default": "human_phix"
                 },
-                "human_blast_index_name": {
-                    "type": "string",
-                    "description": "Human BLAST db.",
-                    "default": "human"
-                },
                 "human_phix_bwamem2_index_name": {
                     "type": "string",
                     "description": "Combined Human and phiX bwa-mem2 index.",
diff --git a/subworkflows/local/long_reads_qc.nf b/subworkflows/local/long_reads_qc.nf
index af7a5c3..cbc4d58 100644
--- a/subworkflows/local/long_reads_qc.nf
+++ b/subworkflows/local/long_reads_qc.nf
@@ -36,7 +36,7 @@ workflow LONG_READS_QC {
 
         human_reference = Channel.fromPath( "${params.reference_genomes_folder}/${params.human_fasta_prefix}.fna", checkIfExists: true)
             .collect().map {
-                files -> [ ["id": params.human_blast_index_name], files ]
+                files -> [ ["id": params.human_fasta_prefix], files ]
             }
 
         // TODO: can we change the way human/host are given via prefixes?
diff --git a/tests/human/human.fna b/tests/human/human.fna
new file mode 100644
index 0000000..a4ebdb7
--- /dev/null
+++ b/tests/human/human.fna
@@ -0,0 +1,79 @@
+>NC_001422.1 Escherichia phage phiX174, complete genome
+GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT
+GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA
+ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG
+TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA
+GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC
+TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT
+TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT
+CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT
+TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG
+TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC
+GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA
+CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCGGAAGGAG
+TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT
+AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC
+CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA
+TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC
+TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA
+CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA
+GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT
+GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA
+ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC
+TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT
+TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC
+ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCATGATGTTATTTCTTCATTTGGAGGTAAAAC
+CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT
+GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC
+CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC
+TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG
+TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT
+TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA
+AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT
+TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT
+ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC
+GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC
+TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT
+TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA
+TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG
+TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC
+CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG
+AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC
+CGGGCAATAACGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT
+TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG
+CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA
+AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT
+GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG
+GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA
+TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT
+CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG
+TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA
+GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC
+CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA
+TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA
+AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC
+TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT
+CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA
+TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG
+TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT
+CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT
+TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC
+ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG
+TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA
+ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG
+GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC
+CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT
+GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTTTGATGAATGCAATGCGACAG
+GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT
+ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG
+CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC
+CGTCTTCATTTCCATGCGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC
+GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT
+CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG
+CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA
+TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT
+TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG
+TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC
+AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC
+TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA
+
diff --git a/tests/main.nf.test b/tests/main.nf.test
index 06e3213..3f36f99 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -11,8 +11,7 @@ nextflow_pipeline {
             params {
                 outdir = "tests/results"
                 assembler = "spades"
-                bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
-                blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
+
                 samplesheet                      = "${projectDir}/tests/samplesheet/test.csv"
             }
         }
@@ -27,6 +26,37 @@ nextflow_pipeline {
 
     }
 
+    test("Samplesheet - no assembled - reads filtered ") {
+
+        tag "samplesheet"
+
+        when {
+            params {
+                outdir = "tests/results"
+                assembler = "spades"
+
+                short_reads_low_reads_count_threshold = 1000000
+
+                samplesheet                      = "${projectDir}/tests/samplesheet/test.csv"
+            }
+        }
+
+        then {
+            with(workflow) {
+                assert success
+                assert trace.succeeded().count{ task -> task.name.contains("FASTQC_BEFORE") } == 3
+                assert trace.succeeded().count{ task -> task.name.contains("FASTP") } == 3
+                assert trace.succeeded().count{ task -> task.name.contains("HUMAN_PHIX_DECONTAMINATION") } == 3
+                assert trace.succeeded().count{ task -> task.name.contains("FASTQC_AFTER") } == 3
+                assert trace.succeeded().count{ task -> task.name.contains("CUSTOM_DUMPSOFTWAREVERSIONS") } == 1
+                assert trace.succeeded().count{ task -> task.name.contains("MULTIQC_STUDY") } == 2
+                assert trace.succeeded().count{ task -> task.name.contains("SPADES") } == 0
+                assert trace.succeeded().count{ task -> task.name.contains("MEGAHIT") } == 0
+            }
+        }
+
+    }
+
     test("metaSPAdes - paired end") {
 
         tag "ena-portal-api"
@@ -35,8 +65,10 @@ nextflow_pipeline {
 
             params {
                 outdir = "tests/results"
-                bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
-                blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
+
+                // Force the assembly
+                short_reads_filter_ratio_threshold           = 0.1
+
                 study_accession                  = "SRP115494"
                 reads_accession                  = "SRR6180434"
             }
@@ -61,10 +93,12 @@ nextflow_pipeline {
             params {
                 outdir                           = "tests/results"
                 assembler                        = "megahit"
+
+                // Force the assembly
+                short_reads_filter_ratio_threshold           = 0.1
+
                 study_accession                  = "SRP115494"
                 reads_accession                  = "SRR6180434"
-                bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
-                blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
             }
         }
 
@@ -87,8 +121,7 @@ nextflow_pipeline {
             params {
                 outdir                           = "tests/results"
                 assembler                        = "metaspades"
-                bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
-                blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
+
                 study_accession                  = "ERP012810"
                 reads_accession                  = "ERR1076564"
             }
@@ -111,8 +144,7 @@ nextflow_pipeline {
         when {
             params {
                 outdir = "tests/results"
-                bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
-                blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
+
                 study_accession                  = "ERP012810"
                 reads_accession                  = "ERR1076564"
             }
@@ -133,8 +165,7 @@ nextflow_pipeline {
         when {
             params {
                 outdir = "tests/results"
-                bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
-                blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
+                
                 study_accession                  = "DRP007622"
                 reads_accession                  = "DRR280712"
             }
@@ -159,8 +190,7 @@ nextflow_pipeline {
             params {
                 outdir = "tests/results"
                 assembler = "spades"
-                bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
-                blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
+                
                 study_accession                  = "DRP007622"
                 reads_accession                  = "DRR280712"
             }
diff --git a/tests/samplesheet/test.csv b/tests/samplesheet/test.csv
index fab7d69..8137806 100644
--- a/tests/samplesheet/test.csv
+++ b/tests/samplesheet/test.csv
@@ -1,4 +1,4 @@
 study_accession,reads_accession,fastq_1,fastq_2,library_layout,library_strategy,platform,assembler,assembly_memory,assembler_config
 SRP115494,SRR6180434,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR6180434_1.fastq.gz,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR6180434_2.fastq.gz,paired,metagenomic,,,,,
 SRP115494,SRR5949318,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR5949318_1.fastq.gz,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR5949318_2.fastq.gz,paired,metagenomic,,,,,
-DRP007622,DRR280712,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/DRR280712.fastq.gz,,single,metatranscriptomic,megahit,,,,
+DRP007622,DRR280712,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/DRR280712.fastq.gz,,single,metatranscriptomic,,megahit,,,
diff --git a/workflows/long_reads_assembler.nf b/workflows/long_reads_assembler.nf
index de4b2d2..0148c01 100644
--- a/workflows/long_reads_assembler.nf
+++ b/workflows/long_reads_assembler.nf
@@ -64,15 +64,15 @@ workflow LONG_READS_ASSEMBLER {
 
     reads_assembler_config = LONG_READS_QC.out.qc_reads.map { meta, reads ->
         if (meta.platform == "ont") {
-            if (params.assembler_config == "nano-raw" || meta.quality == "low") {
+            if (params.long_reads_assembler_config == "nano-raw" || meta.quality == "low") {
                 return [meta + ["assembler_config": "nano-raw"], reads]
-            } else if (params.assembler_config == "nano-hq" || meta.quality == "high") {
+            } else if (params.long_reads_assembler_config == "nano-hq" || meta.quality == "high") {
                 return [meta + ["assembler_config": "nano-hq"], reads]
             }
         } else if (meta.platform == "pacbio") {
-            if (params.assembler_config == "pacbio-raw" || meta.quality == "low") {
+            if (params.long_reads_assembler_config == "pacbio-raw" || meta.quality == "low") {
                 return [meta + ["assembler_config": "pacbio-raw"], reads]
-            } else if (params.assembler_config == "pacbio-hifi" || meta.quality == "high") {
+            } else if (params.long_reads_assembler_config == "pacbio-hifi" || meta.quality == "high") {
                 return [meta + ["assembler_config": "pacbio-hifi"], reads]
             }
         } else {
@@ -148,10 +148,6 @@ workflow LONG_READS_ASSEMBLER {
     // )
     // ch_versions = ch_versions.mix(FASTQC.out.versions.first())
 
-    // CUSTOM_DUMPSOFTWAREVERSIONS (
-    //     ch_versions.unique().collectFile(name: 'collated_versions.yml')
-    // )
-
     //
     // MODULE: MultiQC
     //
diff --git a/workflows/miassembler.nf b/workflows/miassembler.nf
index d17ea7c..7f029bf 100644
--- a/workflows/miassembler.nf
+++ b/workflows/miassembler.nf
@@ -96,7 +96,7 @@ workflow MIASSEMBLER {
                               "single_end": true,
                               "assembler": assembler ?: params.assembler,
                               "assembly_memory": assembly_memory ?: params.assembly_memory,
-                              "assembler_config": params.assembler_config
+                              "assembler_config": params.long_reads_assembler_config
                             ],
                             [fq1]
                         )
@@ -108,7 +108,7 @@ workflow MIASSEMBLER {
                               "single_end": false,
                               "assembler": assembler ?: params.assembler,
                               "assembly_memory": assembly_memory ?: params.assembly_memory,
-                              "assembler_config": params.assembler_config,
+                              "assembler_config": params.long_reads_assembler_config,
                               "platform": params.platform ?: platform
                             ],
                             [fq1, fq2])
@@ -140,7 +140,7 @@ workflow MIASSEMBLER {
                 [ meta + [
                     //  -- The metadata will be overriden by the parameters -- //
                     "assembler": params.assembler,
-                    "assembler_config": params.assembler_config,
+                    "assembler_config": params.long_reads_assembler_config,
                     "assembly_memory": params.assembly_memory,
                     "library_strategy": params.library_strategy ?: library_strategy,
                     "library_layout": params.library_layout ?: library_layout,
@@ -186,13 +186,13 @@ workflow MIASSEMBLER {
         reads_to_assemble.short_reads
     )
 
-    ch_versions.mix( SHORT_READS_ASSEMBLER.out.versions )
+    ch_versions = ch_versions.mix( SHORT_READS_ASSEMBLER.out.versions )
 
     LONG_READS_ASSEMBLER(
         reads_to_assemble.long_reads
     )
 
-    ch_versions.mix( LONG_READS_ASSEMBLER.out.versions )
+    ch_versions = ch_versions.mix( LONG_READS_ASSEMBLER.out.versions )
 
     CUSTOM_DUMPSOFTWAREVERSIONS (
         ch_versions.unique().collectFile(name: 'collated_versions.yml')
@@ -305,8 +305,8 @@ workflow MIASSEMBLER {
             if ( extended_meta.low_reads_count ) {
                 return "${meta.id},low_reads_count"
             }
-            if ( extended_meta.short_reads_filter_ratio_threshold_exceeded ) {
-                return "${meta.id},short_reads_filter_ratio_threshold_exceeded"
+            if ( extended_meta.filter_ratio_threshold_exceeded ) {
+                return "${meta.id},filter_ratio_threshold_exceeded"
             }
             error "Unexpected. meta: ${meta}, extended_meta: ${extended_meta}"
         }
diff --git a/workflows/short_reads_assembler.nf b/workflows/short_reads_assembler.nf
index 744c4e9..8ab6696 100644
--- a/workflows/short_reads_assembler.nf
+++ b/workflows/short_reads_assembler.nf
@@ -102,8 +102,8 @@ workflow SHORT_READS_ASSEMBLER {
             bf_total_reads = json_txt?.summary?.before_filtering?.total_reads ?: 0;
             af_total_reads = json_txt?.summary?.after_filtering?.total_reads ?: 0;
             reads_qc_meta = [
-                "low_reads_count": af_total_reads <= params.low_reads_count_threshold,
-                "filter_ratio_threshold_exceeded": af_total_reads == 0 || ((af_total_reads / bf_total_reads) <= params.filter_ratio_threshold )
+                "low_reads_count": af_total_reads <= params.short_reads_low_reads_count_threshold,
+                "filter_ratio_threshold_exceeded": af_total_reads == 0 || ((af_total_reads / bf_total_reads) <= params.short_reads_filter_ratio_threshold )
             ]
             return [meta, reads_qc_meta]
         }

From d95f0e5897c880e22ef3d80f36d8edd850d42cf3 Mon Sep 17 00:00:00 2001
From: Ge94 <germana.baldi@gmail.com>
Date: Wed, 6 Nov 2024 12:14:26 +0000
Subject: [PATCH 15/33] Updated reference_genome param

---
 README.md                                     | 4 ++--
 nextflow.config                               | 2 +-
 nextflow_schema.json                          | 2 +-
 subworkflows/local/long_reads_qc.nf           | 8 ++++----
 subworkflows/local/short_reads_assembly_qc.nf | 8 ++++----
 subworkflows/local/short_reads_qc.nf          | 8 ++++----
 workflows/long_reads_assembler.nf             | 2 +-
 workflows/short_reads_assembler.nf            | 4 ++--
 8 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 64ad609..8414420 100644
--- a/README.md
+++ b/README.md
@@ -38,7 +38,7 @@ Input/output options
   --spades_version                        [string]  null [default: 3.15.5]
   --megahit_version                       [string]  null [default: 1.2.9]
   --flye_version                          [string]  null [default: 2.9]
-  --host_reference_genome                 [string]  The genome to be used to clean the assembly, the genome will be taken from the Microbiome Informatics
+  --reference_genome                 [string]  The genome to be used to clean the assembly, the genome will be taken from the Microbiome Informatics
                                                     internal directory (accepted: chicken.fna, salmon.fna, cod.fna, pig.fna, cow.fna, mouse.fna,
                                                     honeybee.fna, rainbow_trout.fna, ...)
   --blast_reference_genomes_folder        [string]  The folder with the reference genome blast indexes, defaults to the Microbiome Informatics internal
@@ -69,7 +69,7 @@ Example:
 nextflow run ebi-metagenomics/miassembler \
   -profile codon_slurm \
   --assembler metaspades \
-  --host_reference_genome human \
+  --reference_genome human \
   --outdir testing_results \
   --study_accession SRP002480 \
   --reads_accession SRR1631361
diff --git a/nextflow.config b/nextflow.config
index 0846ccc..5af2985 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -43,7 +43,7 @@ params {
     blast_reference_genomes_folder   = ""
 
     // Long reads reference genome
-    host_reference_genome            = null
+    reference_genome            = null
 
     // Short-read sequences and assemblies are
     // automatically polished from human and phix seqs
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 502976c..33e7cca 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -86,7 +86,7 @@
                     "type": "string",
                     "default": "1.2.9"
                 },
-                "host_reference_genome": {
+                "reference_genome": {
                     "type": "string",
                     "description": "The genome to be used to clean the assembly, the genome will be taken from the Microbiome Informatics internal directory",
                     "enum": [
diff --git a/subworkflows/local/long_reads_qc.nf b/subworkflows/local/long_reads_qc.nf
index cbc4d58..45a3ab3 100644
--- a/subworkflows/local/long_reads_qc.nf
+++ b/subworkflows/local/long_reads_qc.nf
@@ -7,7 +7,7 @@ workflow LONG_READS_QC {
 
     take:
     reads                   // [ val(meta), path(reads) ]
-    host_reference_genome   // [ val(meta2), path(reference_genome) ]
+    reference_genome   // [ val(meta2), path(reference_genome) ]
 
     main:
     ch_versions = Channel.empty()
@@ -59,11 +59,11 @@ workflow LONG_READS_QC {
         decontaminated_reads = FASTP_LR.out.reads
     }
 
-    if ( host_reference_genome != null ) {
+    if ( reference_genome != null ) {
 
-        host_reference = Channel.fromPath( "${params.reference_genomes_folder}/${host_reference_genome}*", checkIfExists: true)
+        host_reference = Channel.fromPath( "${params.reference_genomes_folder}/${reference_genome}*", checkIfExists: true)
             .collect().map {
-                files -> [ ["id": host_reference_genome], files ]
+                files -> [ ["id": reference_genome], files ]
             }
 
         HOST_DECONTAMINATION(
diff --git a/subworkflows/local/short_reads_assembly_qc.nf b/subworkflows/local/short_reads_assembly_qc.nf
index d085a04..5e273af 100644
--- a/subworkflows/local/short_reads_assembly_qc.nf
+++ b/subworkflows/local/short_reads_assembly_qc.nf
@@ -22,7 +22,7 @@ workflow SHORT_READS_ASSEMBLY_QC {
 
     take:
     assembly                    // [ val(meta), path(assembly_fasta) ]
-    host_reference_genome       // [ val(meta2), path(host_reference_genome) ] | meta2 contains the name of the reference genome
+    reference_genome       // [ val(meta2), path(reference_genome) ] | meta2 contains the name of the reference genome
 
     main:
 
@@ -60,11 +60,11 @@ workflow SHORT_READS_ASSEMBLY_QC {
         ch_versions = ch_versions.mix(SEQKIT_GREP_HUMAN_PHIX.out.versions)
     }
 
-    if ( host_reference_genome != null ) {
+    if ( reference_genome != null ) {
 
-        ch_blast_host_refs = Channel.fromPath( "${params.blast_reference_genomes_folder}/${host_reference_genome}*", checkIfExists: true)
+        ch_blast_host_refs = Channel.fromPath( "${params.blast_reference_genomes_folder}/${reference_genome}*", checkIfExists: true)
             .collect().map {
-                files -> [ ["id": host_reference_genome], files ]
+                files -> [ ["id": reference_genome], files ]
             }
 
         BLAST_BLASTN_HOST(
diff --git a/subworkflows/local/short_reads_qc.nf b/subworkflows/local/short_reads_qc.nf
index 1be9cfd..5cbe55b 100644
--- a/subworkflows/local/short_reads_qc.nf
+++ b/subworkflows/local/short_reads_qc.nf
@@ -6,7 +6,7 @@ workflow SHORT_READS_QC {
 
     take:
     reads                 // [ val(meta), path(reads) ]
-    host_reference_genome // [ val(meta2), path(host_reference_genome) ] | meta2 contains the name of the reference genome
+    reference_genome // [ val(meta2), path(reference_genome) ] | meta2 contains the name of the reference genome
 
     main:
     ch_versions = Channel.empty()
@@ -44,11 +44,11 @@ workflow SHORT_READS_QC {
         decontaminated_reads = FASTP.out.reads
     }
 
-    if ( host_reference_genome != null ) {
+    if ( reference_genome != null ) {
 
-        ch_bwamem2_host_refs = Channel.fromPath( "${params.bwamem2_reference_genomes_folder}/${host_reference_genome}*", checkIfExists: true)
+        ch_bwamem2_host_refs = Channel.fromPath( "${params.bwamem2_reference_genomes_folder}/${reference_genome}*", checkIfExists: true)
             .collect().map {
-                files -> [ ["id": host_reference_genome], files ]
+                files -> [ ["id": reference_genome], files ]
             }
 
         HOST_DECONTAMINATION(
diff --git a/workflows/long_reads_assembler.nf b/workflows/long_reads_assembler.nf
index 0148c01..092c850 100644
--- a/workflows/long_reads_assembler.nf
+++ b/workflows/long_reads_assembler.nf
@@ -45,7 +45,7 @@ workflow LONG_READS_ASSEMBLER {
 
     LONG_READS_QC (
         reads,
-        params.host_reference_genome
+        params.reference_genome
     )
     ch_versions = ch_versions.mix(LONG_READS_QC.out.versions)
 
diff --git a/workflows/short_reads_assembler.nf b/workflows/short_reads_assembler.nf
index 8ab6696..5b30d1c 100644
--- a/workflows/short_reads_assembler.nf
+++ b/workflows/short_reads_assembler.nf
@@ -85,7 +85,7 @@ workflow SHORT_READS_ASSEMBLER {
 
     SHORT_READS_QC(
         reads_by_assembler,
-        params.host_reference_genome
+        params.reference_genome
     )
 
     FASTQC_AFTER (
@@ -142,7 +142,7 @@ workflow SHORT_READS_ASSEMBLER {
     // Clean the assembly contigs //
     SHORT_READS_ASSEMBLY_QC(
         assembly,
-        params.host_reference_genome
+        params.reference_genome
     )
 
     ch_versions = ch_versions.mix(SHORT_READS_ASSEMBLY_QC.out.versions)

From 3cc1d79e13d9e61d9eeb5fd6c9034e7839bdae84 Mon Sep 17 00:00:00 2001
From: Martin Beracochea <mbc@ebi.ac.uk>
Date: Wed, 6 Nov 2024 15:24:45 +0000
Subject: [PATCH 16/33] Remove puthi.conf file that was pushed by accident.

---
 conf/puthi.config | 30 ------------------------------
 1 file changed, 30 deletions(-)
 delete mode 100644 conf/puthi.config

diff --git a/conf/puthi.config b/conf/puthi.config
deleted file mode 100644
index a5c1e69..0000000
--- a/conf/puthi.config
+++ /dev/null
@@ -1,30 +0,0 @@
-params {
-    bwamem2_reference_genomes_folder = "/projappl/project_2010686/ebi/reference_dbs/bwamem2"
-    blast_reference_genomes_folder   = "/projappl/project_2010686/ebi/reference_dbs/blast"
-    human_phix_blast_index_name      = "human_phix"
-    human_phix_bwamem2_index_name    = "human_phix"
-}
-
-executor {
-    name = "slurm"
-    queueSize = 200
-    queueGlobalStatus = true
-    submitRateLimit = "10 sec"
-    pollInterval = "10 sec"
-}
-
-conda.enabled = false
-
-// If true, on a successful completion of a run all files in work directory are automatically deleted.
-cleanup = true
-
-singularity {
-    enabled = true
-    autoMounts = true
-    cacheDir = "/projappl/project_2010686/ebi/singularity_cache"
-}
-
-conda.enabled = false
-
-// If true, on a successful completion of a run all files in work directory are automatically deleted.
-cleanup = true

From 2379cb758202c36d72e2f5a70c6521b0ac357860 Mon Sep 17 00:00:00 2001
From: Ge94 <germana.baldi@gmail.com>
Date: Thu, 7 Nov 2024 12:50:30 +0000
Subject: [PATCH 17/33] Update raw_reads qc script with json parser

---
 modules/local/raw_read_quality_check.nf | 24 ------------------------
 subworkflows/local/long_reads_qc.nf     | 16 +++++++++++++++-
 2 files changed, 15 insertions(+), 25 deletions(-)
 delete mode 100644 modules/local/raw_read_quality_check.nf

diff --git a/modules/local/raw_read_quality_check.nf b/modules/local/raw_read_quality_check.nf
deleted file mode 100644
index 01ea6f2..0000000
--- a/modules/local/raw_read_quality_check.nf
+++ /dev/null
@@ -1,24 +0,0 @@
-process RAW_READ_QUALITY_CHECK {
-    tag "$reads_accession"
-    label 'process_single'
-
-    input:
-    tuple val(meta), path(fastp_json)
-
-    output:
-    env(quality)       , emit: quality
-    path "versions.yml", emit: versions
-
-    when:
-    task.ext.when == null || task.ext.when
-
-    script:
-    """
-    quality=\$(check_raw_quality.py -j ${fastp_json})
-
-    cat <<-END_VERSIONS > versions.yml
-    "${task.process}":
-        python: \$(python --version 2>&1 | sed 's/Python //g')
-    END_VERSIONS
-    """
-}
diff --git a/subworkflows/local/long_reads_qc.nf b/subworkflows/local/long_reads_qc.nf
index 45a3ab3..431b340 100644
--- a/subworkflows/local/long_reads_qc.nf
+++ b/subworkflows/local/long_reads_qc.nf
@@ -1,5 +1,4 @@
 include { FASTP as FASTP_LR                       } from '../../modules/nf-core/fastp/main'
-include { RAW_READ_QUALITY_CHECK                  } from '../../modules/local/raw_read_quality_check/'
 include { MINIMAP2_ALIGN as HUMAN_DECONTAMINATION } from '../../modules/nf-core/minimap2/align/main'
 include { MINIMAP2_ALIGN as HOST_DECONTAMINATION  } from '../../modules/nf-core/minimap2/align/main'
 
@@ -23,6 +22,21 @@ workflow LONG_READS_QC {
 
     ch_versions = ch_versions.mix(FASTP_LR.out.versions)
 
+    quality_levels_ch = FASTP_LR.out.json.map { meta, json -> {
+        json_txt = new JsonSlurper().parseText(json.text)
+        q20bases = json_txt?.summary?.before_filtering?.q20_bases ?: 0;
+        total_bases = json_txt?.summary?.before_filtering?.total_bases ?: 0;
+
+        q20_percentage = q20_bases / total_bases * 100
+
+        quality = [
+            "high_quality": q20_percentage >= 80, 
+            "low_quality": q20_percentage < 80,
+        ]
+        return [meta, quality]
+        } 
+    }
+
     RAW_READ_QUALITY_CHECK(
         FASTP_LR.out.json
     )

From d43a86203a0b3460a98d2c5e539b436081df05c5 Mon Sep 17 00:00:00 2001
From: Ge94 <germana.baldi@gmail.com>
Date: Thu, 7 Nov 2024 14:22:09 +0000
Subject: [PATCH 18/33] Comment and update --platform

---
 README.md                        | 1 +
 modules/local/fetchtool_reads.nf | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 8414420..1135e75 100644
--- a/README.md
+++ b/README.md
@@ -35,6 +35,7 @@ Input/output options
   --library_strategy                      [string]  Force the library_strategy value for the study / reads (accepted: metagenomic, metatranscriptomic,
                                                     genomic, transcriptomic, other)
   --library_layout                        [string]  Force the library_layout value for the study / reads (accepted: single, paired)
+  --platform                              [string]  Force the sequencing_platform value for the study / reads 
   --spades_version                        [string]  null [default: 3.15.5]
   --megahit_version                       [string]  null [default: 1.2.9]
   --flye_version                          [string]  null [default: 2.9]
diff --git a/modules/local/fetchtool_reads.nf b/modules/local/fetchtool_reads.nf
index e62484a..05baecb 100644
--- a/modules/local/fetchtool_reads.nf
+++ b/modules/local/fetchtool_reads.nf
@@ -38,7 +38,7 @@ process FETCHTOOL_READS {
     elif [[ \$metadata_platform == "pacbio rs" || \$metadata_platform == "pacbio rs ii" ]]; then
         platform="pacbio"
     else
-        platform="short"
+        platform=\$metadata_platform
     fi
 
     cat <<-END_VERSIONS > versions.yml

From c2a2bb614a359a10ada67b210843bac7579a6e67 Mon Sep 17 00:00:00 2001
From: Ge94 <germana.baldi@gmail.com>
Date: Thu, 7 Nov 2024 15:22:27 +0000
Subject: [PATCH 19/33] Minor refinements and documentation

---
 subworkflows/local/long_reads_qc.nf | 56 ++++++++++++-----------------
 subworkflows/local/ont_hq.nf        |  2 +-
 subworkflows/local/ont_lq.nf        |  2 +-
 workflows/long_reads_assembler.nf   |  4 +--
 4 files changed, 27 insertions(+), 37 deletions(-)

diff --git a/subworkflows/local/long_reads_qc.nf b/subworkflows/local/long_reads_qc.nf
index 431b340..f3dc9ac 100644
--- a/subworkflows/local/long_reads_qc.nf
+++ b/subworkflows/local/long_reads_qc.nf
@@ -1,11 +1,11 @@
-include { FASTP as FASTP_LR                       } from '../../modules/nf-core/fastp/main'
-include { MINIMAP2_ALIGN as HUMAN_DECONTAMINATION } from '../../modules/nf-core/minimap2/align/main'
-include { MINIMAP2_ALIGN as HOST_DECONTAMINATION  } from '../../modules/nf-core/minimap2/align/main'
+include { FASTP as FASTP_LR                      } from '../../modules/nf-core/fastp/main'
+include { MINIMAP2_ALIGN as MINIMAP2_ALIGN_HUMAN } from '../../modules/nf-core/minimap2/align/main'
+include { MINIMAP2_ALIGN as MINIMAP2_ALIGN_HOST  } from '../../modules/nf-core/minimap2/align/main'
 
 workflow LONG_READS_QC {
 
     take:
-    reads                   // [ val(meta), path(reads) ]
+    reads              // [ val(meta), path(reads) ]
     reference_genome   // [ val(meta2), path(reference_genome) ]
 
     main:
@@ -13,11 +13,11 @@ workflow LONG_READS_QC {
 
     FASTP_LR(
         reads,
-        [],
-        false,
-        false,
-        false,
-        false
+        [],      // no input adapters
+        false,   // keep passing reads in the output
+        false,   // omit trimmed reads in the output
+        false,   // don't merge all reads in the output
+        false    // don't trim for polyA
     )
 
     ch_versions = ch_versions.mix(FASTP_LR.out.versions)
@@ -37,9 +37,7 @@ workflow LONG_READS_QC {
         } 
     }
 
-    RAW_READ_QUALITY_CHECK(
-        FASTP_LR.out.json
-    )
+    // TODO: add filter if too many reads are removed
 
     decontaminated_reads = channel.empty()
 
@@ -55,14 +53,14 @@ workflow LONG_READS_QC {
 
         // TODO: can we change the way human/host are given via prefixes?
 
-        HUMAN_DECONTAMINATION(
+        MINIMAP2_ALIGN_HUMAN(
             FASTP_LR.out.reads,
             human_reference,
             "human",
-            true,
-            "bai",
-            false,
-            true
+            true,    // output bam format
+            "bai",   // bam index extension
+            false,   // no CIGAR in paf format
+            true     // allow for long CIGAR
         )
 
         ch_versions = ch_versions.mix(HUMAN_DECONTAMINATION.out.versions)
@@ -80,30 +78,22 @@ workflow LONG_READS_QC {
                 files -> [ ["id": reference_genome], files ]
             }
 
-        HOST_DECONTAMINATION(
+        MINIMAP2_ALIGN_HOST(
             decontaminated_reads,
             host_reference,
             "host",
-            true,
-            "bai",
-            false,
-            true
+            true,    // output bam format
+            "bai",   // bam index extension
+            false,   // no CIGAR in paf format
+            true     // allow for long CIGAR
         )
 
-        ch_versions = ch_versions.mix(HOST_DECONTAMINATION.out.versions)
+        ch_versions = ch_versions.mix(MINIMAP2_ALIGN_HOST.out.versions)
 
-        decontaminated_reads = HOST_DECONTAMINATION.out.filtered_fastq
+        decontaminated_reads = MINIMAP2_ALIGN_HOST.out.filtered_fastq
     }
 
-    final_reads = decontaminated_reads
-                .map{ meta, reads -> {
-                        [ meta + [
-                            "quality": RAW_READ_QUALITY_CHECK.out.quality.val
-                        ], reads ]
-                    }
-                }
-
     emit:
-    qc_reads = final_reads
+    qc_reads = decontaminated_reads
     versions = ch_versions
 }
diff --git a/subworkflows/local/ont_hq.nf b/subworkflows/local/ont_hq.nf
index 7255d24..aa21574 100644
--- a/subworkflows/local/ont_hq.nf
+++ b/subworkflows/local/ont_hq.nf
@@ -1,4 +1,4 @@
-include { PORECHOP_ABI as PORECHOP_ONT         } from '../../modules/nf-core/porechop/abi/main'
+include { PORECHOP_ABI } from '../../modules/nf-core/porechop/abi/main'
 
 workflow ONT_HQ {
     take:
diff --git a/subworkflows/local/ont_lq.nf b/subworkflows/local/ont_lq.nf
index 6538c14..b37b063 100644
--- a/subworkflows/local/ont_lq.nf
+++ b/subworkflows/local/ont_lq.nf
@@ -1,4 +1,4 @@
-include { CANU as CANU_ONT                     } from '../../modules/nf-core/canu/main'
+include { CANU } from '../../modules/nf-core/canu/main'
 
 workflow ONT_LQ {
     take:
diff --git a/workflows/long_reads_assembler.nf b/workflows/long_reads_assembler.nf
index 092c850..f796f54 100644
--- a/workflows/long_reads_assembler.nf
+++ b/workflows/long_reads_assembler.nf
@@ -13,8 +13,8 @@ include { LONG_READS_QC   } from '../subworkflows/local/long_reads_qc'
 
 include { ONT_LQ          } from '../subworkflows/local/ont_lq'
 include { ONT_HQ          } from '../subworkflows/local/ont_hq'
-// include { PACBIO_LQ       } from '../subworkflows/local/pacbio_lq'
-// include { PACBIO_HIFI     } from '../subworkflows/local/pacbio_hifi'
+include { PACBIO_LQ       } from '../subworkflows/local/pacbio_lq'
+include { PACBIO_HIFI     } from '../subworkflows/local/pacbio_hifi'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

From aa18aa0409c468f292f77f85e4ead20a0e8182d5 Mon Sep 17 00:00:00 2001
From: Ge94 <germana.baldi@gmail.com>
Date: Wed, 13 Nov 2024 16:31:28 +0000
Subject: [PATCH 20/33] Fixed tests and configs

---
 bin/check_raw_quality.py            | 22 ----------------------
 conf/test.config                    |  8 ++++++++
 modules/local/fetchtool_reads.nf    |  2 +-
 subworkflows/local/long_reads_qc.nf |  6 ++++--
 subworkflows/local/ont_hq.nf        |  6 +++---
 subworkflows/local/ont_lq.nf        |  2 +-
 subworkflows/local/pacbio_lq.nf     |  2 +-
 workflows/short_reads_assembler.nf  | 11 +++--------
 8 files changed, 21 insertions(+), 38 deletions(-)
 delete mode 100755 bin/check_raw_quality.py

diff --git a/bin/check_raw_quality.py b/bin/check_raw_quality.py
deleted file mode 100755
index 9a9dc5b..0000000
--- a/bin/check_raw_quality.py
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/usr/bin/env python3
- 
-import json
-import argparse
-
-parser = argparse.ArgumentParser(description="Evaluate run quality from fastp output")
-parser.add_argument('--json','-j',help='Fastp json output',required=True)
-
-argv = parser.parse_args()
-
-fastp_out = argv.json
-data = json.load(open(fastp_out))
-
-q20_bases = float(data['read1_before_filtering']['q20_bases'])
-total_bases = float(data['read1_before_filtering']['total_bases'])
-q20_percentage = q20_bases/total_bases*100
-
-quality = "low"
-if q20_percentage >= 80:
-    quality = "high"
-
-print(quality)
\ No newline at end of file
diff --git a/conf/test.config b/conf/test.config
index cde44ce..2e734c2 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -21,6 +21,14 @@ profiles {
             bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
             blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
             reference_genomes_folder         = "${projectDir}/tests/human/"
+
+            max_spades_retries               = -1
+            max_megahit_retries              = -1
+        }
+
+        process {
+            errorStrategy = 'ignore'
+            maxRetries = 0
         }
     }
 }
diff --git a/modules/local/fetchtool_reads.nf b/modules/local/fetchtool_reads.nf
index 05baecb..3dbeae1 100644
--- a/modules/local/fetchtool_reads.nf
+++ b/modules/local/fetchtool_reads.nf
@@ -38,7 +38,7 @@ process FETCHTOOL_READS {
     elif [[ \$metadata_platform == "pacbio rs" || \$metadata_platform == "pacbio rs ii" ]]; then
         platform="pacbio"
     else
-        platform=\$metadata_platform
+        platform="\$metadata_platform"
     fi
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/subworkflows/local/long_reads_qc.nf b/subworkflows/local/long_reads_qc.nf
index f3dc9ac..da0b059 100644
--- a/subworkflows/local/long_reads_qc.nf
+++ b/subworkflows/local/long_reads_qc.nf
@@ -1,3 +1,5 @@
+import groovy.json.JsonSlurper
+
 include { FASTP as FASTP_LR                      } from '../../modules/nf-core/fastp/main'
 include { MINIMAP2_ALIGN as MINIMAP2_ALIGN_HUMAN } from '../../modules/nf-core/minimap2/align/main'
 include { MINIMAP2_ALIGN as MINIMAP2_ALIGN_HOST  } from '../../modules/nf-core/minimap2/align/main'
@@ -63,9 +65,9 @@ workflow LONG_READS_QC {
             true     // allow for long CIGAR
         )
 
-        ch_versions = ch_versions.mix(HUMAN_DECONTAMINATION.out.versions)
+        ch_versions = ch_versions.mix(MINIMAP2_ALIGN_HUMAN.out.versions)
 
-        decontaminated_reads = HUMAN_DECONTAMINATION.out.filtered_fastq
+        decontaminated_reads = MINIMAP2_ALIGN_HUMAN.out.filtered_fastq
 
     } else {
         decontaminated_reads = FASTP_LR.out.reads
diff --git a/subworkflows/local/ont_hq.nf b/subworkflows/local/ont_hq.nf
index aa21574..4537d46 100644
--- a/subworkflows/local/ont_hq.nf
+++ b/subworkflows/local/ont_hq.nf
@@ -5,12 +5,12 @@ workflow ONT_HQ {
     reads                   // [ val(meta), path(reads) ]
 
     main:
-    PORECHOP_ONT(
+    PORECHOP_ABI(
         reads
     )
-    PORECHOP_ONT.out.reads.view()
+    PORECHOP_ABI.out.reads.view()
 
     // temporary just to test the module
     emit:
-    contigs = PORECHOP_ONT.out.reads
+    contigs = PORECHOP_ABI.out.reads
 }
diff --git a/subworkflows/local/ont_lq.nf b/subworkflows/local/ont_lq.nf
index b37b063..d53db8c 100644
--- a/subworkflows/local/ont_lq.nf
+++ b/subworkflows/local/ont_lq.nf
@@ -1,4 +1,4 @@
-include { CANU } from '../../modules/nf-core/canu/main'
+include { CANU as CANU_ONT } from '../../modules/nf-core/canu/main'
 
 workflow ONT_LQ {
     take:
diff --git a/subworkflows/local/pacbio_lq.nf b/subworkflows/local/pacbio_lq.nf
index df49b01..0db719d 100644
--- a/subworkflows/local/pacbio_lq.nf
+++ b/subworkflows/local/pacbio_lq.nf
@@ -1,4 +1,4 @@
-include { CANU as CANU_PACBIO                  } from '../../modules/nf-core/canu/main'
+include { CANU as CANU_PACBIO } from '../../modules/nf-core/canu/main'
 
 workflow PACBIO_LQ {
     take:
diff --git a/workflows/short_reads_assembler.nf b/workflows/short_reads_assembler.nf
index 5b30d1c..f159e84 100644
--- a/workflows/short_reads_assembler.nf
+++ b/workflows/short_reads_assembler.nf
@@ -80,13 +80,13 @@ workflow SHORT_READS_ASSEMBLER {
     FASTQC_BEFORE (
         reads_by_assembler
     )
-
     ch_versions = ch_versions.mix(FASTQC_BEFORE.out.versions)
 
     SHORT_READS_QC(
         reads_by_assembler,
         params.reference_genome
     )
+    ch_versions = ch_versions.mix(SHORT_READS_QC.out.versions)
 
     FASTQC_AFTER (
         SHORT_READS_QC.out.qc_reads
@@ -118,8 +118,6 @@ workflow SHORT_READS_ASSEMBLER {
         xspades: ["metaspades", "spades"].contains(meta.assembler)
     }.set { qc_filtered_reads }
 
-    ch_versions = ch_versions.mix(SHORT_READS_QC.out.versions)
-
     /*********************/
     /*     Assembly     */
     /********************/
@@ -128,23 +126,20 @@ workflow SHORT_READS_ASSEMBLER {
         [], // yml input parameters, which we don't use
         []  // hmm, not used
     )
-
     ch_versions = ch_versions.mix(SPADES.out.versions)
 
     MEGAHIT(
         qc_filtered_reads.megahit.map { meta, reads, _ -> [meta, reads] }
     )
-
-    assembly = SPADES.out.contigs.mix( MEGAHIT.out.contigs )
-
     ch_versions = ch_versions.mix(MEGAHIT.out.versions)
+    
+    assembly = SPADES.out.contigs.mix( MEGAHIT.out.contigs )
 
     // Clean the assembly contigs //
     SHORT_READS_ASSEMBLY_QC(
         assembly,
         params.reference_genome
     )
-
     ch_versions = ch_versions.mix(SHORT_READS_ASSEMBLY_QC.out.versions)
 
     // Coverage //

From 9a3cf9f6230db9a453f06d72c8cb9fba9fe8bbc4 Mon Sep 17 00:00:00 2001
From: Jennifer Mattock <mattock@ebi.ac.uk>
Date: Wed, 20 Nov 2024 15:05:08 +0000
Subject: [PATCH 21/33] amended filter ratio threshold to 10%

---
 README.md                          | 2 +-
 nextflow.config                    | 2 +-
 workflows/short_reads_assembler.nf | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 2d15f73..037c9eb 100644
--- a/README.md
+++ b/README.md
@@ -221,7 +221,7 @@ SRR6180434,short_reads_filter_ratio_threshold_exceeded
 
 | Exclusion Message                 | Description                                                                                                                                                                                                                                                                            |
 | --------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `short_reads_filter_ratio_threshold_exceeded` | The maximum fraction of reads that are allowed to be filtered out. If exceeded, it flags excessive filtering. The default value is 0.9, meaning that if more than 90% of the reads are filtered out, the threshold is considered exceeded, and the run is not assembled. |
+| `short_reads_filter_ratio_threshold_exceeded` | The maximum fraction of reads that are allowed to be filtered out. If exceeded, it flags excessive filtering. The default value is 0.1, meaning that if less than 10% of the reads are retained after filtering, the threshold is considered exceeded, and the run is not assembled. |
 | `short_reads_low_reads_count_threshold`       | The minimum number of reads required after filtering. If below, it flags a low read count, and the run is not assembled.                                                                                                                                                               |
 
 #### Assembled Runs
diff --git a/nextflow.config b/nextflow.config
index e89c6d5..52f1cd3 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -32,7 +32,7 @@ params {
     // QC FILTERING
 
     // Short reads QC filtering options
-    short_reads_filter_ratio_threshold    = 0.9
+    short_reads_filter_ratio_threshold    = 0.1
     short_reads_low_reads_count_threshold = 1000
 
     // Long reads options
diff --git a/workflows/short_reads_assembler.nf b/workflows/short_reads_assembler.nf
index f159e84..e38a676 100644
--- a/workflows/short_reads_assembler.nf
+++ b/workflows/short_reads_assembler.nf
@@ -94,7 +94,7 @@ workflow SHORT_READS_ASSEMBLER {
 
     /******************************************/
     /*  Reads that fail the following rules:  */
-    /*  - Reads discarded by fastp > 90% (default value) */
+    /*  - Reads kept by fastp < 10% (default value) */
     /*  - Less than 1k reads                  */
     /******************************************/
     extended_qc = SHORT_READS_QC.out.fastp_json.map { meta, json -> {

From 8aa06e74108adaad241fffb9dfe1e5dadba12ef8 Mon Sep 17 00:00:00 2001
From: jmattock5 <80533767+jmattock5@users.noreply.github.com>
Date: Wed, 20 Nov 2024 16:19:11 +0000
Subject: [PATCH 22/33] Update nextflow_schema.json

Changed ratio threshold default to 0.1 and amended description
---
 nextflow_schema.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nextflow_schema.json b/nextflow_schema.json
index da9c14b..f3e69d7 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -194,8 +194,8 @@
             "properties": {
                 "short_reads_filter_ratio_threshold": {
                     "type": "number",
-                    "description": "The maximum fraction of reads that are allowed to be filtered out. If exceeded, it flags excessive filtering. The default value is 0.9, meaning that if more than 90% of the reads are filtered out, the threshold is considered exceeded, and the run is not assembled.",
-                    "default": 0.9,
+                    "description": "The maximum fraction of reads that are allowed to be filtered out. If exceeded, it flags excessive filtering. The default value is 0.1, meaning that if less than 10% of the reads are retained after filtering, the threshold is considered exceeded, and the run is not assembled.",
+                    "default": 0.1,
                     "minimum": 0.0,
                     "maximum": 1.0
                 },

From 703c0c720115192664dcfd4286bae907b10e1e5b Mon Sep 17 00:00:00 2001
From: Martin Beracochea <mbc@ebi.ac.uk>
Date: Wed, 20 Nov 2024 17:29:03 +0000
Subject: [PATCH 23/33] WIP - EBI FIRE S3 module for embargoed data

---
 bin/s3fire_downloader.py            | 141 ++++++++++++++++++++++++++++
 modules/local/download_from_fire.nf |  49 ++++++++++
 workflows/miassembler.nf            |  24 ++++-
 3 files changed, 212 insertions(+), 2 deletions(-)
 create mode 100755 bin/s3fire_downloader.py
 create mode 100644 modules/local/download_from_fire.nf

diff --git a/bin/s3fire_downloader.py b/bin/s3fire_downloader.py
new file mode 100755
index 0000000..09a7282
--- /dev/null
+++ b/bin/s3fire_downloader.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+
+import argparse
+import logging
+from typing import Optional, Tuple, List
+import os
+
+import boto3
+from botocore import UNSIGNED
+from botocore.config import Config
+
+
+FIRE_ENDPOINT: str = "https://hl.fire.sdo.ebi.ac.uk"
+PUBLIC_BUCKET: str = "era-public"
+PRIVATE_BUCKET: str = "era-private"
+
+
+logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+logger = logging.getLogger(__name__)
+
+
+def transform_ftp_to_s3(ftp_path: str) -> Tuple[str, str]:
+    """
+    Transforms an FTP path to a FIRE S3 object key, it also returns if it's public or private.
+
+    :param ftp_path: The FTP path of the file to be transformed.
+    :type ftp_path: str
+    :return: A tuple containing the S3 object key and the corresponding bucket name.
+    :rtype: Tuple[str, str]
+    :raises ValueError: If the FTP path does not match the expected format.
+    """
+    if ftp_path.startswith("ftp.sra.ebi.ac.uk/vol1/"):
+        s3_key = ftp_path.replace("ftp.sra.ebi.ac.uk/vol1/", "")
+        logger.info(f"Detected a public file for FTP path: {ftp_path}")
+        return s3_key, PUBLIC_BUCKET
+    elif ftp_path.startswith("ftp.dcc-private.ebi.ac.uk/vol1/"):
+        s3_key = ftp_path.replace("ftp.dcc-private.ebi.ac.uk/vol1/", "")
+        logger.info(f"Detected a private file for FTP path: {ftp_path}")
+        return s3_key, PRIVATE_BUCKET
+    else:
+        raise ValueError(
+            f"Invalid FTP path: {ftp_path}. Must start with 'ftp.sra.ebi.ac.uk/vol1/' or 'ftp.dcc-private.ebi.ac.uk/vol1/'."
+        )
+
+
+def download_file_from_fire(
+    s3_key: str, bucket: str, outdir: str, access_key: Optional[str] = None, secret_key: Optional[str] = None
+) -> None:
+    """
+    Downloads an individual file from FIRE S3 using its object key.
+
+    :param s3_key: The S3 object key of the file to download.
+    :type s3_key: str
+    :param bucket: The name of the S3 bucket.
+    :type bucket: str
+    :param outdir: The local directory to save the downloaded file.
+    :type outdir: str
+    :param access_key: The access key for private S3 buckets (optional for public files).
+    :type access_key: Optional[str]
+    :param secret_key: The secret key for private S3 buckets (optional for public files).
+    :type secret_key: Optional[str]
+    :return: None
+    :rtype: None
+    :raises ValueError: If credentials are missing for private files.
+    :raises Exception: For other download errors.
+    """
+    s3_args = {"endpoint_url": FIRE_ENDPOINT}
+    if bucket == PRIVATE_BUCKET:
+        if not access_key or not secret_key:
+            logger.error("Missing credentials for private files.")
+            raise ValueError("Access key and secret key are required for private files.")
+        s3_args.update(
+            aws_access_key_id=access_key,
+            aws_secret_access_key=secret_key,
+        )
+    else:
+        # Public bucket configuration with unsigned requests
+        s3_args.update({"config": Config(signature_version=UNSIGNED)})
+
+    s3 = boto3.client("s3", **s3_args)
+
+    os.makedirs(outdir, exist_ok=True)
+    local_file_path = os.path.join(outdir, os.path.basename(s3_key))
+
+    try:
+        logger.info(f"Downloading {s3_key} from S3 bucket {bucket} to {local_file_path}...")
+        s3.download_file(bucket, s3_key, local_file_path)
+        logger.info(f"File successfully downloaded to: {local_file_path}")
+    except Exception as e:
+        logger.error(f"Error downloading file from S3: {e}")
+        raise
+
+
+def download_files(ftp_paths: List[str], outdir: str, access_key: Optional[str], secret_key: Optional[str]) -> None:
+    """
+    Downloads multiple files from their FTP paths.
+
+    :param ftp_paths: List of FTP paths to download.
+    :type ftp_paths: List[str]
+    :param outdir: Directory to save the downloaded files.
+    :type outdir: str
+    :param access_key: Access key for private files.
+    :type access_key: Optional[str]
+    :param secret_key: Secret key for private files.
+    :type secret_key: Optional[str]
+    """
+    for ftp_path in ftp_paths:
+        try:
+            s3_key, bucket = transform_ftp_to_s3(ftp_path)
+            download_file_from_fire(s3_key, bucket, outdir, access_key, secret_key)
+        except ValueError as ve:
+            logger.error(f"Skipping download due to error: {ve}")
+        except Exception as e:
+            logger.error(f"Unexpected error while downloading {ftp_path}: {e}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        description="Download multiple files from FTP paths via FIRE S3 (supports public and private files)."
+    )
+    parser.add_argument(
+        "--ftp_paths",
+        nargs="+",
+        required=True,
+        help="Space-separated list of FTP paths to download (e.g., ftp.sra.ebi.ac.uk/vol1/.../file1 ftp.sra.ebi.ac.uk/vol1/.../file2).",
+    )
+    parser.add_argument("--outdir", required=True, help="Local destination directory for the downloaded files.")
+    parser.add_argument("--access-key", required=False, help="S3 access key (required for private files).")
+    parser.add_argument("--secret-key", required=False, help="S3 secret key (required for private files).")
+    args = parser.parse_args()
+
+    try:
+        logger.info("Starting the file download process...")
+        download_files(args.ftp_paths, args.outdir, args.access_key, args.secret_key)
+        logger.info("All files have been processed.")
+    except Exception as e:
+        logger.error(f"Unexpected error: {e}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/modules/local/download_from_fire.nf b/modules/local/download_from_fire.nf
new file mode 100644
index 0000000..a6b81e9
--- /dev/null
+++ b/modules/local/download_from_fire.nf
@@ -0,0 +1,49 @@
+process DOWNLOAD_FROM_FIRE {
+
+    secret 'FIRE_ACCESS_KEY'
+    secret 'FIRE_SECRET_KEY'
+
+    tag "${meta.id}"
+
+    label 'process_single'
+
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'oras://community.wave.seqera.io/library/boto3:1.35.37--a82b4d378d332259' :
+        'community.wave.seqera.io/library/pip_boto3:501beb4bd409b3e1' }"
+
+    input:
+    tuple val(meta), val(input_reads)
+
+    output:
+    tuple val(meta), path("fastq_files/*fastq.gz"), emit: reads
+    path "versions.yml"                           , emit: versions
+
+    script:
+    """
+    s3fire_downloader.py \\
+        --access-key \${FIRE_ACCESS_KEY} \\
+        --secret-key \${FIRE_SECRET_KEY} \\
+        --ftp_paths ${input_reads.join(" ")} \\
+        --outdir fastq_files
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version 2>&1 | sed 's/Python //g')
+        boto: \$(python -c "import boto3; print(boto3.__version__)")
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    mkdir -p fastq_files
+    touch fastq_files/${meta.id}_1.fastq
+    touch fastq_files/${meta.id}_2.fastq
+    gzip fastq_files/*
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        python: \$(python --version 2>&1 | sed 's/Python //g')
+        boto: \$(python -c "import boto3; print(boto3.__version__)")
+    END_VERSIONS
+    """
+}
diff --git a/workflows/miassembler.nf b/workflows/miassembler.nf
index 7f029bf..eb8a7dd 100644
--- a/workflows/miassembler.nf
+++ b/workflows/miassembler.nf
@@ -68,6 +68,7 @@ include { LONG_READS_ASSEMBLER    } from '../workflows/long_reads_assembler'
 */
 
 include { FETCHTOOL_READS       } from '../modules/local/fetchtool_reads'
+include { DOWNLOAD_FROM_FIRE    } from '../modules/local/download_from_fire.nf'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -173,7 +174,7 @@ workflow MIASSEMBLER {
         }
     }
 
-    classified_reads.branch { meta, reads ->
+    classified_reads.branch { meta, _reads ->
         short_reads: meta.short_reads
         long_reads: meta.long_reads
     }.set { reads_to_assemble }
@@ -182,8 +183,27 @@ workflow MIASSEMBLER {
     /* Assemble short reads and long reads */
     /***************************************/
 
+    def short_reads_to_assemble = channel.empty()
+
+    // If running for a private study on EBI infrastructure //
+    if ( params.private_study ) {
+        /*
+         * For private studies we need to bypass Nextflow S3 integration until https://github.com/nextflow-io/nextflow/issues/4873 is fixed
+         * The EBI parameter is needed as this only works on EBI network, FIRE is not accessible otherwise
+        */
+        DOWNLOAD_FROM_FIRE(
+            reads_to_assemble.short_reads
+        )
+
+        short_reads_to_assemble = DOWNLOAD_FROM_FIRE.out.reads
+
+    } else {
+        // Carry on
+        short_reads_to_assemble = reads_to_assemble.short_reads
+    }
+
     SHORT_READS_ASSEMBLER(
-        reads_to_assemble.short_reads
+        short_reads_to_assemble
     )
 
     ch_versions = ch_versions.mix( SHORT_READS_ASSEMBLER.out.versions )

From 2e51c9202131678bf37a0f51d44e633aea2b9f32 Mon Sep 17 00:00:00 2001
From: Martin Beracochea <mbc@ebi.ac.uk>
Date: Wed, 20 Nov 2024 23:20:17 +0000
Subject: [PATCH 24/33] Fire - private study - test case works on my laptop :)

---
 .nf-core.yml                                  |   2 +-
 README.md                                     |  17 +-
 modules.json                                  |   7 +-
 modules/local/download_from_fire.nf           |   4 +-
 modules/nf-core/fastqc/environment.yml        |   2 -
 modules/nf-core/fastqc/fastqc.diff            |  27 --
 modules/nf-core/fastqc/main.nf                |  18 +-
 modules/nf-core/fastqc/meta.yml               |  58 +--
 modules/nf-core/fastqc/tests/main.nf.test     | 368 +++++++++++++----
 .../nf-core/fastqc/tests/main.nf.test.snap    | 386 +++++++++++++++++-
 modules/nf-core/multiqc/environment.yml       |   4 +-
 modules/nf-core/multiqc/main.nf               |  20 +-
 modules/nf-core/multiqc/meta.yml              |  78 ++--
 modules/nf-core/multiqc/tests/main.nf.test    |   8 +
 .../nf-core/multiqc/tests/main.nf.test.snap   |  24 +-
 modules/nf-core/multiqc/tests/nextflow.config |   5 +
 nextflow_schema.json                          |   8 +-
 workflows/miassembler.nf                      | 331 ++++++++-------
 workflows/short_reads_assembler.nf            | 141 +++----
 19 files changed, 1063 insertions(+), 445 deletions(-)
 delete mode 100644 modules/nf-core/fastqc/fastqc.diff
 create mode 100644 modules/nf-core/multiqc/tests/nextflow.config

diff --git a/.nf-core.yml b/.nf-core.yml
index b48640b..4db2825 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -38,7 +38,7 @@ lint:
     - .gitignore
   multiqc_config:
     - report_comment
-  nextflow_config: False
+  nextflow_config:
     - params.input
     - params.validationSchemaIgnoreParams
     - params.custom_config_version
diff --git a/README.md b/README.md
index 2d15f73..812293b 100644
--- a/README.md
+++ b/README.md
@@ -28,14 +28,14 @@ Typical pipeline command:
 Input/output options
   --study_accession                       [string]  The ENA Study secondary accession
   --reads_accession                       [string]  The ENA Run primary accession
-  --private_study                         [boolean] To use if the ENA study is private
+  --private_study                         [boolean] To use if the ENA study is private, *this feature only works on EBI infrastructure at the moment*
   --samplesheet                           [string]  Path to comma-separated file containing information about the raw reads with the prefix to be used.
   --assembler                             [string]  The short reads assembler (accepted: spades, metaspades, megahit)
   --single_end                            [boolean] Force the single_end value for the study / reads
   --library_strategy                      [string]  Force the library_strategy value for the study / reads (accepted: metagenomic, metatranscriptomic,
                                                     genomic, transcriptomic, other)
   --library_layout                        [string]  Force the library_layout value for the study / reads (accepted: single, paired)
-  --platform                              [string]  Force the sequencing_platform value for the study / reads 
+  --platform                              [string]  Force the sequencing_platform value for the study / reads
   --spades_version                        [string]  null [default: 3.15.5]
   --megahit_version                       [string]  null [default: 1.2.9]
   --flye_version                          [string]  null [default: 2.9]
@@ -45,7 +45,7 @@ Input/output options
   --blast_reference_genomes_folder        [string]  The folder with the reference genome blast indexes, defaults to the Microbiome Informatics internal
                                                     directory.
   --bwamem2_reference_genomes_folder      [string]  The folder with the reference genome bwa-mem2 indexes, defaults to the Microbiome Informatics internal
-  
+
   --reference_genomes_folder              [string]  The folder with reference genomes, defaults to the Microbiome Informatics internal
                                                     directory.
   --remove_human_phix                     [boolean] Remove human and phiX reads pre assembly, and contigs matching those genomes. [default: true]
@@ -64,7 +64,6 @@ Generic options
   --multiqc_methods_description           [string]  Custom MultiQC yaml file containing HTML including a methods description.
 ```
 
-
 Example:
 
 ```bash
@@ -78,6 +77,7 @@ nextflow run ebi-metagenomics/miassembler \
 ```
 
 ### Required DBs:
+
 - `--reference_genome`: reference genome in FASTA format
 - `--blast_reference_genomes_folder`: mandatory **human_phiX** is provided on [FTP](https://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/references/)
 - `--bwamem2_reference_genomes_folder`: mandatory **human_phiX** is provided on [FTP](https://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/references/)
@@ -85,7 +85,9 @@ nextflow run ebi-metagenomics/miassembler \
 Blast and bwa-mem2 reference databases can be generated for any reference genome to polish input sequences with.
 
 #### BWA-MEM2
+
 As explained in [bwa-mem2's README](https://github.com/bwa-mem2/bwa-mem2?tab=readme-ov-file#getting-started):
+
 ```
 # Use precompiled binaries (recommended)
 curl -L https://github.com/bwa-mem2/bwa-mem2/releases/download/v2.2.1/bwa-mem2-2.2.1_x64-linux.tar.bz2 \
@@ -98,6 +100,7 @@ bwa-mem2-2.2.1_x64-linux/bwa-mem2 index ref.fa
 This will generate multiple index files in a folder. The folder containing them is the one to use as `bwamem2_reference_genomes_folder`.
 
 #### BLAST
+
 ```
 makeblastdb -in <ref.fa> -dbtype nucl -out <my_db_file>
 ```
@@ -219,10 +222,10 @@ SRR6180434,short_reads_filter_ratio_threshold_exceeded
 
 ##### Runs exclusion messages
 
-| Exclusion Message                 | Description                                                                                                                                                                                                                                                                            |
-| --------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| Exclusion Message                             | Description                                                                                                                                                                                                                                                              |
+| --------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | `short_reads_filter_ratio_threshold_exceeded` | The maximum fraction of reads that are allowed to be filtered out. If exceeded, it flags excessive filtering. The default value is 0.9, meaning that if more than 90% of the reads are filtered out, the threshold is considered exceeded, and the run is not assembled. |
-| `short_reads_low_reads_count_threshold`       | The minimum number of reads required after filtering. If below, it flags a low read count, and the run is not assembled.                                                                                                                                                               |
+| `short_reads_low_reads_count_threshold`       | The minimum number of reads required after filtering. If below, it flags a low read count, and the run is not assembled.                                                                                                                                                 |
 
 #### Assembled Runs
 
diff --git a/modules.json b/modules.json
index f510e07..c34d7cc 100644
--- a/modules.json
+++ b/modules.json
@@ -50,9 +50,8 @@
                     },
                     "fastqc": {
                         "branch": "master",
-                        "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53",
-                        "installed_by": ["modules"],
-                        "patch": "modules/nf-core/fastqc/fastqc.diff"
+                        "git_sha": "21f230b8cca43755bf73470e6fd0290832a98aef",
+                        "installed_by": ["modules"]
                     },
                     "flye": {
                         "branch": "master",
@@ -82,7 +81,7 @@
                     },
                     "multiqc": {
                         "branch": "master",
-                        "git_sha": "314d742bdb357a1df5f9b88427b3b6ac78aa33f7",
+                        "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d",
                         "installed_by": ["modules"]
                     },
                     "porechop/abi": {
diff --git a/modules/local/download_from_fire.nf b/modules/local/download_from_fire.nf
index a6b81e9..7226f72 100644
--- a/modules/local/download_from_fire.nf
+++ b/modules/local/download_from_fire.nf
@@ -29,7 +29,7 @@ process DOWNLOAD_FROM_FIRE {
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         python: \$(python --version 2>&1 | sed 's/Python //g')
-        boto: \$(python -c "import boto3; print(boto3.__version__)")
+        boto3: \$(python -c "import boto3; print(boto3.__version__)")
     END_VERSIONS
     """
 
@@ -43,7 +43,7 @@ process DOWNLOAD_FROM_FIRE {
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         python: \$(python --version 2>&1 | sed 's/Python //g')
-        boto: \$(python -c "import boto3; print(boto3.__version__)")
+        boto3: \$(python -c "import boto3; print(boto3.__version__)")
     END_VERSIONS
     """
 }
diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml
index 1787b38..691d4c7 100644
--- a/modules/nf-core/fastqc/environment.yml
+++ b/modules/nf-core/fastqc/environment.yml
@@ -1,7 +1,5 @@
-name: fastqc
 channels:
   - conda-forge
   - bioconda
-  - defaults
 dependencies:
   - bioconda::fastqc=0.12.1
diff --git a/modules/nf-core/fastqc/fastqc.diff b/modules/nf-core/fastqc/fastqc.diff
deleted file mode 100644
index 0dd7d4d..0000000
--- a/modules/nf-core/fastqc/fastqc.diff
+++ /dev/null
@@ -1,27 +0,0 @@
-Changes in module 'nf-core/fastqc'
---- modules/nf-core/fastqc/main.nf
-+++ modules/nf-core/fastqc/main.nf
-@@ -21,19 +21,12 @@
-     script:
-     def args = task.ext.args ?: ''
-     def prefix = task.ext.prefix ?: "${meta.id}"
--    // Make list of old name and new name pairs to use for renaming in the bash while loop
--    def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] }
--    def rename_to = old_new_pairs*.join(' ').join(' ')
--    def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ')
-+
-     """
--    printf "%s %s\\n" $rename_to | while read old_name new_name; do
--        [ -f "\${new_name}" ] || ln -s \$old_name \$new_name
--    done
--
-     fastqc \\
-         $args \\
-         --threads $task.cpus \\
--        $renamed_files
-+        $reads
- 
-     cat <<-END_VERSIONS > versions.yml
-     "${task.process}":
-
-************************************************************
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
index 0a11817..d8989f4 100644
--- a/modules/nf-core/fastqc/main.nf
+++ b/modules/nf-core/fastqc/main.nf
@@ -21,12 +21,28 @@ process FASTQC {
     script:
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
+    // Make list of old name and new name pairs to use for renaming in the bash while loop
+    def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] }
+    def rename_to = old_new_pairs*.join(' ').join(' ')
+    def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ')
+
+    // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory)
+    // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222
+    // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label
+    def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus
+    // FastQC memory value allowed range (100 - 10000)
+    def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb)
 
     """
+    printf "%s %s\\n" $rename_to | while read old_name new_name; do
+        [ -f "\${new_name}" ] || ln -s \$old_name \$new_name
+    done
+
     fastqc \\
         $args \\
         --threads $task.cpus \\
-        $reads
+        --memory $fastqc_memory \\
+        $renamed_files
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml
index ee5507e..2b2e62b 100644
--- a/modules/nf-core/fastqc/meta.yml
+++ b/modules/nf-core/fastqc/meta.yml
@@ -11,40 +11,50 @@ tools:
         FastQC gives general quality metrics about your reads.
         It provides information about the quality score distribution
         across your reads, the per base sequence content (%A/C/G/T).
+
         You get information about adapter contamination and other
         overrepresented sequences.
       homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/
       documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/
       licence: ["GPL-2.0-only"]
+      identifier: biotools:fastqc
 input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - reads:
-      type: file
-      description: |
-        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
-        respectively.
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - reads:
+        type: file
+        description: |
+          List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+          respectively.
 output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
   - html:
-      type: file
-      description: FastQC report
-      pattern: "*_{fastqc.html}"
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.html":
+          type: file
+          description: FastQC report
+          pattern: "*_{fastqc.html}"
   - zip:
-      type: file
-      description: FastQC report archive
-      pattern: "*_{fastqc.zip}"
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.zip":
+          type: file
+          description: FastQC report archive
+          pattern: "*_{fastqc.zip}"
   - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
 authors:
   - "@drpatelh"
   - "@grst"
diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test
index b9e8f92..e9d79a0 100644
--- a/modules/nf-core/fastqc/tests/main.nf.test
+++ b/modules/nf-core/fastqc/tests/main.nf.test
@@ -3,107 +3,307 @@ nextflow_process {
     name "Test Process FASTQC"
     script "../main.nf"
     process "FASTQC"
+
     tag "modules"
     tag "modules_nfcore"
     tag "fastqc"
 
-    test("Single-Read") {
+    test("sarscov2 single-end [fastq]") {
 
         when {
-            params {
-                outdir   = "$outputDir"
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id: 'test', single_end:true ],
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+                ])
+                """
             }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it.
+                // looks like this: <div id="header_filename">Mon 2 Oct 2023<br/>test.gz</div>
+                // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039
+                { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+                { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+                { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+                { assert snapshot(process.out.versions).match() }
+            )
+        }
+    }
+
+    test("sarscov2 paired-end [fastq]") {
+
+        when {
             process {
                 """
-                input[0] = [
+                input[0] = Channel.of([
+                    [id: 'test', single_end: false], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
+                { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
+                { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
+                { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
+                { assert path(process.out.html[0][1][0]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+                { assert path(process.out.html[0][1][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+                { assert snapshot(process.out.versions).match() }
+            )
+        }
+    }
+
+    test("sarscov2 interleaved [fastq]") {
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [id: 'test', single_end: false], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true)
+                ])
+            """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+                { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+                { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+                { assert snapshot(process.out.versions).match() }
+            )
+        }
+    }
+
+    test("sarscov2 paired-end [bam]") {
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [id: 'test', single_end: false], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" },
+                { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
+                { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+                { assert snapshot(process.out.versions).match() }
+            )
+        }
+    }
+
+    test("sarscov2 multiple [fastq]") {
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [id: 'test', single_end: false], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ]
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" },
+                { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" },
+                { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" },
+                { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" },
+                { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" },
+                { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" },
+                { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" },
+                { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" },
+                { assert path(process.out.html[0][1][0]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+                { assert path(process.out.html[0][1][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+                { assert path(process.out.html[0][1][2]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+                { assert path(process.out.html[0][1][3]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+                { assert snapshot(process.out.versions).match() }
+            )
+        }
+    }
+
+    test("sarscov2 custom_prefix") {
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'mysample', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" },
+                { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" },
+                { assert path(process.out.html[0][1]).text.contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
+                { assert snapshot(process.out.versions).match() }
+            )
+        }
+    }
+
+    test("sarscov2 single-end [fastq] - stub") {
+
+    options "-stub"
+        when {
+            process {
+                """
+                input[0] = Channel.of([
                     [ id: 'test', single_end:true ],
-                    [
-                        file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
-                    ]
-                ]
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("sarscov2 paired-end [fastq] - stub") {
+
+    options "-stub"
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [id: 'test', single_end: false], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("sarscov2 interleaved [fastq] - stub") {
+
+    options "-stub"
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [id: 'test', single_end: false], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true)
+                ])
+            """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("sarscov2 paired-end [bam] - stub") {
+
+    options "-stub"
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [id: 'test', single_end: false], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("sarscov2 multiple [fastq] - stub") {
+
+    options "-stub"
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [id: 'test', single_end: false], // meta map
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ]
+                ])
+                """
+            }
+        }
+
+        then {
+            assertAll (
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+    test("sarscov2 custom_prefix - stub") {
+
+    options "-stub"
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                    [ id:'mysample', single_end:true ], // meta map
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+                ])
                 """
             }
         }
 
         then {
             assertAll (
-            { assert process.success },
-            // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it.
-            // looks like this: <div id="header_filename">Mon 2 Oct 2023<br/>test.gz</div>
-            // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039
-            { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" },
-            { assert path(process.out.html.get(0).get(1)).getText().contains("<tr><td>File type</td><td>Conventional base calls</td></tr>") },
-            { assert snapshot(process.out.versions).match("versions") },
-            { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" }
+                { assert process.success },
+                { assert snapshot(process.out).match() }
             )
         }
     }
-// TODO
-// //
-// // Test with paired-end data
-// //
-// workflow test_fastqc_paired_end {
-//     input = [
-//                 [id: 'test', single_end: false], // meta map
-//                 [
-//                     file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-//                     file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
-//                 ]
-//             ]
-
-//     FASTQC ( input )
-// }
-
-// //
-// // Test with interleaved data
-// //
-// workflow test_fastqc_interleaved {
-//     input = [
-//                 [id: 'test', single_end: false], // meta map
-//                 file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true)
-//             ]
-
-//     FASTQC ( input )
-// }
-
-// //
-// // Test with bam data
-// //
-// workflow test_fastqc_bam {
-//     input = [
-//                 [id: 'test', single_end: false], // meta map
-//                 file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
-//             ]
-
-//     FASTQC ( input )
-// }
-
-// //
-// // Test with multiple samples
-// //
-// workflow test_fastqc_multiple {
-//     input = [
-//                 [id: 'test', single_end: false], // meta map
-//                 [
-//                     file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
-//                     file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
-//                     file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
-//                     file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true)
-//                 ]
-//             ]
-
-//     FASTQC ( input )
-// }
-
-// //
-// // Test with custom prefix
-// //
-// workflow test_fastqc_custom_prefix {
-//     input = [
-//                 [ id:'mysample', single_end:true ], // meta map
-//                 file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
-//             ]
-
-//     FASTQC ( input )
-// }
 }
diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap
index 636a32c..d5db309 100644
--- a/modules/nf-core/fastqc/tests/main.nf.test.snap
+++ b/modules/nf-core/fastqc/tests/main.nf.test.snap
@@ -1,10 +1,392 @@
 {
-    "versions": {
+    "sarscov2 custom_prefix": {
         "content": [
             [
                 "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
             ]
         ],
-        "timestamp": "2023-10-09T23:40:54+0000"
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:02:16.374038"
+    },
+    "sarscov2 single-end [fastq] - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+                ],
+                "html": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+                ],
+                "zip": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:02:24.993809"
+    },
+    "sarscov2 custom_prefix - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "mysample",
+                            "single_end": true
+                        },
+                        "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "mysample",
+                            "single_end": true
+                        },
+                        "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+                ],
+                "html": [
+                    [
+                        {
+                            "id": "mysample",
+                            "single_end": true
+                        },
+                        "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+                ],
+                "zip": [
+                    [
+                        {
+                            "id": "mysample",
+                            "single_end": true
+                        },
+                        "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:03:10.93942"
+    },
+    "sarscov2 interleaved [fastq]": {
+        "content": [
+            [
+                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:01:42.355718"
+    },
+    "sarscov2 paired-end [bam]": {
+        "content": [
+            [
+                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:01:53.276274"
+    },
+    "sarscov2 multiple [fastq]": {
+        "content": [
+            [
+                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:02:05.527626"
+    },
+    "sarscov2 paired-end [fastq]": {
+        "content": [
+            [
+                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:01:31.188871"
+    },
+    "sarscov2 paired-end [fastq] - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+                ],
+                "html": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+                ],
+                "zip": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:02:34.273566"
+    },
+    "sarscov2 multiple [fastq] - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+                ],
+                "html": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+                ],
+                "zip": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:03:02.304411"
+    },
+    "sarscov2 single-end [fastq]": {
+        "content": [
+            [
+                "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:01:19.095607"
+    },
+    "sarscov2 interleaved [fastq] - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+                ],
+                "html": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+                ],
+                "zip": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:02:44.640184"
+    },
+    "sarscov2 paired-end [bam] - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "2": [
+                    "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+                ],
+                "html": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+                ],
+                "zip": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": false
+                        },
+                        "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-07-22T11:02:53.550742"
     }
 }
\ No newline at end of file
diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml
index ecb7dd7..6f5b867 100644
--- a/modules/nf-core/multiqc/environment.yml
+++ b/modules/nf-core/multiqc/environment.yml
@@ -1,7 +1,5 @@
-name: multiqc
 channels:
   - conda-forge
   - bioconda
-  - defaults
 dependencies:
-  - bioconda::multiqc=1.22.3
+  - bioconda::multiqc=1.25.1
diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf
index 0c81a3b..8a816ac 100644
--- a/modules/nf-core/multiqc/main.nf
+++ b/modules/nf-core/multiqc/main.nf
@@ -3,15 +3,17 @@ process MULTIQC {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/multiqc:1.22.3--pyhdfd78af_0' :
-        'biocontainers/multiqc:1.22.3--pyhdfd78af_0' }"
+        'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' :
+        'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }"
 
     input:
-    path(multiqc_base_files, stageAs: "?/*")
-    tuple val(meta), path(files, stageAs: "?/*")
+    path(multiqc_files, stageAs: "?/*")
+    tuple val(meta), path(pipeline_files, stageAs: "?/*") 
     path(multiqc_config)
     path(extra_multiqc_config)
     path(multiqc_logo)
+    path(replace_names)
+    path(sample_names)
 
     output:
     path "*multiqc_report.html", emit: report
@@ -24,16 +26,22 @@ process MULTIQC {
 
     script:
     def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : ''
     def config = multiqc_config ? "--config $multiqc_config" : ''
     def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : ''
-    def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : ''
+    def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : ''
+    def replace = replace_names ? "--replace-names ${replace_names}" : ''
+    def samples = sample_names ? "--sample-names ${sample_names}" : ''
     """
     multiqc \\
         --force \\
         $args \\
         $config \\
+        $prefix \\
         $extra_config \\
         $logo \\
+        $replace \\
+        $samples \\
         .
 
     cat <<-END_VERSIONS > versions.yml
@@ -45,7 +53,7 @@ process MULTIQC {
     stub:
     """
     mkdir multiqc_data
-    touch multiqc_plots
+    mkdir multiqc_plots
     touch multiqc_report.html
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml
index 45a9bc3..b16c187 100644
--- a/modules/nf-core/multiqc/meta.yml
+++ b/modules/nf-core/multiqc/meta.yml
@@ -1,5 +1,6 @@
 name: multiqc
-description: Aggregate results from bioinformatics analyses across many samples into a single report
+description: Aggregate results from bioinformatics analyses across many samples into
+  a single report
 keywords:
   - QC
   - bioinformatics tools
@@ -12,40 +13,59 @@ tools:
       homepage: https://multiqc.info/
       documentation: https://multiqc.info/docs/
       licence: ["GPL-3.0-or-later"]
+      identifier: biotools:multiqc
 input:
-  - multiqc_files:
-      type: file
-      description: |
-        List of reports / files recognised by MultiQC, for example the html and zip output of FastQC
-  - multiqc_config:
-      type: file
-      description: Optional config yml for MultiQC
-      pattern: "*.{yml,yaml}"
-  - extra_multiqc_config:
-      type: file
-      description: Second optional config yml for MultiQC. Will override common sections in multiqc_config.
-      pattern: "*.{yml,yaml}"
-  - multiqc_logo:
-      type: file
-      description: Optional logo file for MultiQC
-      pattern: "*.{png}"
+  - - multiqc_files:
+        type: file
+        description: |
+          List of reports / files recognised by MultiQC, for example the html and zip output of FastQC
+  - - multiqc_config:
+        type: file
+        description: Optional config yml for MultiQC
+        pattern: "*.{yml,yaml}"
+  - - extra_multiqc_config:
+        type: file
+        description: Second optional config yml for MultiQC. Will override common sections
+          in multiqc_config.
+        pattern: "*.{yml,yaml}"
+  - - multiqc_logo:
+        type: file
+        description: Optional logo file for MultiQC
+        pattern: "*.{png}"
+  - - replace_names:
+        type: file
+        description: |
+          Optional two-column sample renaming file. First column a set of
+          patterns, second column a set of corresponding replacements. Passed via
+          MultiQC's `--replace-names` option.
+        pattern: "*.{tsv}"
+  - - sample_names:
+        type: file
+        description: |
+          Optional TSV file with headers, passed to the MultiQC --sample_names
+          argument.
+        pattern: "*.{tsv}"
 output:
   - report:
-      type: file
-      description: MultiQC report file
-      pattern: "multiqc_report.html"
+      - "*multiqc_report.html":
+          type: file
+          description: MultiQC report file
+          pattern: "multiqc_report.html"
   - data:
-      type: directory
-      description: MultiQC data dir
-      pattern: "multiqc_data"
+      - "*_data":
+          type: directory
+          description: MultiQC data dir
+          pattern: "multiqc_data"
   - plots:
-      type: file
-      description: Plots created by MultiQC
-      pattern: "*_data"
+      - "*_plots":
+          type: file
+          description: Plots created by MultiQC
+          pattern: "*_data"
   - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
 authors:
   - "@abhi18av"
   - "@bunop"
diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test
index f1c4242..33316a7 100644
--- a/modules/nf-core/multiqc/tests/main.nf.test
+++ b/modules/nf-core/multiqc/tests/main.nf.test
@@ -8,6 +8,8 @@ nextflow_process {
     tag "modules_nfcore"
     tag "multiqc"
 
+    config "./nextflow.config"
+
     test("sarscov2 single-end [fastqc]") {
 
         when {
@@ -17,6 +19,8 @@ nextflow_process {
                 input[1] = []
                 input[2] = []
                 input[3] = []
+                input[4] = []
+                input[5] = []
                 """
             }
         }
@@ -41,6 +45,8 @@ nextflow_process {
                 input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true))
                 input[2] = []
                 input[3] = []
+                input[4] = []
+                input[5] = []
                 """
             }
         }
@@ -66,6 +72,8 @@ nextflow_process {
                 input[1] = []
                 input[2] = []
                 input[3] = []
+                input[4] = []
+                input[5] = []
                 """
             }
         }
diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap
index 0a4760e..2fcbb5f 100644
--- a/modules/nf-core/multiqc/tests/main.nf.test.snap
+++ b/modules/nf-core/multiqc/tests/main.nf.test.snap
@@ -2,14 +2,14 @@
     "multiqc_versions_single": {
         "content": [
             [
-                "versions.yml:md5,bf3b209659477254bb8fa5a9405f9984"
+                "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916"
             ]
         ],
         "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.04.2"
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
         },
-        "timestamp": "2024-06-25T12:31:21.878452033"
+        "timestamp": "2024-10-02T17:51:46.317523"
     },
     "multiqc_stub": {
         "content": [
@@ -17,25 +17,25 @@
                 "multiqc_report.html",
                 "multiqc_data",
                 "multiqc_plots",
-                "versions.yml:md5,bf3b209659477254bb8fa5a9405f9984"
+                "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916"
             ]
         ],
         "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.04.2"
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
         },
-        "timestamp": "2024-06-25T12:32:02.322196503"
+        "timestamp": "2024-10-02T17:52:20.680978"
     },
     "multiqc_versions_config": {
         "content": [
             [
-                "versions.yml:md5,bf3b209659477254bb8fa5a9405f9984"
+                "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916"
             ]
         ],
         "meta": {
-            "nf-test": "0.8.4",
-            "nextflow": "24.04.2"
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.4"
         },
-        "timestamp": "2024-06-25T12:31:50.064227638"
+        "timestamp": "2024-10-02T17:52:09.185842"
     }
 }
\ No newline at end of file
diff --git a/modules/nf-core/multiqc/tests/nextflow.config b/modules/nf-core/multiqc/tests/nextflow.config
new file mode 100644
index 0000000..c537a6a
--- /dev/null
+++ b/modules/nf-core/multiqc/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+    withName: 'MULTIQC' {
+        ext.prefix = null
+    }
+}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index da9c14b..590f635 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -10,9 +10,7 @@
             "type": "object",
             "fa_icon": "fas fa-terminal",
             "description": "Define where the pipeline should find input data and save output data.",
-            "required": [
-                "outdir"
-            ],
+            "required": ["outdir"],
             "properties": {
                 "samplesheet": {
                     "type": "string",
@@ -43,7 +41,7 @@
                 },
                 "private_study": {
                     "type": "boolean",
-                    "description": "To use if the ENA study is private"
+                    "description": "To use if the ENA study is private, *this feature ony works on EBI infrastructure at the moment*"
                 },
                 "assembler": {
                     "type": "string",
@@ -52,7 +50,7 @@
                 },
                 "long_reads_assembler_config": {
                     "type": "string",
-                    "description": "Configuration to use flye with. Pick from nano-raw, nano-corr, nano-hq, pacbio-raw, pacbio-corr, pacbio-hifi", 
+                    "description": "Configuration to use flye with. Pick from nano-raw, nano-corr, nano-hq, pacbio-raw, pacbio-corr, pacbio-hifi",
                     "default": ""
                 },
                 "single_end": {
diff --git a/workflows/miassembler.nf b/workflows/miassembler.nf
index eb8a7dd..2c61bac 100644
--- a/workflows/miassembler.nf
+++ b/workflows/miassembler.nf
@@ -1,39 +1,16 @@
-// Groovy //
-import groovy.json.JsonSlurper
-
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     PRINT PARAMS SUMMARY
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { validateParameters; paramsSummaryLog; paramsSummaryMap; samplesheetToList } from 'plugin/nf-schema'
-
-def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
-def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
-def summary_params = paramsSummaryMap(workflow)
-
-// Print parameter summary log to screen
-log.info logo + paramsSummaryLog(workflow) + citation
-
-validateParameters()
-
-if (params.help) {
-   log.info paramsHelp("nextflow run ebi-metagenomics/miassembler --help")
-   exit 0
-}
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    CONFIG FILES
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-ch_multiqc_config          = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
-ch_multiqc_custom_config   = params.multiqc_config ? file( params.multiqc_config, checkIfExists: true ) : []
-ch_multiqc_logo            = params.multiqc_logo   ? file( params.multiqc_logo, checkIfExists: true ) : file("$projectDir/assets/mgnify_logo.png", checkIfExists: true)
-ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
-
+include {
+    validateParameters ;
+    paramsSummaryLog ;
+    paramsSummaryMap ;
+    samplesheetToList ;
+    paramsHelp
+} from 'plugin/nf-schema'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -45,9 +22,9 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
 // MODULE: Installed directly from nf-core/modules
 //
 
-include { MULTIQC as MULTIQC_STUDY     } from '../modules/nf-core/multiqc/main'
-include { MULTIQC as MULTIQC_RUN       } from '../modules/nf-core/multiqc/main'
-include { CUSTOM_DUMPSOFTWAREVERSIONS  } from '../modules/nf-core/custom/dumpsoftwareversions/main'
+include { MULTIQC as MULTIQC_STUDY    } from '../modules/nf-core/multiqc/main'
+include { MULTIQC as MULTIQC_RUN      } from '../modules/nf-core/multiqc/main'
+include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -58,8 +35,8 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS  } from '../modules/nf-core/custom/dumpsof
 //
 // WORKFLOWS
 //
-include { SHORT_READS_ASSEMBLER   } from '../workflows/short_reads_assembler'
-include { LONG_READS_ASSEMBLER    } from '../workflows/long_reads_assembler'
+include { SHORT_READS_ASSEMBLER       } from '../workflows/short_reads_assembler'
+include { LONG_READS_ASSEMBLER        } from '../workflows/long_reads_assembler'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -67,8 +44,7 @@ include { LONG_READS_ASSEMBLER    } from '../workflows/long_reads_assembler'
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { FETCHTOOL_READS       } from '../modules/local/fetchtool_reads'
-include { DOWNLOAD_FROM_FIRE    } from '../modules/local/download_from_fire.nf'
+include { FETCHTOOL_READS             } from '../modules/local/fetchtool_reads'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -76,168 +52,189 @@ include { DOWNLOAD_FROM_FIRE    } from '../modules/local/download_from_fire.nf'
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-// Info required for completion email and summary
-def multiqc_report = []
-
 
 workflow MIASSEMBLER {
 
-    ch_versions = Channel.empty()
-    fetch_tool_metadata = Channel.empty()
+    /*
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        INIT
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    */
+    def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
+    def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
+    def summary_params = paramsSummaryMap(workflow)
+
+    // Print parameter summary log to screen
+    log.info(logo + paramsSummaryLog(workflow) + citation)
+
+    validateParameters()
+
+    if (params.help) {
+        log.info(paramsHelp("nextflow run ebi-metagenomics/miassembler --help"))
+        exit(0)
+    }
+
+    /*
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+        CONFIG FILES
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    */
+
+    def ch_multiqc_config = file("${projectDir}/assets/multiqc_config.yml", checkIfExists: true)
+    def ch_multiqc_custom_config = params.multiqc_config ? file(params.multiqc_config, checkIfExists: true) : []
+    def ch_multiqc_logo = params.multiqc_logo ? file(params.multiqc_logo, checkIfExists: true) : file("${projectDir}/assets/mgnify_logo.png", checkIfExists: true)
+    def ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true)
+
+
+    def ch_versions = Channel.empty()
+    def fetch_tool_metadata = Channel.empty()
+    def fetch_reads_transformed = Channel.empty()
 
-    if ( params.samplesheet ) {
+    if (params.samplesheet) {
 
-        groupReads = { study_accession, reads_accession, fq1, fq2, library_layout, library_strategy, platform, assembler, assembly_memory, assembler_config ->
+        def groupReads = { study_accession, reads_accession, fq1, fq2, library_layout, library_strategy, platform, assembler, assembly_memory, assembler_config ->
             if (fq2 == []) {
-                return tuple(["id": reads_accession,
-                              "study_accession": study_accession,
-                              "library_layout": library_layout,
-                              "library_strategy": library_strategy,
-                              "platform": params.platform ?: platform,
-                              "single_end": true,
-                              "assembler": assembler ?: params.assembler,
-                              "assembly_memory": assembly_memory ?: params.assembly_memory,
-                              "assembler_config": params.long_reads_assembler_config
-                            ],
-                            [fq1]
-                        )
-            } else {
-                return tuple(["id": reads_accession,
-                              "study_accession": study_accession,
-                              "library_strategy": library_strategy,
-                              "library_layout": library_layout,
-                              "single_end": false,
-                              "assembler": assembler ?: params.assembler,
-                              "assembly_memory": assembly_memory ?: params.assembly_memory,
-                              "assembler_config": params.long_reads_assembler_config,
-                              "platform": params.platform ?: platform
-                            ],
-                            [fq1, fq2])
+                return tuple(
+                    [
+                        "id": reads_accession,
+                        "study_accession": study_accession,
+                        "library_layout": library_layout,
+                        "library_strategy": library_strategy,
+                        "platform": params.platform ?: platform,
+                        "single_end": true,
+                        "assembler": assembler ?: params.assembler,
+                        "assembly_memory": assembly_memory ?: params.assembly_memory,
+                        "assembler_config": assembler_config ?: params.long_reads_assembler_config
+                    ],
+                    [fq1]
+                )
+            }
+            else {
+                return tuple(
+                    [
+                        "id": reads_accession,
+                        "study_accession": study_accession,
+                        "library_strategy": library_strategy,
+                        "library_layout": library_layout,
+                        "single_end": false,
+                        "assembler": assembler ?: params.assembler,
+                        "assembly_memory": assembly_memory ?: params.assembly_memory,
+                        "assembler_config": assembler_config ?: params.long_reads_assembler_config,
+                        "platform": params.platform ?: platform
+                    ],
+                    [fq1, fq2]
+                )
             }
         }
 
-        samplesheet = Channel.fromList(samplesheetToList(params.samplesheet, "./assets/schema_input.json"))
+        def samplesheet = Channel.fromList(samplesheetToList(params.samplesheet, "./assets/schema_input.json"))
 
         // [ study, sample, read1, [read2], library_layout, library_strategy, platform, assembly_memory]
         fetch_reads_transformed = samplesheet.map(groupReads)
-
-    } else {
+    }
+    else {
         // TODO: remove when the fetch tools get's published on bioconda
-        fetch_tool_config = file("${projectDir}/assets/fetch_tool_anonymous.json", checkIfExists: true)
+        def fetch_tool_config = file("${projectDir}/assets/fetch_tool_anonymous.json", checkIfExists: true)
 
-        if ( params.private_study ) {
+        if (params.private_study) {
             fetch_tool_config = file("${projectDir}/assets/fetch_tool_credentials.json", checkIfExists: true)
         }
 
         FETCHTOOL_READS(
-            [ [id: params.reads_accession], params.study_accession, params.reads_accession ],
+            [[id: params.reads_accession], params.study_accession, params.reads_accession],
             fetch_tool_config
         )
 
         ch_versions = ch_versions.mix(FETCHTOOL_READS.out.versions)
 
         // Push the library strategy into the meta of the reads, this is to make it easier to handle downstream
-        fetch_reads_transformed = FETCHTOOL_READS.out.reads.map { meta, reads, library_strategy, library_layout, platform -> {
-                [ meta + [
-                    //  -- The metadata will be overriden by the parameters -- //
-                    "assembler": params.assembler,
-                    "assembler_config": params.long_reads_assembler_config,
-                    "assembly_memory": params.assembly_memory,
-                    "library_strategy": params.library_strategy ?: library_strategy,
-                    "library_layout": params.library_layout ?: library_layout,
-                    "single_end": params.single_end ?: library_layout == "single",
-                    "platform": params.platform ?: platform
-                ], reads ]
+        fetch_reads_transformed = FETCHTOOL_READS.out.reads.map { meta, reads, library_strategy, library_layout, platform ->
+            {
+                [
+                    meta + [
+                        "assembler": params.assembler,
+                        "assembler_config": params.long_reads_assembler_config,
+                        "assembly_memory": params.assembly_memory,
+                        "library_strategy": params.library_strategy ?: library_strategy,
+                        "library_layout": params.library_layout ?: library_layout,
+                        "single_end": params.single_end ?: library_layout == "single",
+                        "platform": params.platform ?: platform
+                    ],
+                    reads
+                ]
             }
         }
 
         // Metadata for MultiQC
-        fetch_tool_metadata = FETCHTOOL_READS.out.metadata_tsv.map { it[1] }.collectFile(
-            name: 'fetch_tool_mqc.tsv',
-            newLine: true,
-            keepHeader: true,
-            skip: 1
-        )
+        fetch_tool_metadata = FETCHTOOL_READS.out.metadata_tsv
+            .map { it[1] }
+            .collectFile(
+                name: 'fetch_tool_mqc.tsv',
+                newLine: true,
+                keepHeader: true,
+                skip: 1
+            )
     }
 
     /********************************************/
     /* Selecting the assembly pipeline flavour */
     /*******************************************/
-
-    classified_reads = fetch_reads_transformed.map { meta, reads ->
+    def classified_reads = fetch_reads_transformed.map { meta, reads ->
         // Long reads //
-        if ( ["ont", "pacbio"].contains( meta.platform ) ) {
-            return [ meta + [long_reads: true], reads]
-        // Short reads //
-        } else {
-            return [ meta + [short_reads: true], reads]
+        if (["ont", "pacbio"].contains(meta.platform)) {
+            return [meta + [long_reads: true], reads]
+        }
+        else {
+            return [meta + [short_reads: true], reads]
         }
     }
 
-    classified_reads.branch { meta, _reads ->
-        short_reads: meta.short_reads
-        long_reads: meta.long_reads
-    }.set { reads_to_assemble }
+    classified_reads
+        .branch { meta, _reads ->
+            short_reads: meta.short_reads
+            long_reads: meta.long_reads
+        }
+        .set { reads_to_assemble }
 
     /***************************************/
     /* Assemble short reads and long reads */
     /***************************************/
-
-    def short_reads_to_assemble = channel.empty()
-
-    // If running for a private study on EBI infrastructure //
-    if ( params.private_study ) {
-        /*
-         * For private studies we need to bypass Nextflow S3 integration until https://github.com/nextflow-io/nextflow/issues/4873 is fixed
-         * The EBI parameter is needed as this only works on EBI network, FIRE is not accessible otherwise
-        */
-        DOWNLOAD_FROM_FIRE(
-            reads_to_assemble.short_reads
-        )
-
-        short_reads_to_assemble = DOWNLOAD_FROM_FIRE.out.reads
-
-    } else {
-        // Carry on
-        short_reads_to_assemble = reads_to_assemble.short_reads
-    }
-
     SHORT_READS_ASSEMBLER(
-        short_reads_to_assemble
+        reads_to_assemble.short_reads
     )
 
-    ch_versions = ch_versions.mix( SHORT_READS_ASSEMBLER.out.versions )
+    ch_versions = ch_versions.mix(SHORT_READS_ASSEMBLER.out.versions)
 
     LONG_READS_ASSEMBLER(
         reads_to_assemble.long_reads
     )
 
-    ch_versions = ch_versions.mix( LONG_READS_ASSEMBLER.out.versions )
+    ch_versions = ch_versions.mix(LONG_READS_ASSEMBLER.out.versions)
 
-    CUSTOM_DUMPSOFTWAREVERSIONS (
+    CUSTOM_DUMPSOFTWAREVERSIONS(
         ch_versions.unique().collectFile(name: 'collated_versions.yml')
     )
 
     //
     // MODULE: MultiQC
     //
-    workflow_summary    = WorkflowMiassembler.paramsSummaryMultiqc(workflow, summary_params)
-    ch_workflow_summary = Channel.value(workflow_summary)
+    def workflow_summary = WorkflowMiassembler.paramsSummaryMultiqc(workflow, summary_params)
+    def ch_workflow_summary = Channel.value(workflow_summary)
 
-    methods_description    = WorkflowMiassembler.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params)
-    ch_methods_description = Channel.value(methods_description)
+    def methods_description = WorkflowMiassembler.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params)
+    def ch_methods_description = Channel.value(methods_description)
 
-    ch_multiqc_base_files = Channel.empty()
-    ch_multiqc_base_files = ch_multiqc_base_files.mix( CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect() )
-    ch_multiqc_base_files = ch_multiqc_base_files.mix( ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') )
-    ch_multiqc_base_files = ch_multiqc_base_files.mix( ch_methods_description.collectFile(name: 'methods_description_mqc.yaml') )
+    def ch_multiqc_base_files = Channel.empty()
+    ch_multiqc_base_files = ch_multiqc_base_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect())
+    ch_multiqc_base_files = ch_multiqc_base_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'))
+    ch_multiqc_base_files = ch_multiqc_base_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml'))
 
     /**************************************/
     /* MultiQC report for the whole study */
     /**************************************/
 
     def meta_by_study = { meta, result_artifact ->
-        [ meta.subMap("study_accession"), result_artifact ]
+        [meta.subMap("study_accession"), result_artifact]
     }
 
     // Helper method for the MultiQC aggregation by study and runs //
@@ -260,22 +257,25 @@ workflow MIASSEMBLER {
         }
     }
 
-    ch_multiqc_study_tools_files = Channel.empty()
+    def ch_multiqc_study_tools_files = Channel.empty()
 
-    study_multiqc_files = SHORT_READS_ASSEMBLER.out.fastqc_before_zip.map(meta_by_study)
-        .join( SHORT_READS_ASSEMBLER.out.fastqc_after_zip.map(meta_by_study) )
-        .join( SHORT_READS_ASSEMBLER.out.assembly_coverage_samtools_idxstats.map(meta_by_study), remainder: true ) // the assembly step could fail
-        .join( SHORT_READS_ASSEMBLER.out.quast_results.map(meta_by_study), remainder: true )                          // the assembly step could fail
+    def study_multiqc_files = SHORT_READS_ASSEMBLER.out.fastqc_before_zip.map(meta_by_study) \
+        .join(SHORT_READS_ASSEMBLER.out.fastqc_after_zip.map(meta_by_study)) \
+        .join(SHORT_READS_ASSEMBLER.out.assembly_coverage_samtools_idxstats.map(meta_by_study), remainder: true) \
+        .join(SHORT_READS_ASSEMBLER.out.quast_results.map(meta_by_study), remainder: true)
 
-    ch_multiqc_study_tools_files = study_multiqc_files.flatMap( combineFiles ).groupTuple()
+    ch_multiqc_study_tools_files = study_multiqc_files.flatMap(combineFiles).groupTuple()
 
     // TODO: add the fetch tool log file
-    MULTIQC_STUDY (
+
+    MULTIQC_STUDY(
         ch_multiqc_base_files.collect(),
         ch_multiqc_study_tools_files,
         ch_multiqc_config,
         ch_multiqc_custom_config,
-        ch_multiqc_logo
+        ch_multiqc_logo,
+        [],
+        []
     )
 
     /**************************/
@@ -283,27 +283,28 @@ workflow MIASSEMBLER {
     /*************************/
 
     def meta_by_run = { meta, result_artifact ->
-        [ meta.subMap("study_accession", "id", "assembler", "assembler_version"), result_artifact ]
+        [meta.subMap("study_accession", "id", "assembler", "assembler_version"), result_artifact]
     }
 
-    run_multiqc_files = SHORT_READS_ASSEMBLER.out.fastqc_before_zip.map(meta_by_run)
-        .join( SHORT_READS_ASSEMBLER.out.fastqc_after_zip.map(meta_by_run) )
-        .join( SHORT_READS_ASSEMBLER.out.assembly_coverage_samtools_idxstats.map(meta_by_run), remainder: true ) // the assembly step could fail
-        .join( SHORT_READS_ASSEMBLER.out.quast_results.map(meta_by_run), remainder: true )                          // the assembly step could fail
+    def run_multiqc_files = SHORT_READS_ASSEMBLER.out.fastqc_before_zip.map(meta_by_run).join(SHORT_READS_ASSEMBLER.out.fastqc_after_zip.map(meta_by_run)).join(SHORT_READS_ASSEMBLER.out.assembly_coverage_samtools_idxstats.map(meta_by_run), remainder: true).join(SHORT_READS_ASSEMBLER.out.quast_results.map(meta_by_run), remainder: true)
+    // the assembly step could fail
 
     // Filter out the non-assembled runs //
-    ch_multiqc_run_tools_files = run_multiqc_files.filter { meta, fastqc_before, fastqc_after, assembly_coverage, quast -> {
+    def ch_multiqc_run_tools_files = run_multiqc_files.filter { _meta, _fastqc_before, _fastqc_after, assembly_coverage, quast ->
+        {
             return assembly_coverage != null && quast != null
         }
-    } .flatMap( combineFiles ).groupTuple()
+    }.flatMap(combineFiles).groupTuple()
 
     // TODO: add the fetch tool log file
-    MULTIQC_RUN (
+    MULTIQC_RUN(
         ch_multiqc_base_files.collect(),
         ch_multiqc_run_tools_files,
         ch_multiqc_config,
         ch_multiqc_custom_config,
-        ch_multiqc_logo
+        ch_multiqc_logo,
+        [],
+        []
     )
 
     /*****************************/
@@ -313,30 +314,26 @@ workflow MIASSEMBLER {
     // TODO: we need to add LR end-of-run reports
 
     // Short reads asssembled runs //
-    SHORT_READS_ASSEMBLER.out.assembly_coverage_samtools_idxstats.map {
-        meta, _ -> {
-            return "${meta.id},${meta.assembler},${meta.assembler_version}"
+    SHORT_READS_ASSEMBLER.out.assembly_coverage_samtools_idxstats
+        .map { meta, __ ->
+            {
+                return "${meta.id},${meta.assembler},${meta.assembler_version}"
+            }
         }
-     }.collectFile(name: "assembled_runs.csv", storeDir: "${params.outdir}", newLine: true, cache: false)
+        .collectFile(name: "assembled_runs.csv", storeDir: "${params.outdir}", newLine: true, cache: false)
 
     // Short reads QC failed //
-    short_reads_qc_failed_entries = SHORT_READS_ASSEMBLER.out.qc_failed.map {
-        meta, _, extended_meta -> {
-            if ( extended_meta.low_reads_count ) {
+    def short_reads_qc_failed_entries = SHORT_READS_ASSEMBLER.out.qc_failed.map { meta, __, extended_meta ->
+        {
+            if (extended_meta.low_reads_count) {
                 return "${meta.id},low_reads_count"
             }
-            if ( extended_meta.filter_ratio_threshold_exceeded ) {
+            if (extended_meta.filter_ratio_threshold_exceeded) {
                 return "${meta.id},filter_ratio_threshold_exceeded"
             }
-            error "Unexpected. meta: ${meta}, extended_meta: ${extended_meta}"
+            error("Unexpected. meta: ${meta}, extended_meta: ${extended_meta}")
         }
     }
 
     short_reads_qc_failed_entries.collectFile(name: "qc_failed_runs.csv", storeDir: "${params.outdir}", newLine: true, cache: false)
 }
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    THE END
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
diff --git a/workflows/short_reads_assembler.nf b/workflows/short_reads_assembler.nf
index f159e84..1aca4a5 100644
--- a/workflows/short_reads_assembler.nf
+++ b/workflows/short_reads_assembler.nf
@@ -1,16 +1,3 @@
-import groovy.json.JsonSlurper
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    CONFIG FILES
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-ch_multiqc_config          = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true)
-ch_multiqc_custom_config   = params.multiqc_config ? file( params.multiqc_config, checkIfExists: true ) : []
-ch_multiqc_logo            = params.multiqc_logo   ? file( params.multiqc_logo, checkIfExists: true ) : file("$projectDir/assets/mgnify_logo.png", checkIfExists: true)
-ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true)
-
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
     IMPORT LOCAL MODULES/SUBWORKFLOWS
@@ -21,9 +8,11 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
 //
 
-include { SHORT_READS_QC                 } from '../subworkflows/local/short_reads_qc'
-include { SHORT_READS_ASSEMBLY_QC        } from '../subworkflows/local/short_reads_assembly_qc'
-include { SHORT_READS_ASSEMBLY_COVERAGE  } from '../subworkflows/local/short_reads_assembly_coverage'
+include { DOWNLOAD_FROM_FIRE            } from '../modules/local/download_from_fire.nf'
+
+include { SHORT_READS_QC                } from '../subworkflows/local/short_reads_qc'
+include { SHORT_READS_ASSEMBLY_QC       } from '../subworkflows/local/short_reads_assembly_qc'
+include { SHORT_READS_ASSEMBLY_COVERAGE } from '../subworkflows/local/short_reads_assembly_coverage'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -34,11 +23,11 @@ include { SHORT_READS_ASSEMBLY_COVERAGE  } from '../subworkflows/local/short_rea
 //
 // MODULE: Installed directly from nf-core/modules
 //
-include { FASTQC as FASTQC_BEFORE      } from '../modules/nf-core/fastqc/main'
-include { FASTQC as FASTQC_AFTER       } from '../modules/nf-core/fastqc/main'
-include { SPADES                       } from '../modules/nf-core/spades/main'
-include { MEGAHIT                      } from '../modules/nf-core/megahit/main'
-include { QUAST                        } from '../modules/nf-core/quast/main'
+include { FASTQC as FASTQC_BEFORE       } from '../modules/nf-core/fastqc/main'
+include { FASTQC as FASTQC_AFTER        } from '../modules/nf-core/fastqc/main'
+include { SPADES                        } from '../modules/nf-core/spades/main'
+include { MEGAHIT                       } from '../modules/nf-core/megahit/main'
+include { QUAST                         } from '../modules/nf-core/quast/main'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -47,13 +36,28 @@ include { QUAST                        } from '../modules/nf-core/quast/main'
 */
 
 workflow SHORT_READS_ASSEMBLER {
-
     take:
-    reads // tuple(meta), path(reads)
+    input_reads // tuple(meta), path(reads)
 
     main:
 
-    ch_versions = Channel.empty()
+    def ch_versions = Channel.empty()
+    def reads_to_assemble = input_reads
+
+    // If running for a private study on EBI infrastructure //
+    if (params.private_study) {
+        /*
+         * For private studies we need to bypass Nextflow S3 integration until https://github.com/nextflow-io/nextflow/issues/4873 is fixed
+         * The EBI parameter is needed as this only works on EBI network, FIRE is not accessible otherwise
+        */
+        DOWNLOAD_FROM_FIRE(
+            input_reads
+        )
+
+        ch_versions = ch_versions.mix(DOWNLOAD_FROM_FIRE.out.versions.first())
+
+        reads_to_assemble = DOWNLOAD_FROM_FIRE.out.reads
+    }
 
     /***************************/
     /* Selecting the assembler */
@@ -65,19 +69,22 @@ workflow SHORT_READS_ASSEMBLER {
         - Paired-end reads are assembled with MetaSPAdes, unless specified otherwise
         - An error is raised if the assembler and read layout are incompatible (shouldn't happen...)
     */
-    reads_by_assembler = reads.map { meta, reads ->
-        def selected_assembler = meta.assembler;
-        if ( selected_assembler == "megahit" || ( meta.single_end && selected_assembler == null ) ) {
-            return [ meta + [assembler: "megahit", assembler_version: params.megahit_version], reads]
-        } else if ( ["metaspades", "spades"].contains(selected_assembler) || ( !meta.single_end && selected_assembler == null ) ) {
-            def xspades_assembler = selected_assembler ?: "metaspades" // Default to "metaspades" if the user didn't select one
-            return [ meta + [assembler: xspades_assembler, assembler_version: params.spades_version], reads]
-        } else {
-            error "Incompatible assembler and/or reads layout. We can't assembly data that is. Reads - single end value: ${meta.single_end}."
+    def reads_by_assembler = reads_to_assemble.map { meta, reads ->
+        def selected_assembler = meta.assembler
+        if (selected_assembler == "megahit" || (meta.single_end && selected_assembler == null)) {
+            return [meta + [assembler: "megahit", assembler_version: params.megahit_version], reads]
+        }
+        else if (["metaspades", "spades"].contains(selected_assembler) || (!meta.single_end && selected_assembler == null)) {
+            def xspades_assembler = selected_assembler ?: "metaspades"
+            // Default to "metaspades" if the user didn't select one
+            return [meta + [assembler: xspades_assembler, assembler_version: params.spades_version], reads]
+        }
+        else {
+            error("Incompatible assembler and/or reads layout. We can't assembly data that is. Reads - single end value: ${meta.single_end}.")
         }
     }
 
-    FASTQC_BEFORE (
+    FASTQC_BEFORE(
         reads_by_assembler
     )
     ch_versions = ch_versions.mix(FASTQC_BEFORE.out.versions)
@@ -88,7 +95,7 @@ workflow SHORT_READS_ASSEMBLER {
     )
     ch_versions = ch_versions.mix(SHORT_READS_QC.out.versions)
 
-    FASTQC_AFTER (
+    FASTQC_AFTER(
         SHORT_READS_QC.out.qc_reads
     )
 
@@ -97,43 +104,45 @@ workflow SHORT_READS_ASSEMBLER {
     /*  - Reads discarded by fastp > 90% (default value) */
     /*  - Less than 1k reads                  */
     /******************************************/
-    extended_qc = SHORT_READS_QC.out.fastp_json.map { meta, json -> {
-            json_txt = new JsonSlurper().parseText(json.text)
-            bf_total_reads = json_txt?.summary?.before_filtering?.total_reads ?: 0;
-            af_total_reads = json_txt?.summary?.after_filtering?.total_reads ?: 0;
-            reads_qc_meta = [
+    def extended_qc = SHORT_READS_QC.out.fastp_json.map { meta, json ->
+        {
+            def json_txt = new groovy.json.JsonSlurper().parseText(json.text)
+            def bf_total_reads = json_txt.summary.before_filtering.total_reads ?: 0
+            def af_total_reads = json_txt.summary.after_filtering.total_reads ?: 0
+            def reads_qc_meta = [
                 "low_reads_count": af_total_reads <= params.short_reads_low_reads_count_threshold,
-                "filter_ratio_threshold_exceeded": af_total_reads == 0 || ((af_total_reads / bf_total_reads) <= params.short_reads_filter_ratio_threshold )
+                "filter_ratio_threshold_exceeded": af_total_reads == 0 || ((af_total_reads / bf_total_reads) <= params.short_reads_filter_ratio_threshold)
             ]
             return [meta, reads_qc_meta]
         }
     }
 
-    extended_reads_qc = SHORT_READS_QC.out.qc_reads.join( extended_qc )
+    def extended_reads_qc = SHORT_READS_QC.out.qc_reads.join(extended_qc)
 
-    extended_reads_qc.branch { meta, reads, reads_qc_meta ->
-        // Filter out failed reads //
-        qc_failed: reads_qc_meta.low_reads_count || reads_qc_meta.filter_ratio_threshold_exceeded
-        megahit: meta.assembler == "megahit"
-        xspades: ["metaspades", "spades"].contains(meta.assembler)
-    }.set { qc_filtered_reads }
+    extended_reads_qc
+        .branch { meta, _reads, reads_qc_meta ->
+            qc_failed: reads_qc_meta.low_reads_count || reads_qc_meta.filter_ratio_threshold_exceeded
+            megahit: meta.assembler == "megahit"
+            xspades: ["metaspades", "spades"].contains(meta.assembler)
+        }
+        .set { qc_filtered_reads }
 
     /*********************/
     /*     Assembly     */
     /********************/
     SPADES(
-        qc_filtered_reads.xspades.map { meta, reads, _ -> [meta, reads, [], []] },
-        [], // yml input parameters, which we don't use
-        []  // hmm, not used
+        qc_filtered_reads.xspades.map { meta, reads, __ -> [meta, reads, [], []] },
+        [],
+        []
     )
     ch_versions = ch_versions.mix(SPADES.out.versions)
 
     MEGAHIT(
-        qc_filtered_reads.megahit.map { meta, reads, _ -> [meta, reads] }
+        qc_filtered_reads.megahit.map { meta, reads, __ -> [meta, reads] }
     )
     ch_versions = ch_versions.mix(MEGAHIT.out.versions)
-    
-    assembly = SPADES.out.contigs.mix( MEGAHIT.out.contigs )
+
+    assembly = SPADES.out.contigs.mix(MEGAHIT.out.contigs)
 
     // Clean the assembly contigs //
     SHORT_READS_ASSEMBLY_QC(
@@ -144,7 +153,7 @@ workflow SHORT_READS_ASSEMBLER {
 
     // Coverage //
     SHORT_READS_ASSEMBLY_COVERAGE(
-        SHORT_READS_ASSEMBLY_QC.out.filtered_contigs.join( SHORT_READS_QC.out.qc_reads, remainder: false )
+        SHORT_READS_ASSEMBLY_QC.out.filtered_contigs.join(SHORT_READS_QC.out.qc_reads, remainder: false)
     )
 
     ch_versions = ch_versions.mix(SHORT_READS_ASSEMBLY_COVERAGE.out.versions)
@@ -153,23 +162,17 @@ workflow SHORT_READS_ASSEMBLER {
     /* The QUAST module was modified to run metaQUAST instead */
     QUAST(
         SHORT_READS_ASSEMBLY_QC.out.filtered_contigs,
-        [ [], [] ], // reference
-        [ [], [] ]  // gff
+        [[], []],
+        [[], []]
     )
 
     ch_versions = ch_versions.mix(QUAST.out.versions)
 
     emit:
-    fastqc_before_zip                    = FASTQC_BEFORE.out.zip                                // tuple(meta)
-    qc_failed                            = qc_filtered_reads.qc_failed                          // tuple(meta)
-    fastqc_after_zip                     = FASTQC_AFTER.out.zip                                // tuple(meta)
-    assembly_coverage_samtools_idxstats  = SHORT_READS_ASSEMBLY_COVERAGE.out.samtools_idxstats // tuple(meta)
-    quast_results                        = QUAST.out.results                                   // tuple(meta)
-    versions                             = ch_versions
+    fastqc_before_zip                   = FASTQC_BEFORE.out.zip // tuple(meta)
+    qc_failed                           = qc_filtered_reads.qc_failed // tuple(meta)
+    fastqc_after_zip                    = FASTQC_AFTER.out.zip // tuple(meta)
+    assembly_coverage_samtools_idxstats = SHORT_READS_ASSEMBLY_COVERAGE.out.samtools_idxstats // tuple(meta)
+    quast_results                       = QUAST.out.results // tuple(meta)
+    versions                            = ch_versions
 }
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    THE END
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/

From f97e774a97967e442f3710887a0144aeebc8404d Mon Sep 17 00:00:00 2001
From: Martin Beracochea <mbc@ebi.ac.uk>
Date: Thu, 21 Nov 2024 12:45:13 +0000
Subject: [PATCH 25/33] Add docs around private studies and one shallow test

---
 README.md          | 15 ++++++++++---
 tests/main.nf.test | 55 +++++++++++++++++++++++++++++++++++-----------
 2 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 812293b..2c4a906 100644
--- a/README.md
+++ b/README.md
@@ -15,9 +15,6 @@ This pipeline is still in early development. It's mostly a direct port of the mi
 
 ## Usage
 
-> [!WARNING]
-> It only runs in EBI Codon cluster using Slurm ATM.
-
 Pipeline help:
 
 ```bash
@@ -150,6 +147,18 @@ PRJ1,ERR1,/path/to/reads/ERR1_1.fq.gz,/path/to/reads/ERR1_2.fq.gz,paired,metagen
 PRJ2,ERR2,/path/to/reads/ERR2.fq.gz,,single,genomic,megahit,32
 ```
 
+### ENA Private Data
+
+The pipeline includes a module to download private data from ENA using the EMBL-EBI FIRE (File Replication) system. This system is restricted for use within the EMBL-EBI network and will not work unless connected to that network.
+
+If you have private data to assemble, you must provide the full path to the files on a system that Nextflow can access.
+
+#### Microbiome Informatics Team
+
+To process private data, the pipeline should be launched with the `--private_study` flag, and the samplesheet must include the private FTP (transfer services) paths. The `download_from_fire` module will be utilized to download the files.
+
+This module uses [Nextflow secrets](https://www.nextflow.io/docs/latest/secrets.html#how-it-works). Specifically, it requires the `FIRE_ACCESS_KEY` and `FIRE_SECRET_KEY` secrets to authenticate and download the files.
+
 ## Outputs
 
 The outputs of the pipeline are organized as follows:
diff --git a/tests/main.nf.test b/tests/main.nf.test
index 3dcfd64..fb6ab8f 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -37,7 +37,7 @@ nextflow_pipeline {
 
                 short_reads_low_reads_count_threshold = 1000000
 
-                samplesheet                      = "${projectDir}/tests/samplesheet/test.csv"
+                samplesheet                           = "${projectDir}/tests/samplesheet/test.csv"
             }
         }
 
@@ -52,6 +52,8 @@ nextflow_pipeline {
                 assert trace.succeeded().count{ task -> task.name.contains("MULTIQC_STUDY") } == 2
                 assert trace.succeeded().count{ task -> task.name.contains("SPADES") } == 0
                 assert trace.succeeded().count{ task -> task.name.contains("MEGAHIT") } == 0
+                // This process should not have been called
+                assert trace.succeeded().count{ task -> task.name.contains("DOWNLOAD_FROM_FIRE") == 0}
             }
         }
 
@@ -67,10 +69,10 @@ nextflow_pipeline {
                 outdir = "tests/results"
 
                 // Force the assembly
-                short_reads_filter_ratio_threshold           = 0.1
+                short_reads_filter_ratio_threshold   = 0.1
 
-                study_accession                  = "SRP115494"
-                reads_accession                  = "SRR6180434"
+                study_accession                      = "SRP115494"
+                reads_accession                      = "SRR6180434"
             }
         }
 
@@ -91,14 +93,14 @@ nextflow_pipeline {
 
         when {
             params {
-                outdir                           = "tests/results"
-                assembler                        = "megahit"
+                outdir                              = "tests/results"
+                assembler                           = "megahit"
 
                 // Force the assembly
-                short_reads_filter_ratio_threshold           = 0.1
+                short_reads_filter_ratio_threshold  = 0.1
 
-                study_accession                  = "SRP115494"
-                reads_accession                  = "SRR6180434"
+                study_accession                     = "SRP115494"
+                reads_accession                     = "SRR6180434"
             }
         }
 
@@ -165,7 +167,7 @@ nextflow_pipeline {
         when {
             params {
                 outdir = "tests/results"
-                
+
                 study_accession                  = "DRP007622"
                 reads_accession                  = "DRR280712"
             }
@@ -190,7 +192,7 @@ nextflow_pipeline {
             params {
                 outdir = "tests/results"
                 assembler = "spades"
-                
+
                 study_accession                  = "DRP007622"
                 reads_accession                  = "DRR280712"
             }
@@ -218,7 +220,7 @@ nextflow_pipeline {
                 blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 samplesheet                      = "${projectDir}/tests/samplesheet/test_mem.csv"
                 assembly_memory                  = 0.5
-                  // will will be [0.5GB, 0.75GB, 1.13GB, ...] which rounds down to [0, 0, 1, ...] so should definitely fail twice before succeeding. after a few trys.
+                // will will be [0.5GB, 0.75GB, 1.13GB, ...] which rounds down to [0, 0, 1, ...] so should definitely fail twice before succeeding. after a few trys.
                 max_spades_retries               = 5
             }
         }
@@ -235,4 +237,31 @@ nextflow_pipeline {
         }
 
     }
-}
\ No newline at end of file
+
+    test("Private study reads - this one should fail") {
+
+        tag "samplesheet"
+        tag "private"
+
+        when {
+            params {
+                outdir = "tests/results"
+                assembler = "spades"
+                bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
+                blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
+                samplesheet                      = "${projectDir}/tests/samplesheet/test.csv"
+                private_study                    = true
+            }
+        }
+
+        // Complete this test when secrets are implemented in nf-test https://github.com/askimed/nf-test/issues/145
+        then {
+            with(workflow) {
+                assert !success
+                assert stdout.count{ line -> line.contains("Required secrets are missing") } == 1
+            }
+        }
+
+    }
+
+}

From dfaffbadf881c6700421c822647fb5c559255806 Mon Sep 17 00:00:00 2001
From: Martin Beracochea <mbc@ebi.ac.uk>
Date: Thu, 21 Nov 2024 13:43:16 +0000
Subject: [PATCH 26/33] Adjust the version on the wf metadata

---
 nextflow.config | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 52f1cd3..3080801 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -20,7 +20,7 @@ params {
 
     // For already fetched data
     samplesheet                = null
-    
+
     // The pipeline will use the metadata from ENA (obtained by the fetch_tool)
     // As the metadata can be incorrect, we provide the following parameters to
     // "force" them
@@ -288,7 +288,7 @@ manifest {
     description     = """Microbiome Informatics metagenomes assembly pipeline"""
     mainScript      = 'main.nf'
     nextflowVersion = '!>=23.04.0'
-    version         = '1.0dev'
+    version         = 'v1.0.0'
     doi             = ''
 }
 

From eb26726659a604145e6a72bdc691d90ca4ff850d Mon Sep 17 00:00:00 2001
From: Martin Beracochea <mbc@ebi.ac.uk>
Date: Thu, 21 Nov 2024 15:56:13 +0000
Subject: [PATCH 27/33] Massive commit, sorry about that.

nf-core linting + fixes all over the place to follow sensible nf-core linting rules
Updates multiqc and blastn
Enabled a nf-core linting github action
Tweaked the tests slightly (I think they are still failling .. testing and fixing ATM)
Upgraded to nf-schema 2.0.2 - also pinned this dependency
Remove check_max and moved to resources limits (require nextflow 24.0.0 as min - which I've set on the config)
Created some missing .diff for some modules
---
 .github/workflows/linting.yml                 |  80 ++++++++
 .github/workflows/{ci.yml => nf_tests.yml}    |  20 +-
 .nf-core.yml                                  |   4 +
 assets/multiqc_config.yml                     |   6 +-
 conf/base.config                              |  12 +-
 conf/codon_slurm.config                       |   1 -
 conf/modules.config                           | 124 ++++++-------
 conf/test.config                              |  11 +-
 main.nf                                       |  30 ++-
 modules.json                                  |  16 +-
 .../bwamem2/mem/bwamem2-mem.diff              |  29 +++
 .../nf-core/blast/blastn/blast-blastn.diff    |   7 +
 modules/nf-core/blast/blastn/environment.yml  |   4 +-
 modules/nf-core/blast/blastn/main.nf          |   4 +-
 modules/nf-core/blast/blastn/meta.yml         |  61 ++++---
 .../nf-core/blast/blastn/tests/main.nf.test   |   6 +-
 .../blast/blastn/tests/main.nf.test.snap      |   4 +-
 modules/nf-core/canu/canu.diff                |  37 ++++
 modules/nf-core/megahit/environment.yml       |   4 +-
 modules/nf-core/megahit/main.nf               | 104 +++++------
 modules/nf-core/megahit/megahit.diff          |  53 +++---
 modules/nf-core/megahit/meta.yml              | 123 +++++++++----
 modules/nf-core/megahit/tests/main.nf.test    | 126 +++++++++++++
 .../nf-core/megahit/tests/main.nf.test.snap   | 172 ++++++++++++++++++
 modules/nf-core/megahit/tests/tags.yml        |   2 +
 .../minimap2/align/minimap2-align.diff        |  59 ++++++
 modules/nf-core/multiqc/multiqc.diff          |  22 +++
 modules/nf-core/seqkit/grep/seqkit-grep.diff  |  18 ++
 nextflow.config                               |  52 +-----
 nextflow_schema.json                          |  40 +---
 nf-test.config                                |   1 -
 tests/main.nf.test                            |  14 +-
 workflows/miassembler.nf                      |  15 +-
 33 files changed, 897 insertions(+), 364 deletions(-)
 create mode 100644 .github/workflows/linting.yml
 rename .github/workflows/{ci.yml => nf_tests.yml} (65%)
 create mode 100644 modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff
 create mode 100644 modules/nf-core/canu/canu.diff
 create mode 100644 modules/nf-core/megahit/tests/main.nf.test
 create mode 100644 modules/nf-core/megahit/tests/main.nf.test.snap
 create mode 100644 modules/nf-core/megahit/tests/tags.yml
 create mode 100644 modules/nf-core/minimap2/align/minimap2-align.diff
 create mode 100644 modules/nf-core/multiqc/multiqc.diff

diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml
new file mode 100644
index 0000000..62a2d28
--- /dev/null
+++ b/.github/workflows/linting.yml
@@ -0,0 +1,80 @@
+name: nf-core linting
+on:
+  push:
+    branches:
+      - dev
+  pull_request:
+  release:
+    types: [published]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
+
+      - name: Set up Python 3.12
+        uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
+        with:
+          python-version: "3.12"
+
+      - name: Install pre-commit
+        run: pip install pre-commit
+
+      - name: Run pre-commit
+        run: pre-commit run --all-files
+
+  nf-core:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Check out pipeline code
+        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
+
+      - name: Install Nextflow
+        uses: nf-core/setup-nextflow@v2
+
+      - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5
+        with:
+          python-version: "3.12"
+          architecture: "x64"
+
+      - name: read .nf-core.yml
+        uses: pietrobolcato/action-read-yaml@1.1.0
+        id: read_yml
+        with:
+          config: ${{ github.workspace }}/.nf-core.yml
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }}
+
+      - name: Run nf-core pipelines lint
+        if: ${{ github.base_ref != 'main' }}
+        env:
+          GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }}
+        run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md
+
+      - name: Run nf-core pipelines lint --release
+        if: ${{ github.base_ref == 'main' }}
+        env:
+          GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }}
+        run: nf-core -l lint_log.txt pipelines lint --release --dir ${GITHUB_WORKSPACE} --markdown lint_results.md
+
+      - name: Save PR number
+        if: ${{ always() }}
+        run: echo ${{ github.event.pull_request.number }} > PR_number.txt
+
+      - name: Upload linting log file artifact
+        if: ${{ always() }}
+        uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4
+        with:
+          name: linting-logs
+          path: |
+            lint_log.txt
+            lint_results.md
+            PR_number.txt
diff --git a/.github/workflows/ci.yml b/.github/workflows/nf_tests.yml
similarity index 65%
rename from .github/workflows/ci.yml
rename to .github/workflows/nf_tests.yml
index b55e0f6..f29e7d8 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/nf_tests.yml
@@ -1,11 +1,9 @@
 name: nf-test CI
 on:
-  push:
-    branches:
-      - dev
   pull_request:
   release:
     types: [published]
+  workflow_dispatch:
 
 env:
   NXF_ANSI_LOG: false
@@ -15,22 +13,24 @@ jobs:
     name: Run pipeline with test data
     runs-on: ubuntu-latest
 
+    strategy:
+      matrix:
+        # Nextflow versions: check pipeline minimum and current latest
+        NXF_VER: ["24.04.0"]
+
     steps:
       - name: Check out pipeline code
         uses: actions/checkout@v4
 
-      - uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4
-        with:
-          distribution: "temurin"
-          java-version: "17"
-
       - name: Setup Nextflow
-        uses: nf-core/setup-nextflow@v2
+        uses: nf-core/setup-nextflow@v2.0.0
+        with:
+          version: "${{ matrix.NXF_VER }}"
 
       - name: Install nf-test
         uses: nf-core/setup-nf-test@v1
         with:
-          version: 0.9.0
+          install-pdiff: true
 
       - name: Run pipeline with test data
         run: |
diff --git a/.nf-core.yml b/.nf-core.yml
index 4db2825..6074a56 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -20,6 +20,7 @@ lint:
     - .github/workflows/ci.yml
     - .github/workflows/linting_comment.yml
     - .github/workflows/linting.yml
+    - .github/workflows/ci.yml
     - conf/test_full.config
     - lib/Utils.groovy
     - lib/WorkflowMain.groovy
@@ -32,7 +33,9 @@ lint:
     - docs/images/nf-core-miassembler_logo_light.png
     - docs/images/nf-core-miassembler_logo_dark.png
     - .github/ISSUE_TEMPLATE/bug_report.yml
+    - .github/PULL_REQUEST_TEMPLATE.md
     - .github/CONTRIBUTING.md
+    - .github/workflows/linting.yml
     - LICENSE
     - docs/README.md
     - .gitignore
@@ -45,5 +48,6 @@ lint:
     - params.custom_config_base
     - manifest.name
     - manifest.homePage
+    - custom_config
   readme:
     - nextflow_badge
diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml
index 2986e13..b9feb24 100644
--- a/assets/multiqc_config.yml
+++ b/assets/multiqc_config.yml
@@ -3,12 +3,12 @@ report_comment: >
   analysis pipeline.
 
 report_section_order:
-  "software_versions":
-    order: -1000
   "ebi-metagenomics-miassembler-methods-description":
     order: -1001
-  "ebi-metagenomics-miassembler-summary":
+  "software_versions":
     order: -1002
+  "ebi-metagenomics-miassembler-summary":
+    order: -1003
 
 export_plots: true
 
diff --git a/conf/base.config b/conf/base.config
index aff79f6..7170d21 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -10,9 +10,15 @@
 
 process {
 
-    cpus   = { check_max( 1    * task.attempt, 'cpus'   ) }
-    memory = { check_max( 6.GB * task.attempt, 'memory' ) }
-    time   = { check_max( 4.h  * task.attempt, 'time'   ) }
+    resourceLimits = [
+        cpus: 32,
+        memory: '1.TB',
+        time: '168.h'
+    ]
+
+    cpus   = { 1    * task.attempt }
+    memory = { 6.GB * task.attempt }
+    time   = { 4.h  * task.attempt }
 
     errorStrategy = { task.exitStatus in ((130..155) + 104) ? 'retry' : 'finish' }
     maxRetries    = 1
diff --git a/conf/codon_slurm.config b/conf/codon_slurm.config
index 7fb4789..c658798 100644
--- a/conf/codon_slurm.config
+++ b/conf/codon_slurm.config
@@ -12,7 +12,6 @@ executor {
     queueGlobalStatus = true
     submitRateLimit = "10 sec"
     pollInterval = "10 sec"
-
 }
 
 conda.enabled = false
diff --git a/conf/modules.config b/conf/modules.config
index e6814bc..5e58f41 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -13,17 +13,17 @@
 process {
 
     withName: 'FETCHTOOL*' {
-        cpus   = { check_max( 1                  , 'cpus'    ) }
-        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+        cpus   = { 1                   }
+        memory = { 6.GB * task.attempt }
+        time   = { 4.h  * task.attempt }
 
         ext.args = params.private_study ? "--private" : ""
     }
 
     withName: 'FASTP*' {
-        cpus   = { check_max( 6     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 36.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 8.h   * task.attempt, 'time'    ) }
+        cpus   = { 6     * task.attempt }
+        memory = { 36.GB * task.attempt }
+        time   = { 8.h   * task.attempt }
         publishDir = [
             [
                 path: "${params.outdir}",
@@ -61,9 +61,9 @@ process {
     }
 
     withName: 'FASTQC' {
-        cpus   = { check_max( 6     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 36.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 8.h   * task.attempt, 'time'    ) }
+        cpus   = { 6     * task.attempt }
+        memory = { 36.GB * task.attempt }
+        time   = { 8.h   * task.attempt }
         publishDir = [
             [
                 path: "${params.outdir}",
@@ -84,9 +84,9 @@ process {
 
     // This BWAMEM2_MEM belongs to the coverage module
     withName: 'BWAMEM2_MEM_COVERAGE' {
-        cpus   = { check_max( 12    * task.attempt, 'cpus'    ) }
-        memory = { check_max( 20.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 1.h  * task.attempt,  'time'    ) }
+        cpus   = { 12    * task.attempt }
+        memory = { 20.GB * task.attempt }
+        time   = { 1.h   * task.attempt }
 
         ext.args  = "-M"
         ext.args2 = "-F 268 -uS"
@@ -94,23 +94,23 @@ process {
 
     /* Decontamination */
     withName: 'BWAMEM2DECONTNOBAMS' {
-        cpus   = { check_max( 2     * task.attempt, 'cpus'    ) }
-        time   = { check_max( 8.h   * task.attempt, 'time'    ) }
+        cpus   = { 2     * task.attempt }
+        time   = { 8.h   * task.attempt }
         ext.prefix = "decontaminated"
     }
 
     withName: 'HUMAN*_DECONTAMINATION' {
-        memory = { check_max( 64.GB * task.attempt, 'memory'  ) }
+        memory = { 64.GB * task.attempt }
     }
 
     withName: 'HOST_DECONTAMINATION' {
-        memory = { check_max( 24.GB * task.attempt, 'memory'  ) }
+        memory = { 24.GB * task.attempt }
     }
 
     withName: 'CANU*' {
-        cpus   = { check_max( 4                  , 'cpus'    ) }
-        memory = { check_max( 3.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+        cpus   = { 4                   }
+        memory = { 3.GB * task.attempt }
+        time   = { 4.h  * task.attempt }
 
         ext.args = [
             '-trim',
@@ -141,10 +141,10 @@ process {
         ].join(' ').trim()
     }
 
-    withName: 'PORECHOP_ONT' {
-        cpus   = { check_max( 1                  , 'cpus'    ) }
-        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+    withName: 'PORECHOP_ABI' {
+        cpus   = { 1                   }
+        memory = { 6.GB * task.attempt }
+        time   = { 4.h  * task.attempt }
     }
 
     /* --------- */
@@ -154,11 +154,11 @@ process {
         // We increase the memory 50% with each try
         memory = {
             def assembly_memory = meta.assembly_memory ?: params.assembly_memory;
-            check_max( assembly_memory.GB + ( assembly_memory.GB * 0.5 * ( task.attempt - 1 ) ), 'memory')
+            assembly_memory.GB + ( assembly_memory.GB * 0.5 * ( task.attempt - 1 ) )
         }
-        cpus   = { check_max( 32                       * task.attempt, 'cpus') }
+        cpus   = { 32 * task.attempt }
         // TODO: tweak this based on input ( using the biome maybe? )
-        time   = { check_max( 168.h                    * task.attempt, 'time') }
+        time   = { 168.h * task.attempt }
         ext.args = params.spades_only_assembler ? "--only-assembler" : ""
         errorStrategy = 'retry'
         maxRetries    = params.max_spades_retries
@@ -194,10 +194,10 @@ process {
     withName: 'MEGAHIT' {
         memory = {
             def assembly_memory = meta.assembly_memory ?: params.assembly_memory;
-            check_max( assembly_memory.GB + ( assembly_memory.GB * 0.5 * ( task.attempt - 1 ) ), 'memory')
+            assembly_memory.GB + ( assembly_memory.GB * 0.5 * ( task.attempt - 1 ) )
         }
-        cpus   = { check_max( 12    * task.attempt, 'cpus'    ) }
-        time   = { check_max( 16.h  * task.attempt, 'time'    ) }
+        cpus   = { 12    * task.attempt }
+        time   = { 16.h  * task.attempt }
         errorStrategy = 'retry'
         maxRetries    = params.max_megahit_retries
 
@@ -218,15 +218,15 @@ process {
     }
 
     withName: 'SEQKIT_SEQ' {
-        cpus   = { check_max( 2     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 12.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h   * task.attempt, 'time'    ) }
+        cpus   = { 2     * task.attempt }
+        memory = { 12.GB * task.attempt }
+        time   = { 4.h   * task.attempt }
     }
 
     withName: 'BLAST_BLASTN*' {
-        cpus   = { check_max( 6     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 36.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 8.h   * task.attempt, 'time'    ) }
+        cpus   = { 6     * task.attempt }
+        memory = { 36.GB * task.attempt }
+        time   = { 8.h   * task.attempt }
 
         ext.args = [
             '-task',
@@ -274,17 +274,17 @@ process {
     }
 
     withName: 'SEQKIT_GREP' {
-        cpus   = { check_max( 2     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 12.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h   * task.attempt, 'time'    ) }
+        cpus   = { 2     * task.attempt }
+        memory = { 12.GB * task.attempt }
+        time   = { 4.h   * task.attempt }
         ext.args = "--invert-match"
     }
 
     // Dummy process to published the filtered and decontaminated contigs
     withName: 'PUBLISH_CLEANED_CONTIGS' {
-        cpus   = { check_max( 1                     , 'cpus'    ) }
-        memory = { check_max( 250.MB * task.attempt , 'memory'  ) }
-        time   = { check_max( 30.m    * task.attempt, 'time'    ) }
+        cpus   = { 1                      }
+        memory = { 250.MB * task.attempt  }
+        time   = { 30.m    * task.attempt }
         publishDir = [
             [
                 path: "${params.outdir}",
@@ -300,15 +300,15 @@ process {
     }
 
     withName: 'BWAMEM2_INDEX' {
-        cpus   = { check_max( 1                  , 'cpus'    ) }
-        memory = { check_max( 16.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 6.h  * task.attempt, 'time'    ) }
+        cpus   = { 1                    }
+        memory = { 16.GB * task.attempt }
+        time   = { 6.h  * task.attempt  }
     }
 
     withName: 'METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS' {
-        cpus   = { check_max( 6     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 36.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 8.h   * task.attempt, 'time'    ) }
+        cpus   = { 6     * task.attempt }
+        memory = { 36.GB * task.attempt }
+        time   = { 8.h   * task.attempt }
         publishDir = [
             [
                 path: "${params.outdir}",
@@ -325,15 +325,15 @@ process {
     }
 
     withName: 'SAMTOOLS_IDXSTATS' {
-        cpus   = { check_max( 1                  , 'cpus'    ) }
-        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+        cpus   = { 1                   }
+        memory = { 6.GB * task.attempt }
+        time   = { 4.h  * task.attempt }
     }
 
     withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' {
-        cpus   = { check_max( 1                  , 'cpus'    ) }
-        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+        cpus   = { 1                   }
+        memory = { 6.GB * task.attempt }
+        time   = { 4.h  * task.attempt }
 
         publishDir = [
             [
@@ -345,9 +345,9 @@ process {
     }
 
     withName: 'MULTIQC_STUDY' {
-        cpus   = { check_max( 1                  , 'cpus'    ) }
-        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+        cpus   = { 1                   }
+        memory = { 6.GB * task.attempt }
+        time   = { 4.h  * task.attempt }
 
         ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
 
@@ -370,9 +370,9 @@ process {
     }
 
     withName: 'MULTIQC_RUN' {
-        cpus   = { check_max( 1                  , 'cpus'    ) }
-        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+        cpus   = { 1                   }
+        memory = { 6.GB * task.attempt }
+        time   = { 4.h  * task.attempt }
 
         ext.args   = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' }
 
@@ -395,9 +395,9 @@ process {
     }
 
     withName: 'QUAST' {
-        cpus   = { check_max( 1                  , 'cpus'    ) }
-        memory = { check_max( 6.GB * task.attempt, 'memory'  ) }
-        time   = { check_max( 4.h  * task.attempt, 'time'    ) }
+        cpus   = { 1                   }
+        memory = { 6.GB * task.attempt }
+        time   = { 4.h  * task.attempt }
 
         publishDir = [
             [
diff --git a/conf/test.config b/conf/test.config
index 2e734c2..b3b6265 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -13,11 +13,14 @@
 profiles {
     // Limit resources so that this can run on GitHub Actions
     test {
+        process {
+            resourceLimits = [
+                cpus: 2,
+                memory: 6.GB,
+                time: 1.h
+            ]
+        }
         params {
-            max_cpus   = 2
-            max_memory = '6.GB'
-            max_time   = '6.h'
-
             bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
             blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
             reference_genomes_folder         = "${projectDir}/tests/human/"
diff --git a/main.nf b/main.nf
index f1d5494..b2321bd 100644
--- a/main.nf
+++ b/main.nf
@@ -15,25 +15,7 @@ nextflow.enable.dsl = 2
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include { validateParameters; paramsHelp; paramsSummaryLog; paramsSummaryMap; } from 'plugin/nf-schema'
-
-def summary_params = paramsSummaryMap(workflow)
-
-if (params.help) {
-   log.info paramsHelp("nextflow run ebi-metagenomics/miassembler --help")
-   exit 0
-}
-
-validateParameters()
-
-// Custom validation //
-// The conditional validation doesn't work yet -> https://github.com/nf-core/tools/issues/2619
-if ( !params.samplesheet && ( !params.study_accession || !params.reads_accession ) ) {
-    error "Either --samplesheet or both --study_accession and --reads_accession are required."
-    exit 1
-}
-
-log.info paramsSummaryLog(workflow)
+include { validateParameters } from 'plugin/nf-schema'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -61,6 +43,16 @@ workflow EBIMETAGENOMICS_MIASSEMBLER {
 // See: https://github.com/nf-core/rnaseq/issues/619
 //
 workflow {
+
+    validateParameters()
+
+    // Custom validation //
+    // The conditional validation doesn't work yet -> https://github.com/nf-core/tools/issues/2619
+    if ( !params.samplesheet && ( !params.study_accession || !params.reads_accession ) ) {
+        error "Either --samplesheet or both --study_accession and --reads_accession are required."
+        exit 1
+    }
+
     EBIMETAGENOMICS_MIASSEMBLER ()
 }
 
diff --git a/modules.json b/modules.json
index c34d7cc..2b2ccea 100644
--- a/modules.json
+++ b/modules.json
@@ -8,7 +8,8 @@
                     "bwamem2/mem": {
                         "branch": "main",
                         "git_sha": "75707538d91ddd27fb6007b4ac3710cb05154780",
-                        "installed_by": ["modules"]
+                        "installed_by": ["modules"],
+                        "patch": "modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff"
                     },
                     "bwamem2decontnobams": {
                         "branch": "main",
@@ -23,7 +24,7 @@
                 "nf-core": {
                     "blast/blastn": {
                         "branch": "master",
-                        "git_sha": "209e5a3e2753c5e628736a662c877c20f341ee15",
+                        "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1",
                         "installed_by": ["modules"],
                         "patch": "modules/nf-core/blast/blastn/blast-blastn.diff"
                     },
@@ -35,7 +36,8 @@
                     "canu": {
                         "branch": "master",
                         "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a",
-                        "installed_by": ["modules"]
+                        "installed_by": ["modules"],
+                        "patch": "modules/nf-core/canu/canu.diff"
                     },
                     "custom/dumpsoftwareversions": {
                         "branch": "master",
@@ -65,7 +67,7 @@
                     },
                     "megahit": {
                         "branch": "master",
-                        "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5",
+                        "git_sha": "7755db15e36b30da564cd67fffdfe18a255092aa",
                         "installed_by": ["modules"],
                         "patch": "modules/nf-core/megahit/megahit.diff"
                     },
@@ -77,12 +79,14 @@
                     "minimap2/align": {
                         "branch": "master",
                         "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306",
-                        "installed_by": ["modules"]
+                        "installed_by": ["modules"],
+                        "patch": "modules/nf-core/minimap2/align/minimap2-align.diff"
                     },
                     "multiqc": {
                         "branch": "master",
                         "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d",
-                        "installed_by": ["modules"]
+                        "installed_by": ["modules"],
+                        "patch": "modules/nf-core/multiqc/multiqc.diff"
                     },
                     "porechop/abi": {
                         "branch": "master",
diff --git a/modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff b/modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff
new file mode 100644
index 0000000..759865c
--- /dev/null
+++ b/modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff
@@ -0,0 +1,29 @@
+Changes in module 'ebi-metagenomics/bwamem2/mem'
+'modules/ebi-metagenomics/bwamem2/mem/environment.yml' is unchanged
+Changes in 'bwamem2/mem/main.nf':
+--- modules/ebi-metagenomics/bwamem2/mem/main.nf
++++ modules/ebi-metagenomics/bwamem2/mem/main.nf
+@@ -7,8 +7,7 @@
+         'biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' }"
+ 
+     input:
+-    tuple val(meta), path(reads)
+-    tuple val(meta2), path(index)
++    tuple val(meta), path(reads), path(index)
+ 
+     output:
+     tuple val(meta), path("*_sorted.bam"), path("*_sorted.bam.bai"), emit: bam
+@@ -21,7 +20,6 @@
+     def args = task.ext.args ?: ''
+     def args2 = task.ext.args2 ?: ''
+     def prefix = task.ext.prefix ?: meta.id
+-    def database = task.ext.database ?: meta2.id
+     """
+     INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'`
+ 
+
+'modules/ebi-metagenomics/bwamem2/mem/meta.yml' is unchanged
+'modules/ebi-metagenomics/bwamem2/mem/tests/tags.yml' is unchanged
+'modules/ebi-metagenomics/bwamem2/mem/tests/main.nf.test.snap' is unchanged
+'modules/ebi-metagenomics/bwamem2/mem/tests/main.nf.test' is unchanged
+************************************************************
diff --git a/modules/nf-core/blast/blastn/blast-blastn.diff b/modules/nf-core/blast/blastn/blast-blastn.diff
index 888e64e..e596c33 100644
--- a/modules/nf-core/blast/blastn/blast-blastn.diff
+++ b/modules/nf-core/blast/blastn/blast-blastn.diff
@@ -1,4 +1,6 @@
 Changes in module 'nf-core/blast/blastn'
+'modules/nf-core/blast/blastn/environment.yml' is unchanged
+Changes in 'blast/blastn/main.nf':
 --- modules/nf-core/blast/blastn/main.nf
 +++ modules/nf-core/blast/blastn/main.nf
 @@ -20,7 +20,7 @@
@@ -11,4 +13,9 @@ Changes in module 'nf-core/blast/blastn'
      def fasta_name = is_compressed ? fasta.getBaseName() : fasta
  
 
+'modules/nf-core/blast/blastn/meta.yml' is unchanged
+'modules/nf-core/blast/blastn/tests/tags.yml' is unchanged
+'modules/nf-core/blast/blastn/tests/nextflow.config' is unchanged
+'modules/nf-core/blast/blastn/tests/main.nf.test.snap' is unchanged
+'modules/nf-core/blast/blastn/tests/main.nf.test' is unchanged
 ************************************************************
diff --git a/modules/nf-core/blast/blastn/environment.yml b/modules/nf-core/blast/blastn/environment.yml
index cb9b15d..777e097 100644
--- a/modules/nf-core/blast/blastn/environment.yml
+++ b/modules/nf-core/blast/blastn/environment.yml
@@ -1,7 +1,5 @@
-name: blast_blastn
 channels:
   - conda-forge
   - bioconda
-  - defaults
 dependencies:
-  - bioconda::blast=2.14.1
+  - bioconda::blast=2.15.0
diff --git a/modules/nf-core/blast/blastn/main.nf b/modules/nf-core/blast/blastn/main.nf
index 9b44592..587e799 100644
--- a/modules/nf-core/blast/blastn/main.nf
+++ b/modules/nf-core/blast/blastn/main.nf
@@ -4,8 +4,8 @@ process BLAST_BLASTN {
 
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/blast:2.14.1--pl5321h6f7f691_0':
-        'biocontainers/blast:2.14.1--pl5321h6f7f691_0' }"
+        'https://depot.galaxyproject.org/singularity/blast:2.15.0--pl5321h6f7f691_1':
+        'biocontainers/blast:2.15.0--pl5321h6f7f691_1' }"
 
     input:
     tuple val(meta) , path(fasta)
diff --git a/modules/nf-core/blast/blastn/meta.yml b/modules/nf-core/blast/blastn/meta.yml
index a0d64dd..0f5e41b 100644
--- a/modules/nf-core/blast/blastn/meta.yml
+++ b/modules/nf-core/blast/blastn/meta.yml
@@ -13,39 +13,42 @@ tools:
       documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs
       doi: 10.1016/S0022-2836(05)80360-2
       licence: ["US-Government-Work"]
+      identifier: ""
 input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - fasta:
-      type: file
-      description: Input fasta file containing queries sequences
-      pattern: "*.{fa,fasta,fa.gz,fasta.gz}"
-  - meta2:
-      type: map
-      description: |
-        Groovy Map containing db information
-        e.g. [ id:'test2', single_end:false ]
-  - db:
-      type: directory
-      description: Directory containing the blast database
-      pattern: "*"
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information
+          e.g. [ id:'test', single_end:false ]
+    - fasta:
+        type: file
+        description: Input fasta file containing queries sequences
+        pattern: "*.{fa,fasta,fa.gz,fasta.gz}"
+  - - meta2:
+        type: map
+        description: |
+          Groovy Map containing db information
+          e.g. [ id:'test2', single_end:false ]
+    - db:
+        type: directory
+        description: Directory containing the blast database
+        pattern: "*"
 output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
   - txt:
-      type: file
-      description: File containing blastn hits
-      pattern: "*.txt"
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.txt":
+          type: file
+          description: File containing blastn hits
+          pattern: "*.txt"
   - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
 authors:
   - "@joseespinosa"
   - "@drpatelh"
diff --git a/modules/nf-core/blast/blastn/tests/main.nf.test b/modules/nf-core/blast/blastn/tests/main.nf.test
index 02ecfab..aacc93c 100644
--- a/modules/nf-core/blast/blastn/tests/main.nf.test
+++ b/modules/nf-core/blast/blastn/tests/main.nf.test
@@ -15,7 +15,7 @@ nextflow_process {
             script "../../makeblastdb/main.nf"
             process {
                 """
-                input[0] = [ [id:'test2'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+                input[0] = [ [id:'test2'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]
                 """
             }
         }
@@ -29,7 +29,7 @@ nextflow_process {
             }
             process {
                 """
-                input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]
                 input[1] = BLAST_MAKEBLASTDB.out.db
                 """
             }
@@ -53,7 +53,7 @@ nextflow_process {
             }
             process {
                 """
-                input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) ]
+                input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) ]
                 input[1] = BLAST_MAKEBLASTDB.out.db
                 """
             }
diff --git a/modules/nf-core/blast/blastn/tests/main.nf.test.snap b/modules/nf-core/blast/blastn/tests/main.nf.test.snap
index d1b5f3f..dd8b775 100644
--- a/modules/nf-core/blast/blastn/tests/main.nf.test.snap
+++ b/modules/nf-core/blast/blastn/tests/main.nf.test.snap
@@ -2,7 +2,7 @@
     "versions": {
         "content": [
             [
-                "versions.yml:md5,2d5ffadc7035672f6a9e00b01d1751ea"
+                "versions.yml:md5,faf2471d836ebbf24d96d3e1f8720b17"
             ]
         ],
         "timestamp": "2023-12-11T07:20:03.54997013"
@@ -10,7 +10,7 @@
     "versions_zipped": {
         "content": [
             [
-                "versions.yml:md5,2d5ffadc7035672f6a9e00b01d1751ea"
+                "versions.yml:md5,faf2471d836ebbf24d96d3e1f8720b17"
             ]
         ],
         "timestamp": "2023-12-11T07:20:12.925782708"
diff --git a/modules/nf-core/canu/canu.diff b/modules/nf-core/canu/canu.diff
new file mode 100644
index 0000000..1e6aba1
--- /dev/null
+++ b/modules/nf-core/canu/canu.diff
@@ -0,0 +1,37 @@
+Changes in module 'nf-core/canu'
+'modules/nf-core/canu/environment.yml' is unchanged
+Changes in 'canu/main.nf':
+--- modules/nf-core/canu/main.nf
++++ modules/nf-core/canu/main.nf
+@@ -15,7 +15,7 @@
+     output:
+     tuple val(meta), path("*.report")                   , emit: report
+     tuple val(meta), path("*.contigs.fasta.gz")         , emit: assembly                , optional: true
+-    tuple val(meta), path("*.unassembled.fasta.gz")     , emit: contigs
++    tuple val(meta), path("*.unassembled.fasta.gz")     , emit: contigs                 , optional: true
+     tuple val(meta), path("*.correctedReads.fasta.gz")	, emit: corrected_reads         , optional: true
+     tuple val(meta), path("*.trimmedReads.fasta.gz")	, emit: corrected_trimmed_reads , optional: true
+     tuple val(meta), path("*.contigs.layout")           , emit: metadata                , optional: true
+@@ -28,6 +28,7 @@
+ 
+     script:
+     def args = task.ext.args ?: ''
++    def args2 = task.ext.args2 ?: ''
+     def prefix = task.ext.prefix ?: "${meta.id}"
+     def valid_mode = ["-pacbio", "-nanopore", "-pacbio-hifi"]
+     if ( !valid_mode.contains(mode) )  { error "Unrecognised mode to run Canu. Options: ${valid_mode.join(', ')}" }
+@@ -37,10 +38,9 @@
+         $mode \\
+         genomeSize=${genomesize} \\
+         $args \\
++        $args2 \\
+         maxThreads=$task.cpus \\
+         $reads
+-
+-    gzip *.fasta
+ 
+     cat <<-END_VERSIONS > versions.yml
+     "${task.process}":
+
+'modules/nf-core/canu/meta.yml' is unchanged
+************************************************************
diff --git a/modules/nf-core/megahit/environment.yml b/modules/nf-core/megahit/environment.yml
index aac2f99..eed8b72 100644
--- a/modules/nf-core/megahit/environment.yml
+++ b/modules/nf-core/megahit/environment.yml
@@ -1,8 +1,6 @@
-name: megahit
 channels:
   - conda-forge
   - bioconda
-  - defaults
 dependencies:
   - bioconda::megahit=1.2.9
-  - conda-forge::pigz=2.6
+  - conda-forge::pigz=2.8
diff --git a/modules/nf-core/megahit/main.nf b/modules/nf-core/megahit/main.nf
index 750e3ec..dc9bc4b 100644
--- a/modules/nf-core/megahit/main.nf
+++ b/modules/nf-core/megahit/main.nf
@@ -1,22 +1,22 @@
 process MEGAHIT {
-    tag "$meta.id"
+    tag "${meta.id}"
     label 'process_high'
-
     conda "${moduleDir}/environment.yml"
     container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/mulled-v2-0f92c152b180c7cd39d9b0e6822f8c89ccb59c99:8ec213d21e5d03f9db54898a2baeaf8ec729b447-0' :
-        'biocontainers/mulled-v2-0f92c152b180c7cd39d9b0e6822f8c89ccb59c99:8ec213d21e5d03f9db54898a2baeaf8ec729b447-0' }"
+        'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f2/f2cb827988dca7067ff8096c37cb20bc841c878013da52ad47a50865d54efe83/data' :
+        'community.wave.seqera.io/library/megahit_pigz:87a590163e594224' }"
 
     input:
     tuple val(meta), path(reads)
 
     output:
-    tuple val(meta), path("megahit_out/*.contigs.fa.gz")                            , emit: contigs
-    tuple val(meta), path("megahit_out/intermediate_contigs/k*.contigs.fa.gz")      , emit: k_contigs
-    tuple val(meta), path("megahit_out/intermediate_contigs/k*.addi.fa.gz")         , emit: addi_contigs
-    tuple val(meta), path("megahit_out/intermediate_contigs/k*.local.fa.gz")        , emit: local_contigs
-    tuple val(meta), path("megahit_out/intermediate_contigs/k*.final.contigs.fa.gz"), emit: kfinal_contigs
-    path "versions.yml"                                                             , emit: versions
+    tuple val(meta), path("*.contigs.fa.gz")                            , emit: contigs
+    tuple val(meta), path("intermediate_contigs/k*.contigs.fa.gz")      , emit: k_contigs
+    tuple val(meta), path("intermediate_contigs/k*.addi.fa.gz")         , emit: addi_contigs
+    tuple val(meta), path("intermediate_contigs/k*.local.fa.gz")        , emit: local_contigs
+    tuple val(meta), path("intermediate_contigs/k*.final.contigs.fa.gz"), emit: kfinal_contigs
+    tuple val(meta), path('*.log')                                      , emit: log
+    path "versions.yml"                                                 , emit: versions
 
     when:
     task.ext.when == null || task.ext.when
@@ -25,56 +25,46 @@ process MEGAHIT {
     def args = task.ext.args ?: ''
     def args2 = task.ext.args2 ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
-    if (meta.single_end) {
-        """
-        megahit \\
-            -r ${reads} \\
-            -t $task.cpus \\
-            $args \\
-            --out-prefix $prefix
-
-        if [ ! -s megahit_out/*.fa ]; then
-            echo "No contigs assembled" | tee /dev/stderr
-            exit 1
-        fi
+    def reads_command = meta.single_end ? "-r ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
+    """
+    megahit \\
+        ${reads_command} \\
+        ${args} \\
+        -t ${task.cpus} \\
+        --out-prefix ${prefix}
 
-        pigz \\
-            --no-name \\
-            -p $task.cpus \\
-            $args2 \\
-            megahit_out/*.fa \\
-            megahit_out/intermediate_contigs/*.fa
+    pigz \\
+        --no-name \\
+        -p ${task.cpus} \\
+        ${args2} \\
+        megahit_out/*.fa \\
+        megahit_out/intermediate_contigs/*.fa
 
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//')
-        END_VERSIONS
-        """
-    } else {
-        """
-        megahit \\
-            -1 ${reads[0]} \\
-            -2 ${reads[1]} \\
-            -t $task.cpus \\
-            $args \\
-            --out-prefix $prefix
+    mv megahit_out/* .
 
-        if [ ! -s megahit_out/*.fa ]; then
-            echo "No contigs assembled" | tee /dev/stderr
-            exit 1
-        fi
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//')
+    END_VERSIONS
+    """
 
-        pigz \\
-            --no-name \\
-            -p $task.cpus \\
-            $args2 \\
-            megahit_out/*.fa \\
-            megahit_out/intermediate_contigs/*.fa
+    stub:
+    def args = task.ext.args ?: ''
+    def args2 = task.ext.args2 ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def reads_command = meta.single_end ? "-r ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
+    """
+    mkdir -p intermediate_contigs
+    echo "" | gzip > ${prefix}.contigs.fa.gz
+    echo "" | gzip > intermediate_contigs/k21.contigs.fa.gz
+    echo "" | gzip > intermediate_contigs/k21.addi.fa.gz
+    echo "" | gzip > intermediate_contigs/k21.local.fa.gz
+    echo "" | gzip > intermediate_contigs/k21.final.contigs.fa.gz
+    touch ${prefix}.log
 
-        cat <<-END_VERSIONS > versions.yml
-        "${task.process}":
-            megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//')
-        END_VERSIONS
-        """
-    }
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//')
+    END_VERSIONS
+    """
 }
diff --git a/modules/nf-core/megahit/megahit.diff b/modules/nf-core/megahit/megahit.diff
index 9f4fe6f..0f2f60b 100644
--- a/modules/nf-core/megahit/megahit.diff
+++ b/modules/nf-core/megahit/megahit.diff
@@ -1,29 +1,38 @@
 Changes in module 'nf-core/megahit'
+'modules/nf-core/megahit/environment.yml' is unchanged
+Changes in 'megahit/main.nf':
 --- modules/nf-core/megahit/main.nf
 +++ modules/nf-core/megahit/main.nf
-@@ -33,6 +33,11 @@
-             $args \\
-             --out-prefix $prefix
+@@ -7,7 +7,7 @@
+         'community.wave.seqera.io/library/megahit_pigz:87a590163e594224' }"
  
-+        if [ ! -s megahit_out/*.fa ]; then
-+            echo "No contigs assembled" | tee /dev/stderr
-+            exit 1
-+        fi
-+
-         pigz \\
-             --no-name \\
-             -p $task.cpus \\
-@@ -54,6 +59,11 @@
-             $args \\
-             --out-prefix $prefix
+     input:
+-    tuple val(meta), path(reads1), path(reads2)
++    tuple val(meta), path(reads)
  
-+        if [ ! -s megahit_out/*.fa ]; then
-+            echo "No contigs assembled" | tee /dev/stderr
-+            exit 1
-+        fi
-+
-         pigz \\
-             --no-name \\
-             -p $task.cpus \\
+     output:
+     tuple val(meta), path("*.contigs.fa.gz")                            , emit: contigs
+@@ -25,7 +25,7 @@
+     def args = task.ext.args ?: ''
+     def args2 = task.ext.args2 ?: ''
+     def prefix = task.ext.prefix ?: "${meta.id}"
+-    def reads_command = meta.single_end || !reads2 ? "-r ${reads1}" : "-1 ${reads1.join(',')} -2 ${reads2.join(',')}"
++    def reads_command = meta.single_end ? "-r ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
+     """
+     megahit \\
+         ${reads_command} \\
+@@ -52,7 +52,7 @@
+     def args = task.ext.args ?: ''
+     def args2 = task.ext.args2 ?: ''
+     def prefix = task.ext.prefix ?: "${meta.id}"
+-    def reads_command = meta.single_end || !reads2 ? "-r ${reads1}" : "-1 ${reads1.join(',')} -2 ${reads2.join(',')}"
++    def reads_command = meta.single_end ? "-r ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}"
+     """
+     mkdir -p intermediate_contigs
+     echo "" | gzip > ${prefix}.contigs.fa.gz
 
+'modules/nf-core/megahit/meta.yml' is unchanged
+'modules/nf-core/megahit/tests/tags.yml' is unchanged
+'modules/nf-core/megahit/tests/main.nf.test.snap' is unchanged
+'modules/nf-core/megahit/tests/main.nf.test' is unchanged
 ************************************************************
diff --git a/modules/nf-core/megahit/meta.yml b/modules/nf-core/megahit/meta.yml
index 83b718f..04dab4c 100644
--- a/modules/nf-core/megahit/meta.yml
+++ b/modules/nf-core/megahit/meta.yml
@@ -8,53 +8,106 @@ keywords:
   - metagenomics
 tools:
   - megahit:
-      description: "An ultra-fast single-node solution for large and complex metagenomics assembly via succinct de Bruijn graph"
+      description: "An ultra-fast single-node solution for large and complex metagenomics
+        assembly via succinct de Bruijn graph"
       homepage: https://github.com/voutcn/megahit
       documentation: https://github.com/voutcn/megahit
       tool_dev_url: https://github.com/voutcn/megahit
       doi: "10.1093/bioinformatics/btv033"
       licence: ["GPL v3"]
+      args_id: "$args"
+      identifier: biotools:megahit
+  - pigz:
+      description: "Parallel implementation of the gzip algorithm."
+      homepage: "https://zlib.net/pigz/"
+      documentation: "https://zlib.net/pigz/pigz.pdf"
+      args_id: "$args2"
+
+      identifier: biotools:megahit
 input:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information and input single, or paired-end FASTA/FASTQ files (optionally decompressed)
-        e.g. [ id:'test', single_end:false ]
-  - reads:
-      type: file
-      description: |
-        List of input FastQ files of size 1 and 2 for single-end and paired-end data,
-        respectively in gzipped or uncompressed FASTQ or FASTA format.
+  - - meta:
+        type: map
+        description: |
+          Groovy Map containing sample information and input single, or paired-end FASTA/FASTQ files (optionally decompressed)
+          e.g. [ id:'test', single_end:false ]
+    - reads1:
+        type: file
+        description: |
+          A single or list of input FastQ files for single-end or R1 of paired-end library(s),
+          respectively in gzipped or uncompressed FASTQ or FASTA format.
+    - reads2:
+        type: file
+        description: |
+          A single or list of input FastQ files for R2 of paired-end library(s),
+          respectively in gzipped or uncompressed FASTQ or FASTA format.
 output:
-  - meta:
-      type: map
-      description: |
-        Groovy Map containing sample information
-        e.g. [ id:'test', single_end:false ]
-  - versions:
-      type: file
-      description: File containing software versions
-      pattern: "versions.yml"
   - contigs:
-      type: file
-      description: Final final contigs result of the assembly in FASTA format.
-      pattern: "*.contigs.fa.gz"
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.contigs.fa.gz":
+          type: file
+          description: Final final contigs result of the assembly in FASTA format.
+          pattern: "*.contigs.fa.gz"
   - k_contigs:
-      type: file
-      description: Contigs assembled from the de Bruijn graph of order-K
-      pattern: "k*.contigs.fa.gz"
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - intermediate_contigs/k*.contigs.fa.gz:
+          type: file
+          description: Contigs assembled from the de Bruijn graph of order-K
+          pattern: "k*.contigs.fa.gz"
   - addi_contigs:
-      type: file
-      description: Contigs assembled after iteratively removing local low coverage unitigs in the de Bruijn graph of order-K
-      pattern: "k*.addi.fa.gz"
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - intermediate_contigs/k*.addi.fa.gz:
+          type: file
+          description: Contigs assembled after iteratively removing local low coverage
+            unitigs in the de Bruijn graph of order-K
+          pattern: "k*.addi.fa.gz"
   - local_contigs:
-      type: file
-      description: Contigs of the locally assembled contigs for k=K
-      pattern: "k*.local.fa.gz"
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - intermediate_contigs/k*.local.fa.gz:
+          type: file
+          description: Contigs of the locally assembled contigs for k=K
+          pattern: "k*.local.fa.gz"
   - kfinal_contigs:
-      type: file
-      description: Stand-alone contigs for k=K; if local assembly is turned on, the file will be empty
-      pattern: "k*.final.contigs.fa.gz"
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - intermediate_contigs/k*.final.contigs.fa.gz:
+          type: file
+          description: Stand-alone contigs for k=K; if local assembly is turned on, the
+            file will be empty
+          pattern: "k*.final.contigs.fa.gz"
+  - log:
+      - meta:
+          type: map
+          description: |
+            Groovy Map containing sample information
+            e.g. [ id:'test', single_end:false ]
+      - "*.log":
+          type: file
+          description: Log file containing statistics of the assembly output
+          pattern: "*.log"
+  - versions:
+      - versions.yml:
+          type: file
+          description: File containing software versions
+          pattern: "versions.yml"
 authors:
   - "@jfy133"
 maintainers:
diff --git a/modules/nf-core/megahit/tests/main.nf.test b/modules/nf-core/megahit/tests/main.nf.test
new file mode 100644
index 0000000..b52765d
--- /dev/null
+++ b/modules/nf-core/megahit/tests/main.nf.test
@@ -0,0 +1,126 @@
+nextflow_process {
+
+    name "Test Process MEGAHIT"
+    script "../main.nf"
+    process "MEGAHIT"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "megahit"
+
+    test("sarscov2 - fastq - se") {
+
+        when {
+            process {
+                """
+                input[0] = [ [id:"test", single_end:true],
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    []]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert path(process.out.contigs[0][1]).linesGzip.toString().contains(">k") },
+                { assert process.out.k_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+                { assert process.out.addi_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+                { assert process.out.local_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+                { assert process.out.kfinal_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+                { assert snapshot(
+                        path(process.out.log[0][1]).readLines().last().contains("ALL DONE. Time elapsed"),
+                        process.out.versions
+                    ).match()
+                }
+            )
+        }
+
+    }
+
+    test("sarscov2 - fastq - pe") {
+
+        when {
+            process {
+                """
+                input[0] = [ [id:"test", single_end:false],
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+                    ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert path(process.out.contigs[0][1]).linesGzip.toString().contains(">k") },
+                { assert process.out.k_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+                { assert process.out.addi_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+                { assert process.out.local_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+                { assert process.out.kfinal_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+                { assert snapshot(
+                        path(process.out.log[0][1]).readLines().last().contains("ALL DONE. Time elapsed"),
+                        process.out.versions
+                    ).match()
+                }
+            )
+        }
+
+    }
+
+    test("sarscov2 - fastq - pe - coassembly") {
+
+        when {
+            process {
+                """
+                input[0] = [ [id:"test", single_end:false],
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true)] ,
+                    [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)]
+                    ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert path(process.out.contigs[0][1]).linesGzip.toString().contains(">k") },
+                { assert process.out.k_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+                { assert process.out.addi_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+                { assert process.out.local_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+                { assert process.out.kfinal_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}},
+                { assert snapshot(
+                        path(process.out.log[0][1]).readLines().last().contains("ALL DONE. Time elapsed"),
+                        process.out.versions
+                    ).match()
+                }
+            )
+        }
+
+    }
+
+    test("sarscov2 - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = [ [id:"test", single_end:true],
+                    file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                    []
+                    ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out).match() }
+            )
+        }
+    }
+
+}
diff --git a/modules/nf-core/megahit/tests/main.nf.test.snap b/modules/nf-core/megahit/tests/main.nf.test.snap
new file mode 100644
index 0000000..4677cc3
--- /dev/null
+++ b/modules/nf-core/megahit/tests/main.nf.test.snap
@@ -0,0 +1,172 @@
+{
+    "sarscov2 - fastq - se": {
+        "content": [
+            true,
+            [
+                "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-09-12T16:45:42.387947698"
+    },
+    "sarscov2 - fastq - pe": {
+        "content": [
+            true,
+            [
+                "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-09-12T16:45:48.679485983"
+    },
+    "sarscov2 - stub": {
+        "content": [
+            {
+                "0": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "1": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        [
+                            "k21.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                            "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                        ]
+                    ]
+                ],
+                "2": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "k21.addi.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "3": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "k21.local.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "4": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "5": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "6": [
+                    "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6"
+                ],
+                "addi_contigs": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "k21.addi.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "contigs": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "k_contigs": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        [
+                            "k21.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940",
+                            "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                        ]
+                    ]
+                ],
+                "kfinal_contigs": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "local_contigs": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "k21.local.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940"
+                    ]
+                ],
+                "log": [
+                    [
+                        {
+                            "id": "test",
+                            "single_end": true
+                        },
+                        "test.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+                    ]
+                ],
+                "versions": [
+                    "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6"
+                ]
+            }
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-09-12T16:44:35.245399991"
+    },
+    "sarscov2 - fastq - pe - coassembly": {
+        "content": [
+            true,
+            [
+                "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "24.04.4"
+        },
+        "timestamp": "2024-09-12T16:45:56.23363342"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/megahit/tests/tags.yml b/modules/nf-core/megahit/tests/tags.yml
new file mode 100644
index 0000000..9e86584
--- /dev/null
+++ b/modules/nf-core/megahit/tests/tags.yml
@@ -0,0 +1,2 @@
+megahit:
+  - "modules/nf-core/megahit/**"
diff --git a/modules/nf-core/minimap2/align/minimap2-align.diff b/modules/nf-core/minimap2/align/minimap2-align.diff
new file mode 100644
index 0000000..647611a
--- /dev/null
+++ b/modules/nf-core/minimap2/align/minimap2-align.diff
@@ -0,0 +1,59 @@
+Changes in module 'nf-core/minimap2/align'
+'modules/nf-core/minimap2/align/environment.yml' is unchanged
+Changes in 'minimap2/align/main.nf':
+--- modules/nf-core/minimap2/align/main.nf
++++ modules/nf-core/minimap2/align/main.nf
+@@ -11,12 +11,14 @@
+     input:
+     tuple val(meta), path(reads)
+     tuple val(meta2), path(reference)
++    val prefix2
+     val bam_format
+     val bam_index_extension
+     val cigar_paf_format
+     val cigar_bam
+ 
+     output:
++    tuple val(meta), path("*.minimap*")                  , optional: true, emit: filtered_fastq
+     tuple val(meta), path("*.paf")                       , optional: true, emit: paf
+     tuple val(meta), path("*.bam")                       , optional: true, emit: bam
+     tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index
+@@ -32,7 +34,8 @@
+     def args4 = task.ext.args4 ?: ''
+     def prefix = task.ext.prefix ?: "${meta.id}"
+     def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam"
+-    def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${bam_index} ${args2}" : "-o ${prefix}.paf"
++    def map_mode = "${meta.platform}" ? "-x map-${meta.platform}" : ''
++    def bam_output = bam_format ? "-a | samtools fastq -f 4 | gzip > ${prefix}.${prefix2}.minimap.fastq.gz" : "-o ${prefix}.paf"
+     def cigar_paf = cigar_paf_format && !bam_format ? "-c" : ''
+     def set_cigar_bam = cigar_bam && bam_format ? "-L" : ''
+     def bam_input = "${reads.extension}".matches('sam|bam|cram')
+@@ -45,12 +48,12 @@
+     minimap2 \\
+         $args \\
+         -t $task.cpus \\
++        $map_mode \\
+         $target \\
+         $query \\
+         $cigar_paf \\
+         $set_cigar_bam \\
+         $bam_output
+-
+ 
+     cat <<-END_VERSIONS > versions.yml
+     "${task.process}":
+@@ -60,7 +63,7 @@
+     """
+ 
+     stub:
+-    def prefix = task.ext.prefix ?: "${meta.id}"
++    def prefix = task.ext.prefix ?: c
+     def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf"
+     def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : ""
+     def bam_input = "${reads.extension}".matches('sam|bam|cram')
+
+'modules/nf-core/minimap2/align/meta.yml' is unchanged
+'modules/nf-core/minimap2/align/tests/tags.yml' is unchanged
+'modules/nf-core/minimap2/align/tests/main.nf.test.snap' is unchanged
+'modules/nf-core/minimap2/align/tests/main.nf.test' is unchanged
+************************************************************
diff --git a/modules/nf-core/multiqc/multiqc.diff b/modules/nf-core/multiqc/multiqc.diff
new file mode 100644
index 0000000..1f02e13
--- /dev/null
+++ b/modules/nf-core/multiqc/multiqc.diff
@@ -0,0 +1,22 @@
+Changes in module 'nf-core/multiqc'
+'modules/nf-core/multiqc/environment.yml' is unchanged
+Changes in 'multiqc/main.nf':
+--- modules/nf-core/multiqc/main.nf
++++ modules/nf-core/multiqc/main.nf
+@@ -7,7 +7,8 @@
+         'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }"
+ 
+     input:
+-    path  multiqc_files, stageAs: "?/*"
++    path(multiqc_files, stageAs: "?/*")
++    tuple val(meta), path(pipeline_files, stageAs: "?/*") 
+     path(multiqc_config)
+     path(extra_multiqc_config)
+     path(multiqc_logo)
+
+'modules/nf-core/multiqc/meta.yml' is unchanged
+'modules/nf-core/multiqc/tests/tags.yml' is unchanged
+'modules/nf-core/multiqc/tests/nextflow.config' is unchanged
+'modules/nf-core/multiqc/tests/main.nf.test.snap' is unchanged
+'modules/nf-core/multiqc/tests/main.nf.test' is unchanged
+************************************************************
diff --git a/modules/nf-core/seqkit/grep/seqkit-grep.diff b/modules/nf-core/seqkit/grep/seqkit-grep.diff
index 30b7db0..60ca678 100644
--- a/modules/nf-core/seqkit/grep/seqkit-grep.diff
+++ b/modules/nf-core/seqkit/grep/seqkit-grep.diff
@@ -1,4 +1,6 @@
 Changes in module 'nf-core/seqkit/grep'
+'modules/nf-core/seqkit/grep/environment.yml' is unchanged
+Changes in 'seqkit/grep/main.nf':
 --- modules/nf-core/seqkit/grep/main.nf
 +++ modules/nf-core/seqkit/grep/main.nf
 @@ -9,8 +9,7 @@
@@ -11,5 +13,21 @@ Changes in module 'nf-core/seqkit/grep'
  
      output:
      tuple val(meta), path("*.{fa,fq}.gz")  , emit: filter
+@@ -21,10 +20,13 @@
+ 
+     script:
+     def args = task.ext.args ?: ''
+-    def prefix = task.ext.prefix ?: "${meta.id}"
+     // fasta or fastq. Exact pattern match .fasta or .fa suffix with optional .gz (gzip) suffix
+     def suffix = task.ext.suffix ?: "${sequence}" ==~ /(.*f[astn]*a(.gz)?$)/ ? "fa" : "fq"
+     def pattern_file = pattern ? "-f ${pattern}" : ""
++
++    def pattern_filename = pattern.getName()
++    def pattern_name = pattern_filename.split('\\.')[0]
++    def prefix = task.ext.prefix ?: "${meta.id}_${pattern_name}"
+ 
+     """
+     seqkit \\
 
+'modules/nf-core/seqkit/grep/meta.yml' is unchanged
 ************************************************************
diff --git a/nextflow.config b/nextflow.config
index 3080801..e3bf5ff 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -6,10 +6,6 @@
 ----------------------------------------------------------------------------------------
 */
 
-plugins {
-  id 'nf-schema'
-}
-
 // Global default params, used in configs
 params {
 
@@ -108,11 +104,6 @@ params {
     help                             = false
     version                          = false
 
-    // Max resource options
-    // Defaults only, expecting to be overwritten
-    max_memory                       = '1.TB'
-    max_cpus                         = 32
-    max_time                         = '168.h' // 7 days
     max_spades_retries               = 3
     max_megahit_retries              = 3
 
@@ -126,7 +117,11 @@ params {
 validation {
     failUnrecognisedParams = true
     lenientMode            = false
-    showHiddenParams       = false
+    help {
+        enabled = true
+        showHidden = false
+        command = "nextflow run ebi-metagenomics/miassembler --samplesheet samplesheet.csv --outdir output"
+    }
 }
 
 // Load base.config by default for all pipelines
@@ -243,7 +238,7 @@ singularity.registry = 'quay.io'
 
 // Nextflow plugins
 plugins {
-    id 'nf-schema@2.0.0'
+    id 'nf-schema@2.2.0'
 }
 
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container
@@ -287,7 +282,7 @@ manifest {
     homePage        = 'https://github.com/ebi-metagenomics/miassembler'
     description     = """Microbiome Informatics metagenomes assembly pipeline"""
     mainScript      = 'main.nf'
-    nextflowVersion = '!>=23.04.0'
+    nextflowVersion = '!>=24.04.0'
     version         = 'v1.0.0'
     doi             = ''
 }
@@ -314,36 +309,3 @@ def study_folder( meta = null ) {
         study_accession,
     ].join("/")
 }
-
-// Function to ensure that resource requirements don't go beyond
-// a maximum limit
-def check_max(obj, type) {
-    if (type == 'memory') {
-        try {
-            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
-                return params.max_memory as nextflow.util.MemoryUnit
-            else
-                return obj
-        } catch (all) {
-            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
-            return obj
-        }
-    } else if (type == 'time') {
-        try {
-            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
-                return params.max_time as nextflow.util.Duration
-            else
-                return obj
-        } catch (all) {
-            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
-            return obj
-        }
-    } else if (type == 'cpus') {
-        try {
-            return Math.min( obj, params.max_cpus as int )
-        } catch (all) {
-            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
-            return obj
-        }
-    }
-}
diff --git a/nextflow_schema.json b/nextflow_schema.json
index d6f3d00..65d981c 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -4,7 +4,7 @@
     "title": "ebi-metagenomics/miassembler pipeline parameters",
     "description": "Microbiome Informatics metagenomes assembly pipeline",
     "type": "object",
-    "defs": {
+    "$defs": {
         "input_output_options": {
             "title": "Input/output options",
             "type": "object",
@@ -51,7 +51,7 @@
                 "long_reads_assembler_config": {
                     "type": "string",
                     "description": "Configuration to use flye with. Pick from nano-raw, nano-corr, nano-hq, pacbio-raw, pacbio-corr, pacbio-hifi",
-                    "default": ""
+                    "default": null
                 },
                 "single_end": {
                     "type": "boolean",
@@ -70,7 +70,7 @@
                 "platform": {
                     "type": "string",
                     "description": "Force the instrument_platform value for the study / reads",
-                    "default": "ont"
+                    "default": null
                 },
                 "flye_version": {
                     "type": "string",
@@ -211,32 +211,6 @@
             "description": "Set the top limit for requested resources for any single job.",
             "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.",
             "properties": {
-                "max_cpus": {
-                    "type": "integer",
-                    "description": "Maximum number of CPUs that can be requested for any single job.",
-                    "default": 32,
-                    "fa_icon": "fas fa-microchip",
-                    "hidden": true,
-                    "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`"
-                },
-                "max_memory": {
-                    "type": "string",
-                    "description": "Maximum amount of memory that can be requested for any single job.",
-                    "default": "1.TB",
-                    "fa_icon": "fas fa-memory",
-                    "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
-                    "hidden": true,
-                    "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`"
-                },
-                "max_time": {
-                    "type": "string",
-                    "description": "Maximum amount of time that can be requested for any single job.",
-                    "default": "168.h",
-                    "fa_icon": "far fa-clock",
-                    "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$",
-                    "hidden": true,
-                    "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`"
-                },
                 "max_spades_retries": {
                     "type": "integer",
                     "description": "Maximum number of task attempt retries for (meta)spades assembly steps only.",
@@ -341,16 +315,16 @@
     },
     "allOf": [
         {
-            "$ref": "#/defs/input_output_options"
+            "$ref": "#/$defs/input_output_options"
         },
         {
-            "$ref": "#/defs/reads_qc"
+            "$ref": "#/$defs/reads_qc"
         },
         {
-            "$ref": "#/defs/max_job_request_options"
+            "$ref": "#/$defs/max_job_request_options"
         },
         {
-            "$ref": "#/defs/generic_options"
+            "$ref": "#/$defs/generic_options"
         }
     ]
 }
diff --git a/nf-test.config b/nf-test.config
index ac040d5..853c892 100644
--- a/nf-test.config
+++ b/nf-test.config
@@ -1,5 +1,4 @@
 config {
-
     testsDir "tests"
     workDir ".nf-test"
     configFile "tests/nextflow.config"
diff --git a/tests/main.nf.test b/tests/main.nf.test
index fb6ab8f..bd389ab 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -19,7 +19,7 @@ nextflow_pipeline {
         then {
             with(workflow) {
                 assert success
-                assert trace.succeeded().count{ task -> task.name.contains("SPADES") } == 1
+                assert trace.succeeded().count{ task -> task.name.contains("SPADES") } == 2
                 assert trace.succeeded().count{ task -> task.name.contains("MEGAHIT") } == 1
             }
         }
@@ -53,7 +53,7 @@ nextflow_pipeline {
                 assert trace.succeeded().count{ task -> task.name.contains("SPADES") } == 0
                 assert trace.succeeded().count{ task -> task.name.contains("MEGAHIT") } == 0
                 // This process should not have been called
-                assert trace.succeeded().count{ task -> task.name.contains("DOWNLOAD_FROM_FIRE") == 0}
+                assert trace.succeeded().count{ task -> task.name.contains("DOWNLOAD_FROM_FIRE") } == 0
             }
         }
 
@@ -216,11 +216,13 @@ nextflow_pipeline {
             params {
                 outdir = "tests/results"
                 assembler = "spades"
-                bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
-                blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 samplesheet                      = "${projectDir}/tests/samplesheet/test_mem.csv"
                 assembly_memory                  = 0.5
-                // will will be [0.5GB, 0.75GB, 1.13GB, ...] which rounds down to [0, 0, 1, ...] so should definitely fail twice before succeeding. after a few trys.
+                /* Memory jumping testing */
+                /* will try with [0.5GB, 0.75GB, 1.13GB, ...]
+                /* which rounds down to [0, 0, 1, ...]
+                /* so should definitely fail twice before succeeding. after a few trys.
+                /* ~~~ */
                 max_spades_retries               = 5
             }
         }
@@ -247,8 +249,6 @@ nextflow_pipeline {
             params {
                 outdir = "tests/results"
                 assembler = "spades"
-                bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem"
-                blast_reference_genomes_folder   = "${projectDir}/tests/human_phix/blast"
                 samplesheet                      = "${projectDir}/tests/samplesheet/test.csv"
                 private_study                    = true
             }
diff --git a/workflows/miassembler.nf b/workflows/miassembler.nf
index 2c61bac..d896ebd 100644
--- a/workflows/miassembler.nf
+++ b/workflows/miassembler.nf
@@ -4,13 +4,7 @@
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 */
 
-include {
-    validateParameters ;
-    paramsSummaryLog ;
-    paramsSummaryMap ;
-    samplesheetToList ;
-    paramsHelp
-} from 'plugin/nf-schema'
+include { paramsSummaryLog; paramsSummaryMap; samplesheetToList } from 'plugin/nf-schema'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -67,13 +61,6 @@ workflow MIASSEMBLER {
     // Print parameter summary log to screen
     log.info(logo + paramsSummaryLog(workflow) + citation)
 
-    validateParameters()
-
-    if (params.help) {
-        log.info(paramsHelp("nextflow run ebi-metagenomics/miassembler --help"))
-        exit(0)
-    }
-
     /*
     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         CONFIG FILES

From ebb527862c10570b004e6a51c7c9d6304aa6d2a7 Mon Sep 17 00:00:00 2001
From: Martin Beracochea <mbc@ebi.ac.uk>
Date: Thu, 21 Nov 2024 16:18:24 +0000
Subject: [PATCH 28/33] Adjust code based on feedback

---
 bin/s3fire_downloader.py            | 20 ++++++--------------
 modules/local/download_from_fire.nf |  2 +-
 nextflow_schema.json                |  3 ++-
 3 files changed, 9 insertions(+), 16 deletions(-)

diff --git a/bin/s3fire_downloader.py b/bin/s3fire_downloader.py
index 09a7282..5291d3c 100755
--- a/bin/s3fire_downloader.py
+++ b/bin/s3fire_downloader.py
@@ -105,13 +105,8 @@ def download_files(ftp_paths: List[str], outdir: str, access_key: Optional[str],
     :type secret_key: Optional[str]
     """
     for ftp_path in ftp_paths:
-        try:
-            s3_key, bucket = transform_ftp_to_s3(ftp_path)
-            download_file_from_fire(s3_key, bucket, outdir, access_key, secret_key)
-        except ValueError as ve:
-            logger.error(f"Skipping download due to error: {ve}")
-        except Exception as e:
-            logger.error(f"Unexpected error while downloading {ftp_path}: {e}")
+        s3_key, bucket = transform_ftp_to_s3(ftp_path)
+        download_file_from_fire(s3_key, bucket, outdir, access_key, secret_key)
 
 
 def main() -> None:
@@ -119,7 +114,7 @@ def main() -> None:
         description="Download multiple files from FTP paths via FIRE S3 (supports public and private files)."
     )
     parser.add_argument(
-        "--ftp_paths",
+        "--ftp-paths",
         nargs="+",
         required=True,
         help="Space-separated list of FTP paths to download (e.g., ftp.sra.ebi.ac.uk/vol1/.../file1 ftp.sra.ebi.ac.uk/vol1/.../file2).",
@@ -129,12 +124,9 @@ def main() -> None:
     parser.add_argument("--secret-key", required=False, help="S3 secret key (required for private files).")
     args = parser.parse_args()
 
-    try:
-        logger.info("Starting the file download process...")
-        download_files(args.ftp_paths, args.outdir, args.access_key, args.secret_key)
-        logger.info("All files have been processed.")
-    except Exception as e:
-        logger.error(f"Unexpected error: {e}")
+    logger.info("Starting the file download process...")
+    download_files(args.ftp_paths, args.outdir, args.access_key, args.secret_key)
+    logger.info("All files have been processed.")
 
 
 if __name__ == "__main__":
diff --git a/modules/local/download_from_fire.nf b/modules/local/download_from_fire.nf
index 7226f72..b8c7242 100644
--- a/modules/local/download_from_fire.nf
+++ b/modules/local/download_from_fire.nf
@@ -23,7 +23,7 @@ process DOWNLOAD_FROM_FIRE {
     s3fire_downloader.py \\
         --access-key \${FIRE_ACCESS_KEY} \\
         --secret-key \${FIRE_SECRET_KEY} \\
-        --ftp_paths ${input_reads.join(" ")} \\
+        --ftp-paths ${input_reads.join(" ")} \\
         --outdir fastq_files
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 65d981c..4df56ed 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -50,7 +50,8 @@
                 },
                 "long_reads_assembler_config": {
                     "type": "string",
-                    "description": "Configuration to use flye with. Pick from nano-raw, nano-corr, nano-hq, pacbio-raw, pacbio-corr, pacbio-hifi",
+                    "enum": ["nano-raw", "nano-corr", "nano-hq", "pacbio-raw", "pacbio-corr", "pacbio-hifi"],
+                    "description": "Configuration to use flye with.",
                     "default": null
                 },
                 "single_end": {

From 48cc7ee90026af1df2a6d8629dec0c453aa44550 Mon Sep 17 00:00:00 2001
From: Martin Beracochea <mbc@ebi.ac.uk>
Date: Thu, 21 Nov 2024 16:18:45 +0000
Subject: [PATCH 29/33] Adjust code based on feedback

---
 tests/main.nf.test | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/main.nf.test b/tests/main.nf.test
index bd389ab..644b429 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -139,7 +139,9 @@ nextflow_pipeline {
 
     }
 
-    test("MEGAHIT - single end - should fail") {
+    test("MEGAHIT - single end - should produce an empty contigs file") {
+
+        // TODO: fix along with - https://github.com/EBI-Metagenomics/miassembler/pull/21
 
         tag "ena-portal-api"
 
@@ -154,7 +156,7 @@ nextflow_pipeline {
 
         then {
             with(workflow) {
-                assert !success
+                assert success
                 assert trace.failed().count{ task -> task.name.contains("MEGAHIT") } == 1
             }
         }

From a244fa0bf249bf604a8198c292e6ac8139602d14 Mon Sep 17 00:00:00 2001
From: Martin Beracochea <mbc@ebi.ac.uk>
Date: Thu, 21 Nov 2024 16:22:04 +0000
Subject: [PATCH 30/33] Add nf-core version to .nf-core.yml

---
 .nf-core.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.nf-core.yml b/.nf-core.yml
index 6074a56..6c1e96b 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -51,3 +51,4 @@ lint:
     - custom_config
   readme:
     - nextflow_badge
+nf_core_version: 3.0.2

From 5592bc48c65d1d9c530ea8f57c67c9c3ab0f1ebb Mon Sep 17 00:00:00 2001
From: Martin Beracochea <mbc@ebi.ac.uk>
Date: Thu, 21 Nov 2024 16:24:44 +0000
Subject: [PATCH 31/33] Bump version of nf-test for nf_tests github aciton

---
 .github/workflows/nf_tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/nf_tests.yml b/.github/workflows/nf_tests.yml
index f29e7d8..70e3e58 100644
--- a/.github/workflows/nf_tests.yml
+++ b/.github/workflows/nf_tests.yml
@@ -31,6 +31,7 @@ jobs:
         uses: nf-core/setup-nf-test@v1
         with:
           install-pdiff: true
+          version: 0.9.2
 
       - name: Run pipeline with test data
         run: |

From e42163902d45a7427ffea88ae2a3140866d5139d Mon Sep 17 00:00:00 2001
From: Martin Beracochea <mbc@ebi.ac.uk>
Date: Thu, 21 Nov 2024 16:38:59 +0000
Subject: [PATCH 32/33] Adjust test - megahit with empty contigs will succedd

---
 tests/main.nf.test | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/main.nf.test b/tests/main.nf.test
index 644b429..f77f1f5 100644
--- a/tests/main.nf.test
+++ b/tests/main.nf.test
@@ -157,7 +157,7 @@ nextflow_pipeline {
         then {
             with(workflow) {
                 assert success
-                assert trace.failed().count{ task -> task.name.contains("MEGAHIT") } == 1
+                assert trace.succeeded().count{ task -> task.name.contains("MEGAHIT") } == 1
             }
         }
     }

From 570059344637134ac8691da7442152b096b8bd12 Mon Sep 17 00:00:00 2001
From: Martin Beracochea <mbc@ebi.ac.uk>
Date: Tue, 10 Dec 2024 16:47:44 +0000
Subject: [PATCH 33/33] Retrofit some fixes

---
 conf/modules.config                | 6 +++---
 nf-test.config                     | 2 +-
 workflows/short_reads_assembler.nf | 3 ++-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index 7f4b2c1..4a2209e 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -325,9 +325,9 @@ process {
     }
 
     withName: 'CALCULATE_ASSEMBLY_COVERAGE' {
-        cpus   = { check_max( 1     * task.attempt, 'cpus'    ) }
-        memory = { check_max( 100.MB * task.attempt, 'memory'  ) }
-        time   = { check_max( 30.m   * task.attempt, 'time'    ) }
+        cpus   = { 1      * task.attempt }
+        memory = { 100.MB * task.attempt }
+        time   = { 30.m   * task.attempt }
         publishDir = [
             [
                 path: "${params.outdir}",
diff --git a/nf-test.config b/nf-test.config
index c47d9bb..853c892 100644
--- a/nf-test.config
+++ b/nf-test.config
@@ -2,5 +2,5 @@ config {
     testsDir "tests"
     workDir ".nf-test"
     configFile "tests/nextflow.config"
-    profile "test_ci,docker"
+    profile "test,docker"
 }
diff --git a/workflows/short_reads_assembler.nf b/workflows/short_reads_assembler.nf
index 6929e41..2beb545 100644
--- a/workflows/short_reads_assembler.nf
+++ b/workflows/short_reads_assembler.nf
@@ -153,7 +153,8 @@ workflow SHORT_READS_ASSEMBLER {
 
     // Coverage //
     SHORT_READS_ASSEMBLY_COVERAGE(
-        SHORT_READS_ASSEMBLY_QC.out.filtered_contigs.join(SHORT_READS_QC.out.qc_reads, remainder: false)
+        SHORT_READS_ASSEMBLY_QC.out.filtered_contigs.join(SHORT_READS_QC.out.qc_reads, remainder: false),
+        SHORT_READS_QC.out.fastp_json
     )
 
     ch_versions = ch_versions.mix(SHORT_READS_ASSEMBLY_COVERAGE.out.versions)