diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml new file mode 100644 index 0000000..62a2d28 --- /dev/null +++ b/.github/workflows/linting.yml @@ -0,0 +1,80 @@ +name: nf-core linting +on: + push: + branches: + - dev + pull_request: + release: + types: [published] + +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Set up Python 3.12 + uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + + - name: Install pre-commit + run: pip install pre-commit + + - name: Run pre-commit + run: pre-commit run --all-files + + nf-core: + runs-on: ubuntu-latest + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + + - uses: actions/setup-python@82c7e631bb3cdc910f68e0081d67478d79c6982d # v5 + with: + python-version: "3.12" + architecture: "x64" + + - name: read .nf-core.yml + uses: pietrobolcato/action-read-yaml@1.1.0 + id: read_yml + with: + config: ${{ github.workspace }}/.nf-core.yml + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install nf-core==${{ steps.read_yml.outputs['nf_core_version'] }} + + - name: Run nf-core pipelines lint + if: ${{ github.base_ref != 'main' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Run nf-core pipelines lint --release + if: ${{ github.base_ref == 'main' }} + env: + GITHUB_COMMENTS_URL: ${{ github.event.pull_request.comments_url }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_PR_COMMIT: ${{ github.event.pull_request.head.sha }} + run: nf-core -l lint_log.txt pipelines lint --release --dir ${GITHUB_WORKSPACE} --markdown lint_results.md + + - name: Save PR number + if: ${{ always() }} + run: echo ${{ github.event.pull_request.number }} > PR_number.txt + + - name: Upload linting log file artifact + if: ${{ always() }} + uses: actions/upload-artifact@65462800fd760344b1a7b4382951275a0abb4808 # v4 + with: + name: linting-logs + path: | + lint_log.txt + lint_results.md + PR_number.txt diff --git a/.github/workflows/ci.yml b/.github/workflows/nf_tests.yml similarity index 63% rename from .github/workflows/ci.yml rename to .github/workflows/nf_tests.yml index b55e0f6..70e3e58 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/nf_tests.yml @@ -1,11 +1,9 @@ name: nf-test CI on: - push: - branches: - - dev pull_request: release: types: [published] + workflow_dispatch: env: NXF_ANSI_LOG: false @@ -15,22 +13,25 @@ jobs: name: Run pipeline with test data runs-on: ubuntu-latest + strategy: + matrix: + # Nextflow versions: check pipeline minimum and current latest + NXF_VER: ["24.04.0"] + steps: - name: Check out pipeline code uses: actions/checkout@v4 - - uses: actions/setup-java@99b8673ff64fbf99d8d325f52d9a5bdedb8483e9 # v4 - with: - distribution: "temurin" - java-version: "17" - - name: Setup Nextflow - uses: nf-core/setup-nextflow@v2 + uses: nf-core/setup-nextflow@v2.0.0 + with: + version: "${{ matrix.NXF_VER }}" - name: Install nf-test uses: nf-core/setup-nf-test@v1 with: - version: 0.9.0 + install-pdiff: true + version: 0.9.2 - name: Run pipeline with test data run: | diff --git a/.nf-core.yml b/.nf-core.yml index b48640b..6c1e96b 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -20,6 +20,7 @@ lint: - .github/workflows/ci.yml - .github/workflows/linting_comment.yml - .github/workflows/linting.yml + - .github/workflows/ci.yml - conf/test_full.config - lib/Utils.groovy - lib/WorkflowMain.groovy @@ -32,18 +33,22 @@ lint: - docs/images/nf-core-miassembler_logo_light.png - docs/images/nf-core-miassembler_logo_dark.png - .github/ISSUE_TEMPLATE/bug_report.yml + - .github/PULL_REQUEST_TEMPLATE.md - .github/CONTRIBUTING.md + - .github/workflows/linting.yml - LICENSE - docs/README.md - .gitignore multiqc_config: - report_comment - nextflow_config: False + nextflow_config: - params.input - params.validationSchemaIgnoreParams - params.custom_config_version - params.custom_config_base - manifest.name - manifest.homePage + - custom_config readme: - nextflow_badge +nf_core_version: 3.0.2 diff --git a/README.md b/README.md index 2a01be5..39d05bc 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,6 @@ This pipeline is still in early development. It's mostly a direct port of the mi ## Usage -> [!WARNING] -> It only runs in Codon using Slurm ATM. - Pipeline help: ```bash @@ -28,27 +25,31 @@ Typical pipeline command: Input/output options --study_accession [string] The ENA Study secondary accession --reads_accession [string] The ENA Run primary accession - --private_study [boolean] To use if the ENA study is private + --private_study [boolean] To use if the ENA study is private, *this feature only works on EBI infrastructure at the moment* --samplesheet [string] Path to comma-separated file containing information about the raw reads with the prefix to be used. --assembler [string] The short reads assembler (accepted: spades, metaspades, megahit) --single_end [boolean] Force the single_end value for the study / reads --library_strategy [string] Force the library_strategy value for the study / reads (accepted: metagenomic, metatranscriptomic, genomic, transcriptomic, other) --library_layout [string] Force the library_layout value for the study / reads (accepted: single, paired) + --platform [string] Force the sequencing_platform value for the study / reads --spades_version [string] null [default: 3.15.5] --megahit_version [string] null [default: 1.2.9] - --reference_genome [string] The genome to be used to clean the assembly, the genome will be taken from the Microbiome Informatics + --flye_version [string] null [default: 2.9] + --reference_genome [string] The genome to be used to clean the assembly, the genome will be taken from the Microbiome Informatics internal directory (accepted: chicken.fna, salmon.fna, cod.fna, pig.fna, cow.fna, mouse.fna, honeybee.fna, rainbow_trout.fna, ...) --blast_reference_genomes_folder [string] The folder with the reference genome blast indexes, defaults to the Microbiome Informatics internal directory. --bwamem2_reference_genomes_folder [string] The folder with the reference genome bwa-mem2 indexes, defaults to the Microbiome Informatics internal + + --reference_genomes_folder [string] The folder with reference genomes, defaults to the Microbiome Informatics internal directory. --remove_human_phix [boolean] Remove human and phiX reads pre assembly, and contigs matching those genomes. [default: true] --human_phix_blast_index_name [string] Combined Human and phiX BLAST db. [default: human_phix] --human_phix_bwamem2_index_name [string] Combined Human and phiX bwa-mem2 index. [default: human_phix] - --min_contig_length [integer] Minimum contig length filter. [default: 500] - --min_contig_length_metatranscriptomics [integer] Minimum contig length filter for metaT. [default: 200] + --short_reads_min_contig_length [integer] Minimum contig length filter. [default: 500] + --short_reads_min_contig_length_metat [integer] Minimum contig length filter for metaT. [default: 200] --assembly_memory [integer] Default memory allocated for the assembly process. [default: 100] --spades_only_assembler [boolean] Run SPAdes/metaSPAdes without the error correction step. [default: true] --outdir [string] The output directory where the results will be saved. You have to use absolute paths to storage on Cloud @@ -72,6 +73,37 @@ nextflow run ebi-metagenomics/miassembler \ --reads_accession SRR1631361 ``` +### Required DBs: + +- `--reference_genome`: reference genome in FASTA format +- `--blast_reference_genomes_folder`: mandatory **human_phiX** is provided on [FTP](https://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/references/) +- `--bwamem2_reference_genomes_folder`: mandatory **human_phiX** is provided on [FTP](https://ftp.ebi.ac.uk/pub/databases/metagenomics/pipelines/references/) + +Blast and bwa-mem2 reference databases can be generated for any reference genome to polish input sequences with. + +#### BWA-MEM2 + +As explained in [bwa-mem2's README](https://github.com/bwa-mem2/bwa-mem2?tab=readme-ov-file#getting-started): + +``` +# Use precompiled binaries (recommended) +curl -L https://github.com/bwa-mem2/bwa-mem2/releases/download/v2.2.1/bwa-mem2-2.2.1_x64-linux.tar.bz2 \ + | tar jxf - + +# Index your reference genome with +bwa-mem2-2.2.1_x64-linux/bwa-mem2 index ref.fa +``` + +This will generate multiple index files in a folder. The folder containing them is the one to use as `bwamem2_reference_genomes_folder`. + +#### BLAST + +``` +makeblastdb -in -dbtype nucl -out +``` + +As with bwa-mem2, numerous files will be generated in the same folder, which should be used for `blast_reference_genomes_folder`. + ### Samplesheet The samplesheet is a comma-separated file (.csv) with the following columns: @@ -115,6 +147,18 @@ PRJ1,ERR1,/path/to/reads/ERR1_1.fq.gz,/path/to/reads/ERR1_2.fq.gz,paired,metagen PRJ2,ERR2,/path/to/reads/ERR2.fq.gz,,single,genomic,megahit,32 ``` +### ENA Private Data + +The pipeline includes a module to download private data from ENA using the EMBL-EBI FIRE (File Replication) system. This system is restricted for use within the EMBL-EBI network and will not work unless connected to that network. + +If you have private data to assemble, you must provide the full path to the files on a system that Nextflow can access. + +#### Microbiome Informatics Team + +To process private data, the pipeline should be launched with the `--private_study` flag, and the samplesheet must include the private FTP (transfer services) paths. The `download_from_fire` module will be utilized to download the files. + +This module uses [Nextflow secrets](https://www.nextflow.io/docs/latest/secrets.html#how-it-works). Specifically, it requires the `FIRE_ACCESS_KEY` and `FIRE_SECRET_KEY` secrets to authenticate and download the files. + ## Outputs The outputs of the pipeline are organized as follows: @@ -225,15 +269,15 @@ Runs that fail QC checks are excluded from the assembly process. These runs are Example: ```csv -SRR6180434,filter_ratio_threshold_exceeded +SRR6180434,short_reads_filter_ratio_threshold_exceeded ``` ##### Runs exclusion messages -| Exclusion Message | Description | -| --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `filter_ratio_threshold_exceeded` | The maximum fraction of reads that are allowed to be filtered out. If exceeded, it flags excessive filtering. The default value is 0.9, meaning that if more than 90% of the reads are filtered out, the threshold is considered exceeded, and the run is not assembled. | -| `low_reads_count_threshold` | The minimum number of reads required after filtering. If below, it flags a low read count, and the run is not assembled. | +| Exclusion Message | Description | +| --------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| `short_reads_filter_ratio_threshold_exceeded` | The maximum fraction of reads that are allowed to be filtered out. If exceeded, it flags excessive filtering. The default value is 0.1, meaning that if less than 10% of the reads are retained after filtering, the threshold is considered exceeded, and the run is not assembled. | +| `short_reads_low_reads_count_threshold` | The minimum number of reads required after filtering. If below, it flags a low read count, and the run is not assembled. | #### Assembled Runs diff --git a/assets/multiqc_config.yml b/assets/multiqc_config.yml index 2986e13..b9feb24 100644 --- a/assets/multiqc_config.yml +++ b/assets/multiqc_config.yml @@ -3,12 +3,12 @@ report_comment: > analysis pipeline. report_section_order: - "software_versions": - order: -1000 "ebi-metagenomics-miassembler-methods-description": order: -1001 - "ebi-metagenomics-miassembler-summary": + "software_versions": order: -1002 + "ebi-metagenomics-miassembler-summary": + order: -1003 export_plots: true diff --git a/assets/schema_input.json b/assets/schema_input.json index 84444d1..5da904b 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -47,6 +47,9 @@ "enum": ["metagenomic", "metatranscriptomic", "genomic", "transcriptomic", "other"], "errorMessage": "library strategy should be only value from list: 'metagenomic', 'metatranscriptomic', 'genomic', 'transcriptomic', 'other'" }, + "platform": { + "type": "string" + }, "assembler": { "type": "string", "enum": ["spades", "metaspades", "megahit"], @@ -57,6 +60,9 @@ "type": "integer", "default": null, "description": "Default memory (in GB) allocated for the assembly process for the run." + }, + "assembler_config": { + "type": "string" } }, "required": ["study_accession", "reads_accession", "fastq_1", "library_layout", "library_strategy"] diff --git a/bin/s3fire_downloader.py b/bin/s3fire_downloader.py new file mode 100755 index 0000000..5291d3c --- /dev/null +++ b/bin/s3fire_downloader.py @@ -0,0 +1,133 @@ +#!/usr/bin/env python + +import argparse +import logging +from typing import Optional, Tuple, List +import os + +import boto3 +from botocore import UNSIGNED +from botocore.config import Config + + +FIRE_ENDPOINT: str = "https://hl.fire.sdo.ebi.ac.uk" +PUBLIC_BUCKET: str = "era-public" +PRIVATE_BUCKET: str = "era-private" + + +logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") +logger = logging.getLogger(__name__) + + +def transform_ftp_to_s3(ftp_path: str) -> Tuple[str, str]: + """ + Transforms an FTP path to a FIRE S3 object key, it also returns if it's public or private. + + :param ftp_path: The FTP path of the file to be transformed. + :type ftp_path: str + :return: A tuple containing the S3 object key and the corresponding bucket name. + :rtype: Tuple[str, str] + :raises ValueError: If the FTP path does not match the expected format. + """ + if ftp_path.startswith("ftp.sra.ebi.ac.uk/vol1/"): + s3_key = ftp_path.replace("ftp.sra.ebi.ac.uk/vol1/", "") + logger.info(f"Detected a public file for FTP path: {ftp_path}") + return s3_key, PUBLIC_BUCKET + elif ftp_path.startswith("ftp.dcc-private.ebi.ac.uk/vol1/"): + s3_key = ftp_path.replace("ftp.dcc-private.ebi.ac.uk/vol1/", "") + logger.info(f"Detected a private file for FTP path: {ftp_path}") + return s3_key, PRIVATE_BUCKET + else: + raise ValueError( + f"Invalid FTP path: {ftp_path}. Must start with 'ftp.sra.ebi.ac.uk/vol1/' or 'ftp.dcc-private.ebi.ac.uk/vol1/'." + ) + + +def download_file_from_fire( + s3_key: str, bucket: str, outdir: str, access_key: Optional[str] = None, secret_key: Optional[str] = None +) -> None: + """ + Downloads an individual file from FIRE S3 using its object key. + + :param s3_key: The S3 object key of the file to download. + :type s3_key: str + :param bucket: The name of the S3 bucket. + :type bucket: str + :param outdir: The local directory to save the downloaded file. + :type outdir: str + :param access_key: The access key for private S3 buckets (optional for public files). + :type access_key: Optional[str] + :param secret_key: The secret key for private S3 buckets (optional for public files). + :type secret_key: Optional[str] + :return: None + :rtype: None + :raises ValueError: If credentials are missing for private files. + :raises Exception: For other download errors. + """ + s3_args = {"endpoint_url": FIRE_ENDPOINT} + if bucket == PRIVATE_BUCKET: + if not access_key or not secret_key: + logger.error("Missing credentials for private files.") + raise ValueError("Access key and secret key are required for private files.") + s3_args.update( + aws_access_key_id=access_key, + aws_secret_access_key=secret_key, + ) + else: + # Public bucket configuration with unsigned requests + s3_args.update({"config": Config(signature_version=UNSIGNED)}) + + s3 = boto3.client("s3", **s3_args) + + os.makedirs(outdir, exist_ok=True) + local_file_path = os.path.join(outdir, os.path.basename(s3_key)) + + try: + logger.info(f"Downloading {s3_key} from S3 bucket {bucket} to {local_file_path}...") + s3.download_file(bucket, s3_key, local_file_path) + logger.info(f"File successfully downloaded to: {local_file_path}") + except Exception as e: + logger.error(f"Error downloading file from S3: {e}") + raise + + +def download_files(ftp_paths: List[str], outdir: str, access_key: Optional[str], secret_key: Optional[str]) -> None: + """ + Downloads multiple files from their FTP paths. + + :param ftp_paths: List of FTP paths to download. + :type ftp_paths: List[str] + :param outdir: Directory to save the downloaded files. + :type outdir: str + :param access_key: Access key for private files. + :type access_key: Optional[str] + :param secret_key: Secret key for private files. + :type secret_key: Optional[str] + """ + for ftp_path in ftp_paths: + s3_key, bucket = transform_ftp_to_s3(ftp_path) + download_file_from_fire(s3_key, bucket, outdir, access_key, secret_key) + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Download multiple files from FTP paths via FIRE S3 (supports public and private files)." + ) + parser.add_argument( + "--ftp-paths", + nargs="+", + required=True, + help="Space-separated list of FTP paths to download (e.g., ftp.sra.ebi.ac.uk/vol1/.../file1 ftp.sra.ebi.ac.uk/vol1/.../file2).", + ) + parser.add_argument("--outdir", required=True, help="Local destination directory for the downloaded files.") + parser.add_argument("--access-key", required=False, help="S3 access key (required for private files).") + parser.add_argument("--secret-key", required=False, help="S3 secret key (required for private files).") + args = parser.parse_args() + + logger.info("Starting the file download process...") + download_files(args.ftp_paths, args.outdir, args.access_key, args.secret_key) + logger.info("All files have been processed.") + + +if __name__ == "__main__": + main() diff --git a/conf/base.config b/conf/base.config index aff79f6..7170d21 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,9 +10,15 @@ process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + resourceLimits = [ + cpus: 32, + memory: '1.TB', + time: '168.h' + ] + + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } errorStrategy = { task.exitStatus in ((130..155) + 104) ? 'retry' : 'finish' } maxRetries = 1 diff --git a/conf/codon_slurm.config b/conf/codon_slurm.config index 541a69d..c658798 100644 --- a/conf/codon_slurm.config +++ b/conf/codon_slurm.config @@ -1,4 +1,5 @@ params { + reference_genomes_folder = "/hps/nobackup/rdf/metagenomics/service-team/ref-dbs/bwa-mem2/" bwamem2_reference_genomes_folder = "/hps/nobackup/rdf/metagenomics/service-team/ref-dbs/bwa-mem2/" blast_reference_genomes_folder = "/nfs/production/rdf/metagenomics/pipelines/prod/assembly-pipeline/blast_dbs/" human_phix_blast_index_name = "human_phix" @@ -11,7 +12,6 @@ executor { queueGlobalStatus = true submitRateLimit = "10 sec" pollInterval = "10 sec" - } conda.enabled = false diff --git a/conf/modules.config b/conf/modules.config index 42a0c54..4a2209e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -13,17 +13,17 @@ process { withName: 'FETCHTOOL*' { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } ext.args = params.private_study ? "--private" : "" } - withName: 'FASTP' { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + withName: 'FASTP*' { + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } publishDir = [ [ path: "${params.outdir}", @@ -50,10 +50,20 @@ process { ] } + withName: 'FASTP_LR' { + ext.args = [ + '--average_qual', + '10', + '--length_required', + "${params.long_reads_min_read_length}", + '--disable_adapter_trimming' + ].join(' ').trim() + } + withName: 'FASTQC' { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } publishDir = [ [ path: "${params.outdir}", @@ -74,9 +84,9 @@ process { // This BWAMEM2_MEM belongs to the coverage module withName: 'BWAMEM2_MEM_COVERAGE' { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 20.GB * task.attempt, 'memory' ) } - time = { check_max( 1.h * task.attempt, 'time' ) } + cpus = { 12 * task.attempt } + memory = { 20.GB * task.attempt } + time = { 1.h * task.attempt } ext.args = "-M" ext.args2 = "-F 268 -uS" @@ -84,18 +94,59 @@ process { /* Decontamination */ withName: 'BWAMEM2DECONTNOBAMS' { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { 2 * task.attempt } + time = { 8.h * task.attempt } ext.prefix = "decontaminated" } - withName: 'HUMAN_PHIX_DECONTAMINATION' { - memory = { check_max( 64.GB * task.attempt, 'memory' ) } + withName: 'HUMAN*_DECONTAMINATION' { + memory = { 64.GB * task.attempt } } withName: 'HOST_DECONTAMINATION' { - memory = { check_max( 24.GB * task.attempt, 'memory' ) } + memory = { 24.GB * task.attempt } + } + + withName: 'CANU*' { + cpus = { 4 } + memory = { 3.GB * task.attempt } + time = { 4.h * task.attempt } + + ext.args = [ + '-trim', + '-corrected', + 'corMinCoverage=0', + 'stopOnLowCoverage=0', + 'minInputCoverage=0', + 'maxInputCoverage=10000', + 'corOutCoverage=all', + 'corMhapSensitivity=high', + 'corMaxEvidenceCoverageLocal=10', + 'corMaxEvidenceCoverageGlobal=10', + 'oeaMemory=10', + 'redMemory=10', + 'batMemory=10', + ].join(' ').trim() } + + withName: 'CANU_ONT' { + ext.args2 = [ + 'correctedErrorRate=0.16', + ].join(' ').trim() + } + + withName: 'CANU_PACBIO' { + ext.args2 = [ + 'correctedErrorRate=0.105', + ].join(' ').trim() + } + + withName: 'PORECHOP_ABI' { + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } + } + /* --------- */ /* Assembly */ @@ -103,12 +154,14 @@ process { // We increase the memory 50% with each try memory = { def assembly_memory = meta.assembly_memory ?: params.assembly_memory; - check_max( assembly_memory.GB + ( assembly_memory.GB * 0.5 * ( task.attempt - 1 ) ), 'memory') + assembly_memory.GB + ( assembly_memory.GB * 0.5 * ( task.attempt - 1 ) ) } - cpus = { check_max( 32 * task.attempt, 'cpus') } + cpus = { 32 * task.attempt } // TODO: tweak this based on input ( using the biome maybe? ) - time = { check_max( 168.h * task.attempt, 'time') } + time = { 168.h * task.attempt } ext.args = params.spades_only_assembler ? "--only-assembler" : "" + errorStrategy = 'retry' + maxRetries = params.max_spades_retries publishDir = [ [ @@ -141,10 +194,12 @@ process { withName: 'MEGAHIT' { memory = { def assembly_memory = meta.assembly_memory ?: params.assembly_memory; - check_max( assembly_memory.GB + ( assembly_memory.GB * 0.5 * ( task.attempt - 1 ) ), 'memory') + assembly_memory.GB + ( assembly_memory.GB * 0.5 * ( task.attempt - 1 ) ) } - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { 12 * task.attempt } + time = { 16.h * task.attempt } + errorStrategy = 'retry' + maxRetries = params.max_megahit_retries publishDir = [ [ @@ -163,15 +218,15 @@ process { } withName: 'SEQKIT_SEQ' { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } } withName: 'BLAST_BLASTN*' { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } ext.args = [ '-task', @@ -219,17 +274,17 @@ process { } withName: 'SEQKIT_GREP' { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } ext.args = "--invert-match" } // Dummy process to published the filtered and decontaminated contigs withName: 'PUBLISH_CLEANED_CONTIGS' { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 250.MB * task.attempt , 'memory' ) } - time = { check_max( 30.m * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 250.MB * task.attempt } + time = { 30.m * task.attempt } publishDir = [ [ path: "${params.outdir}", @@ -245,15 +300,15 @@ process { } withName: 'BWAMEM2_INDEX' { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 16.GB * task.attempt, 'memory' ) } - time = { check_max( 6.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 16.GB * task.attempt } + time = { 6.h * task.attempt } } withName: 'METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS' { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } publishDir = [ [ path: "${params.outdir}", @@ -270,9 +325,9 @@ process { } withName: 'CALCULATE_ASSEMBLY_COVERAGE' { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 100.MB * task.attempt, 'memory' ) } - time = { check_max( 30.m * task.attempt, 'time' ) } + cpus = { 1 * task.attempt } + memory = { 100.MB * task.attempt } + time = { 30.m * task.attempt } publishDir = [ [ path: "${params.outdir}", @@ -289,15 +344,15 @@ process { } withName: 'SAMTOOLS_IDXSTATS' { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } } withName: 'CUSTOM_DUMPSOFTWAREVERSIONS' { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } publishDir = [ [ @@ -309,9 +364,9 @@ process { } withName: 'MULTIQC_STUDY' { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } @@ -334,9 +389,9 @@ process { } withName: 'MULTIQC_RUN' { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } @@ -359,9 +414,9 @@ process { } withName: 'QUAST' { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } publishDir = [ [ diff --git a/conf/test.config b/conf/test.config index 848e884..822eaeb 100644 --- a/conf/test.config +++ b/conf/test.config @@ -13,31 +13,25 @@ profiles { test { + process { + resourceLimits = [ + cpus: 2, + memory: 6.GB, + time: 1.h + ] + } params { - max_cpus = 4 - max_memory = '8.GB' - max_time = '6.h' - - samplesheet = "tests/samplesheet/test.csv" + bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem" + blast_reference_genomes_folder = "${projectDir}/tests/human_phix/blast" + reference_genomes_folder = "${projectDir}/tests/human/" - bwamem2_reference_genomes_folder = "tests/human_phix/bwa2mem" - blast_reference_genomes_folder = "tests/human_phix/blast" - human_phix_blast_index_name = "human_phix" - human_phix_bwamem2_index_name = "human_phix" + max_spades_retries = -1 + max_megahit_retries = -1 } - } - // Limit resources so that this can run on GitHub Actions - test_ci { - params { - max_cpus = 2 - max_memory = '6.GB' - max_time = '6.h' - - bwamem2_reference_genomes_folder = "tests/human_phix/bwa2mem" - blast_reference_genomes_folder = "tests/human_phix/blast" - human_phix_blast_index_name = "human_phix" - human_phix_bwamem2_index_name = "human_phix" + process { + errorStrategy = 'ignore' + maxRetries = 0 } } } diff --git a/main.nf b/main.nf index f1d5494..b2321bd 100644 --- a/main.nf +++ b/main.nf @@ -15,25 +15,7 @@ nextflow.enable.dsl = 2 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsHelp; paramsSummaryLog; paramsSummaryMap; } from 'plugin/nf-schema' - -def summary_params = paramsSummaryMap(workflow) - -if (params.help) { - log.info paramsHelp("nextflow run ebi-metagenomics/miassembler --help") - exit 0 -} - -validateParameters() - -// Custom validation // -// The conditional validation doesn't work yet -> https://github.com/nf-core/tools/issues/2619 -if ( !params.samplesheet && ( !params.study_accession || !params.reads_accession ) ) { - error "Either --samplesheet or both --study_accession and --reads_accession are required." - exit 1 -} - -log.info paramsSummaryLog(workflow) +include { validateParameters } from 'plugin/nf-schema' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -61,6 +43,16 @@ workflow EBIMETAGENOMICS_MIASSEMBLER { // See: https://github.com/nf-core/rnaseq/issues/619 // workflow { + + validateParameters() + + // Custom validation // + // The conditional validation doesn't work yet -> https://github.com/nf-core/tools/issues/2619 + if ( !params.samplesheet && ( !params.study_accession || !params.reads_accession ) ) { + error "Either --samplesheet or both --study_accession and --reads_accession are required." + exit 1 + } + EBIMETAGENOMICS_MIASSEMBLER () } diff --git a/modules.json b/modules.json index 54f81f3..2b2ccea 100644 --- a/modules.json +++ b/modules.json @@ -8,7 +8,8 @@ "bwamem2/mem": { "branch": "main", "git_sha": "75707538d91ddd27fb6007b4ac3710cb05154780", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff" }, "bwamem2decontnobams": { "branch": "main", @@ -23,7 +24,7 @@ "nf-core": { "blast/blastn": { "branch": "master", - "git_sha": "209e5a3e2753c5e628736a662c877c20f341ee15", + "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"], "patch": "modules/nf-core/blast/blastn/blast-blastn.diff" }, @@ -32,6 +33,12 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "canu": { + "branch": "master", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", + "installed_by": ["modules"], + "patch": "modules/nf-core/canu/canu.diff" + }, "custom/dumpsoftwareversions": { "branch": "master", "git_sha": "82024cf6325d2ee194e7f056d841ecad2f6856e9", @@ -39,19 +46,28 @@ }, "fastp": { "branch": "master", - "git_sha": "95cf5fe0194c7bf5cb0e3027a2eb7e7c89385080", + "git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7", "installed_by": ["modules"], "patch": "modules/nf-core/fastp/fastp.diff" }, "fastqc": { "branch": "master", - "git_sha": "65ad3e0b9a4099592e1102e92e10455dc661cf53", - "installed_by": ["modules"], - "patch": "modules/nf-core/fastqc/fastqc.diff" + "git_sha": "21f230b8cca43755bf73470e6fd0290832a98aef", + "installed_by": ["modules"] + }, + "flye": { + "branch": "master", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", + "installed_by": ["modules"] + }, + "medaka": { + "branch": "master", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", + "installed_by": ["modules"] }, "megahit": { "branch": "master", - "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "git_sha": "7755db15e36b30da564cd67fffdfe18a255092aa", "installed_by": ["modules"], "patch": "modules/nf-core/megahit/megahit.diff" }, @@ -60,9 +76,21 @@ "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] }, + "minimap2/align": { + "branch": "master", + "git_sha": "a33ef9475558c6b8da08c5f522ddaca1ec810306", + "installed_by": ["modules"], + "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" + }, "multiqc": { "branch": "master", - "git_sha": "314d742bdb357a1df5f9b88427b3b6ac78aa33f7", + "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", + "installed_by": ["modules"], + "patch": "modules/nf-core/multiqc/multiqc.diff" + }, + "porechop/abi": { + "branch": "master", + "git_sha": "870f9af2eaf0000c94d74910d762cf153752af98", "installed_by": ["modules"] }, "quast": { @@ -71,6 +99,11 @@ "installed_by": ["modules"], "patch": "modules/nf-core/quast/quast.diff" }, + "racon": { + "branch": "master", + "git_sha": "8fc1d24c710ebe1d5de0f2447ec9439fd3d9d66a", + "installed_by": ["modules"] + }, "samtools/idxstats": { "branch": "master", "git_sha": "a64788f5ad388f1d2ac5bd5f1f3f8fc81476148c", diff --git a/modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff b/modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff new file mode 100644 index 0000000..759865c --- /dev/null +++ b/modules/ebi-metagenomics/bwamem2/mem/bwamem2-mem.diff @@ -0,0 +1,29 @@ +Changes in module 'ebi-metagenomics/bwamem2/mem' +'modules/ebi-metagenomics/bwamem2/mem/environment.yml' is unchanged +Changes in 'bwamem2/mem/main.nf': +--- modules/ebi-metagenomics/bwamem2/mem/main.nf ++++ modules/ebi-metagenomics/bwamem2/mem/main.nf +@@ -7,8 +7,7 @@ + 'biocontainers/mulled-v2-e5d375990341c5aef3c9aff74f96f66f65375ef6:2d15960ccea84e249a150b7f5d4db3a42fc2d6c3-0' }" + + input: +- tuple val(meta), path(reads) +- tuple val(meta2), path(index) ++ tuple val(meta), path(reads), path(index) + + output: + tuple val(meta), path("*_sorted.bam"), path("*_sorted.bam.bai"), emit: bam +@@ -21,7 +20,6 @@ + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: meta.id +- def database = task.ext.database ?: meta2.id + """ + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` + + +'modules/ebi-metagenomics/bwamem2/mem/meta.yml' is unchanged +'modules/ebi-metagenomics/bwamem2/mem/tests/tags.yml' is unchanged +'modules/ebi-metagenomics/bwamem2/mem/tests/main.nf.test.snap' is unchanged +'modules/ebi-metagenomics/bwamem2/mem/tests/main.nf.test' is unchanged +************************************************************ diff --git a/modules/local/download_from_fire.nf b/modules/local/download_from_fire.nf new file mode 100644 index 0000000..b8c7242 --- /dev/null +++ b/modules/local/download_from_fire.nf @@ -0,0 +1,49 @@ +process DOWNLOAD_FROM_FIRE { + + secret 'FIRE_ACCESS_KEY' + secret 'FIRE_SECRET_KEY' + + tag "${meta.id}" + + label 'process_single' + + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/boto3:1.35.37--a82b4d378d332259' : + 'community.wave.seqera.io/library/pip_boto3:501beb4bd409b3e1' }" + + input: + tuple val(meta), val(input_reads) + + output: + tuple val(meta), path("fastq_files/*fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + script: + """ + s3fire_downloader.py \\ + --access-key \${FIRE_ACCESS_KEY} \\ + --secret-key \${FIRE_SECRET_KEY} \\ + --ftp-paths ${input_reads.join(" ")} \\ + --outdir fastq_files + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version 2>&1 | sed 's/Python //g') + boto3: \$(python -c "import boto3; print(boto3.__version__)") + END_VERSIONS + """ + + stub: + """ + mkdir -p fastq_files + touch fastq_files/${meta.id}_1.fastq + touch fastq_files/${meta.id}_2.fastq + gzip fastq_files/* + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version 2>&1 | sed 's/Python //g') + boto3: \$(python -c "import boto3; print(boto3.__version__)") + END_VERSIONS + """ +} diff --git a/modules/local/fetchtool_reads.nf b/modules/local/fetchtool_reads.nf index 129e452..3dbeae1 100644 --- a/modules/local/fetchtool_reads.nf +++ b/modules/local/fetchtool_reads.nf @@ -3,17 +3,17 @@ process FETCHTOOL_READS { label 'process_single' - container "quay.io/microbiome-informatics/fetch-tool:v1.0.0rc" + container "quay.io/microbiome-informatics/fetch-tool:v1.0.2" input: tuple val(meta), val(study_accession), val(reads_accession) path fetchtool_config output: - tuple val(meta), path("download_folder/${study_accession}/raw/${reads_accession}*.fastq.gz"), env(library_strategy), env(library_layout), emit: reads + tuple val(meta), path("download_folder/${study_accession}/raw/${reads_accession}*.fastq.gz"), env(library_strategy), env(library_layout), env(platform), emit: reads // The '_mqc.' is for multiQC - tuple val(meta), path("download_folder/${study_accession}/${study_accession}.txt") , emit: metadata_tsv - path "versions.yml" , emit: versions + tuple val(meta), path("download_folder/${study_accession}/${study_accession}.txt") , emit: metadata_tsv + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -32,6 +32,15 @@ process FETCHTOOL_READS { library_strategy=\$(echo "\$(grep ${reads_accession} download_folder/${study_accession}/${study_accession}.txt | cut -f 7)" | tr '[:upper:]' '[:lower:]') library_layout=\$(echo "\$(grep ${reads_accession} download_folder/${study_accession}/${study_accession}.txt | cut -f 5)" | tr '[:upper:]' '[:lower:]') + export metadata_platform=\$(echo "\$(grep ${reads_accession} download_folder/${study_accession}/${study_accession}.txt | cut -f 8)" | tr '[:upper:]' '[:lower:]') + if [[ \$metadata_platform == "minion" || \$metadata_platform == "promethion" || \$metadata_platform == "gridion" ]]; then + platform="ont" + elif [[ \$metadata_platform == "pacbio rs" || \$metadata_platform == "pacbio rs ii" ]]; then + platform="pacbio" + else + platform="\$metadata_platform" + fi + cat <<-END_VERSIONS > versions.yml "${task.process}": fetch-tool: \$(fetch-read-tool --version) @@ -53,4 +62,4 @@ process FETCHTOOL_READS { fetch-tool: \$(fetch-read-tool --version) END_VERSIONS """ -} +} \ No newline at end of file diff --git a/modules/nf-core/blast/blastn/blast-blastn.diff b/modules/nf-core/blast/blastn/blast-blastn.diff index 888e64e..e596c33 100644 --- a/modules/nf-core/blast/blastn/blast-blastn.diff +++ b/modules/nf-core/blast/blastn/blast-blastn.diff @@ -1,4 +1,6 @@ Changes in module 'nf-core/blast/blastn' +'modules/nf-core/blast/blastn/environment.yml' is unchanged +Changes in 'blast/blastn/main.nf': --- modules/nf-core/blast/blastn/main.nf +++ modules/nf-core/blast/blastn/main.nf @@ -20,7 +20,7 @@ @@ -11,4 +13,9 @@ Changes in module 'nf-core/blast/blastn' def fasta_name = is_compressed ? fasta.getBaseName() : fasta +'modules/nf-core/blast/blastn/meta.yml' is unchanged +'modules/nf-core/blast/blastn/tests/tags.yml' is unchanged +'modules/nf-core/blast/blastn/tests/nextflow.config' is unchanged +'modules/nf-core/blast/blastn/tests/main.nf.test.snap' is unchanged +'modules/nf-core/blast/blastn/tests/main.nf.test' is unchanged ************************************************************ diff --git a/modules/nf-core/blast/blastn/environment.yml b/modules/nf-core/blast/blastn/environment.yml index cb9b15d..777e097 100644 --- a/modules/nf-core/blast/blastn/environment.yml +++ b/modules/nf-core/blast/blastn/environment.yml @@ -1,7 +1,5 @@ -name: blast_blastn channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::blast=2.14.1 + - bioconda::blast=2.15.0 diff --git a/modules/nf-core/blast/blastn/main.nf b/modules/nf-core/blast/blastn/main.nf index 9b44592..587e799 100644 --- a/modules/nf-core/blast/blastn/main.nf +++ b/modules/nf-core/blast/blastn/main.nf @@ -4,8 +4,8 @@ process BLAST_BLASTN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/blast:2.14.1--pl5321h6f7f691_0': - 'biocontainers/blast:2.14.1--pl5321h6f7f691_0' }" + 'https://depot.galaxyproject.org/singularity/blast:2.15.0--pl5321h6f7f691_1': + 'biocontainers/blast:2.15.0--pl5321h6f7f691_1' }" input: tuple val(meta) , path(fasta) diff --git a/modules/nf-core/blast/blastn/meta.yml b/modules/nf-core/blast/blastn/meta.yml index a0d64dd..0f5e41b 100644 --- a/modules/nf-core/blast/blastn/meta.yml +++ b/modules/nf-core/blast/blastn/meta.yml @@ -13,39 +13,42 @@ tools: documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs doi: 10.1016/S0022-2836(05)80360-2 licence: ["US-Government-Work"] + identifier: "" input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: Input fasta file containing queries sequences - pattern: "*.{fa,fasta,fa.gz,fasta.gz}" - - meta2: - type: map - description: | - Groovy Map containing db information - e.g. [ id:'test2', single_end:false ] - - db: - type: directory - description: Directory containing the blast database - pattern: "*" + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file containing queries sequences + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - - meta2: + type: map + description: | + Groovy Map containing db information + e.g. [ id:'test2', single_end:false ] + - db: + type: directory + description: Directory containing the blast database + pattern: "*" output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - txt: - type: file - description: File containing blastn hits - pattern: "*.txt" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.txt": + type: file + description: File containing blastn hits + pattern: "*.txt" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@joseespinosa" - "@drpatelh" diff --git a/modules/nf-core/blast/blastn/tests/main.nf.test b/modules/nf-core/blast/blastn/tests/main.nf.test index 02ecfab..aacc93c 100644 --- a/modules/nf-core/blast/blastn/tests/main.nf.test +++ b/modules/nf-core/blast/blastn/tests/main.nf.test @@ -15,7 +15,7 @@ nextflow_process { script "../../makeblastdb/main.nf" process { """ - input[0] = [ [id:'test2'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test2'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] """ } } @@ -29,7 +29,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ] input[1] = BLAST_MAKEBLASTDB.out.db """ } @@ -53,7 +53,7 @@ nextflow_process { } process { """ - input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) ] + input[0] = [ [id:'test'], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) ] input[1] = BLAST_MAKEBLASTDB.out.db """ } diff --git a/modules/nf-core/blast/blastn/tests/main.nf.test.snap b/modules/nf-core/blast/blastn/tests/main.nf.test.snap index d1b5f3f..dd8b775 100644 --- a/modules/nf-core/blast/blastn/tests/main.nf.test.snap +++ b/modules/nf-core/blast/blastn/tests/main.nf.test.snap @@ -2,7 +2,7 @@ "versions": { "content": [ [ - "versions.yml:md5,2d5ffadc7035672f6a9e00b01d1751ea" + "versions.yml:md5,faf2471d836ebbf24d96d3e1f8720b17" ] ], "timestamp": "2023-12-11T07:20:03.54997013" @@ -10,7 +10,7 @@ "versions_zipped": { "content": [ [ - "versions.yml:md5,2d5ffadc7035672f6a9e00b01d1751ea" + "versions.yml:md5,faf2471d836ebbf24d96d3e1f8720b17" ] ], "timestamp": "2023-12-11T07:20:12.925782708" diff --git a/modules/nf-core/canu/canu.diff b/modules/nf-core/canu/canu.diff new file mode 100644 index 0000000..1e6aba1 --- /dev/null +++ b/modules/nf-core/canu/canu.diff @@ -0,0 +1,37 @@ +Changes in module 'nf-core/canu' +'modules/nf-core/canu/environment.yml' is unchanged +Changes in 'canu/main.nf': +--- modules/nf-core/canu/main.nf ++++ modules/nf-core/canu/main.nf +@@ -15,7 +15,7 @@ + output: + tuple val(meta), path("*.report") , emit: report + tuple val(meta), path("*.contigs.fasta.gz") , emit: assembly , optional: true +- tuple val(meta), path("*.unassembled.fasta.gz") , emit: contigs ++ tuple val(meta), path("*.unassembled.fasta.gz") , emit: contigs , optional: true + tuple val(meta), path("*.correctedReads.fasta.gz") , emit: corrected_reads , optional: true + tuple val(meta), path("*.trimmedReads.fasta.gz") , emit: corrected_trimmed_reads , optional: true + tuple val(meta), path("*.contigs.layout") , emit: metadata , optional: true +@@ -28,6 +28,7 @@ + + script: + def args = task.ext.args ?: '' ++ def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def valid_mode = ["-pacbio", "-nanopore", "-pacbio-hifi"] + if ( !valid_mode.contains(mode) ) { error "Unrecognised mode to run Canu. Options: ${valid_mode.join(', ')}" } +@@ -37,10 +38,9 @@ + $mode \\ + genomeSize=${genomesize} \\ + $args \\ ++ $args2 \\ + maxThreads=$task.cpus \\ + $reads +- +- gzip *.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + +'modules/nf-core/canu/meta.yml' is unchanged +************************************************************ diff --git a/modules/nf-core/canu/environment.yml b/modules/nf-core/canu/environment.yml new file mode 100644 index 0000000..7b601cb --- /dev/null +++ b/modules/nf-core/canu/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::canu=2.2 diff --git a/modules/nf-core/canu/main.nf b/modules/nf-core/canu/main.nf new file mode 100644 index 0000000..7c5deab --- /dev/null +++ b/modules/nf-core/canu/main.nf @@ -0,0 +1,50 @@ +process CANU { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/canu:2.2--ha47f30e_0': + 'biocontainers/canu:2.2--ha47f30e_0' }" + + input: + tuple val(meta), path(reads) + val mode + val genomesize + + output: + tuple val(meta), path("*.report") , emit: report + tuple val(meta), path("*.contigs.fasta.gz") , emit: assembly , optional: true + tuple val(meta), path("*.unassembled.fasta.gz") , emit: contigs , optional: true + tuple val(meta), path("*.correctedReads.fasta.gz") , emit: corrected_reads , optional: true + tuple val(meta), path("*.trimmedReads.fasta.gz") , emit: corrected_trimmed_reads , optional: true + tuple val(meta), path("*.contigs.layout") , emit: metadata , optional: true + tuple val(meta), path("*.contigs.layout.readToTig") , emit: contig_position , optional: true + tuple val(meta), path("*.contigs.layout.tigInfo") , emit: contig_info , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def valid_mode = ["-pacbio", "-nanopore", "-pacbio-hifi"] + if ( !valid_mode.contains(mode) ) { error "Unrecognised mode to run Canu. Options: ${valid_mode.join(', ')}" } + """ + canu \\ + -p ${prefix} \\ + $mode \\ + genomeSize=${genomesize} \\ + $args \\ + $args2 \\ + maxThreads=$task.cpus \\ + $reads + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + canu: \$(echo \$(canu --version 2>&1) | sed 's/^.*canu //; s/Using.*\$//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/canu/meta.yml b/modules/nf-core/canu/meta.yml new file mode 100644 index 0000000..2feed43 --- /dev/null +++ b/modules/nf-core/canu/meta.yml @@ -0,0 +1,79 @@ +name: "canu" +description: Accurate assembly of segmental duplications, satellites, and allelic variants from high-fidelity long reads. +keywords: + - Assembly + - pacbio + - hifi + - nanopore +tools: + - "canu": + description: "Canu is a fork of the Celera Assembler designed for high-noise single-molecule sequencing." + homepage: "https://canu.readthedocs.io/en/latest/index.html#" + documentation: "https://canu.readthedocs.io/en/latest/tutorial.html" + tool_dev_url: "https://github.com/marbl/canu" + doi: "10.1101/gr.215087.116" + licence: "['GPL v2 and others']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:true ] + - reads: + type: file + description: fasta/fastq file + pattern: "*.{fasta,fastq}" + - mode: + type: value + description: Canu mode depending on the input data (source and error rate) + pattern: "-pacbio|-nanopore|-pacbio-hifi" + - genomesize: + type: value + description: An estimate of the size of the genome. Common suffices are allowed, for example, 3.7m or 2.8g + pattern: "[g|m|k]" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - report: + type: file + description: Most of the analysis reported during assembly + pattern: "*.report" + - assembly: + type: file + description: Everything which could be assembled and is the full assembly, including both unique, repetitive, and bubble elements. + pattern: "*.contigs.fasta" + - contigs: + type: file + description: Reads and low-coverage contigs which could not be incorporated into the primary assembly. + pattern: "*.unassembled.fasta" + - corrected_reads: + type: file + description: The reads after correction. + pattern: "*.correctedReads.fasta.gz" + - corrected_trimmed_reads: + type: file + description: The corrected reads after overlap based trimming + pattern: "*.trimmedReads.fasta.gz" + - metadata: + type: file + description: (undocumented) + pattern: "*.contigs.layout" + - contig_position: + type: file + description: The position of each read in a contig + pattern: "*.contigs.layout.readToTig" + - contig_info: + type: file + description: A list of the contigs, lengths, coverage, number of reads and other metadata. Essentially the same information provided in the FASTA header line. + pattern: "*.contigs.layout.tigInfo" +authors: + - "@scorreard" +maintainers: + - "@scorreard" diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index 3d97ca9..7c51260 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -10,6 +10,7 @@ process FASTP { input: tuple val(meta), path(reads) path adapter_fasta + val discard_trimmed_pass val save_trimmed_fail val save_merged val trim_polyA @@ -32,8 +33,11 @@ process FASTP { def polyA = ( trim_polyA || meta.library_strategy == "metatranscriptomic" ) ? "--trim_poly_x" : '' def prefix = task.ext.prefix ?: "${meta.id}" def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" - def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_1.fastp.fastq.gz" ) + def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_2.fastp.fastq.gz" // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. if ( task.ext.args?.contains('--interleaved_in') ) { """ [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz @@ -62,6 +66,7 @@ process FASTP { fastp \\ --in1 ${prefix}.fastq.gz \\ + $out_fq1 \\ --out1 ${prefix}.fastp.fastq.gz \\ --thread $task.cpus \\ --json ${prefix}.fastp.json \\ @@ -85,8 +90,8 @@ process FASTP { fastp \\ --in1 ${prefix}_1.fastq.gz \\ --in2 ${prefix}_2.fastq.gz \\ - --out1 ${prefix}_1.fastp.fastq.gz \\ - --out2 ${prefix}_2.fastp.fastq.gz \\ + $out_fq1 \\ + $out_fq2 \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ $adapter_list \\ @@ -96,7 +101,7 @@ process FASTP { --thread $task.cpus \\ --detect_adapter_for_pe \\ $args \\ - 2> ${prefix}.fastp.log + 2> >(tee ${prefix}.fastp.log >&2) cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml index c22a16a..8dfecc1 100644 --- a/modules/nf-core/fastp/meta.yml +++ b/modules/nf-core/fastp/meta.yml @@ -27,12 +27,16 @@ input: type: file description: File in FASTA format containing possible adapters to remove. pattern: "*.{fasta,fna,fas,fa}" + - discard_trimmed_pass: + type: boolean + description: Specify true to not write any reads that pass trimming thresholds. | + This can be used to use fastp for the output report only. - save_trimmed_fail: type: boolean description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` - save_merged: type: boolean - description: Specify true to save all merged reads to the a file ending in `*.merged.fastq.gz` + description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz` output: - meta: type: map diff --git a/modules/nf-core/fastqc/environment.yml b/modules/nf-core/fastqc/environment.yml index 1787b38..691d4c7 100644 --- a/modules/nf-core/fastqc/environment.yml +++ b/modules/nf-core/fastqc/environment.yml @@ -1,7 +1,5 @@ -name: fastqc channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::fastqc=0.12.1 diff --git a/modules/nf-core/fastqc/fastqc.diff b/modules/nf-core/fastqc/fastqc.diff deleted file mode 100644 index 0dd7d4d..0000000 --- a/modules/nf-core/fastqc/fastqc.diff +++ /dev/null @@ -1,27 +0,0 @@ -Changes in module 'nf-core/fastqc' ---- modules/nf-core/fastqc/main.nf -+++ modules/nf-core/fastqc/main.nf -@@ -21,19 +21,12 @@ - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" -- // Make list of old name and new name pairs to use for renaming in the bash while loop -- def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } -- def rename_to = old_new_pairs*.join(' ').join(' ') -- def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') -+ - """ -- printf "%s %s\\n" $rename_to | while read old_name new_name; do -- [ -f "\${new_name}" ] || ln -s \$old_name \$new_name -- done -- - fastqc \\ - $args \\ - --threads $task.cpus \\ -- $renamed_files -+ $reads - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - -************************************************************ diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 0a11817..d8989f4 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -21,12 +21,28 @@ process FASTQC { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + // Make list of old name and new name pairs to use for renaming in the bash while loop + def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] } + def rename_to = old_new_pairs*.join(' ').join(' ') + def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ') + + // The total amount of allocated RAM by FastQC is equal to the number of threads defined (--threads) time the amount of RAM defined (--memory) + // https://github.com/s-andrews/FastQC/blob/1faeea0412093224d7f6a07f777fad60a5650795/fastqc#L211-L222 + // Dividing the task.memory by task.cpu allows to stick to requested amount of RAM in the label + def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB') / task.cpus + // FastQC memory value allowed range (100 - 10000) + def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) """ + printf "%s %s\\n" $rename_to | while read old_name new_name; do + [ -f "\${new_name}" ] || ln -s \$old_name \$new_name + done + fastqc \\ $args \\ --threads $task.cpus \\ - $reads + --memory $fastqc_memory \\ + $renamed_files cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/fastqc/meta.yml b/modules/nf-core/fastqc/meta.yml index ee5507e..2b2e62b 100644 --- a/modules/nf-core/fastqc/meta.yml +++ b/modules/nf-core/fastqc/meta.yml @@ -11,40 +11,50 @@ tools: FastQC gives general quality metrics about your reads. It provides information about the quality score distribution across your reads, the per base sequence content (%A/C/G/T). + You get information about adapter contamination and other overrepresented sequences. homepage: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/ documentation: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/ licence: ["GPL-2.0-only"] + identifier: biotools:fastqc input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - html: - type: file - description: FastQC report - pattern: "*_{fastqc.html}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.html": + type: file + description: FastQC report + pattern: "*_{fastqc.html}" - zip: - type: file - description: FastQC report archive - pattern: "*_{fastqc.zip}" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.zip": + type: file + description: FastQC report archive + pattern: "*_{fastqc.zip}" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@drpatelh" - "@grst" diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test index b9e8f92..e9d79a0 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test +++ b/modules/nf-core/fastqc/tests/main.nf.test @@ -3,107 +3,307 @@ nextflow_process { name "Test Process FASTQC" script "../main.nf" process "FASTQC" + tag "modules" tag "modules_nfcore" tag "fastqc" - test("Single-Read") { + test("sarscov2 single-end [fastq]") { when { - params { - outdir = "$outputDir" + process { + """ + input[0] = Channel.of([ + [ id: 'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ } + } + + then { + assertAll ( + { assert process.success }, + // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. + // looks like this:
Mon 2 Oct 2023
test.gz
+ // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 paired-end [fastq]") { + + when { process { """ - input[0] = [ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 interleaved [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 paired-end [bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/test_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 multiple [fastq]") { + + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1][0] ==~ ".*/test_1_fastqc.html" }, + { assert process.out.html[0][1][1] ==~ ".*/test_2_fastqc.html" }, + { assert process.out.html[0][1][2] ==~ ".*/test_3_fastqc.html" }, + { assert process.out.html[0][1][3] ==~ ".*/test_4_fastqc.html" }, + { assert process.out.zip[0][1][0] ==~ ".*/test_1_fastqc.zip" }, + { assert process.out.zip[0][1][1] ==~ ".*/test_2_fastqc.zip" }, + { assert process.out.zip[0][1][2] ==~ ".*/test_3_fastqc.zip" }, + { assert process.out.zip[0][1][3] ==~ ".*/test_4_fastqc.zip" }, + { assert path(process.out.html[0][1][0]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][1]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][2]).text.contains("File typeConventional base calls") }, + { assert path(process.out.html[0][1][3]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 custom_prefix") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert process.out.html[0][1] ==~ ".*/mysample_fastqc.html" }, + { assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" }, + { assert path(process.out.html[0][1]).text.contains("File typeConventional base calls") }, + { assert snapshot(process.out.versions).match() } + ) + } + } + + test("sarscov2 single-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ [ id: 'test', single_end:true ], - [ - file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) - ] - ] + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 interleaved [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 paired-end [bam] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 multiple [fastq] - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [id: 'test', single_end: false], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ] + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 custom_prefix - stub") { + + options "-stub" + when { + process { + """ + input[0] = Channel.of([ + [ id:'mysample', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ]) """ } } then { assertAll ( - { assert process.success }, - // NOTE The report contains the date inside it, which means that the md5sum is stable per day, but not longer than that. So you can't md5sum it. - // looks like this:
Mon 2 Oct 2023
test.gz
- // https://github.com/nf-core/modules/pull/3903#issuecomment-1743620039 - { assert process.out.html.get(0).get(1) ==~ ".*/test_fastqc.html" }, - { assert path(process.out.html.get(0).get(1)).getText().contains("File typeConventional base calls") }, - { assert snapshot(process.out.versions).match("versions") }, - { assert process.out.zip.get(0).get(1) ==~ ".*/test_fastqc.zip" } + { assert process.success }, + { assert snapshot(process.out).match() } ) } } -// TODO -// // -// // Test with paired-end data -// // -// workflow test_fastqc_paired_end { -// input = [ -// [id: 'test', single_end: false], // meta map -// [ -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) -// ] -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with interleaved data -// // -// workflow test_fastqc_interleaved { -// input = [ -// [id: 'test', single_end: false], // meta map -// file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with bam data -// // -// workflow test_fastqc_bam { -// input = [ -// [id: 'test', single_end: false], // meta map -// file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with multiple samples -// // -// workflow test_fastqc_multiple { -// input = [ -// [id: 'test', single_end: false], // meta map -// [ -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true), -// file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) -// ] -// ] - -// FASTQC ( input ) -// } - -// // -// // Test with custom prefix -// // -// workflow test_fastqc_custom_prefix { -// input = [ -// [ id:'mysample', single_end:true ], // meta map -// file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) -// ] - -// FASTQC ( input ) -// } } diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap index 636a32c..d5db309 100644 --- a/modules/nf-core/fastqc/tests/main.nf.test.snap +++ b/modules/nf-core/fastqc/tests/main.nf.test.snap @@ -1,10 +1,392 @@ { - "versions": { + "sarscov2 custom_prefix": { "content": [ [ "versions.yml:md5,e1cc25ca8af856014824abd842e93978" ] ], - "timestamp": "2023-10-09T23:40:54+0000" + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:16.374038" + }, + "sarscov2 single-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": true + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:24.993809" + }, + "sarscov2 custom_prefix - stub": { + "content": [ + { + "0": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "mysample", + "single_end": true + }, + "mysample.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:03:10.93942" + }, + "sarscov2 interleaved [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:42.355718" + }, + "sarscov2 paired-end [bam]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:53.276274" + }, + "sarscov2 multiple [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:05.527626" + }, + "sarscov2 paired-end [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:31.188871" + }, + "sarscov2 paired-end [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:34.273566" + }, + "sarscov2 multiple [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:03:02.304411" + }, + "sarscov2 single-end [fastq]": { + "content": [ + [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:01:19.095607" + }, + "sarscov2 interleaved [fastq] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:44.640184" + }, + "sarscov2 paired-end [bam] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e1cc25ca8af856014824abd842e93978" + ], + "zip": [ + [ + { + "id": "test", + "single_end": false + }, + "test.zip:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-07-22T11:02:53.550742" } } \ No newline at end of file diff --git a/modules/nf-core/flye/environment.yml b/modules/nf-core/flye/environment.yml new file mode 100644 index 0000000..f5364d5 --- /dev/null +++ b/modules/nf-core/flye/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::flye=2.9 diff --git a/modules/nf-core/flye/main.nf b/modules/nf-core/flye/main.nf new file mode 100644 index 0000000..3d89218 --- /dev/null +++ b/modules/nf-core/flye/main.nf @@ -0,0 +1,68 @@ +process FLYE { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/flye:2.9--py39h6935b12_1' : + 'biocontainers/flye:2.9--py39h6935b12_1' }" + + input: + tuple val(meta), path(reads) + val mode + + output: + tuple val(meta), path("*.fasta.gz"), emit: fasta + tuple val(meta), path("*.gfa.gz") , emit: gfa + tuple val(meta), path("*.gv.gz") , emit: gv + tuple val(meta), path("*.txt") , emit: txt + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*.json") , emit: json + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def valid_mode = ["--pacbio-raw", "--pacbio-corr", "--pacbio-hifi", "--nano-raw", "--nano-corr", "--nano-hq"] + if ( !valid_mode.contains(mode) ) { error "Unrecognised mode to run Flye. Options: ${valid_mode.join(', ')}" } + """ + flye \\ + $mode \\ + $reads \\ + --out-dir . \\ + --threads \\ + $task.cpus \\ + $args + + gzip -c assembly.fasta > ${prefix}.assembly.fasta.gz + gzip -c assembly_graph.gfa > ${prefix}.assembly_graph.gfa.gz + gzip -c assembly_graph.gv > ${prefix}.assembly_graph.gv.gz + mv assembly_info.txt ${prefix}.assembly_info.txt + mv flye.log ${prefix}.flye.log + mv params.json ${prefix}.params.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + flye: \$( flye --version ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + echo stub | gzip -c > ${prefix}.assembly.fasta.gz + echo stub | gzip -c > ${prefix}.assembly_graph.gfa.gz + echo stub | gzip -c > ${prefix}.assembly_graph.gv.gz + echo contig_1 > ${prefix}.assembly_info.txt + echo stub > ${prefix}.flye.log + echo stub > ${prefix}.params.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + flye: \$( flye --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/flye/meta.yml b/modules/nf-core/flye/meta.yml new file mode 100644 index 0000000..5c3c816 --- /dev/null +++ b/modules/nf-core/flye/meta.yml @@ -0,0 +1,68 @@ +name: "flye" +description: De novo assembler for single molecule sequencing reads +keywords: + - assembly + - genome + - de novo + - genome assembler + - single molecule +tools: + - "flye": + description: "Fast and accurate de novo assembler for single molecule sequencing reads" + homepage: "https://github.com/fenderglass/Flye" + documentation: "https://github.com/fenderglass/Flye/blob/flye/docs/USAGE.md" + tool_dev_url: "https://github.com/fenderglass/Flye" + doi: "10.1038/s41592-020-00971-x" + licence: "['BSD-3-clause']" +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - reads: + type: file + description: Input reads from Oxford Nanopore or PacBio data in FASTA/FASTQ format. + pattern: "*.{fasta,fastq,fasta.gz,fastq.gz,fa,fq,fa.gz,fq.gz}" + - mode: + type: string + description: Flye mode depending on the input data (source and error rate) + pattern: "--pacbio-raw|--pacbio-corr|--pacbio-hifi|--nano-raw|--nano-corr|--nano-hq" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fasta: + type: file + description: Assembled FASTA file + pattern: "*.fasta.gz" + - gfa: + type: file + description: Repeat graph in gfa format + pattern: "*.gfa.gz" + - gv: + type: file + description: Repeat graph in gv format + pattern: "*.gv.gz" + - txt: + type: file + description: Extra information and statistics about resulting contigs + pattern: "*.txt" + - log: + type: file + description: Flye log file + pattern: "*.log" + - json: + type: file + description: Flye parameters + pattern: "*.json" +authors: + - "@mirpedrol" +maintainers: + - "@mirpedrol" diff --git a/modules/nf-core/flye/tests/main.nf.test b/modules/nf-core/flye/tests/main.nf.test new file mode 100644 index 0000000..f06aa1b --- /dev/null +++ b/modules/nf-core/flye/tests/main.nf.test @@ -0,0 +1,258 @@ +// According to the issue https://github.com/fenderglass/Flye/issues/164 +// Some fluctuations are expected because of the heuristics +// Here we check the that test.assembly_info.txt contains at least one contig + +nextflow_process { + + name "Test Process FLYE" + script "../main.nf" + process "FLYE" + config "./nextflow.config" + tag "flye" + tag "modules" + tag "modules_nfcore" + + + test("flye_pacbio_raw") { + tag "flye_pacbio_raw" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + input[1] = "--pacbio-raw" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' }, + { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' }, + { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' }, + { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' }, + { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' }, + { assert process.out.json.get(0).get(1) ==~ '.*/test.params.json' }, + + // check for contig_1 in assembly_info + { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ }, + // Check if test.params.json matches + { assert snapshot(process.out.json).match() } + + + + ) + } + + } + + + test("flye_pacbio_corr") { + tag "flye_pacbio_corr" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + input[1] = "--pacbio-corr" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' }, + { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' }, + { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' }, + { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' }, + { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' }, + + // check for contig_1 in assembly_info + { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ }, + // Check if test.params.json matches + { assert snapshot(process.out.json).match() } + + + + + ) + } + + } + + test("flye_pacbio_hifi") { + tag "flye_pacbio_hifi" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + input[1] = "--pacbio-hifi" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' }, + { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' }, + { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' }, + { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' }, + { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' }, + + // check for contig_1 in assembly_info + { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ }, + // Check if test.params.json matches + { assert snapshot(process.out.json).match() } + + + + + ) + } + + } + + test("flye_nano_raw") { + tag "flye_nano_raw" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + input[1] = "--nano-raw" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' }, + { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' }, + { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' }, + { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' }, + { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' }, + { assert process.out.json.get(0).get(1) ==~ '.*/test.params.json' }, + + // check for contig_1 in assembly_info + { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ }, + // Check if test.params.json matches + { assert snapshot(process.out.json).match() } + + + + ) + } + + } + + test("flye_nano_corr") { + tag "flye_nano_corr" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + input[1] = "--nano-corr" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' }, + { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' }, + { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' }, + { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' }, + { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' }, + { assert process.out.json.get(0).get(1) ==~ '.*/test.params.json' }, + + // check for contig_1 in assembly_info + { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ }, + // Check if test.params.json matches + { assert snapshot(process.out.json).match() } + + + ) + } + + } + + + test("flye_nano_hq") { + tag "flye_nano_hq" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['homo_sapiens']['pacbio']['hifi'], checkIfExists: true) + ] + input[1] = "--nano-hq" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.fasta.get(0).get(1) ==~ '.*/test.assembly.fasta.gz' }, + { assert process.out.gfa.get(0).get(1) ==~ '.*/test.assembly_graph.gfa.gz' }, + { assert process.out.gv.get(0).get(1) ==~ '.*/test.assembly_graph.gv.gz' }, + { assert process.out.log.get(0).get(1) ==~ '.*/test.flye.log' }, + { assert process.out.txt.get(0).get(1) ==~ '.*/test.assembly_info.txt' }, + { assert process.out.json.get(0).get(1) ==~ '.*/test.params.json' }, + + // check for contig_1 in assembly_info + { assert path(process.out.txt.get(0).get(1)).text =~ /contig_1/ }, + // Check if test.params.json matches + { assert snapshot(process.out.json).match() } + + + ) + } + + } + + + +} diff --git a/modules/nf-core/flye/tests/main.nf.test.snap b/modules/nf-core/flye/tests/main.nf.test.snap new file mode 100644 index 0000000..a4aef73 --- /dev/null +++ b/modules/nf-core/flye/tests/main.nf.test.snap @@ -0,0 +1,80 @@ +{ + "flye_pacbio_raw": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ] + ], + "timestamp": "2023-10-18T09:38:04.835173617" + }, + "flye_pacbio_hifi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ] + ], + "timestamp": "2023-10-18T08:38:39.624137639" + }, + "flye_nano_raw": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ] + ], + "timestamp": "2023-10-18T09:51:24.546896915" + }, + "flye_pacbio_corr": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ] + ], + "timestamp": "2023-10-18T08:34:15.751344742" + }, + "flye_nano_corr": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ] + ], + "timestamp": "2023-10-18T09:17:49.861781685" + }, + "flye_nano_hq": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.params.json:md5,54b576cb6d4d27656878a7fd3657bde9" + ] + ] + ], + "timestamp": "2023-10-18T09:26:29.081427909" + } +} \ No newline at end of file diff --git a/modules/nf-core/flye/tests/nextflow.config b/modules/nf-core/flye/tests/nextflow.config new file mode 100644 index 0000000..40cf878 --- /dev/null +++ b/modules/nf-core/flye/tests/nextflow.config @@ -0,0 +1,4 @@ +// profile=docker with tests flye_pacbio_raw and flye_nano_raw need more memory that the default of 3.GB +process { + memory = 6.GB +} diff --git a/modules/nf-core/flye/tests/tags.yml b/modules/nf-core/flye/tests/tags.yml new file mode 100644 index 0000000..31103d1 --- /dev/null +++ b/modules/nf-core/flye/tests/tags.yml @@ -0,0 +1,2 @@ +flye: + - modules/nf-core/flye/** diff --git a/modules/nf-core/medaka/environment.yml b/modules/nf-core/medaka/environment.yml new file mode 100644 index 0000000..fea1532 --- /dev/null +++ b/modules/nf-core/medaka/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::medaka=1.4.4 diff --git a/modules/nf-core/medaka/main.nf b/modules/nf-core/medaka/main.nf new file mode 100644 index 0000000..e87c910 --- /dev/null +++ b/modules/nf-core/medaka/main.nf @@ -0,0 +1,40 @@ +process MEDAKA { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/medaka:1.4.4--py38h130def0_0' : + 'biocontainers/medaka:1.4.4--py38h130def0_0' }" + + input: + tuple val(meta), path(reads), path(assembly) + + output: + tuple val(meta), path("*.fa.gz"), emit: assembly + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + medaka_consensus \\ + -t $task.cpus \\ + $args \\ + -i $reads \\ + -d $assembly \\ + -o ./ + + mv consensus.fasta ${prefix}.fa + + gzip -n ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + medaka: \$( medaka --version 2>&1 | sed 's/medaka //g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/medaka/meta.yml b/modules/nf-core/medaka/meta.yml new file mode 100644 index 0000000..9ed3589 --- /dev/null +++ b/modules/nf-core/medaka/meta.yml @@ -0,0 +1,45 @@ +name: medaka +description: A tool to create consensus sequences and variant calls from nanopore sequencing data +keywords: + - assembly + - polishing + - nanopore +tools: + - medaka: + description: Neural network sequence error correction. + homepage: https://nanoporetech.github.io/medaka/index.html + documentation: https://nanoporetech.github.io/medaka/index.html + tool_dev_url: https://github.com/nanoporetech/medaka + licence: ["Mozilla Public License 2.0"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: List of input nanopore fasta/FastQ files + pattern: "*.{fasta,fa,fastq,fastq.gz,fq,fq.gz}" + - assembly: + type: file + description: Genome assembly + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - assembly: + type: file + description: Polished genome assembly + pattern: "*.fa.gz" +authors: + - "@avantonder" +maintainers: + - "@avantonder" diff --git a/modules/nf-core/medaka/tests/main.nf.test b/modules/nf-core/medaka/tests/main.nf.test new file mode 100644 index 0000000..1c5c55f --- /dev/null +++ b/modules/nf-core/medaka/tests/main.nf.test @@ -0,0 +1,33 @@ +nextflow_process { + + name "Test Process MEDAKA" + tag "modules_nfcore" + tag "modules" + tag "medaka" + script "../main.nf" + process "MEDAKA" + + test("Medaka") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/medaka/tests/main.nf.test.snap b/modules/nf-core/medaka/tests/main.nf.test.snap new file mode 100644 index 0000000..d3fcba2 --- /dev/null +++ b/modules/nf-core/medaka/tests/main.nf.test.snap @@ -0,0 +1,33 @@ +{ + "Medaka": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fa.gz:md5,f42303f1d6c2c79175faeb00e10b9a6e" + ] + ], + "1": [ + "versions.yml:md5,739bb00a08faba4029f9f5ab9c15275a" + ], + "assembly": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fa.gz:md5,f42303f1d6c2c79175faeb00e10b9a6e" + ] + ], + "versions": [ + "versions.yml:md5,739bb00a08faba4029f9f5ab9c15275a" + ] + } + ], + "timestamp": "2023-10-18T12:38:17.806031909" + } +} \ No newline at end of file diff --git a/modules/nf-core/medaka/tests/tags.yml b/modules/nf-core/medaka/tests/tags.yml new file mode 100644 index 0000000..dd9fb10 --- /dev/null +++ b/modules/nf-core/medaka/tests/tags.yml @@ -0,0 +1,2 @@ +medaka: + - modules/nf-core/medaka/** diff --git a/modules/nf-core/megahit/environment.yml b/modules/nf-core/megahit/environment.yml index aac2f99..eed8b72 100644 --- a/modules/nf-core/megahit/environment.yml +++ b/modules/nf-core/megahit/environment.yml @@ -1,8 +1,6 @@ -name: megahit channels: - conda-forge - bioconda - - defaults dependencies: - bioconda::megahit=1.2.9 - - conda-forge::pigz=2.6 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/megahit/main.nf b/modules/nf-core/megahit/main.nf index 750e3ec..dc9bc4b 100644 --- a/modules/nf-core/megahit/main.nf +++ b/modules/nf-core/megahit/main.nf @@ -1,22 +1,22 @@ process MEGAHIT { - tag "$meta.id" + tag "${meta.id}" label 'process_high' - conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mulled-v2-0f92c152b180c7cd39d9b0e6822f8c89ccb59c99:8ec213d21e5d03f9db54898a2baeaf8ec729b447-0' : - 'biocontainers/mulled-v2-0f92c152b180c7cd39d9b0e6822f8c89ccb59c99:8ec213d21e5d03f9db54898a2baeaf8ec729b447-0' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/f2/f2cb827988dca7067ff8096c37cb20bc841c878013da52ad47a50865d54efe83/data' : + 'community.wave.seqera.io/library/megahit_pigz:87a590163e594224' }" input: tuple val(meta), path(reads) output: - tuple val(meta), path("megahit_out/*.contigs.fa.gz") , emit: contigs - tuple val(meta), path("megahit_out/intermediate_contigs/k*.contigs.fa.gz") , emit: k_contigs - tuple val(meta), path("megahit_out/intermediate_contigs/k*.addi.fa.gz") , emit: addi_contigs - tuple val(meta), path("megahit_out/intermediate_contigs/k*.local.fa.gz") , emit: local_contigs - tuple val(meta), path("megahit_out/intermediate_contigs/k*.final.contigs.fa.gz"), emit: kfinal_contigs - path "versions.yml" , emit: versions + tuple val(meta), path("*.contigs.fa.gz") , emit: contigs + tuple val(meta), path("intermediate_contigs/k*.contigs.fa.gz") , emit: k_contigs + tuple val(meta), path("intermediate_contigs/k*.addi.fa.gz") , emit: addi_contigs + tuple val(meta), path("intermediate_contigs/k*.local.fa.gz") , emit: local_contigs + tuple val(meta), path("intermediate_contigs/k*.final.contigs.fa.gz"), emit: kfinal_contigs + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -25,56 +25,46 @@ process MEGAHIT { def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - megahit \\ - -r ${reads} \\ - -t $task.cpus \\ - $args \\ - --out-prefix $prefix - - if [ ! -s megahit_out/*.fa ]; then - echo "No contigs assembled" | tee /dev/stderr - exit 1 - fi + def reads_command = meta.single_end ? "-r ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" + """ + megahit \\ + ${reads_command} \\ + ${args} \\ + -t ${task.cpus} \\ + --out-prefix ${prefix} - pigz \\ - --no-name \\ - -p $task.cpus \\ - $args2 \\ - megahit_out/*.fa \\ - megahit_out/intermediate_contigs/*.fa + pigz \\ + --no-name \\ + -p ${task.cpus} \\ + ${args2} \\ + megahit_out/*.fa \\ + megahit_out/intermediate_contigs/*.fa - cat <<-END_VERSIONS > versions.yml - "${task.process}": - megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//') - END_VERSIONS - """ - } else { - """ - megahit \\ - -1 ${reads[0]} \\ - -2 ${reads[1]} \\ - -t $task.cpus \\ - $args \\ - --out-prefix $prefix + mv megahit_out/* . - if [ ! -s megahit_out/*.fa ]; then - echo "No contigs assembled" | tee /dev/stderr - exit 1 - fi + cat <<-END_VERSIONS > versions.yml + "${task.process}": + megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//') + END_VERSIONS + """ - pigz \\ - --no-name \\ - -p $task.cpus \\ - $args2 \\ - megahit_out/*.fa \\ - megahit_out/intermediate_contigs/*.fa + stub: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reads_command = meta.single_end ? "-r ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" + """ + mkdir -p intermediate_contigs + echo "" | gzip > ${prefix}.contigs.fa.gz + echo "" | gzip > intermediate_contigs/k21.contigs.fa.gz + echo "" | gzip > intermediate_contigs/k21.addi.fa.gz + echo "" | gzip > intermediate_contigs/k21.local.fa.gz + echo "" | gzip > intermediate_contigs/k21.final.contigs.fa.gz + touch ${prefix}.log - cat <<-END_VERSIONS > versions.yml - "${task.process}": - megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//') - END_VERSIONS - """ - } + cat <<-END_VERSIONS > versions.yml + "${task.process}": + megahit: \$(echo \$(megahit -v 2>&1) | sed 's/MEGAHIT v//') + END_VERSIONS + """ } diff --git a/modules/nf-core/megahit/megahit.diff b/modules/nf-core/megahit/megahit.diff index 9f4fe6f..0f2f60b 100644 --- a/modules/nf-core/megahit/megahit.diff +++ b/modules/nf-core/megahit/megahit.diff @@ -1,29 +1,38 @@ Changes in module 'nf-core/megahit' +'modules/nf-core/megahit/environment.yml' is unchanged +Changes in 'megahit/main.nf': --- modules/nf-core/megahit/main.nf +++ modules/nf-core/megahit/main.nf -@@ -33,6 +33,11 @@ - $args \\ - --out-prefix $prefix +@@ -7,7 +7,7 @@ + 'community.wave.seqera.io/library/megahit_pigz:87a590163e594224' }" -+ if [ ! -s megahit_out/*.fa ]; then -+ echo "No contigs assembled" | tee /dev/stderr -+ exit 1 -+ fi -+ - pigz \\ - --no-name \\ - -p $task.cpus \\ -@@ -54,6 +59,11 @@ - $args \\ - --out-prefix $prefix + input: +- tuple val(meta), path(reads1), path(reads2) ++ tuple val(meta), path(reads) -+ if [ ! -s megahit_out/*.fa ]; then -+ echo "No contigs assembled" | tee /dev/stderr -+ exit 1 -+ fi -+ - pigz \\ - --no-name \\ - -p $task.cpus \\ + output: + tuple val(meta), path("*.contigs.fa.gz") , emit: contigs +@@ -25,7 +25,7 @@ + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" +- def reads_command = meta.single_end || !reads2 ? "-r ${reads1}" : "-1 ${reads1.join(',')} -2 ${reads2.join(',')}" ++ def reads_command = meta.single_end ? "-r ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" + """ + megahit \\ + ${reads_command} \\ +@@ -52,7 +52,7 @@ + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" +- def reads_command = meta.single_end || !reads2 ? "-r ${reads1}" : "-1 ${reads1.join(',')} -2 ${reads2.join(',')}" ++ def reads_command = meta.single_end ? "-r ${reads}" : "-1 ${reads[0]} -2 ${reads[1]}" + """ + mkdir -p intermediate_contigs + echo "" | gzip > ${prefix}.contigs.fa.gz +'modules/nf-core/megahit/meta.yml' is unchanged +'modules/nf-core/megahit/tests/tags.yml' is unchanged +'modules/nf-core/megahit/tests/main.nf.test.snap' is unchanged +'modules/nf-core/megahit/tests/main.nf.test' is unchanged ************************************************************ diff --git a/modules/nf-core/megahit/meta.yml b/modules/nf-core/megahit/meta.yml index 83b718f..04dab4c 100644 --- a/modules/nf-core/megahit/meta.yml +++ b/modules/nf-core/megahit/meta.yml @@ -8,53 +8,106 @@ keywords: - metagenomics tools: - megahit: - description: "An ultra-fast single-node solution for large and complex metagenomics assembly via succinct de Bruijn graph" + description: "An ultra-fast single-node solution for large and complex metagenomics + assembly via succinct de Bruijn graph" homepage: https://github.com/voutcn/megahit documentation: https://github.com/voutcn/megahit tool_dev_url: https://github.com/voutcn/megahit doi: "10.1093/bioinformatics/btv033" licence: ["GPL v3"] + args_id: "$args" + identifier: biotools:megahit + - pigz: + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" + args_id: "$args2" + + identifier: biotools:megahit input: - - meta: - type: map - description: | - Groovy Map containing sample information and input single, or paired-end FASTA/FASTQ files (optionally decompressed) - e.g. [ id:'test', single_end:false ] - - reads: - type: file - description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively in gzipped or uncompressed FASTQ or FASTA format. + - - meta: + type: map + description: | + Groovy Map containing sample information and input single, or paired-end FASTA/FASTQ files (optionally decompressed) + e.g. [ id:'test', single_end:false ] + - reads1: + type: file + description: | + A single or list of input FastQ files for single-end or R1 of paired-end library(s), + respectively in gzipped or uncompressed FASTQ or FASTA format. + - reads2: + type: file + description: | + A single or list of input FastQ files for R2 of paired-end library(s), + respectively in gzipped or uncompressed FASTQ or FASTA format. output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" - contigs: - type: file - description: Final final contigs result of the assembly in FASTA format. - pattern: "*.contigs.fa.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.contigs.fa.gz": + type: file + description: Final final contigs result of the assembly in FASTA format. + pattern: "*.contigs.fa.gz" - k_contigs: - type: file - description: Contigs assembled from the de Bruijn graph of order-K - pattern: "k*.contigs.fa.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intermediate_contigs/k*.contigs.fa.gz: + type: file + description: Contigs assembled from the de Bruijn graph of order-K + pattern: "k*.contigs.fa.gz" - addi_contigs: - type: file - description: Contigs assembled after iteratively removing local low coverage unitigs in the de Bruijn graph of order-K - pattern: "k*.addi.fa.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intermediate_contigs/k*.addi.fa.gz: + type: file + description: Contigs assembled after iteratively removing local low coverage + unitigs in the de Bruijn graph of order-K + pattern: "k*.addi.fa.gz" - local_contigs: - type: file - description: Contigs of the locally assembled contigs for k=K - pattern: "k*.local.fa.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intermediate_contigs/k*.local.fa.gz: + type: file + description: Contigs of the locally assembled contigs for k=K + pattern: "k*.local.fa.gz" - kfinal_contigs: - type: file - description: Stand-alone contigs for k=K; if local assembly is turned on, the file will be empty - pattern: "k*.final.contigs.fa.gz" + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intermediate_contigs/k*.final.contigs.fa.gz: + type: file + description: Stand-alone contigs for k=K; if local assembly is turned on, the + file will be empty + pattern: "k*.final.contigs.fa.gz" + - log: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.log": + type: file + description: Log file containing statistics of the assembly output + pattern: "*.log" + - versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@jfy133" maintainers: diff --git a/modules/nf-core/megahit/tests/main.nf.test b/modules/nf-core/megahit/tests/main.nf.test new file mode 100644 index 0000000..b52765d --- /dev/null +++ b/modules/nf-core/megahit/tests/main.nf.test @@ -0,0 +1,126 @@ +nextflow_process { + + name "Test Process MEGAHIT" + script "../main.nf" + process "MEGAHIT" + + tag "modules" + tag "modules_nfcore" + tag "megahit" + + test("sarscov2 - fastq - se") { + + when { + process { + """ + input[0] = [ [id:"test", single_end:true], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + []] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.contigs[0][1]).linesGzip.toString().contains(">k") }, + { assert process.out.k_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.addi_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.local_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.kfinal_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert snapshot( + path(process.out.log[0][1]).readLines().last().contains("ALL DONE. Time elapsed"), + process.out.versions + ).match() + } + ) + } + + } + + test("sarscov2 - fastq - pe") { + + when { + process { + """ + input[0] = [ [id:"test", single_end:false], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.contigs[0][1]).linesGzip.toString().contains(">k") }, + { assert process.out.k_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.addi_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.local_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.kfinal_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert snapshot( + path(process.out.log[0][1]).readLines().last().contains("ALL DONE. Time elapsed"), + process.out.versions + ).match() + } + ) + } + + } + + test("sarscov2 - fastq - pe - coassembly") { + + when { + process { + """ + input[0] = [ [id:"test", single_end:false], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true)] , + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.contigs[0][1]).linesGzip.toString().contains(">k") }, + { assert process.out.k_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.addi_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.local_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert process.out.kfinal_contigs[0][1].each{path(it).linesGzip.toString().contains(">k")}}, + { assert snapshot( + path(process.out.log[0][1]).readLines().last().contains("ALL DONE. Time elapsed"), + process.out.versions + ).match() + } + ) + } + + } + + test("sarscov2 - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [id:"test", single_end:true], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/megahit/tests/main.nf.test.snap b/modules/nf-core/megahit/tests/main.nf.test.snap new file mode 100644 index 0000000..4677cc3 --- /dev/null +++ b/modules/nf-core/megahit/tests/main.nf.test.snap @@ -0,0 +1,172 @@ +{ + "sarscov2 - fastq - se": { + "content": [ + true, + [ + "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-12T16:45:42.387947698" + }, + "sarscov2 - fastq - pe": { + "content": [ + true, + [ + "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-12T16:45:48.679485983" + }, + "sarscov2 - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "k21.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "k21.addi.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "k21.local.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6" + ], + "addi_contigs": [ + [ + { + "id": "test", + "single_end": true + }, + "k21.addi.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "contigs": [ + [ + { + "id": "test", + "single_end": true + }, + "test.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "k_contigs": [ + [ + { + "id": "test", + "single_end": true + }, + [ + "k21.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "kfinal_contigs": [ + [ + { + "id": "test", + "single_end": true + }, + "k21.final.contigs.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "local_contigs": [ + [ + { + "id": "test", + "single_end": true + }, + "k21.local.fa.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-12T16:44:35.245399991" + }, + "sarscov2 - fastq - pe - coassembly": { + "content": [ + true, + [ + "versions.yml:md5,e3c0731297c9abe2f495ab6d541ac0e6" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-12T16:45:56.23363342" + } +} \ No newline at end of file diff --git a/modules/nf-core/megahit/tests/tags.yml b/modules/nf-core/megahit/tests/tags.yml new file mode 100644 index 0000000..9e86584 --- /dev/null +++ b/modules/nf-core/megahit/tests/tags.yml @@ -0,0 +1,2 @@ +megahit: + - "modules/nf-core/megahit/**" diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml new file mode 100644 index 0000000..41e8fe9 --- /dev/null +++ b/modules/nf-core/minimap2/align/environment.yml @@ -0,0 +1,11 @@ +name: minimap2_align + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - bioconda::htslib=1.20 + - bioconda::minimap2=2.28 + - bioconda::samtools=1.20 diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf new file mode 100644 index 0000000..cbfc5bf --- /dev/null +++ b/modules/nf-core/minimap2/align/main.nf @@ -0,0 +1,81 @@ +process MINIMAP2_ALIGN { + tag "$meta.id" + label 'process_high' + + // Note: the versions here need to match the versions used in the mulled container below and minimap2/index + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' : + 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3161f532a5ea6f1dec9be5667c9efc2afdac6104-0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(reference) + val prefix2 + val bam_format + val bam_index_extension + val cigar_paf_format + val cigar_bam + + output: + tuple val(meta), path("*.minimap*") , optional: true, emit: filtered_fastq + tuple val(meta), path("*.paf") , optional: true, emit: paf + tuple val(meta), path("*.bam") , optional: true, emit: bam + tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def args4 = task.ext.args4 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam" + def map_mode = "${meta.platform}" ? "-x map-${meta.platform}" : '' + def bam_output = bam_format ? "-a | samtools fastq -f 4 | gzip > ${prefix}.${prefix2}.minimap.fastq.gz" : "-o ${prefix}.paf" + def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' + def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : '' + def query = bam_input ? "-" : reads + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + + """ + $samtools_reset_fastq \\ + minimap2 \\ + $args \\ + -t $task.cpus \\ + $map_mode \\ + $target \\ + $query \\ + $cigar_paf \\ + $set_cigar_bam \\ + $bam_output + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: c + def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" + def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : "" + def bam_input = "${reads.extension}".matches('sam|bam|cram') + def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) + + """ + touch $output_file + ${bam_index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minimap2: \$(minimap2 --version 2>&1) + END_VERSIONS + """ +} diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml new file mode 100644 index 0000000..8996f88 --- /dev/null +++ b/modules/nf-core/minimap2/align/meta.yml @@ -0,0 +1,84 @@ +name: minimap2_align +description: A versatile pairwise aligner for genomic and spliced nucleotide sequences +keywords: + - align + - fasta + - fastq + - genome + - paf + - reference +tools: + - minimap2: + description: | + A versatile pairwise aligner for genomic and spliced nucleotide sequences. + homepage: https://github.com/lh3/minimap2 + documentation: https://github.com/lh3/minimap2#uguide + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FASTA or FASTQ files of size 1 and 2 for single-end + and paired-end data, respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test_ref'] + - reference: + type: file + description: | + Reference database in FASTA format. + - bam_format: + type: boolean + description: Specify that output should be in BAM format + - bam_index_extension: + type: string + description: BAM alignment index extension (e.g. "bai") + - cigar_paf_format: + type: boolean + description: Specify that output CIGAR should be in PAF format + - cigar_bam: + type: boolean + description: | + Write CIGAR with >65535 ops at the CG tag. This is recommended when + doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations) +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - paf: + type: file + description: Alignment in PAF format + pattern: "*.paf" + - bam: + type: file + description: Alignment in BAM format + pattern: "*.bam" + - index: + type: file + description: BAM alignment index + pattern: "*.bam.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" + - "@fellen31" +maintainers: + - "@heuermh" + - "@sofstam" + - "@sateeshperi" + - "@jfy133" + - "@fellen31" diff --git a/modules/nf-core/minimap2/align/minimap2-align.diff b/modules/nf-core/minimap2/align/minimap2-align.diff new file mode 100644 index 0000000..647611a --- /dev/null +++ b/modules/nf-core/minimap2/align/minimap2-align.diff @@ -0,0 +1,59 @@ +Changes in module 'nf-core/minimap2/align' +'modules/nf-core/minimap2/align/environment.yml' is unchanged +Changes in 'minimap2/align/main.nf': +--- modules/nf-core/minimap2/align/main.nf ++++ modules/nf-core/minimap2/align/main.nf +@@ -11,12 +11,14 @@ + input: + tuple val(meta), path(reads) + tuple val(meta2), path(reference) ++ val prefix2 + val bam_format + val bam_index_extension + val cigar_paf_format + val cigar_bam + + output: ++ tuple val(meta), path("*.minimap*") , optional: true, emit: filtered_fastq + tuple val(meta), path("*.paf") , optional: true, emit: paf + tuple val(meta), path("*.bam") , optional: true, emit: bam + tuple val(meta), path("*.bam.${bam_index_extension}"), optional: true, emit: index +@@ -32,7 +34,8 @@ + def args4 = task.ext.args4 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam" +- def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${bam_index} ${args2}" : "-o ${prefix}.paf" ++ def map_mode = "${meta.platform}" ? "-x map-${meta.platform}" : '' ++ def bam_output = bam_format ? "-a | samtools fastq -f 4 | gzip > ${prefix}.${prefix2}.minimap.fastq.gz" : "-o ${prefix}.paf" + def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' + def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' + def bam_input = "${reads.extension}".matches('sam|bam|cram') +@@ -45,12 +48,12 @@ + minimap2 \\ + $args \\ + -t $task.cpus \\ ++ $map_mode \\ + $target \\ + $query \\ + $cigar_paf \\ + $set_cigar_bam \\ + $bam_output +- + + cat <<-END_VERSIONS > versions.yml + "${task.process}": +@@ -60,7 +63,7 @@ + """ + + stub: +- def prefix = task.ext.prefix ?: "${meta.id}" ++ def prefix = task.ext.prefix ?: c + def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf" + def bam_index = bam_index_extension ? "touch ${prefix}.bam.${bam_index_extension}" : "" + def bam_input = "${reads.extension}".matches('sam|bam|cram') + +'modules/nf-core/minimap2/align/meta.yml' is unchanged +'modules/nf-core/minimap2/align/tests/tags.yml' is unchanged +'modules/nf-core/minimap2/align/tests/main.nf.test.snap' is unchanged +'modules/nf-core/minimap2/align/tests/main.nf.test' is unchanged +************************************************************ diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test new file mode 100644 index 0000000..4072c17 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test @@ -0,0 +1,441 @@ +nextflow_process { + + name "Test Process MINIMAP2_ALIGN" + script "../main.nf" + process "MINIMAP2_ALIGN" + + tag "modules" + tag "modules_nfcore" + tag "minimap2" + tag "minimap2/align" + + test("sarscov2 - fastq, fasta, true, [], false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], fasta, true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) + ] + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, [], false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + bam(process.out.bam[0][1]).getHeader(), + bam(process.out.bam[0][1]).getReadsMD5(), + file(process.out.index[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - fastq, fasta, false, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = false + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, [], false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, fasta, true, 'bai', false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + input[2] = true + input[3] = 'bai' + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bam, [], true, false, false - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test3.single_end.markduplicates.sorted.bam', checkIfExists: true) + ] + input[1] = [ + [ id:'test_ref' ], // meta map + [] + ] + input[2] = true + input[3] = [] + input[4] = false + input[5] = false + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap new file mode 100644 index 0000000..12264a8 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap @@ -0,0 +1,476 @@ +{ + "sarscov2 - bam, fasta, true, 'bai', false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:03:00.827260362" + }, + "sarscov2 - bam, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:37.92353539" + }, + "sarscov2 - fastq, fasta, true, 'bai', false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam.bai:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:29:44.669021368" + }, + "sarscov2 - fastq, fasta, false, [], false, false - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + + ], + "index": [ + + ], + "paf": [ + [ + { + "id": "test", + "single_end": true + }, + "test.paf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:15:52.738781039" + }, + "sarscov2 - fastq, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-03T11:15:23.033808223" + }, + "sarscov2 - [fastq1, fastq2], fasta, true, false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz test_2.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "1bc392244f228bf52cf0b5a8f6a654c9", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:18:18.964586894" + }, + "sarscov2 - fastq, fasta, true, [], false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "f194745c0ccfcb2a9c0aee094a08750", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:17:48.667488325" + }, + "sarscov2 - fastq, fasta, true, 'bai', false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam##idx##test.bam.bai --write-index" + ], + "f194745c0ccfcb2a9c0aee094a08750", + "test.bam.bai", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:18:02.517416733" + }, + "sarscov2 - bam, fasta, true, [], false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:MT192765.1\tLN:29829", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a genome.fasta -", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "5d426b9a5f5b2c54f1d7f1e4c238ae94", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-25T09:02:49.64829488" + }, + "sarscov2 - bam, fasta, true, [], false, false - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ], + "bam": [ + [ + { + "id": "test", + "single_end": true + }, + "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "index": [ + + ], + "paf": [ + + ], + "versions": [ + "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:21:22.162291795" + }, + "sarscov2 - fastq, [], true, false, false": { + "content": [ + [ + "@HD\tVN:1.6\tSO:coordinate", + "@SQ\tSN:ERR5069949.2151832\tLN:150", + "@SQ\tSN:ERR5069949.576388\tLN:77", + "@SQ\tSN:ERR5069949.501486\tLN:146", + "@SQ\tSN:ERR5069949.1331889\tLN:132", + "@SQ\tSN:ERR5069949.2161340\tLN:80", + "@SQ\tSN:ERR5069949.973930\tLN:79", + "@SQ\tSN:ERR5069949.2417063\tLN:150", + "@SQ\tSN:ERR5069949.376959\tLN:151", + "@SQ\tSN:ERR5069949.1088785\tLN:149", + "@SQ\tSN:ERR5069949.1066259\tLN:147", + "@SQ\tSN:ERR5069949.2832676\tLN:139", + "@SQ\tSN:ERR5069949.2953930\tLN:151", + "@SQ\tSN:ERR5069949.324865\tLN:151", + "@SQ\tSN:ERR5069949.2185111\tLN:150", + "@SQ\tSN:ERR5069949.937422\tLN:151", + "@SQ\tSN:ERR5069949.2431709\tLN:150", + "@SQ\tSN:ERR5069949.1246538\tLN:148", + "@SQ\tSN:ERR5069949.1189252\tLN:98", + "@SQ\tSN:ERR5069949.2216307\tLN:147", + "@SQ\tSN:ERR5069949.3273002\tLN:148", + "@SQ\tSN:ERR5069949.3277445\tLN:151", + "@SQ\tSN:ERR5069949.3022231\tLN:147", + "@SQ\tSN:ERR5069949.184542\tLN:151", + "@SQ\tSN:ERR5069949.540529\tLN:149", + "@SQ\tSN:ERR5069949.686090\tLN:150", + "@SQ\tSN:ERR5069949.2787556\tLN:106", + "@SQ\tSN:ERR5069949.2650879\tLN:150", + "@SQ\tSN:ERR5069949.2064910\tLN:149", + "@SQ\tSN:ERR5069949.2328704\tLN:150", + "@SQ\tSN:ERR5069949.1067032\tLN:150", + "@SQ\tSN:ERR5069949.3338256\tLN:151", + "@SQ\tSN:ERR5069949.1412839\tLN:147", + "@SQ\tSN:ERR5069949.1538968\tLN:150", + "@SQ\tSN:ERR5069949.147998\tLN:94", + "@SQ\tSN:ERR5069949.366975\tLN:106", + "@SQ\tSN:ERR5069949.1372331\tLN:151", + "@SQ\tSN:ERR5069949.1709367\tLN:129", + "@SQ\tSN:ERR5069949.2388984\tLN:150", + "@SQ\tSN:ERR5069949.1132353\tLN:150", + "@SQ\tSN:ERR5069949.1151736\tLN:151", + "@SQ\tSN:ERR5069949.479807\tLN:150", + "@SQ\tSN:ERR5069949.2176303\tLN:151", + "@SQ\tSN:ERR5069949.2772897\tLN:151", + "@SQ\tSN:ERR5069949.1020777\tLN:122", + "@SQ\tSN:ERR5069949.465452\tLN:151", + "@SQ\tSN:ERR5069949.1704586\tLN:149", + "@SQ\tSN:ERR5069949.1258508\tLN:151", + "@SQ\tSN:ERR5069949.986441\tLN:119", + "@SQ\tSN:ERR5069949.2674295\tLN:148", + "@SQ\tSN:ERR5069949.885966\tLN:79", + "@SQ\tSN:ERR5069949.2342766\tLN:151", + "@SQ\tSN:ERR5069949.3122970\tLN:127", + "@SQ\tSN:ERR5069949.3279513\tLN:72", + "@SQ\tSN:ERR5069949.309410\tLN:151", + "@SQ\tSN:ERR5069949.532979\tLN:149", + "@SQ\tSN:ERR5069949.2888794\tLN:151", + "@SQ\tSN:ERR5069949.2205229\tLN:150", + "@SQ\tSN:ERR5069949.786562\tLN:151", + "@SQ\tSN:ERR5069949.919671\tLN:151", + "@SQ\tSN:ERR5069949.1328186\tLN:151", + "@SQ\tSN:ERR5069949.870926\tLN:149", + "@SQ\tSN:ERR5069949.2257580\tLN:151", + "@SQ\tSN:ERR5069949.3249622\tLN:77", + "@SQ\tSN:ERR5069949.611123\tLN:125", + "@SQ\tSN:ERR5069949.651338\tLN:142", + "@SQ\tSN:ERR5069949.169513\tLN:92", + "@SQ\tSN:ERR5069949.155944\tLN:150", + "@SQ\tSN:ERR5069949.2033605\tLN:150", + "@SQ\tSN:ERR5069949.2730382\tLN:142", + "@SQ\tSN:ERR5069949.2125592\tLN:150", + "@SQ\tSN:ERR5069949.1062611\tLN:151", + "@SQ\tSN:ERR5069949.1778133\tLN:151", + "@SQ\tSN:ERR5069949.3057020\tLN:95", + "@SQ\tSN:ERR5069949.2972968\tLN:141", + "@SQ\tSN:ERR5069949.2734474\tLN:149", + "@SQ\tSN:ERR5069949.856527\tLN:151", + "@SQ\tSN:ERR5069949.2098070\tLN:151", + "@SQ\tSN:ERR5069949.1552198\tLN:150", + "@SQ\tSN:ERR5069949.2385514\tLN:150", + "@SQ\tSN:ERR5069949.2270078\tLN:151", + "@SQ\tSN:ERR5069949.114870\tLN:150", + "@SQ\tSN:ERR5069949.2668880\tLN:147", + "@SQ\tSN:ERR5069949.257821\tLN:139", + "@SQ\tSN:ERR5069949.2243023\tLN:150", + "@SQ\tSN:ERR5069949.2605155\tLN:146", + "@SQ\tSN:ERR5069949.1340552\tLN:151", + "@SQ\tSN:ERR5069949.1561137\tLN:150", + "@SQ\tSN:ERR5069949.2361683\tLN:149", + "@SQ\tSN:ERR5069949.2521353\tLN:150", + "@SQ\tSN:ERR5069949.1261808\tLN:149", + "@SQ\tSN:ERR5069949.2734873\tLN:98", + "@SQ\tSN:ERR5069949.3017828\tLN:107", + "@SQ\tSN:ERR5069949.573706\tLN:150", + "@SQ\tSN:ERR5069949.1980512\tLN:151", + "@SQ\tSN:ERR5069949.1014693\tLN:150", + "@SQ\tSN:ERR5069949.3184655\tLN:150", + "@SQ\tSN:ERR5069949.29668\tLN:89", + "@SQ\tSN:ERR5069949.3258358\tLN:151", + "@SQ\tSN:ERR5069949.1476386\tLN:151", + "@SQ\tSN:ERR5069949.2415814\tLN:150", + "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -t 2 -a test_1.fastq.gz test_1.fastq.gz", + "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.20\tCL:samtools sort -@ 1 -o test.bam" + ], + "16c1c651f8ec67383bcdee3c55aed94f", + [ + "versions.yml:md5,3548eeba9066efbf8d78ea99f8d813fd" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-23T11:18:34.246998277" + } +} \ No newline at end of file diff --git a/modules/nf-core/minimap2/align/tests/tags.yml b/modules/nf-core/minimap2/align/tests/tags.yml new file mode 100644 index 0000000..39dba37 --- /dev/null +++ b/modules/nf-core/minimap2/align/tests/tags.yml @@ -0,0 +1,2 @@ +minimap2/align: + - "modules/nf-core/minimap2/align/**" diff --git a/modules/nf-core/multiqc/environment.yml b/modules/nf-core/multiqc/environment.yml index ecb7dd7..6f5b867 100644 --- a/modules/nf-core/multiqc/environment.yml +++ b/modules/nf-core/multiqc/environment.yml @@ -1,7 +1,5 @@ -name: multiqc channels: - conda-forge - bioconda - - defaults dependencies: - - bioconda::multiqc=1.22.3 + - bioconda::multiqc=1.25.1 diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 0c81a3b..8a816ac 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -3,15 +3,17 @@ process MULTIQC { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.22.3--pyhdfd78af_0' : - 'biocontainers/multiqc:1.22.3--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : + 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" input: - path(multiqc_base_files, stageAs: "?/*") - tuple val(meta), path(files, stageAs: "?/*") + path(multiqc_files, stageAs: "?/*") + tuple val(meta), path(pipeline_files, stageAs: "?/*") path(multiqc_config) path(extra_multiqc_config) path(multiqc_logo) + path(replace_names) + path(sample_names) output: path "*multiqc_report.html", emit: report @@ -24,16 +26,22 @@ process MULTIQC { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' def config = multiqc_config ? "--config $multiqc_config" : '' def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' - def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' + def replace = replace_names ? "--replace-names ${replace_names}" : '' + def samples = sample_names ? "--sample-names ${sample_names}" : '' """ multiqc \\ --force \\ $args \\ $config \\ + $prefix \\ $extra_config \\ $logo \\ + $replace \\ + $samples \\ . cat <<-END_VERSIONS > versions.yml @@ -45,7 +53,7 @@ process MULTIQC { stub: """ mkdir multiqc_data - touch multiqc_plots + mkdir multiqc_plots touch multiqc_report.html cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml index 45a9bc3..b16c187 100644 --- a/modules/nf-core/multiqc/meta.yml +++ b/modules/nf-core/multiqc/meta.yml @@ -1,5 +1,6 @@ name: multiqc -description: Aggregate results from bioinformatics analyses across many samples into a single report +description: Aggregate results from bioinformatics analyses across many samples into + a single report keywords: - QC - bioinformatics tools @@ -12,40 +13,59 @@ tools: homepage: https://multiqc.info/ documentation: https://multiqc.info/docs/ licence: ["GPL-3.0-or-later"] + identifier: biotools:multiqc input: - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections in multiqc_config. - pattern: "*.{yml,yaml}" - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" + - - multiqc_files: + type: file + description: | + List of reports / files recognised by MultiQC, for example the html and zip output of FastQC + - - multiqc_config: + type: file + description: Optional config yml for MultiQC + pattern: "*.{yml,yaml}" + - - extra_multiqc_config: + type: file + description: Second optional config yml for MultiQC. Will override common sections + in multiqc_config. + pattern: "*.{yml,yaml}" + - - multiqc_logo: + type: file + description: Optional logo file for MultiQC + pattern: "*.{png}" + - - replace_names: + type: file + description: | + Optional two-column sample renaming file. First column a set of + patterns, second column a set of corresponding replacements. Passed via + MultiQC's `--replace-names` option. + pattern: "*.{tsv}" + - - sample_names: + type: file + description: | + Optional TSV file with headers, passed to the MultiQC --sample_names + argument. + pattern: "*.{tsv}" output: - report: - type: file - description: MultiQC report file - pattern: "multiqc_report.html" + - "*multiqc_report.html": + type: file + description: MultiQC report file + pattern: "multiqc_report.html" - data: - type: directory - description: MultiQC data dir - pattern: "multiqc_data" + - "*_data": + type: directory + description: MultiQC data dir + pattern: "multiqc_data" - plots: - type: file - description: Plots created by MultiQC - pattern: "*_data" + - "*_plots": + type: file + description: Plots created by MultiQC + pattern: "*_data" - versions: - type: file - description: File containing software versions - pattern: "versions.yml" + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" authors: - "@abhi18av" - "@bunop" diff --git a/modules/nf-core/multiqc/multiqc.diff b/modules/nf-core/multiqc/multiqc.diff new file mode 100644 index 0000000..1f02e13 --- /dev/null +++ b/modules/nf-core/multiqc/multiqc.diff @@ -0,0 +1,22 @@ +Changes in module 'nf-core/multiqc' +'modules/nf-core/multiqc/environment.yml' is unchanged +Changes in 'multiqc/main.nf': +--- modules/nf-core/multiqc/main.nf ++++ modules/nf-core/multiqc/main.nf +@@ -7,7 +7,8 @@ + 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" + + input: +- path multiqc_files, stageAs: "?/*" ++ path(multiqc_files, stageAs: "?/*") ++ tuple val(meta), path(pipeline_files, stageAs: "?/*") + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + +'modules/nf-core/multiqc/meta.yml' is unchanged +'modules/nf-core/multiqc/tests/tags.yml' is unchanged +'modules/nf-core/multiqc/tests/nextflow.config' is unchanged +'modules/nf-core/multiqc/tests/main.nf.test.snap' is unchanged +'modules/nf-core/multiqc/tests/main.nf.test' is unchanged +************************************************************ diff --git a/modules/nf-core/multiqc/tests/main.nf.test b/modules/nf-core/multiqc/tests/main.nf.test index f1c4242..33316a7 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test +++ b/modules/nf-core/multiqc/tests/main.nf.test @@ -8,6 +8,8 @@ nextflow_process { tag "modules_nfcore" tag "multiqc" + config "./nextflow.config" + test("sarscov2 single-end [fastqc]") { when { @@ -17,6 +19,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -41,6 +45,8 @@ nextflow_process { input[1] = Channel.of(file("https://github.com/nf-core/tools/raw/dev/nf_core/pipeline-template/assets/multiqc_config.yml", checkIfExists: true)) input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } @@ -66,6 +72,8 @@ nextflow_process { input[1] = [] input[2] = [] input[3] = [] + input[4] = [] + input[5] = [] """ } } diff --git a/modules/nf-core/multiqc/tests/main.nf.test.snap b/modules/nf-core/multiqc/tests/main.nf.test.snap index 0a4760e..2fcbb5f 100644 --- a/modules/nf-core/multiqc/tests/main.nf.test.snap +++ b/modules/nf-core/multiqc/tests/main.nf.test.snap @@ -2,14 +2,14 @@ "multiqc_versions_single": { "content": [ [ - "versions.yml:md5,bf3b209659477254bb8fa5a9405f9984" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-06-25T12:31:21.878452033" + "timestamp": "2024-10-02T17:51:46.317523" }, "multiqc_stub": { "content": [ @@ -17,25 +17,25 @@ "multiqc_report.html", "multiqc_data", "multiqc_plots", - "versions.yml:md5,bf3b209659477254bb8fa5a9405f9984" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-06-25T12:32:02.322196503" + "timestamp": "2024-10-02T17:52:20.680978" }, "multiqc_versions_config": { "content": [ [ - "versions.yml:md5,bf3b209659477254bb8fa5a9405f9984" + "versions.yml:md5,41f391dcedce7f93ca188f3a3ffa0916" ] ], "meta": { - "nf-test": "0.8.4", - "nextflow": "24.04.2" + "nf-test": "0.9.0", + "nextflow": "24.04.4" }, - "timestamp": "2024-06-25T12:31:50.064227638" + "timestamp": "2024-10-02T17:52:09.185842" } } \ No newline at end of file diff --git a/modules/nf-core/multiqc/tests/nextflow.config b/modules/nf-core/multiqc/tests/nextflow.config new file mode 100644 index 0000000..c537a6a --- /dev/null +++ b/modules/nf-core/multiqc/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: 'MULTIQC' { + ext.prefix = null + } +} diff --git a/modules/nf-core/porechop/abi/environment.yml b/modules/nf-core/porechop/abi/environment.yml new file mode 100644 index 0000000..4dd2eab --- /dev/null +++ b/modules/nf-core/porechop/abi/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: porechop_abi +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::porechop_abi=0.5.0 diff --git a/modules/nf-core/porechop/abi/main.nf b/modules/nf-core/porechop/abi/main.nf new file mode 100644 index 0000000..88ec5bd --- /dev/null +++ b/modules/nf-core/porechop/abi/main.nf @@ -0,0 +1,50 @@ +process PORECHOP_ABI { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/porechop_abi:0.5.0--py310h590eda1_0': + 'biocontainers/porechop_abi:0.5.0--py310h590eda1_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path("*.fastq.gz") , emit: reads + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}.porechop_abi" + if ("$reads" == "${prefix}.fastq.gz") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + porechop_abi \\ + --input $reads \\ + --threads $task.cpus \\ + $args \\ + --output ${prefix}.fastq.gz \\ + | tee ${prefix}.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + porechop_abi: \$( porechop_abi --version ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}.porechop_abi" + """ + echo "" | gzip > ${prefix}.fastq.gz + touch ${prefix}.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + porechop_abi: \$( porechop_abi --version ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/porechop/abi/meta.yml b/modules/nf-core/porechop/abi/meta.yml new file mode 100644 index 0000000..a856ffb --- /dev/null +++ b/modules/nf-core/porechop/abi/meta.yml @@ -0,0 +1,48 @@ +name: "porechop_abi" +description: Extension of Porechop whose purpose is to process adapter sequences in ONT reads. +keywords: + - porechop_abi + - adapter + - nanopore +tools: + - "porechop_abi": + description: Extension of Porechop whose purpose is to process adapter sequences in ONT reads. + homepage: "https://github.com/bonsai-team/Porechop_ABI" + documentation: "https://github.com/bonsai-team/Porechop_ABI" + tool_dev_url: "https://github.com/bonsai-team/Porechop_ABI" + doi: "10.1101/2022.07.07.499093" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: fastq/fastq.gz file + pattern: "*.{fastq,fastq.gz,fq,fq.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads: + type: file + description: Adapter-trimmed fastq.gz file + pattern: "*.fastq.gz" + - log: + type: file + description: Log file containing stdout information + pattern: "*.log" +authors: + - "@sofstam" + - "LilyAnderssonLee" +maintainers: + - "@sofstam" + - "LilyAnderssonLee" diff --git a/modules/nf-core/porechop/abi/tests/main.nf.test b/modules/nf-core/porechop/abi/tests/main.nf.test new file mode 100644 index 0000000..b5a29f9 --- /dev/null +++ b/modules/nf-core/porechop/abi/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process PORECHOP_ABI" + script "../main.nf" + process "PORECHOP_ABI" + tag "modules" + tag "modules_nfcore" + tag "porechop" + tag "porechop/abi" + + test("sarscov2-nanopore") { + + when { + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.reads, + file(process.out.log.get(0).get(1)).readLines()[20..40], + process.out.versions).match() + } + ) + } + } + + test("sarscov2-nanopore - stub") { + + options "-stub" + + when { + + process { + """ + input[0] = [ + [ id:'test'], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/nanopore/fastq/test.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/porechop/abi/tests/main.nf.test.snap b/modules/nf-core/porechop/abi/tests/main.nf.test.snap new file mode 100644 index 0000000..ad63f4e --- /dev/null +++ b/modules/nf-core/porechop/abi/tests/main.nf.test.snap @@ -0,0 +1,94 @@ +{ + "sarscov2-nanopore": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.porechop_abi.fastq.gz:md5,886fdb859fb50e0dddd35007bcff043e" + ] + ], + [ + " Best \u001b[0m", + " read Best \u001b[0m", + " start read end\u001b[0m", + " \u001b[4mSet %ID %ID \u001b[0m", + " \u001b[32mSQK-NSK007 100.0 73.1\u001b[0m", + " Rapid 40.4 0.0", + " RBK004_upstream 77.5 0.0", + " SQK-MAP006 75.8 72.7", + " SQK-MAP006 short 65.5 66.7", + " PCR adapters 1 73.9 69.6", + " PCR adapters 2 80.0 72.7", + " PCR adapters 3 70.8 69.6", + " 1D^2 part 1 71.4 70.0", + " 1D^2 part 2 84.8 75.8", + " cDNA SSP 63.0 61.7", + " \u001b[32mBarcode 1 (reverse) 100.0 100.0\u001b[0m", + " Barcode 2 (reverse) 70.8 69.2", + " Barcode 3 (reverse) 76.0 70.4", + " Barcode 4 (reverse) 74.1 71.4", + " Barcode 5 (reverse) 77.8 80.8", + " Barcode 6 (reverse) 73.1 70.8" + ], + [ + "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-29T13:50:49.318599" + }, + "sarscov2-nanopore - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.porechop_abi.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test.porechop_abi.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98" + ], + "log": [ + [ + { + "id": "test" + }, + "test.porechop_abi.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test" + }, + "test.porechop_abi.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,0e9e5e0d35a68ff8e6490c949b257f98" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.1" + }, + "timestamp": "2024-07-29T13:50:54.425389" + } +} \ No newline at end of file diff --git a/modules/nf-core/porechop/abi/tests/tags.yml b/modules/nf-core/porechop/abi/tests/tags.yml new file mode 100644 index 0000000..e19350c --- /dev/null +++ b/modules/nf-core/porechop/abi/tests/tags.yml @@ -0,0 +1,2 @@ +porechop/abi: + - "modules/nf-core/porechop/abi/**" diff --git a/modules/nf-core/quast/main.nf b/modules/nf-core/quast/main.nf index ce9befd..da16b9f 100644 --- a/modules/nf-core/quast/main.nf +++ b/modules/nf-core/quast/main.nf @@ -26,9 +26,9 @@ process QUAST { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" - def min_contig_len = "--min-contig ${params.min_contig_length}" + def min_contig_len = "--min-contig ${params.short_reads_min_contig_length}" if ( meta.library_strategy == "metatranscriptomics" ) { - min_contig_len = "--min-contig ${params.min_contig_length_metatranscriptomics}" + min_contig_len = "--min-contig ${params.short_reads_min_contig_length_metat}" } def features = gff ? "--features $gff" : '' def reference = fasta ? "-r $fasta" : '' diff --git a/modules/nf-core/quast/quast.diff b/modules/nf-core/quast/quast.diff index 7d48832..bfaf013 100644 --- a/modules/nf-core/quast/quast.diff +++ b/modules/nf-core/quast/quast.diff @@ -14,9 +14,9 @@ Changes in module 'nf-core/quast' script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" -+ def min_contig_len = "--min-contig ${params.min_contig_length}" ++ def min_contig_len = "--min-contig ${params.short_reads_min_contig_length}" + if ( meta.library_strategy == "metatranscriptomics" ) { -+ min_contig_len = "--min-contig ${params.min_contig_length_metatranscriptomics}" ++ min_contig_len = "--min-contig ${params.short_reads_min_contig_length_metat}" + } def features = gff ? "--features $gff" : '' def reference = fasta ? "-r $fasta" : '' diff --git a/modules/nf-core/racon/environment.yml b/modules/nf-core/racon/environment.yml new file mode 100644 index 0000000..e5cd0b8 --- /dev/null +++ b/modules/nf-core/racon/environment.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::racon=1.4.20 diff --git a/modules/nf-core/racon/main.nf b/modules/nf-core/racon/main.nf new file mode 100644 index 0000000..de29e35 --- /dev/null +++ b/modules/nf-core/racon/main.nf @@ -0,0 +1,38 @@ +process RACON { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/racon:1.4.20--h9a82719_1' : + 'biocontainers/racon:1.4.20--h9a82719_1' }" + + input: + tuple val(meta), path(reads), path(assembly), path(paf) + + output: + tuple val(meta), path('*_assembly_consensus.fasta.gz') , emit: improved_assembly + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + racon -t "$task.cpus" \\ + "${reads}" \\ + "${paf}" \\ + $args \\ + "${assembly}" > \\ + ${prefix}_assembly_consensus.fasta + + gzip -n ${prefix}_assembly_consensus.fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + racon: \$( racon --version 2>&1 | sed 's/^.*v//' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/racon/meta.yml b/modules/nf-core/racon/meta.yml new file mode 100644 index 0000000..9698c0a --- /dev/null +++ b/modules/nf-core/racon/meta.yml @@ -0,0 +1,51 @@ +name: racon +description: Consensus module for raw de novo DNA assembly of long uncorrected reads +keywords: + - assembly + - pacbio + - nanopore + - polish +tools: + - racon: + description: Ultrafast consensus module for raw de novo genome assembly of long uncorrected reads. + homepage: https://github.com/lbcb-sci/racon + documentation: https://github.com/lbcb-sci/racon + tool_dev_url: https://github.com/lbcb-sci/racon + doi: 10.1101/gr.214270.116 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: List of input FastQ files. Racon expects single end reads + pattern: "*.{fastq,fastq.gz,fq,fq.gz}" + - assembly: + type: file + description: Genome assembly to be improved + pattern: "*.{fasta,fa}" + - paf: + type: file + description: Alignment in PAF format + pattern: "*.paf" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - improved_assembly: + type: file + description: Improved genome assembly + pattern: "*_assembly_consensus.fasta.gz" +authors: + - "@avantonder" +maintainers: + - "@avantonder" diff --git a/modules/nf-core/seqkit/grep/seqkit-grep.diff b/modules/nf-core/seqkit/grep/seqkit-grep.diff index 30b7db0..60ca678 100644 --- a/modules/nf-core/seqkit/grep/seqkit-grep.diff +++ b/modules/nf-core/seqkit/grep/seqkit-grep.diff @@ -1,4 +1,6 @@ Changes in module 'nf-core/seqkit/grep' +'modules/nf-core/seqkit/grep/environment.yml' is unchanged +Changes in 'seqkit/grep/main.nf': --- modules/nf-core/seqkit/grep/main.nf +++ modules/nf-core/seqkit/grep/main.nf @@ -9,8 +9,7 @@ @@ -11,5 +13,21 @@ Changes in module 'nf-core/seqkit/grep' output: tuple val(meta), path("*.{fa,fq}.gz") , emit: filter +@@ -21,10 +20,13 @@ + + script: + def args = task.ext.args ?: '' +- def prefix = task.ext.prefix ?: "${meta.id}" + // fasta or fastq. Exact pattern match .fasta or .fa suffix with optional .gz (gzip) suffix + def suffix = task.ext.suffix ?: "${sequence}" ==~ /(.*f[astn]*a(.gz)?$)/ ? "fa" : "fq" + def pattern_file = pattern ? "-f ${pattern}" : "" ++ ++ def pattern_filename = pattern.getName() ++ def pattern_name = pattern_filename.split('\\.')[0] ++ def prefix = task.ext.prefix ?: "${meta.id}_${pattern_name}" + + """ + seqkit \\ +'modules/nf-core/seqkit/grep/meta.yml' is unchanged ************************************************************ diff --git a/modules/nf-core/seqkit/seq/main.nf b/modules/nf-core/seqkit/seq/main.nf index a6a05b7..4a1d0f3 100644 --- a/modules/nf-core/seqkit/seq/main.nf +++ b/modules/nf-core/seqkit/seq/main.nf @@ -19,9 +19,9 @@ process SEQKIT_SEQ { task.ext.when == null || task.ext.when script: - def min_len = params.min_contig_length + def min_len = params.short_reads_min_contig_length if ( meta.library_strategy == "metatranscriptomic" ) { - min_len = params.min_contig_length_metatranscriptomics + min_len = params.short_reads_min_contig_length_metat } def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' diff --git a/modules/nf-core/seqkit/seq/seqkit-seq.diff b/modules/nf-core/seqkit/seq/seqkit-seq.diff index 168ac0b..af070e2 100644 --- a/modules/nf-core/seqkit/seq/seqkit-seq.diff +++ b/modules/nf-core/seqkit/seq/seqkit-seq.diff @@ -5,9 +5,9 @@ Changes in module 'nf-core/seqkit/seq' task.ext.when == null || task.ext.when script: -+ def min_len = params.min_contig_length ++ def min_len = params.short_reads_min_contig_length + if ( meta.library_strategy == "metatranscriptomic" ) { -+ min_len = params.min_contig_length_metatranscriptomics ++ min_len = params.short_reads_min_contig_length_metat + } def args = task.ext.args ?: '' def args2 = task.ext.args2 ?: '' diff --git a/nextflow.config b/nextflow.config index d8ad734..02daa11 100644 --- a/nextflow.config +++ b/nextflow.config @@ -6,10 +6,6 @@ ---------------------------------------------------------------------------------------- */ -plugins { - id 'nf-schema' -} - // Global default params, used in configs params { @@ -20,24 +16,6 @@ params { // For already fetched data samplesheet = null - /* - * Assembler options, by default the pipeline will pick - * - metaspades for pair-end - * - megahit for single-end - * - * Setting --assembler will force the assembler - * - * - spades: Use for assembling single end reads - * - * - metaspades: Use for assembling paired end reads - * with moderate memory and runtime requirements - * - * - megahit: Use when memory or runtime requirements - * for metaspades are prohibitively high, such as: - * - Memory >1TB - * - Runtime >3-4 days - */ - assembler = null // The pipeline will use the metadata from ENA (obtained by the fetch_tool) // As the metadata can be incorrect, we provide the following parameters to @@ -45,26 +23,68 @@ params { single_end = null library_layout = null library_strategy = null + platform = null + + // QC FILTERING - // Reads QC filtering options - filter_ratio_threshold = 0.9 - low_reads_count_threshold = 1000 + // Short reads QC filtering options + short_reads_filter_ratio_threshold = 0.1 + short_reads_low_reads_count_threshold = 1000 - // Reference genome - reference_genome = null + // Long reads options + long_reads_min_read_length = 200 + // Short reads reference databases (name to be selected from list) + bwamem2_reference_genomes_folder = "" + blast_reference_genomes_folder = "" + + // Long reads reference genome + reference_genome = null + + // Short-read sequences and assemblies are + // automatically polished from human and phix seqs + // Both blast and bwa indices are needed remove_human_phix = true human_phix_blast_index_name = "human_phix" human_phix_bwamem2_index_name = "human_phix" - bwamem2_reference_genomes_folder = "" - blast_reference_genomes_folder = "" + // Long-read assemblies don't require phiX + // nor indices, just a fasta file + reference_genomes_folder = "" + remove_human = true + human_fasta_prefix = "human" + + // ASSEMBLY + + /* By default the pipeline will pick + * - metaspades for paired-end short reads + * - megahit for single-end short reads + * - flye for long reads + * + * Setting --assembler will force the assembler + * + * - spades: Use for assembling single end reads + * + * - metaspades: Use for assembling paired end reads + * with moderate memory and runtime requirements + * + * - megahit: Use when memory or runtime requirements + * for metaspades are prohibitively high, such as: + * - Memory >1TB + * - Runtime >3-4 days + * + * - flye: Use for any long-read assembly. long_reads_assembler_config + * should be selected depending on input data (if ONT or + * pacbio, and if data quality is high or low) + */ + assembler = null // Assembly options - spades_only_assembler = true - min_contig_length = 500 - min_contig_length_metatranscriptomics = 200 - assembly_memory = 100 + spades_only_assembler = true + short_reads_min_contig_length = 500 + short_reads_min_contig_length_metat = 200 + long_reads_assembler_config = null + assembly_memory = 100 // MultiQC options multiqc_config = null @@ -84,22 +104,24 @@ params { help = false version = false - // Max resource options - // Defaults only, expecting to be overwritten - max_memory = '1.TB' - max_cpus = 32 - max_time = '168.h' // 7 days + max_spades_retries = 3 + max_megahit_retries = 3 // Assembler versions spades_version = "3.15.5" megahit_version = "1.2.9" + flye_version = "2.9" } validation { failUnrecognisedParams = true lenientMode = false - showHiddenParams = false + help { + enabled = true + showHidden = false + command = "nextflow run ebi-metagenomics/miassembler --samplesheet samplesheet.csv --outdir output" + } } // Load base.config by default for all pipelines @@ -197,6 +219,9 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } + test { + includeConfig 'conf/test.config' + } codon_slurm { includeConfig 'conf/codon_slurm.config' } } @@ -213,7 +238,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-schema@2.0.0' + id 'nf-schema@2.2.0' } // Export these variables to prevent local Python/R libraries from conflicting with those in the container @@ -257,8 +282,8 @@ manifest { homePage = 'https://github.com/ebi-metagenomics/miassembler' description = """Microbiome Informatics metagenomes assembly pipeline""" mainScript = 'main.nf' - nextflowVersion = '!>=23.04.0' - version = '1.0dev' + nextflowVersion = '!>=24.04.0' + version = 'v1.0.0' doi = '' } @@ -284,36 +309,3 @@ def study_folder( meta = null ) { study_accession, ].join("/") } - -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} diff --git a/nextflow_schema.json b/nextflow_schema.json index ebfb512..4df56ed 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -4,19 +4,13 @@ "title": "ebi-metagenomics/miassembler pipeline parameters", "description": "Microbiome Informatics metagenomes assembly pipeline", "type": "object", - "defs": { + "$defs": { "input_output_options": { "title": "Input/output options", "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": [ - "blast_reference_genomes_folder", - "bwamem2_reference_genomes_folder", - "human_phix_blast_index_name", - "human_phix_bwamem2_index_name", - "outdir" - ], + "required": ["outdir"], "properties": { "samplesheet": { "type": "string", @@ -40,14 +34,25 @@ "fa_icon": "far fa-address-card", "minLength": 3 }, + "long_reads_min_read_length": { + "type": "integer", + "description": "Minimum read length for pre-assembly quality filtering", + "default": 200 + }, "private_study": { "type": "boolean", - "description": "To use if the ENA study is private" + "description": "To use if the ENA study is private, *this feature ony works on EBI infrastructure at the moment*" }, "assembler": { "type": "string", - "enum": ["spades", "metaspades", "megahit"], - "description": "The short reads assembler" + "enum": ["spades", "metaspades", "megahit", "flye"], + "description": "The short or long reads assembler" + }, + "long_reads_assembler_config": { + "type": "string", + "enum": ["nano-raw", "nano-corr", "nano-hq", "pacbio-raw", "pacbio-corr", "pacbio-hifi"], + "description": "Configuration to use flye with.", + "default": null }, "single_end": { "type": "boolean", @@ -63,6 +68,15 @@ "description": "Force the library_layout value for the study / reads", "enum": ["single", "paired"] }, + "platform": { + "type": "string", + "description": "Force the instrument_platform value for the study / reads", + "default": null + }, + "flye_version": { + "type": "string", + "default": "2.9" + }, "spades_version": { "type": "string", "default": "3.15.5" @@ -89,6 +103,11 @@ "zebrafish.fna" ] }, + "reference_genomes_folder": { + "type": "string", + "description": "The folder with the reference genomes, defaults to the Microbiome Informatics internal directory.", + "format": "directory-path" + }, "blast_reference_genomes_folder": { "type": "string", "description": "The folder with the reference genome blast indexes, defaults to the Microbiome Informatics internal directory.", @@ -104,6 +123,11 @@ "description": "Remove human and phiX reads pre assembly, and contigs matching those genomes.", "default": true }, + "remove_human": { + "type": "boolean", + "description": "Remove human reads pre assembly, and contigs matching those genomes.", + "default": true + }, "human_phix_blast_index_name": { "type": "string", "description": "Combined Human and phiX BLAST db.", @@ -114,18 +138,23 @@ "description": "Combined Human and phiX bwa-mem2 index.", "default": "human_phix" }, - "min_contig_length": { + "human_fasta_prefix": { + "type": "string", + "description": "Human prefix name.", + "default": "human" + }, + "short_reads_min_contig_length": { "type": "integer", "default": 500, - "description": "Minimum contig length filter." + "description": "Minimum contig length filter for short reads." }, - "min_contig_length_metatranscriptomics": { + "short_reads_min_contig_length_metat": { "type": "integer", "default": 200, - "description": "Minimum contig length filter for metaT." + "description": "Minimum contig length filter for short reads metaT." }, "assembly_memory": { - "type": "integer", + "type": "number", "default": 100, "description": "Default memory allocated for the assembly process." }, @@ -162,14 +191,14 @@ "description": "Set the thresholds for the reads QC/filtering steps. Reads that fail QC won't be assembled.", "help_text": "Use these options to define the quality control thresholds for your reads. You can specify the maximum allowed filtering ratio and the minimum acceptable read count. If the filtering ratio exceeds the set limit or the read count falls below the threshold, the reads will be flagged and excluded from further assembly. The information about those runs that failed are aggregated in the qc_failed_runs.csv file.", "properties": { - "filter_ratio_threshold": { + "short_reads_filter_ratio_threshold": { "type": "number", - "description": "The maximum fraction of reads that are allowed to be filtered out. If exceeded, it flags excessive filtering. The default value is 0.9, meaning that if more than 90% of the reads are filtered out, the threshold is considered exceeded, and the run is not assembled.", - "default": 0.9, + "description": "The maximum fraction of reads that are allowed to be filtered out. If exceeded, it flags excessive filtering. The default value is 0.1, meaning that if less than 10% of the reads are retained after filtering, the threshold is considered exceeded, and the run is not assembled.", + "default": 0.1, "minimum": 0.0, "maximum": 1.0 }, - "low_reads_count_threshold": { + "short_reads_low_reads_count_threshold": { "type": "number", "description": "The minimum number of reads required after filtering. If below, it flags a low read count and the run is not assembled.", "default": 1000 @@ -183,31 +212,21 @@ "description": "Set the top limit for requested resources for any single job.", "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.", "properties": { - "max_cpus": { + "max_spades_retries": { "type": "integer", - "description": "Maximum number of CPUs that can be requested for any single job.", - "default": 32, - "fa_icon": "fas fa-microchip", + "description": "Maximum number of task attempt retries for (meta)spades assembly steps only.", + "default": 3, + "fa_icon": "fas fa-repeat", "hidden": true, - "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`" + "help_text": "Each retry will increase the memory by 50%. Use to limit how many times this increase-and-retry happens." }, - "max_memory": { - "type": "string", - "description": "Maximum amount of memory that can be requested for any single job.", - "default": "1.TB", - "fa_icon": "fas fa-memory", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "hidden": true, - "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`" - }, - "max_time": { - "type": "string", - "description": "Maximum amount of time that can be requested for any single job.", - "default": "168.h", - "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", + "max_megahit_retries": { + "type": "integer", + "description": "Maximum number of task attempt retries for megahit assembly steps only.", + "default": 3, + "fa_icon": "fas fa-repeat", "hidden": true, - "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" + "help_text": "Each retry will increase the memory by 50%. Use to limit how many times this increase-and-retry happens." } } }, @@ -297,16 +316,16 @@ }, "allOf": [ { - "$ref": "#/defs/input_output_options" + "$ref": "#/$defs/input_output_options" }, { - "$ref": "#/defs/reads_qc" + "$ref": "#/$defs/reads_qc" }, { - "$ref": "#/defs/max_job_request_options" + "$ref": "#/$defs/max_job_request_options" }, { - "$ref": "#/defs/generic_options" + "$ref": "#/$defs/generic_options" } ] } diff --git a/nf-test.config b/nf-test.config index 43c03a0..853c892 100644 --- a/nf-test.config +++ b/nf-test.config @@ -1,7 +1,6 @@ config { - testsDir "tests" workDir ".nf-test" configFile "tests/nextflow.config" - profile "test_ci,docker" + profile "test,docker" } diff --git a/subworkflows/local/long_reads_qc.nf b/subworkflows/local/long_reads_qc.nf new file mode 100644 index 0000000..da0b059 --- /dev/null +++ b/subworkflows/local/long_reads_qc.nf @@ -0,0 +1,101 @@ +import groovy.json.JsonSlurper + +include { FASTP as FASTP_LR } from '../../modules/nf-core/fastp/main' +include { MINIMAP2_ALIGN as MINIMAP2_ALIGN_HUMAN } from '../../modules/nf-core/minimap2/align/main' +include { MINIMAP2_ALIGN as MINIMAP2_ALIGN_HOST } from '../../modules/nf-core/minimap2/align/main' + +workflow LONG_READS_QC { + + take: + reads // [ val(meta), path(reads) ] + reference_genome // [ val(meta2), path(reference_genome) ] + + main: + ch_versions = Channel.empty() + + FASTP_LR( + reads, + [], // no input adapters + false, // keep passing reads in the output + false, // omit trimmed reads in the output + false, // don't merge all reads in the output + false // don't trim for polyA + ) + + ch_versions = ch_versions.mix(FASTP_LR.out.versions) + + quality_levels_ch = FASTP_LR.out.json.map { meta, json -> { + json_txt = new JsonSlurper().parseText(json.text) + q20bases = json_txt?.summary?.before_filtering?.q20_bases ?: 0; + total_bases = json_txt?.summary?.before_filtering?.total_bases ?: 0; + + q20_percentage = q20_bases / total_bases * 100 + + quality = [ + "high_quality": q20_percentage >= 80, + "low_quality": q20_percentage < 80, + ] + return [meta, quality] + } + } + + // TODO: add filter if too many reads are removed + + decontaminated_reads = channel.empty() + + if ( params.remove_human ) { + // TODO: make this consistent with short_reads + // can we use the same flag, even if one has phix but not the other? + // Check file extensions too + + human_reference = Channel.fromPath( "${params.reference_genomes_folder}/${params.human_fasta_prefix}.fna", checkIfExists: true) + .collect().map { + files -> [ ["id": params.human_fasta_prefix], files ] + } + + // TODO: can we change the way human/host are given via prefixes? + + MINIMAP2_ALIGN_HUMAN( + FASTP_LR.out.reads, + human_reference, + "human", + true, // output bam format + "bai", // bam index extension + false, // no CIGAR in paf format + true // allow for long CIGAR + ) + + ch_versions = ch_versions.mix(MINIMAP2_ALIGN_HUMAN.out.versions) + + decontaminated_reads = MINIMAP2_ALIGN_HUMAN.out.filtered_fastq + + } else { + decontaminated_reads = FASTP_LR.out.reads + } + + if ( reference_genome != null ) { + + host_reference = Channel.fromPath( "${params.reference_genomes_folder}/${reference_genome}*", checkIfExists: true) + .collect().map { + files -> [ ["id": reference_genome], files ] + } + + MINIMAP2_ALIGN_HOST( + decontaminated_reads, + host_reference, + "host", + true, // output bam format + "bai", // bam index extension + false, // no CIGAR in paf format + true // allow for long CIGAR + ) + + ch_versions = ch_versions.mix(MINIMAP2_ALIGN_HOST.out.versions) + + decontaminated_reads = MINIMAP2_ALIGN_HOST.out.filtered_fastq + } + + emit: + qc_reads = decontaminated_reads + versions = ch_versions +} diff --git a/subworkflows/local/ont_hq.nf b/subworkflows/local/ont_hq.nf new file mode 100644 index 0000000..4537d46 --- /dev/null +++ b/subworkflows/local/ont_hq.nf @@ -0,0 +1,16 @@ +include { PORECHOP_ABI } from '../../modules/nf-core/porechop/abi/main' + +workflow ONT_HQ { + take: + reads // [ val(meta), path(reads) ] + + main: + PORECHOP_ABI( + reads + ) + PORECHOP_ABI.out.reads.view() + + // temporary just to test the module + emit: + contigs = PORECHOP_ABI.out.reads +} diff --git a/subworkflows/local/ont_lq.nf b/subworkflows/local/ont_lq.nf new file mode 100644 index 0000000..d53db8c --- /dev/null +++ b/subworkflows/local/ont_lq.nf @@ -0,0 +1,18 @@ +include { CANU as CANU_ONT } from '../../modules/nf-core/canu/main' + +workflow ONT_LQ { + take: + reads // [ val(meta), path(reads) ] + + main: + CANU_ONT( + reads, + "-nanopore", + "5m" + ) + CANU_ONT.out.corrected_trimmed_reads.view() + + // temporary just to test the module + emit: + contigs = CANU_ONT.out.corrected_trimmed_reads +} diff --git a/subworkflows/local/pacbio_hifi.nf b/subworkflows/local/pacbio_hifi.nf new file mode 100644 index 0000000..491bf28 --- /dev/null +++ b/subworkflows/local/pacbio_hifi.nf @@ -0,0 +1,3 @@ +workflow PACBIO_HIFI { + +} \ No newline at end of file diff --git a/subworkflows/local/pacbio_lq.nf b/subworkflows/local/pacbio_lq.nf new file mode 100644 index 0000000..0db719d --- /dev/null +++ b/subworkflows/local/pacbio_lq.nf @@ -0,0 +1,14 @@ +include { CANU as CANU_PACBIO } from '../../modules/nf-core/canu/main' + +workflow PACBIO_LQ { + take: + reads // [ val(meta), path(reads) ] + + main: + CANU_PACBIO( + reads, + "-pacbio", + "5m" + ) + CANU_PACBIO.out.corrected_reads.view() +} diff --git a/subworkflows/local/assembly_coverage.nf b/subworkflows/local/short_reads_assembly_coverage.nf similarity index 98% rename from subworkflows/local/assembly_coverage.nf rename to subworkflows/local/short_reads_assembly_coverage.nf index b8b3db6..9e28603 100644 --- a/subworkflows/local/assembly_coverage.nf +++ b/subworkflows/local/short_reads_assembly_coverage.nf @@ -4,7 +4,7 @@ include { BWAMEM2_INDEX } from '../../modules/nf-core/bwa include { SAMTOOLS_IDXSTATS } from '../../modules/nf-core/samtools/idxstats/main' include { METABAT2_JGISUMMARIZEBAMCONTIGDEPTHS } from '../../modules/nf-core/metabat2/jgisummarizebamcontigdepths/main' -workflow ASSEMBLY_COVERAGE { +workflow SHORT_READS_ASSEMBLY_COVERAGE { take: assembly_reads // [ val(meta), path(assembly_fasta), path(reads) ] diff --git a/subworkflows/local/assembly_qc.nf b/subworkflows/local/short_reads_assembly_qc.nf similarity index 86% rename from subworkflows/local/assembly_qc.nf rename to subworkflows/local/short_reads_assembly_qc.nf index f5bfa7d..5e273af 100644 --- a/subworkflows/local/assembly_qc.nf +++ b/subworkflows/local/short_reads_assembly_qc.nf @@ -18,17 +18,17 @@ process PUBLISH_CLEANED_CONTIGS { """ } -workflow ASSEMBLY_QC { +workflow SHORT_READS_ASSEMBLY_QC { take: assembly // [ val(meta), path(assembly_fasta) ] - host_reference_genome // [ val(meta2), path(reference_genome) ] | meta2 contains the name of the reference genome + reference_genome // [ val(meta2), path(reference_genome) ] | meta2 contains the name of the reference genome main: ch_versions = Channel.empty() - /* Len filter using the parameter "min_contig_length" */ + /* Len filter using the parameter "short_reads_min_contig_length" */ SEQKIT_SEQ( assembly ) @@ -60,11 +60,11 @@ workflow ASSEMBLY_QC { ch_versions = ch_versions.mix(SEQKIT_GREP_HUMAN_PHIX.out.versions) } - if ( host_reference_genome != null ) { + if ( reference_genome != null ) { - ch_blast_host_refs = Channel.fromPath( "${params.blast_reference_genomes_folder}/${host_reference_genome}*", checkIfExists: true) + ch_blast_host_refs = Channel.fromPath( "${params.blast_reference_genomes_folder}/${reference_genome}*", checkIfExists: true) .collect().map { - files -> [ ["id": host_reference_genome], files ] + files -> [ ["id": reference_genome], files ] } BLAST_BLASTN_HOST( diff --git a/subworkflows/local/reads_qc.nf b/subworkflows/local/short_reads_qc.nf similarity index 84% rename from subworkflows/local/reads_qc.nf rename to subworkflows/local/short_reads_qc.nf index a3e99af..5cbe55b 100644 --- a/subworkflows/local/reads_qc.nf +++ b/subworkflows/local/short_reads_qc.nf @@ -2,11 +2,11 @@ include { FASTP } from '../../module include { BWAMEM2DECONTNOBAMS as HUMAN_PHIX_DECONTAMINATION } from '../../modules/ebi-metagenomics/bwamem2decontnobams/main' include { BWAMEM2DECONTNOBAMS as HOST_DECONTAMINATION } from '../../modules/ebi-metagenomics/bwamem2decontnobams/main' -workflow READS_QC { +workflow SHORT_READS_QC { take: reads // [ val(meta), path(reads) ] - host_reference_genome // [ val(meta2), path(reference_genome) ] | meta2 contains the name of the reference genome + reference_genome // [ val(meta2), path(reference_genome) ] | meta2 contains the name of the reference genome main: ch_versions = Channel.empty() @@ -16,6 +16,7 @@ workflow READS_QC { [], false, false, + false, false ) @@ -43,11 +44,11 @@ workflow READS_QC { decontaminated_reads = FASTP.out.reads } - if ( host_reference_genome != null ) { + if ( reference_genome != null ) { - ch_bwamem2_host_refs = Channel.fromPath( "${params.bwamem2_reference_genomes_folder}/${host_reference_genome}*", checkIfExists: true) + ch_bwamem2_host_refs = Channel.fromPath( "${params.bwamem2_reference_genomes_folder}/${reference_genome}*", checkIfExists: true) .collect().map { - files -> [ ["id": host_reference_genome], files ] + files -> [ ["id": reference_genome], files ] } HOST_DECONTAMINATION( diff --git a/tests/human/human.fna b/tests/human/human.fna new file mode 100644 index 0000000..a4ebdb7 --- /dev/null +++ b/tests/human/human.fna @@ -0,0 +1,79 @@ +>NC_001422.1 Escherichia phage phiX174, complete genome +GAGTTTTATCGCTTCCATGACGCAGAAGTTAACACTTTCGGATATTTCTGATGAGTCGAAAAATTATCTT +GATAAAGCAGGAATTACTACTGCTTGTTTACGAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAA +ATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAACGATTCTG +TCAAAAACTGACGCGTTGGATGAGGAGAAGTGGCTTAATATGCTTGGCACGTTCGTCAAGGACTGGTTTA +GATATGAGTCACATTTTGTTCATGGTAGAGATTCTCTTGTTGACATTTTAAAAGAGCGTGGATTACTATC +TGAGTCCGATGCTGTTCAACCACTAATAGGTAAGAAATCATGAGTCAAGTTACTGAACAATCCGTACGTT +TCCAGACCGCTTTGGCCTCTATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGATTT +CGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGTTGAGGCT +TGCGTTTATGGTACGCTGGACTTTGTGGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCG +TCATTGCTTATTATGTTCATCCCGTCAACATTCAAACGGCCTGTCTCATCATGGAAGGCGCTGAATTTAC +GGAAAACATTATTAATGGCGTCGAGCGTCCGGTTAAAGCCGCTGAATTGTTCGCGTTTACCTTGCGTGTA +CGCGCAGGAAACACTGACGTTCTTACTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCGGAAGGAG +TGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGCCGTTGCGAGGTACT +AAAGGCAAGCGTAAAGGCGCTCGTCTTTGGTATGTAGGTGGTCAACAATTTTAATTGCAGGGGCTTCGGC +CCCTTACTTGAGGATAAATTATGTCTAATATTCAAACTGGCGCCGAGCGTATGCCGCATGACCTTTCCCA +TCTTGGCTTCCTTGCTGGTCAGATTGGTCGTCTTATTACCATTTCAACTACTCCGGTTATCGCTGGCGAC +TCCTTCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTA +CTGTAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAA +GGATGGTGTTAATGCCACTCCTCTCCCGACTGTTAACACTACTGGTTATATTGACCATGCCGCTTTTCTT +GGCACGATTAACCCTGATACCAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACA +ACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATGAGCTTAATCAAGATGATGC +TCGTTATGGTTTCCGTTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTT +TCTCGCCAAATGACGACTTCTACCACATCTATTGACATTATGGGTCTGCAAGCTGCTTATGCTAATTTGC +ATACTGACCAAGAACGTGATTACTTCATGCAGCGTTACCATGATGTTATTTCTTCATTTGGAGGTAAAAC +CTCTTATGACGCTGACAACCGTCCTTTACTTGTCATGCGCTCTAATCTCTGGGCATCTGGCTATGATGTT +GATGGAACTGACCAAACGTCGTTAGGCCAGTTTTCTGGTCGTGTTCAACAGACCTATAAACATTCTGTGC +CGCGTTTCTTTGTTCCTGAGCATGGCACTATGTTTACTCTTGCGCTTGTTCGTTTTCCGCCTACTGCGAC +TAAAGAGATTCAGTACCTTAACGCTAAAGGTGCTTTGACTTATACCGATATTGCTGGCGACCCTGTTTTG +TATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGT +TTAAGATTGCTGAGGGTCAGTGGTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCACCTTCTTGA +AGGCTTCCCATTCATTCAGGAACCGCCTTCTGGTGATTTGCAAGAACGCGTACTTATTCGCCACCATGAT +TATGACCAGTGTTTCCAGTCCGTTCAGTTGTTGCAGTGGAATAGTCAGGTTAAATTTAATGTGACCGTTT +ATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAAC +GCCGAAGCGGTAAAAATTTTAATTTTTGCCGCTGAGGGGTTGACCAAGCGAAGCGCGGTAGGTTTTCTGC +TTAGGAGTTTAATCATGTTTCAGACTTTTATTTCTCGCCATAATTCAAACTTTTTTTCTGATAAGCTGGT +TCTCACTTCTGTTACTCCAGCTTCTTCGGCACCTGTTTTACAGACACCTAAAGCTACATCGTCAACGTTA +TATTTTGATAGTTTGACGGTTAATGCTGGTAATGGTGGTTTTCTTCATTGCATTCAGATGGATACATCTG +TCAACGCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGC +CTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTG +AATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGTGTGACTATTGACGTCCTTCCCCGTACGC +CGGGCAATAACGTTTATGTTGGTTTCATGGTTTGGTCTAACTTTACCGCTACTAAATGCCGCGGATTGGT +TTCGCTGAATCAGGTTATTAAAGAGATTATTTGTCTCCAGCCACTTAAGTGAGGTGATTTATGTTTGGTG +CTATTGCTGGCGGTATTGCTTCTGCTCTTGCTGGTGGCGCCATGTCTAAATTGTTTGGAGGCGGTCAAAA +AGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGATAACAATACTGTAGGCATGGGTGATGCT +GGTATTAAATCTGCCATTCAAGGCTCTAATGTTCCTAACCCTGATGAGGCCGCCCCTAGTTTTGTTTCTG +GTGCTATGGCTAAAGCTGGTAAAGGACTTCTTGAAGGTACGTTGCAGGCTGGCACTTCTGCCGTTTCTGA +TAAGTTGCTTGATTTGGTTGGACTTGGTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTAT +CTTGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGG +TTGACGCCGGATTTGAGAATCAAAAAGAGCTTACTAAAATGCAACTGGACAATCAGAAAGAGATTGCCGA +GATGCAAAATGAGACTCAAAAAGAGATTGCTGGCATTCAGTCGGCGACTTCACGCCAGAATACGAAAGAC +CAGGTATATGCACAAAATGAGATGCTTGCTTATCAACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTA +TGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGATTATGCGCCAAATGCTTACTCAAGCTCA +AACGGCTGGTCAGTATTTTACCAATGACCAAATCAAAGAAATGACTCGCAAGGTTAGTGCTGAGGTTGAC +TTAGTTCATCAGCAAACGCAGAATCAGCGGTATGGCTCTTCTCATATTGGCGCTACTGCAAAGGATATTT +CTAATGTCGTCACTGATGCTGCTTCTGGTGTGGTTGATATTTTTCATGGTATTGATAAAGCTGTTGCCGA +TACTTGGAACAATTTCTGGAAAGACGGTAAAGCTGATGGTATTGGCTCTAATTTGTCTAGGAAATAACCG +TCAGGATTGACACCCTCCCAATTGTATGTTTTCATGCCTCCAAATCTTGGAGGCTTTTTTATGGTTCGTT +CTTATTACCCTTCTGAATGTCACGCTGATTATTTTGACTTTGAGCGTATCGAGGCTCTTAAACCTGCTAT +TGAGGCTTGTGGCATTTCTACTCTTTCTCAATCCCCAATGCTTGGCTTCCATAAGCAGATGGATAACCGC +ATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATG +TTGACGGCCATAAGGCTGCTTCTGACGTTCGTGATGAGTTTGTATCTGTTACTGAGAAGTTAATGGATGA +ATTGGCACAATGCTACAATGTGCTCCCCCAACTTGATATTAATAACACTATAGACCACCGCCCCGAAGGG +GACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCC +CTCTTAAGGATATTCGCGATGAGTATAATTACCCCAAAAAGAAAGGTATTAAGGATGAGTGTTCAAGATT +GCTGGAGGCCTCCACTATGAAATCGCGTAGAGGCTTTGCTATTCAGCGTTTGATGAATGCAATGCGACAG +GCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGACCGATTAGAGGCGTTTT +ATGATAATCCCAATGCTTTGCGTGACTATTTTCGTGATATTGGTCGTATGGTTCTTGCTGCCGAGGGTCG +CAAGGCTAATGATTCACACGCCGACTGCTATCAGTATTTTTGTGTGCCTGAGTATGGTACAGCTAATGGC +CGTCTTCATTTCCATGCGGTGCACTTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTC +GTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGGTTACAGTATGCCCAT +CGCAGTTCGCTACACGCAGGACGCTTTTTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAG +CCGCTTAAAGCTACCAGTTATATGGCTGTTGGTTTCTATGTGGCTAAATACGTTAACAAAAAGTCAGATA +TGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTCGCTACT +TCCCAAGAAGCTGTTCAGAATCAGAATGAGCCGCAACTTCGGGATGAAAATGCTCACAATGACAAATCTG +TCCACGGAGTGCTTAATCCAACTTACCAAGCTGGGTTACGACGCGACGCCGTTCAACCAGATATTGAAGC +AGAACGCAAAAAGAGAGATGAGATTGAGGCTGGGAAAAGTTACTGTAGCCGACGTTTTGGCGGCGCAACC +TGTGACGACAAATCTGCTCAAATTTATGCGCGCTTCGATAAAAATGATTGGCGTATCCAACCTGCA + diff --git a/tests/main.nf.test b/tests/main.nf.test index d4ceedf..80e5e0d 100644 --- a/tests/main.nf.test +++ b/tests/main.nf.test @@ -11,8 +11,7 @@ nextflow_pipeline { params { outdir = "tests/results" assembler = "spades" - bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem" - blast_reference_genomes_folder = "${projectDir}/tests/human_phix/blast" + samplesheet = "${projectDir}/tests/samplesheet/test.csv" } } @@ -20,7 +19,7 @@ nextflow_pipeline { then { with(workflow) { assert success - assert trace.succeeded().count{ task -> task.name.contains("SPADES") } == 1 + assert trace.succeeded().count{ task -> task.name.contains("SPADES") } == 2 assert trace.succeeded().count{ task -> task.name.contains("MEGAHIT") } == 1 } } @@ -36,12 +35,9 @@ nextflow_pipeline { outdir = "tests/results" assembler = "spades" - low_reads_count_threshold = 1000000 + short_reads_low_reads_count_threshold = 1000000 - samplesheet = "${projectDir}/tests/samplesheet/test.csv" - - bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem" - blast_reference_genomes_folder = "${projectDir}/tests/human_phix/blast" + samplesheet = "${projectDir}/tests/samplesheet/test.csv" } } @@ -56,6 +52,8 @@ nextflow_pipeline { assert trace.succeeded().count{ task -> task.name.contains("MULTIQC_STUDY") } == 2 assert trace.succeeded().count{ task -> task.name.contains("SPADES") } == 0 assert trace.succeeded().count{ task -> task.name.contains("MEGAHIT") } == 0 + // This process should not have been called + assert trace.succeeded().count{ task -> task.name.contains("DOWNLOAD_FROM_FIRE") } == 0 } } @@ -71,13 +69,10 @@ nextflow_pipeline { outdir = "tests/results" // Force the assembly - filter_ratio_threshold = 0.1 + short_reads_filter_ratio_threshold = 0.1 - study_accession = "SRP115494" - reads_accession = "SRR6180434" - - bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem" - blast_reference_genomes_folder = "${projectDir}/tests/human_phix/blast" + study_accession = "SRP115494" + reads_accession = "SRR6180434" } } @@ -98,15 +93,14 @@ nextflow_pipeline { when { params { - outdir = "tests/results" - assembler = "megahit" - study_accession = "SRP115494" - reads_accession = "SRR6180434" + outdir = "tests/results" + assembler = "megahit" + // Force the assembly - filter_ratio_threshold = 0.1 + short_reads_filter_ratio_threshold = 0.1 - bwamem2_reference_genomes_folder = "${projectDir}/tests/human_phix/bwa2mem" - blast_reference_genomes_folder = "${projectDir}/tests/human_phix/blast" + study_accession = "SRP115494" + reads_accession = "SRR6180434" } } @@ -148,7 +142,9 @@ nextflow_pipeline { } - test("MEGAHIT - single end - should fail") { + test("MEGAHIT - single end - should produce an empty contigs file") { + + // TODO: fix along with - https://github.com/EBI-Metagenomics/miassembler/pull/21 tag "ena-portal-api" @@ -166,8 +162,8 @@ nextflow_pipeline { then { with(workflow) { - assert !success - assert trace.failed().count{ task -> task.name.contains("MEGAHIT") } == 1 + assert success + assert trace.succeeded().count{ task -> task.name.contains("MEGAHIT") } == 1 } } } @@ -224,4 +220,62 @@ nextflow_pipeline { } } + + test("Samplesheet spades - retries") { + + tag "samplesheet" + tag "retries" + + when { + params { + outdir = "tests/results" + assembler = "spades" + samplesheet = "${projectDir}/tests/samplesheet/test_mem.csv" + assembly_memory = 0.5 + /* Memory jumping testing */ + /* will try with [0.5GB, 0.75GB, 1.13GB, ...] + /* which rounds down to [0, 0, 1, ...] + /* so should definitely fail twice before succeeding. after a few trys. + /* ~~~ */ + max_spades_retries = 5 + } + } + + then { + with(workflow) { + // eventual success: + assert success + assert trace.succeeded().count{ task -> task.name.contains("SPADES") } == 1 + + // but failed and therefore retried multiple times first: + assert trace.failed().count{ task -> task.name.contains("SPADES") } >= 2 + } + } + + } + + test("Private study reads - this one should fail") { + + tag "samplesheet" + tag "private" + + when { + params { + outdir = "tests/results" + assembler = "spades" + samplesheet = "${projectDir}/tests/samplesheet/test.csv" + private_study = true + } + } + + // Complete this test when secrets are implemented in nf-test https://github.com/askimed/nf-test/issues/145 + then { + with(workflow) { + assert !success + assert stdout.count{ line -> line.contains("Required secrets are missing") } == 1 + } + } + + } + } diff --git a/tests/samplesheet/test.csv b/tests/samplesheet/test.csv index b2c4b99..8137806 100644 --- a/tests/samplesheet/test.csv +++ b/tests/samplesheet/test.csv @@ -1,4 +1,4 @@ -study_accession,reads_accession,fastq_1,fastq_2,library_layout,library_strategy,assembler,assembly_memory -SRP115494,SRR6180434,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR6180434_1.fastq.gz,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR6180434_2.fastq.gz,paired,metagenomic,, -SRP115494,SRR5949318,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR5949318_1.fastq.gz,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR5949318_2.fastq.gz,paired,metagenomic,, -DRP007622,DRR280712,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/DRR280712.fastq.gz,,single,metatranscriptomic,megahit, +study_accession,reads_accession,fastq_1,fastq_2,library_layout,library_strategy,platform,assembler,assembly_memory,assembler_config +SRP115494,SRR6180434,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR6180434_1.fastq.gz,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR6180434_2.fastq.gz,paired,metagenomic,,,,, +SRP115494,SRR5949318,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR5949318_1.fastq.gz,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR5949318_2.fastq.gz,paired,metagenomic,,,,, +DRP007622,DRR280712,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/DRR280712.fastq.gz,,single,metatranscriptomic,,megahit,,, diff --git a/tests/samplesheet/test_mem.csv b/tests/samplesheet/test_mem.csv new file mode 100644 index 0000000..bad87ae --- /dev/null +++ b/tests/samplesheet/test_mem.csv @@ -0,0 +1,2 @@ +study_accession,reads_accession,fastq_1,fastq_2,library_layout,library_strategy,assembler,assembly_memory +SRP115494,SRR5949318,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR5949318_1.fastq.gz,https://github.com/EBI-Metagenomics/miassembler/raw/main/tests/test_reads/SRR5949318_2.fastq.gz,paired,metagenomic,, diff --git a/tests/samplesheet/test_minION_SRR10303629.csv b/tests/samplesheet/test_minION_SRR10303629.csv new file mode 100644 index 0000000..c6ac9e8 --- /dev/null +++ b/tests/samplesheet/test_minION_SRR10303629.csv @@ -0,0 +1,2 @@ +study_accession,reads_accession,fastq_1,library_layout,library_strategy,assembler,assembly_memory +SRP226117,SRR10303629,/home/germana/Desktop/EBI_root/Git/long-read-assembly/tests/test_reads/SRR10303629_1.fastq.gz,single,metagenomic,, \ No newline at end of file diff --git a/workflows/long_reads_assembler.nf b/workflows/long_reads_assembler.nf new file mode 100644 index 0000000..f796f54 --- /dev/null +++ b/workflows/long_reads_assembler.nf @@ -0,0 +1,182 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// + +include { FETCHTOOL_READS } from '../modules/local/fetchtool_reads' +include { LONG_READS_QC } from '../subworkflows/local/long_reads_qc' + +include { ONT_LQ } from '../subworkflows/local/ont_lq' +include { ONT_HQ } from '../subworkflows/local/ont_hq' +include { PACBIO_LQ } from '../subworkflows/local/pacbio_lq' +include { PACBIO_HIFI } from '../subworkflows/local/pacbio_hifi' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Info required for completion email and summary + +workflow LONG_READS_ASSEMBLER { + + take: + reads // tuple(meta), path(reads) + + main: + + ch_versions = Channel.empty() + + LONG_READS_QC ( + reads, + params.reference_genome + ) + ch_versions = ch_versions.mix(LONG_READS_QC.out.versions) + + /*********************************************************************************/ + /* Selecting the combination of adapter trimming, assembler, and post-processing */ + /*********************************************************************************/ + /* + The selection process ensures that: + - The user selected assembler configuration is always used (either from the samplesheet assembler column (with precedence) or the params.assembler) + - Low-quality ONT reads are trimmed with canu and assembled with flye --nano-corr/raw), unless specified otherwise. + - High-quality ONT reads are trimmed with porechob_abi and assembled with flye --nano-hq), unless specified otherwise. + - Low-quality pacbio reads are trimmed with canu and assembled with flye --pacbio-corr/raw), unless specified otherwise. + - High-quality pacbio reads are trimmed with HiFiAdapterFilt and assembled with flye --pacbio-hifi), unless specified otherwise. + Extra polishing steps are applied to low-quality reads. All subworkflows also apply post-assembly host decontamination. + */ + + reads_assembler_config = LONG_READS_QC.out.qc_reads.map { meta, reads -> + if (meta.platform == "ont") { + if (params.long_reads_assembler_config == "nano-raw" || meta.quality == "low") { + return [meta + ["assembler_config": "nano-raw"], reads] + } else if (params.long_reads_assembler_config == "nano-hq" || meta.quality == "high") { + return [meta + ["assembler_config": "nano-hq"], reads] + } + } else if (meta.platform == "pacbio") { + if (params.long_reads_assembler_config == "pacbio-raw" || meta.quality == "low") { + return [meta + ["assembler_config": "pacbio-raw"], reads] + } else if (params.long_reads_assembler_config == "pacbio-hifi" || meta.quality == "high") { + return [meta + ["assembler_config": "pacbio-hifi"], reads] + } + } else { + error "Incompatible configuration" + } + } + + /*********************************************************************************/ + /* Selecting the combination of adapter trimming, assembler, and post-processing */ + /*********************************************************************************/ + /* + The selection process ensures that: + - The user selected assembler configuration is always used (either from the samplesheet assembler column (with precedence) or the params.assembler) + - Low-quality ONT reads are trimmed with canu and assembled with flye --nano-corr/raw), unless specified otherwise. + - High-quality ONT reads are trimmed with porechob_abi and assembled with flye --nano-hq), unless specified otherwise. + - Low-quality pacbio reads are trimmed with canu and assembled with flye --pacbio-corr/raw), unless specified otherwise. + - High-quality pacbio reads are trimmed with HiFiAdapterFilt and assembled with flye --pacbio-hifi), unless specified otherwise. + Extra polishing steps are applied to low-quality reads. All subworkflows also apply post-assembly host decontamination. + */ + + reads_assembler_config = LONG_READS_QC.out.qc_reads.map { meta, reads -> + if (meta.platform == "ont") { + if (params.long_reads_assembler_config == "nano-raw" || meta.quality == "low") { + return [meta + ["long_reads_assembler_config": "nano-raw"], reads] + } else if (params.long_reads_assembler_config == "nano-hq" || meta.quality == "high") { + return [meta + ["long_reads_assembler_config": "nano-hq"], reads] + } + } else if (meta.platform == "pacbio") { + if (params.long_reads_assembler_config == "pacbio-raw" || meta.quality == "low") { + return [meta + ["long_reads_assembler_config": "pacbio-raw"], reads] + } else if (params.long_reads_assembler_config == "pacbio-hifi" || meta.quality == "high") { + return [meta + ["long_reads_assembler_config": "pacbio-hifi"], reads] + } + } else { + error "Incompatible configuration" + } + } + + reads_assembler_config.branch { meta, reads -> + lq_ont: meta.long_reads_assembler_config == "nano-raw" + hq_ont: meta.long_reads_assembler_config == "pacbio-raw" + lq_pacbio: meta.long_reads_assembler_config == "nano-hq" + hq_pacbio: meta.long_reads_assembler_config == "pacbio-hifi" + }.set {subworkflow_platform_reads} + + ONT_LQ( + subworkflow_platform_reads.lq_ont + ) + + ONT_HQ( + subworkflow_platform_reads.hq_ont + ) + + // PACBIO_LQ( + // subworkflow_platform_reads.lq_pacbio.map { meta, reads -> [meta, reads] } + // ) + + // PACBIO_HIFI( + // subworkflow_platform_reads.hq_pacbio.map { meta, reads -> [meta, reads] } + // ) + + assembly = ONT_LQ.out.contigs.mix( ONT_HQ.out.contigs )//, PACBIO_LQ.out.contigs, PACBIO_HIFI.out.contigs ) + + /*************************************/ + /* Post-assembly: coverage and stats */ + /*************************************/ + + // + // MODULE: Run FastQC + // + // FASTQC ( + // INPUT_CHECK.out.reads + // ) + // ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + + // + // MODULE: MultiQC + // + // workflow_summary = WorkflowLongreadsassembly.paramsSummaryMultiqc(workflow, summary_params) + // ch_workflow_summary = Channel.value(workflow_summary) + + // methods_description = WorkflowLongreadsassembly.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) + // ch_methods_description = Channel.value(methods_description) + + // ch_multiqc_files = Channel.empty() + // ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + // ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) + // ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + // ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}.ifEmpty([])) + + // MULTIQC ( + // ch_multiqc_files.collect(), + // ch_multiqc_config.toList(), + // ch_multiqc_custom_config.toList(), + // ch_multiqc_logo.toList() + // ) + // multiqc_report = MULTIQC.out.report.toList() + + emit: + versions = ch_versions +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/miassembler.nf b/workflows/miassembler.nf index f3104a6..d896ebd 100644 --- a/workflows/miassembler.nf +++ b/workflows/miassembler.nf @@ -1,53 +1,36 @@ -// Groovy // -import groovy.json.JsonSlurper - /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { validateParameters; paramsSummaryLog; paramsSummaryMap; samplesheetToList } from 'plugin/nf-schema' - -def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) -def citation = '\n' + WorkflowMain.citation(workflow) + '\n' -def summary_params = paramsSummaryMap(workflow) - -// Print parameter summary log to screen -log.info logo + paramsSummaryLog(workflow) + citation - -validateParameters() - -if (params.help) { - log.info paramsHelp("nextflow run ebi-metagenomics/miassembler --help") - exit 0 -} - +include { paramsSummaryLog; paramsSummaryMap; samplesheetToList } from 'plugin/nf-schema' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - CONFIG FILES + IMPORT NF-CORE MODULES/SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -ch_multiqc_config = file("$projectDir/assets/multiqc_config.yml", checkIfExists: true) -ch_multiqc_custom_config = params.multiqc_config ? file( params.multiqc_config, checkIfExists: true ) : [] -ch_multiqc_logo = params.multiqc_logo ? file( params.multiqc_logo, checkIfExists: true ) : file("$projectDir/assets/mgnify_logo.png", checkIfExists: true) -ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) +// +// MODULE: Installed directly from nf-core/modules +// + +include { MULTIQC as MULTIQC_STUDY } from '../modules/nf-core/multiqc/main' +include { MULTIQC as MULTIQC_RUN } from '../modules/nf-core/multiqc/main' +include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT LOCAL MODULES/SUBWORKFLOWS + IMPORT THE MAIN ENTRY POINT WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ // -// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// WORKFLOWS // -include { FETCHTOOL_READS } from '../modules/local/fetchtool_reads' -include { READS_QC } from '../subworkflows/local/reads_qc' -include { ASSEMBLY_QC } from '../subworkflows/local/assembly_qc' -include { ASSEMBLY_COVERAGE } from '../subworkflows/local/assembly_coverage' +include { SHORT_READS_ASSEMBLER } from '../workflows/short_reads_assembler' +include { LONG_READS_ASSEMBLER } from '../workflows/long_reads_assembler' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -55,17 +38,7 @@ include { ASSEMBLY_COVERAGE } from '../subworkflows/local/assembly_coverage' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// MODULE: Installed directly from nf-core/modules -// -include { FASTQC as FASTQC_BEFORE } from '../modules/nf-core/fastqc/main' -include { FASTQC as FASTQC_AFTER } from '../modules/nf-core/fastqc/main' -include { MULTIQC as MULTIQC_STUDY } from '../modules/nf-core/multiqc/main' -include { MULTIQC as MULTIQC_RUN } from '../modules/nf-core/multiqc/main' -include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' -include { SPADES } from '../modules/nf-core/spades/main' -include { MEGAHIT } from '../modules/nf-core/megahit/main' -include { QUAST } from '../modules/nf-core/quast/main' +include { FETCHTOOL_READS } from '../modules/local/fetchtool_reads' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -73,217 +46,182 @@ include { QUAST } from '../modules/nf-core/quast/main' ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// Info required for completion email and summary -def multiqc_report = [] - workflow MIASSEMBLER { - ch_versions = Channel.empty() + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + INIT + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def summary_params = paramsSummaryMap(workflow) + + // Print parameter summary log to screen + log.info(logo + paramsSummaryLog(workflow) + citation) + + /* + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + + def ch_multiqc_config = file("${projectDir}/assets/multiqc_config.yml", checkIfExists: true) + def ch_multiqc_custom_config = params.multiqc_config ? file(params.multiqc_config, checkIfExists: true) : [] + def ch_multiqc_logo = params.multiqc_logo ? file(params.multiqc_logo, checkIfExists: true) : file("${projectDir}/assets/mgnify_logo.png", checkIfExists: true) + def ch_multiqc_custom_methods_description = params.multiqc_methods_description ? file(params.multiqc_methods_description, checkIfExists: true) : file("${projectDir}/assets/methods_description_template.yml", checkIfExists: true) - fetch_tool_metadata = Channel.empty() - if ( params.samplesheet ) { + def ch_versions = Channel.empty() + def fetch_tool_metadata = Channel.empty() + def fetch_reads_transformed = Channel.empty() - groupReads = { study_accession, reads_accession, fq1, fq2, library_layout, library_strategy, assembler, assembly_memory -> + if (params.samplesheet) { + + def groupReads = { study_accession, reads_accession, fq1, fq2, library_layout, library_strategy, platform, assembler, assembly_memory, assembler_config -> if (fq2 == []) { - return tuple(["id": reads_accession, - "study_accession": study_accession, - "library_strategy": library_strategy, - "library_layout": library_layout, - "single_end": true, - "assembler": assembler ?: params.assembler, - "assembly_memory": assembly_memory ?: params.assembly_memory - ], - [fq1] - ) - } else { - return tuple(["id": reads_accession, - "study_accession": study_accession, - "library_strategy": library_strategy, - "library_layout": library_layout, - "single_end": false, - "assembler": assembler ?: params.assembler, - "assembly_memory": assembly_memory ?: params.assembly_memory - ], - [fq1, fq2]) + return tuple( + [ + "id": reads_accession, + "study_accession": study_accession, + "library_layout": library_layout, + "library_strategy": library_strategy, + "platform": params.platform ?: platform, + "single_end": true, + "assembler": assembler ?: params.assembler, + "assembly_memory": assembly_memory ?: params.assembly_memory, + "assembler_config": assembler_config ?: params.long_reads_assembler_config + ], + [fq1] + ) + } + else { + return tuple( + [ + "id": reads_accession, + "study_accession": study_accession, + "library_strategy": library_strategy, + "library_layout": library_layout, + "single_end": false, + "assembler": assembler ?: params.assembler, + "assembly_memory": assembly_memory ?: params.assembly_memory, + "assembler_config": assembler_config ?: params.long_reads_assembler_config, + "platform": params.platform ?: platform + ], + [fq1, fq2] + ) } } - samplesheet = Channel.fromList(samplesheetToList(params.samplesheet, "./assets/schema_input.json")) + def samplesheet = Channel.fromList(samplesheetToList(params.samplesheet, "./assets/schema_input.json")) - // [ study, sample, read1, [read2], library_layout, library_strategy, assembly_memory ] + // [ study, sample, read1, [read2], library_layout, library_strategy, platform, assembly_memory] fetch_reads_transformed = samplesheet.map(groupReads) - - } else { + } + else { // TODO: remove when the fetch tools get's published on bioconda - fetch_tool_config = file("${projectDir}/assets/fetch_tool_anonymous.json", checkIfExists: true) + def fetch_tool_config = file("${projectDir}/assets/fetch_tool_anonymous.json", checkIfExists: true) - if ( params.private_study ) { + if (params.private_study) { fetch_tool_config = file("${projectDir}/assets/fetch_tool_credentials.json", checkIfExists: true) } FETCHTOOL_READS( - [ [id: params.reads_accession], params.study_accession, params.reads_accession ], + [[id: params.reads_accession], params.study_accession, params.reads_accession], fetch_tool_config ) ch_versions = ch_versions.mix(FETCHTOOL_READS.out.versions) // Push the library strategy into the meta of the reads, this is to make it easier to handle downstream - fetch_reads_transformed = FETCHTOOL_READS.out.reads.map { meta, reads, library_strategy, library_layout -> { - [ meta + [ - // -- The metadata will be overriden by the parameters -- // - "assembler": params.assembler, - "assembly_memory": params.assembly_memory, - "library_strategy": params.library_strategy ?: library_strategy, - "library_layout": params.library_layout ?: library_layout, - "single_end": params.single_end ?: library_layout == "single" - ], reads ] + fetch_reads_transformed = FETCHTOOL_READS.out.reads.map { meta, reads, library_strategy, library_layout, platform -> + { + [ + meta + [ + "assembler": params.assembler, + "assembler_config": params.long_reads_assembler_config, + "assembly_memory": params.assembly_memory, + "library_strategy": params.library_strategy ?: library_strategy, + "library_layout": params.library_layout ?: library_layout, + "single_end": params.single_end ?: library_layout == "single", + "platform": params.platform ?: platform + ], + reads + ] } } // Metadata for MultiQC - fetch_tool_metadata = FETCHTOOL_READS.out.metadata_tsv.map { it[1] }.collectFile( - name: 'fetch_tool_mqc.tsv', - newLine: true, - keepHeader: true, - skip: 1 - ) + fetch_tool_metadata = FETCHTOOL_READS.out.metadata_tsv + .map { it[1] } + .collectFile( + name: 'fetch_tool_mqc.tsv', + newLine: true, + keepHeader: true, + skip: 1 + ) } - /***************************/ - /* Selecting the assembler */ - /***************************/ - /* - The selection process ensures that: - - The user selected assembler is always used (either from the samplesheet assembler column (with precedesnse) or the params.assembler) - - Single-end reads are assembled with MEGAHIT, unless specified otherwise. - - Paired-end reads are assembled with MetaSPAdes, unless specified otherwise - - An error is raised if the assembler and read layout are incompatible (shouldn't happen...) - */ - fetch_reads_transformed = fetch_reads_transformed.map { meta, reads -> - def selected_assembler = meta.assembler; - if ( selected_assembler == "megahit" || ( meta.single_end && selected_assembler == null ) ) { - return [ meta + [assembler: "megahit", assembler_version: params.megahit_version], reads] - } else if ( ["metaspades", "spades"].contains(selected_assembler) || ( !meta.single_end && selected_assembler == null ) ) { - def xspades_assembler = selected_assembler ?: "metaspades" // Default to "metaspades" if the user didn't select one - return [ meta + [assembler: xspades_assembler, assembler_version: params.spades_version], reads] - } else { - error "Incompatible assembler and/or reads layout. We can't assembly data that is. Reads - single end value: ${meta.single_end}." + /********************************************/ + /* Selecting the assembly pipeline flavour */ + /*******************************************/ + def classified_reads = fetch_reads_transformed.map { meta, reads -> + // Long reads // + if (["ont", "pacbio"].contains(meta.platform)) { + return [meta + [long_reads: true], reads] } - } - - FASTQC_BEFORE ( - fetch_reads_transformed - ) - - ch_versions = ch_versions.mix(FASTQC_BEFORE.out.versions) - - READS_QC( - fetch_reads_transformed, - params.reference_genome - ) - - FASTQC_AFTER ( - READS_QC.out.qc_reads - ) - - /******************************************/ - /* Reads that fail the following rules: */ - /* - Reads discarded by fastp > 90% (default value) */ - /* - Less than 1k reads */ - /******************************************/ - extended_qc = READS_QC.out.fastp_json.map { meta, json -> { - json_txt = new JsonSlurper().parseText(json.text) - bf_total_reads = json_txt?.summary?.before_filtering?.total_reads ?: 0; - af_total_reads = json_txt?.summary?.after_filtering?.total_reads ?: 0; - reads_qc_meta = [ - "low_reads_count": af_total_reads <= params.low_reads_count_threshold, - "filter_ratio_threshold_exceeded": af_total_reads == 0 || ((af_total_reads / bf_total_reads) <= params.filter_ratio_threshold ) - ] - return [meta, reads_qc_meta] + else { + return [meta + [short_reads: true], reads] } } - extended_reads_qc = READS_QC.out.qc_reads.join( extended_qc ) - - extended_reads_qc.branch { meta, reads, reads_qc_meta -> - // Filter out failed reads // - qc_failed: reads_qc_meta.low_reads_count || reads_qc_meta.filter_ratio_threshold_exceeded - megahit: meta.assembler == "megahit" - xspades: ["metaspades", "spades"].contains(meta.assembler) - }.set { qc_filtered_reads } - - ch_versions = ch_versions.mix(READS_QC.out.versions) - - /*********************/ - /* Assembly */ - /********************/ - SPADES( - qc_filtered_reads.xspades.map { meta, reads, _ -> [meta, reads, [], []] }, - [], // yml input parameters, which we don't use - [] // hmm, not used - ) - - ch_versions = ch_versions.mix(SPADES.out.versions) - - MEGAHIT( - qc_filtered_reads.megahit.map { meta, reads, _ -> [meta, reads] } - ) - - assembly = SPADES.out.contigs.mix( MEGAHIT.out.contigs ) - - ch_versions = ch_versions.mix(MEGAHIT.out.versions) + classified_reads + .branch { meta, _reads -> + short_reads: meta.short_reads + long_reads: meta.long_reads + } + .set { reads_to_assemble } - // Clean the assembly contigs // - ASSEMBLY_QC( - assembly, - params.reference_genome + /***************************************/ + /* Assemble short reads and long reads */ + /***************************************/ + SHORT_READS_ASSEMBLER( + reads_to_assemble.short_reads ) - ch_versions = ch_versions.mix(ASSEMBLY_QC.out.versions) + ch_versions = ch_versions.mix(SHORT_READS_ASSEMBLER.out.versions) - // Coverage // - ASSEMBLY_COVERAGE( - ASSEMBLY_QC.out.filtered_contigs.join( READS_QC.out.qc_reads, remainder: false ), - READS_QC.out.fastp_json + LONG_READS_ASSEMBLER( + reads_to_assemble.long_reads ) - ch_versions = ch_versions.mix(ASSEMBLY_COVERAGE.out.versions) + ch_versions = ch_versions.mix(LONG_READS_ASSEMBLER.out.versions) - // Stats // - /* The QUAST module was modified to run metaQUAST instead */ - QUAST( - ASSEMBLY_QC.out.filtered_contigs, - [ [], [] ], // reference - [ [], [] ] // gff - ) - - CUSTOM_DUMPSOFTWAREVERSIONS ( + CUSTOM_DUMPSOFTWAREVERSIONS( ch_versions.unique().collectFile(name: 'collated_versions.yml') ) // // MODULE: MultiQC // - workflow_summary = WorkflowMiassembler.paramsSummaryMultiqc(workflow, summary_params) - ch_workflow_summary = Channel.value(workflow_summary) + def workflow_summary = WorkflowMiassembler.paramsSummaryMultiqc(workflow, summary_params) + def ch_workflow_summary = Channel.value(workflow_summary) - methods_description = WorkflowMiassembler.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) - ch_methods_description = Channel.value(methods_description) + def methods_description = WorkflowMiassembler.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) + def ch_methods_description = Channel.value(methods_description) - ch_multiqc_base_files = Channel.empty() - ch_multiqc_base_files = ch_multiqc_base_files.mix( CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect() ) - ch_multiqc_base_files = ch_multiqc_base_files.mix( ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml') ) - ch_multiqc_base_files = ch_multiqc_base_files.mix( ch_methods_description.collectFile(name: 'methods_description_mqc.yaml') ) + def ch_multiqc_base_files = Channel.empty() + ch_multiqc_base_files = ch_multiqc_base_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) + ch_multiqc_base_files = ch_multiqc_base_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_base_files = ch_multiqc_base_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) /**************************************/ /* MultiQC report for the whole study */ /**************************************/ def meta_by_study = { meta, result_artifact -> - [ meta.subMap("study_accession"), result_artifact ] + [meta.subMap("study_accession"), result_artifact] } // Helper method for the MultiQC aggregation by study and runs // @@ -306,22 +244,25 @@ workflow MIASSEMBLER { } } - ch_multiqc_study_tools_files = Channel.empty() + def ch_multiqc_study_tools_files = Channel.empty() - ch_multiqc_study_tools_files = FASTQC_BEFORE.out.zip.map(meta_by_study) - .join( FASTQC_AFTER.out.zip.map(meta_by_study) ) - .join( ASSEMBLY_COVERAGE.out.samtools_idxstats.map(meta_by_study), remainder: true ) // the assembly step could fail - .join( QUAST.out.results.map(meta_by_study), remainder: true ) // the assembly step could fail + def study_multiqc_files = SHORT_READS_ASSEMBLER.out.fastqc_before_zip.map(meta_by_study) \ + .join(SHORT_READS_ASSEMBLER.out.fastqc_after_zip.map(meta_by_study)) \ + .join(SHORT_READS_ASSEMBLER.out.assembly_coverage_samtools_idxstats.map(meta_by_study), remainder: true) \ + .join(SHORT_READS_ASSEMBLER.out.quast_results.map(meta_by_study), remainder: true) - ch_multiqc_study_tools_files = ch_multiqc_study_tools_files.flatMap( combineFiles ).groupTuple() + ch_multiqc_study_tools_files = study_multiqc_files.flatMap(combineFiles).groupTuple() // TODO: add the fetch tool log file - MULTIQC_STUDY ( + + MULTIQC_STUDY( ch_multiqc_base_files.collect(), ch_multiqc_study_tools_files, ch_multiqc_config, ch_multiqc_custom_config, - ch_multiqc_logo + ch_multiqc_logo, + [], + [] ) /**************************/ @@ -329,60 +270,57 @@ workflow MIASSEMBLER { /*************************/ def meta_by_run = { meta, result_artifact -> - [ meta.subMap("study_accession", "id", "assembler", "assembler_version"), result_artifact ] + [meta.subMap("study_accession", "id", "assembler", "assembler_version"), result_artifact] } - ch_multiqc_run_tools_files = Channel.empty() - - ch_multiqc_run_tools_files = FASTQC_BEFORE.out.zip.map(meta_by_run) - .join( FASTQC_AFTER.out.zip.map(meta_by_run) ) - .join( ASSEMBLY_COVERAGE.out.samtools_idxstats.map(meta_by_run), remainder: true ) // the assembly step could fail - .join( QUAST.out.results.map(meta_by_run), remainder: true ) // the assembly step could fail + def run_multiqc_files = SHORT_READS_ASSEMBLER.out.fastqc_before_zip.map(meta_by_run).join(SHORT_READS_ASSEMBLER.out.fastqc_after_zip.map(meta_by_run)).join(SHORT_READS_ASSEMBLER.out.assembly_coverage_samtools_idxstats.map(meta_by_run), remainder: true).join(SHORT_READS_ASSEMBLER.out.quast_results.map(meta_by_run), remainder: true) + // the assembly step could fail // Filter out the non-assembled runs // - ch_multiqc_run_tools_files = ch_multiqc_run_tools_files.filter { meta, fastqc_before, fastqc_after, assembly_coverage, quast -> { + def ch_multiqc_run_tools_files = run_multiqc_files.filter { _meta, _fastqc_before, _fastqc_after, assembly_coverage, quast -> + { return assembly_coverage != null && quast != null } - } .flatMap( combineFiles ).groupTuple() + }.flatMap(combineFiles).groupTuple() // TODO: add the fetch tool log file - MULTIQC_RUN ( + MULTIQC_RUN( ch_multiqc_base_files.collect(), ch_multiqc_run_tools_files, ch_multiqc_config, ch_multiqc_custom_config, - ch_multiqc_logo + ch_multiqc_logo, + [], + [] ) /*****************************/ /* End of execution reports */ /****************************/ - // Asssembled runs // - ASSEMBLY_COVERAGE.out.samtools_idxstats.map { - meta, _ -> { - return "${meta.id},${meta.assembler},${meta.assembler_version}" + // TODO: we need to add LR end-of-run reports + + // Short reads asssembled runs // + SHORT_READS_ASSEMBLER.out.assembly_coverage_samtools_idxstats + .map { meta, __ -> + { + return "${meta.id},${meta.assembler},${meta.assembler_version}" + } } - }.collectFile(name: "assembled_runs.csv", storeDir: "${params.outdir}", newLine: true, cache: false) + .collectFile(name: "assembled_runs.csv", storeDir: "${params.outdir}", newLine: true, cache: false) - // Reads QC failed // - qc_failed_entries = qc_filtered_reads.qc_failed.map { - meta, _, extended_meta -> { - if ( extended_meta.low_reads_count ) { + // Short reads QC failed // + def short_reads_qc_failed_entries = SHORT_READS_ASSEMBLER.out.qc_failed.map { meta, __, extended_meta -> + { + if (extended_meta.low_reads_count) { return "${meta.id},low_reads_count" } - if ( extended_meta.filter_ratio_threshold_exceeded ) { + if (extended_meta.filter_ratio_threshold_exceeded) { return "${meta.id},filter_ratio_threshold_exceeded" } - error "Unexpected. meta: ${meta}, extended_meta: ${extended_meta}" + error("Unexpected. meta: ${meta}, extended_meta: ${extended_meta}") } } - qc_failed_entries.collectFile(name: "qc_failed_runs.csv", storeDir: "${params.outdir}", newLine: true, cache: false) + short_reads_qc_failed_entries.collectFile(name: "qc_failed_runs.csv", storeDir: "${params.outdir}", newLine: true, cache: false) } - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ diff --git a/workflows/short_reads_assembler.nf b/workflows/short_reads_assembler.nf new file mode 100644 index 0000000..2beb545 --- /dev/null +++ b/workflows/short_reads_assembler.nf @@ -0,0 +1,179 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// + +include { DOWNLOAD_FROM_FIRE } from '../modules/local/download_from_fire.nf' + +include { SHORT_READS_QC } from '../subworkflows/local/short_reads_qc' +include { SHORT_READS_ASSEMBLY_QC } from '../subworkflows/local/short_reads_assembly_qc' +include { SHORT_READS_ASSEMBLY_COVERAGE } from '../subworkflows/local/short_reads_assembly_coverage' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { FASTQC as FASTQC_BEFORE } from '../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_AFTER } from '../modules/nf-core/fastqc/main' +include { SPADES } from '../modules/nf-core/spades/main' +include { MEGAHIT } from '../modules/nf-core/megahit/main' +include { QUAST } from '../modules/nf-core/quast/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +workflow SHORT_READS_ASSEMBLER { + take: + input_reads // tuple(meta), path(reads) + + main: + + def ch_versions = Channel.empty() + def reads_to_assemble = input_reads + + // If running for a private study on EBI infrastructure // + if (params.private_study) { + /* + * For private studies we need to bypass Nextflow S3 integration until https://github.com/nextflow-io/nextflow/issues/4873 is fixed + * The EBI parameter is needed as this only works on EBI network, FIRE is not accessible otherwise + */ + DOWNLOAD_FROM_FIRE( + input_reads + ) + + ch_versions = ch_versions.mix(DOWNLOAD_FROM_FIRE.out.versions.first()) + + reads_to_assemble = DOWNLOAD_FROM_FIRE.out.reads + } + + /***************************/ + /* Selecting the assembler */ + /***************************/ + /* + The selection process ensures that: + - The user selected assembler is always used (either from the samplesheet assembler column (with precedesnse) or the params.assembler) + - Single-end reads are assembled with MEGAHIT, unless specified otherwise. + - Paired-end reads are assembled with MetaSPAdes, unless specified otherwise + - An error is raised if the assembler and read layout are incompatible (shouldn't happen...) + */ + def reads_by_assembler = reads_to_assemble.map { meta, reads -> + def selected_assembler = meta.assembler + if (selected_assembler == "megahit" || (meta.single_end && selected_assembler == null)) { + return [meta + [assembler: "megahit", assembler_version: params.megahit_version], reads] + } + else if (["metaspades", "spades"].contains(selected_assembler) || (!meta.single_end && selected_assembler == null)) { + def xspades_assembler = selected_assembler ?: "metaspades" + // Default to "metaspades" if the user didn't select one + return [meta + [assembler: xspades_assembler, assembler_version: params.spades_version], reads] + } + else { + error("Incompatible assembler and/or reads layout. We can't assembly data that is. Reads - single end value: ${meta.single_end}.") + } + } + + FASTQC_BEFORE( + reads_by_assembler + ) + ch_versions = ch_versions.mix(FASTQC_BEFORE.out.versions) + + SHORT_READS_QC( + reads_by_assembler, + params.reference_genome + ) + ch_versions = ch_versions.mix(SHORT_READS_QC.out.versions) + + FASTQC_AFTER( + SHORT_READS_QC.out.qc_reads + ) + + /******************************************/ + /* Reads that fail the following rules: */ + /* - Reads kept by fastp < 10% (default value) */ + /* - Less than 1k reads */ + /******************************************/ + def extended_qc = SHORT_READS_QC.out.fastp_json.map { meta, json -> + { + def json_txt = new groovy.json.JsonSlurper().parseText(json.text) + def bf_total_reads = json_txt.summary.before_filtering.total_reads ?: 0 + def af_total_reads = json_txt.summary.after_filtering.total_reads ?: 0 + def reads_qc_meta = [ + "low_reads_count": af_total_reads <= params.short_reads_low_reads_count_threshold, + "filter_ratio_threshold_exceeded": af_total_reads == 0 || ((af_total_reads / bf_total_reads) <= params.short_reads_filter_ratio_threshold) + ] + return [meta, reads_qc_meta] + } + } + + def extended_reads_qc = SHORT_READS_QC.out.qc_reads.join(extended_qc) + + extended_reads_qc + .branch { meta, _reads, reads_qc_meta -> + qc_failed: reads_qc_meta.low_reads_count || reads_qc_meta.filter_ratio_threshold_exceeded + megahit: meta.assembler == "megahit" + xspades: ["metaspades", "spades"].contains(meta.assembler) + } + .set { qc_filtered_reads } + + /*********************/ + /* Assembly */ + /********************/ + SPADES( + qc_filtered_reads.xspades.map { meta, reads, __ -> [meta, reads, [], []] }, + [], + [] + ) + ch_versions = ch_versions.mix(SPADES.out.versions) + + MEGAHIT( + qc_filtered_reads.megahit.map { meta, reads, __ -> [meta, reads] } + ) + ch_versions = ch_versions.mix(MEGAHIT.out.versions) + + assembly = SPADES.out.contigs.mix(MEGAHIT.out.contigs) + + // Clean the assembly contigs // + SHORT_READS_ASSEMBLY_QC( + assembly, + params.reference_genome + ) + ch_versions = ch_versions.mix(SHORT_READS_ASSEMBLY_QC.out.versions) + + // Coverage // + SHORT_READS_ASSEMBLY_COVERAGE( + SHORT_READS_ASSEMBLY_QC.out.filtered_contigs.join(SHORT_READS_QC.out.qc_reads, remainder: false), + SHORT_READS_QC.out.fastp_json + ) + + ch_versions = ch_versions.mix(SHORT_READS_ASSEMBLY_COVERAGE.out.versions) + + // Stats // + /* The QUAST module was modified to run metaQUAST instead */ + QUAST( + SHORT_READS_ASSEMBLY_QC.out.filtered_contigs, + [[], []], + [[], []] + ) + + ch_versions = ch_versions.mix(QUAST.out.versions) + + emit: + fastqc_before_zip = FASTQC_BEFORE.out.zip // tuple(meta) + qc_failed = qc_filtered_reads.qc_failed // tuple(meta) + fastqc_after_zip = FASTQC_AFTER.out.zip // tuple(meta) + assembly_coverage_samtools_idxstats = SHORT_READS_ASSEMBLY_COVERAGE.out.samtools_idxstats // tuple(meta) + quast_results = QUAST.out.results // tuple(meta) + versions = ch_versions +}