diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index 0d4faddf..2e5133de 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -17,6 +17,10 @@ jobs: # run only if the PR is approved by at least 2 reviewers and against the master branch or manually triggered if: github.repository == 'nf-core/viralrecon' && github.event.review.state == 'approved' && github.event.pull_request.base.ref == 'master' || github.event_name == 'workflow_dispatch' runs-on: ubuntu-latest + # Do a full-scale run with data from each sequencing platform + strategy: + matrix: + platform: ["illumina", "nanopore"] steps: - uses: octokit/request-action@v2.x id: check_approvals @@ -32,9 +36,6 @@ jobs: test $CURRENT_APPROVALS_COUNT -ge 2 || exit 1 # At least 2 approvals are required - name: Launch workflow via Seqera Platform uses: seqeralabs/action-tower-launch@v2 - # TODO nf-core: You can customise AWS full pipeline tests as required - # Add full size test data (but still relatively small datasets for few samples) - # on the `test_full.config` test runs with only one set of parameters with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} @@ -44,10 +45,9 @@ jobs: parameters: | { "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/viralrecon/results-${{ github.sha }}" + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/viralrecon/results-${{ github.sha }}/platform_${{ matrix.platform }}" } - profiles: test_full - + profiles: test_full_${{ matrix.platform }},aws_tower - uses: actions/upload-artifact@v4 with: name: Seqera Platform debug log file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f8294d2b..b3d17948 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -83,3 +83,236 @@ jobs: - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.test_name }} | ${{ matrix.profile }}" run: | nextflow run ${GITHUB_WORKSPACE} -profile ${{ matrix.test_name }},${{ matrix.profile }} --outdir ./results + + test_parameters: + name: "Test parameters (${{ matrix.NXF_VER }} | ${{ matrix.parameters }} | ${{ matrix.profile }})" + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "24.04.2" + - "latest-everything" + profile: + - "conda" + - "docker" + - "singularity" + parameters: + - "--consensus_caller ivar" + - "--variant_caller bcftools --consensus_caller ivar" + - "--skip_fastp --skip_pangolin" + - "--skip_variants" + - "--skip_cutadapt --skip_snpeff" + - "--skip_kraken2" + - "--skip_assembly" + - "--spades_mode corona" + - "--spades_mode metaviral" + - "--skip_plasmidid false --skip_asciigenome" + - "--additional_annotation ./GCA_009858895.3_ASM985889v3_genomic.gtf.gz" + - "--bowtie2_index ./GCA_009858895.3_ASM985889v3_genomic.200409.bt2.index.tar.gz" + isMaster: + - ${{ github.base_ref == 'master' }} + # Exclude conda and singularity on dev + exclude: + - isMaster: false + profile: "conda" + - isMaster: false + profile: "singularity" + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Set up Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Set up Apptainer + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: matrix.profile == 'singularity' + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Set up Miniconda + if: matrix.profile == 'conda' + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 + with: + miniconda-version: "latest" + auto-update-conda: true + conda-solver: libmamba + channels: conda-forge,bioconda + + - name: Set up Conda + if: matrix.profile == 'conda' + run: | + echo $(realpath $CONDA)/condabin >> $GITHUB_PATH + echo $(realpath python) >> $GITHUB_PATH + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Download GTF for additional annotation + if: contains(matrix.parameters, 'additional_annotation') + run: | + wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/009/858/895/GCA_009858895.3_ASM985889v3/GCA_009858895.3_ASM985889v3_genomic.gtf.gz + + - name: Download prebuild bowtie2 index + if: contains(matrix.parameters, 'bowtie2_index') + run: | + wget https://github.com/nf-core/test-datasets/raw/viralrecon/genome/MN908947.3/GCA_009858895.3_ASM985889v3_genomic.200409.bt2.index.tar.gz + + - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.parameters }} | ${{ matrix.profile }}" + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test,${{ matrix.profile }} ${{ matrix.parameters }} --outdir ./results + + test_sispa: + name: "Test SISPA (${{ matrix.NXF_VER }} | ${{ matrix.parameters }} | ${{ matrix.profile }})" + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "24.04.2" + - "latest-everything" + profile: + - "conda" + - "docker" + - "singularity" + parameters: + - "--gff false" + - "--genome 'NC_045512.2'" + isMaster: + - ${{ github.base_ref == 'master' }} + # Exclude conda and singularity on dev + exclude: + - isMaster: false + profile: "conda" + - isMaster: false + profile: "singularity" + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Set up Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Set up Apptainer + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: matrix.profile == 'singularity' + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Set up Miniconda + if: matrix.profile == 'conda' + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 + with: + miniconda-version: "latest" + auto-update-conda: true + conda-solver: libmamba + channels: conda-forge,bioconda + + - name: Set up Conda + if: matrix.profile == 'conda' + run: | + echo $(realpath $CONDA)/condabin >> $GITHUB_PATH + echo $(realpath python) >> $GITHUB_PATH + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.parameters }} | ${{ matrix.profile }}" + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_sispa,${{ matrix.profile }} ${{ matrix.parameters }} --outdir ./results + + test_nanopore: + name: "Test nanopore (${{ matrix.NXF_VER }} | ${{ matrix.parameters }} | ${{ matrix.profile }})" + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/viralrecon') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "24.04.2" + - "latest-everything" + profile: + - "conda" + - "docker" + - "singularity" + parameters: + - "--gff false --freyja_depthcutoff 1" + - "--additional_annotation ./GCA_009858895.3_ASM985889v3_genomic.gtf.gz --freyja_depthcutoff 1" + - "--input false --freyja_depthcutoff 1" + - "--min_barcode_reads 10000" + - "--min_guppyplex_reads 10000" + - "--artic_minion_caller medaka --sequencing_summary false --fast5_dir false --freyja_depthcutoff 1" + - "--artic_minion_caller medaka --sequencing_summary false --fast5_dir false --artic_minion_medaka_model ./r941_min_high_g360_model.hdf5 --freyja_depthcutoff 1" + isMaster: + - ${{ github.base_ref == 'master' }} + # Exclude conda and singularity on dev + exclude: + - isMaster: false + profile: "conda" + - isMaster: false + profile: "singularity" + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Set up Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Set up Apptainer + if: matrix.profile == 'singularity' + uses: eWaterCycle/setup-apptainer@main + + - name: Set up Singularity + if: matrix.profile == 'singularity' + run: | + mkdir -p $NXF_SINGULARITY_CACHEDIR + mkdir -p $NXF_SINGULARITY_LIBRARYDIR + + - name: Set up Miniconda + if: matrix.profile == 'conda' + uses: conda-incubator/setup-miniconda@a4260408e20b96e80095f42ff7f1a15b27dd94ca # v3 + with: + miniconda-version: "latest" + auto-update-conda: true + conda-solver: libmamba + channels: conda-forge,bioconda + + - name: Set up Conda + if: matrix.profile == 'conda' + run: | + echo $(realpath $CONDA)/condabin >> $GITHUB_PATH + echo $(realpath python) >> $GITHUB_PATH + + - name: Clean up Disk space + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Download GTF for additional annotation + if: contains(matrix.parameters, 'additional_annotation') + run: | + wget https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/009/858/895/GCA_009858895.3_ASM985889v3/GCA_009858895.3_ASM985889v3_genomic.gtf.gz + + - name: Download medaka model + if: contains(matrix.parameters, 'r941_min_high_g360_model.hdf5') + run: | + wget https://github.com/nanoporetech/medaka/raw/master/medaka/data/r941_min_high_g360_model.hdf5 + + - name: "Run pipeline with test data ${{ matrix.NXF_VER }} | ${{ matrix.parameters }} | ${{ matrix.profile }}" + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_nanopore,${{ matrix.profile }} ${{ matrix.parameters }} --outdir ./results diff --git a/.nf-core.yml b/.nf-core.yml index a928217a..8b54fe9f 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -14,6 +14,7 @@ template: name: viralrecon org: nf-core outdir: . - skip_features: null + skip_features: + - igenomes version: 2.7.0dev update: null diff --git a/CHANGELOG.md b/CHANGELOG.md index 7589b6b4..cc3fda4d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,14 +3,585 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.7.0dev - [date] +## [Unpublished Version / DEV] -Initial release of nf-core/viralrecon, created with the [nf-core](https://nf-co.re/) template. +### Credits -### `Added` +Special thanks to the following for their code contributions to the release: -### `Fixed` +- [Adam Talbot](https://github.com/adamrtalbot) +- [Joon Klaps](https://github.com/Joon-Klaps) +- [Sarai Varona](https://github.com/svarona) -### `Dependencies` +Thank you to everyone else that has contributed by reporting bugs, enhancements or in any other way, shape or form. -### `Deprecated` +### Enhancements & fixes + +- [[#299](https://github.com/nf-core/viralrecon/issues/299)] - Add the freyja pipeline as a subworkflow +- [[PR #387](https://github.com/nf-core/viralrecon/pull/387)] - Software closes gracefully when encountering an error +- [[PR #395](https://github.com/nf-core/viralrecon/pull/395)] - Remove minia from default assemblers because it is unreliable +- [[PR #393](https://github.com/nf-core/viralrecon/pull/393)] - Changed primer set to params +- [[PR #405](https://github.com/nf-core/viralrecon/pull/412)] - Including parameter `depthcutoff` to freyja demix and boot +- [[PR #413](https://github.com/nf-core/viralrecon/pull/413)] - Update multiqc module & include freyja in report +- [[PR #401](https://github.com/nf-core/viralrecon/pull/401)] - Added option to add a custom annotation +- [[PR #417](https://github.com/nf-core/viralrecon/pull/417)] - Allow skipping of Freyja bootstrapping module & freyja module update +- [[PR #434](https://github.com/nf-core/viralrecon/pull/434)] - Add blast result filtering through `min_contig_length` and `min_perc_contig_aligned`. +- [[PR #438](https://github.com/nf-core/viralrecon/pull/438)] - Update fastp container to 0.23.4 +- [[PR #439](https://github.com/nf-core/viralrecon/pull/439)] - Fix cardinality issue when using `--bowtie2_index` + +### Parameters + +| Old parameter | New parameter | +| ------------- | --------------------------- | +| | `--skip_freyja` | +| | `--freyja_repeats` | +| | `--freyja_db_name` | +| | `--freyja_barcodes` | +| | `--freyja_lineages` | +| | `--skip_freyja_boot` | +| | `--additional_annotation` | +| | `--min_contig_length` | +| | `--min_perc_contig_aligned` | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> **NB:** Parameter has been **added** if just the new parameter information is present. +> **NB:** Parameter has been **removed** if new parameter information isn't present. + +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `freyja` | | 1.5.0 | +| `multiqc` | 1.14 | 1.19 | +| `fastp` | 0.23.2 | 0.23.4 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> +> **NB:** Dependency has been **added** if just the new version information is present. +> +> **NB:** Dependency has been **removed** if new version information isn't present. + +## [[2.6.0](https://github.com/nf-core/viralrecon/releases/tag/2.6.0)] - 2023-03-23 + +### Credits + +Special thanks to the following for their code contributions to the release: + +- [Friederike Hanssen](https://github.com/FriederikeHanssen) +- [Hugo Tavares](https://github.com/tavareshugo) +- [James Fellows Yates](https://github.com/jfy133) +- [Jessica Wu](https://github.com/wutron) +- [Matthew Wells](https://github.com/mattheww95) +- [Maxime Garcia](https://github.com/maxulysse) +- [Phil Ewels](https://github.com/ewels) +- [Sara Monzón](https://github.com/saramonzon) + +Thank you to everyone else that has contributed by reporting bugs, enhancements or in any other way, shape or form. + +### Enhancements & fixes + +- [[#297](https://github.com/nf-core/viralrecon/issues/297)] - Add tube map for pipeline +- [[#316](https://github.com/nf-core/viralrecon/issues/316)] - Variant calling isn't run when using `--skip_asciigenome` with metagenomic data +- [[#317](https://github.com/nf-core/viralrecon/issues/317)] - `ivar_variants_to_vcf`: Ignore lines without annotation in ivar tsv file +- [[#320](https://github.com/nf-core/viralrecon/issues/320)] - Pipeline fails at email step: Failed to invoke `workflow.onComplete` event handler +- [[#321](https://github.com/nf-core/viralrecon/issues/321)] - `ivar_variants_to_vcf` script: Duplicated positions in tsv file due to overlapping annotations +- [[#334](https://github.com/nf-core/viralrecon/issues/334)] - Longshot thread 'main' panicked at 'assertion failed: p <= 0.0' error +- [[#341](https://github.com/nf-core/viralrecon/issues/341)] - `artic/minion` and `artic/guppyplex`: Update module version 1.2.2 -> 1.2.3 +- [[#348](https://github.com/nf-core/viralrecon/issues/348)] - Document full parameters of iVar consensus +- [[#349](https://github.com/nf-core/viralrecon/issues/349)] - ERROR in Script plasmidID +- [[#356](https://github.com/nf-core/viralrecon/issues/356)] - Add NEB SARS-CoV-2 primers +- [[#368](https://github.com/nf-core/viralrecon/issues/368)] - Incorrect depth from ivar variants reported in variants long table +- Updated pipeline template to [nf-core/tools 2.7.2](https://github.com/nf-core/tools/releases/tag/2.7.2) +- Add `tower.yml` for Report rendering in Nextflow Tower +- Use `--skip_plasmidid` by default + +### Parameters + +| Old parameter | New parameter | +| ------------- | ------------- | +| `--tracedir` | | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> **NB:** Parameter has been **added** if just the new parameter information is present. +> **NB:** Parameter has been **removed** if new parameter information isn't present. + +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ----------- | ----------- | ----------- | +| `artic` | 1.2.2 | 1.2.3 | +| `bcftools` | 1.51.1 | 1.16 | +| `blast` | 2.12.0 | 2.13.0 | +| `cutadapt` | 3.5 | 4.2 | +| `ivar` | 1.3.1 | 1.4 | +| `multiqc` | 1.13a | 1.14 | +| `nanoplot` | 1.40.0 | 1.41.0 | +| `nextclade` | 2.2.0 | 2.12.0 | +| `pangolin` | 4.1.1 | 4.2 | +| `picard` | 2.27.4 | 3.0.0 | +| `samtools` | 1.15.1 | 1.16.1 | +| `spades` | 3.15.4 | 3.15.5 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> +> **NB:** Dependency has been **added** if just the new version information is present. +> +> **NB:** Dependency has been **removed** if new version information isn't present. + +## [[2.5](https://github.com/nf-core/viralrecon/releases/tag/2.5)] - 2022-07-13 + +### Enhancements & fixes + +- Default Nextclade dataset shipped with the pipeline has been bumped from `2022-01-18T12:00:00Z` -> `2022-06-14T12:00:00Z` +- [[#234](https://github.com/nf-core/viralrecon/issues/234)] - Remove replacement of dashes in sample name with underscores +- [[#292](https://github.com/nf-core/viralrecon/issues/292)] - Filter empty FastQ files after adapter trimming +- [[#303](https://github.com/nf-core/viralrecon/pull/303)] - New pangolin dbs (4.0.x) not assigning lineages to Sars-CoV-2 samples in MultiQC report correctly +- [[#304](https://github.com/nf-core/viralrecon/pull/304)] - Re-factor code of `ivar_variants_to_vcf` script +- [[#306](https://github.com/nf-core/viralrecon/issues/306)] - Add contig field information in vcf header in ivar_variants_to_vcf and use bcftools sort +- [[#311](https://github.com/nf-core/viralrecon/issues/311)] - Invalid declaration val medaka_model_string +- [[#316](https://github.com/nf-core/viralrecon/issues/316)] - Variant calling isn't run when using --skip_asciigenome with metagenomic data +- [[nf-core/rnaseq#764](https://github.com/nf-core/rnaseq/issues/764)] - Test fails when using GCP due to missing tools in the basic biocontainer +- Updated pipeline template to [nf-core/tools 2.4.1](https://github.com/nf-core/tools/releases/tag/2.4.1) + +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ----------- | ----------- | ----------- | +| `artic` | 1.2.1 | 1.2.2 | +| `bcftools` | 1.14 | 1.15.1 | +| `multiqc` | 1.11 | 1.13a | +| `nanoplot` | 1.39.0 | 1.40.0 | +| `nextclade` | 1.10.2 | 2.2.0 | +| `pangolin` | 3.1.20 | 4.1.1 | +| `picard` | 2.26.10 | 2.27.4 | +| `quast` | 5.0.2 | 5.2.0 | +| `samtools` | 1.14 | 1.15.1 | +| `spades` | 3.15.3 | 3.15.4 | +| `vcflib` | 1.0.2 | 1.0.3 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> +> **NB:** Dependency has been **added** if just the new version information is present. +> +> **NB:** Dependency has been **removed** if new version information isn't present. + +### Parameters + +## [[2.4.1](https://github.com/nf-core/viralrecon/releases/tag/2.4.1)] - 2022-03-01 + +### Enhancements & fixes + +- [[#288](https://github.com/nf-core/viralrecon/issues/288)] - `--primer_set_version` only accepts Integers (incompatible with "4.1" Artic primers set) + +## [[2.4](https://github.com/nf-core/viralrecon/releases/tag/2.4)] - 2022-02-22 + +### Enhancements & fixes + +- [nf-core/tools#1415](https://github.com/nf-core/tools/issues/1415) - Make `--outdir` a mandatory parameter +- [[#281](https://github.com/nf-core/viralrecon/issues/281)] - Nanopore medaka processing fails with error if model name, not model file, provided +- [[#286](https://github.com/nf-core/viralrecon/issues/286)] - IVAR_VARIANTS silently failing when FAI index is missing + +### Parameters + +| Old parameter | New parameter | +| ------------- | -------------------- | +| | `--publish_dir_mode` | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> +> **NB:** Parameter has been **added** if just the new parameter information is present. +> +> **NB:** Parameter has been **removed** if new parameter information isn't present. + +## [[2.3.1](https://github.com/nf-core/viralrecon/releases/tag/2.3.1)] - 2022-02-15 + +### Enhancements & fixes + +- [[#277](https://github.com/nf-core/viralrecon/issues/277)] - Misuse of rstrip in make_variants_long_table.py script + +### Software dependencies + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `mosdepth` | 0.3.2 | 0.3.3 | +| `pangolin` | 3.1.19 | 3.1.20 | + +## [[2.3](https://github.com/nf-core/viralrecon/releases/tag/2.3)] - 2022-02-04 + +### :warning: Major enhancements + +- Please see [Major updates in v2.3](https://github.com/nf-core/viralrecon/issues/271) for a more detailed list of changes added in this version. +- When using `--protocol amplicon`, in the previous release, iVar was used for both the variant calling and consensus sequence generation. The pipeline will now perform the variant calling and consensus sequence generation with iVar and BCFTools/BEDTools, respectively. +- Bump minimum Nextflow version from `21.04.0` -> `21.10.3` + +### Enhancements & fixes + +- Port pipeline to the updated Nextflow DSL2 syntax adopted on nf-core/modules +- Updated pipeline template to [nf-core/tools 2.2](https://github.com/nf-core/tools/releases/tag/2.2) +- [[#209](https://github.com/nf-core/viralrecon/issues/209)] - Check that contig in primer BED and genome fasta match +- [[#218](https://github.com/nf-core/viralrecon/issues/218)] - Support for compressed FastQ files for Nanopore data +- [[#232](https://github.com/nf-core/viralrecon/issues/232)] - Remove duplicate variants called by ARTIC ONT pipeline +- [[#235](https://github.com/nf-core/viralrecon/issues/235)] - Nextclade version bump +- [[#244](https://github.com/nf-core/viralrecon/issues/244)] - Fix BCFtools consensus generation and masking +- [[#245](https://github.com/nf-core/viralrecon/issues/245)] - Mpileup file as output +- [[#246](https://github.com/nf-core/viralrecon/issues/246)] - Option to generate consensus with BCFTools / BEDTools using iVar variants +- [[#247](https://github.com/nf-core/viralrecon/issues/247)] - Add strand-bias filtering option and codon fix in consecutive positions in ivar tsv conversion to vcf +- [[#248](https://github.com/nf-core/viralrecon/issues/248)] - New variants reporting table + +### Parameters + +| Old parameter | New parameter | +| ------------- | ------------------------------- | +| | `--nextclade_dataset` | +| | `--nextclade_dataset_name` | +| | `--nextclade_dataset_reference` | +| | `--nextclade_dataset_tag` | +| | `--skip_consensus_plots` | +| | `--skip_variants_long_table` | +| | `--consensus_caller` | +| `--callers` | `--variant_caller` | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> +> **NB:** Parameter has been **added** if just the new parameter information is present. +> +> **NB:** Parameter has been **removed** if new parameter information isn't present. + +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ----------- | ----------- | ----------- | +| `bcftools` | 1.11 | 1.14 | +| `blast` | 2.10.1 | 2.12.0 | +| `bowtie2` | 2.4.2 | 2.4.4 | +| `cutadapt` | 3.2 | 3.5 | +| `fastp` | 0.20.1 | 0.23.2 | +| `kraken2` | 2.1.1 | 2.1.2 | +| `minia` | 3.2.4 | 3.2.6 | +| `mosdepth` | 0.3.1 | 0.3.2 | +| `nanoplot` | 1.36.1 | 1.39.0 | +| `nextclade` | | 1.10.2 | +| `pangolin` | 3.1.7 | 3.1.19 | +| `picard` | 2.23.9 | 2.26.10 | +| `python` | 3.8.3 | 3.9.5 | +| `samtools` | 1.10 | 1.14 | +| `spades` | 3.15.2 | 3.15.3 | +| `tabix` | 0.2.6 | 1.11 | +| `vcflib` | | 1.0.2 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> +> **NB:** Dependency has been **added** if just the new version information is present. +> +> **NB:** Dependency has been **removed** if new version information isn't present. + +## [[2.2](https://github.com/nf-core/viralrecon/releases/tag/2.2)] - 2021-07-29 + +### Enhancements & fixes + +- Updated pipeline template to [nf-core/tools 2.1](https://github.com/nf-core/tools/releases/tag/2.1) +- Remove custom content to render Pangolin report in MultiQC as it was officially added as a module in [v1.11](https://github.com/ewels/MultiQC/pull/1458) +- [[#212](https://github.com/nf-core/viralrecon/issues/212)] - Access to `PYCOQC.out` is undefined +- [[#229](https://github.com/nf-core/viralrecon/issues/229)] - ARTIC Guppyplex settings for 1200bp ARTIC primers with Nanopore data + +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ---------- | ----------- | ----------- | +| `multiqc` | 1.10.1 | 1.11 | +| `pangolin` | 3.0.5 | 3.1.7 | +| `samtools` | 1.10 | 1.12 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> +> **NB:** Dependency has been **added** if just the new version information is present. +> +> **NB:** Dependency has been **removed** if new version information isn't present. + +## [[2.1](https://github.com/nf-core/viralrecon/releases/tag/2.1)] - 2021-06-15 + +### Enhancements & fixes + +- Removed workflow to download data from public databases in favour of using [nf-core/fetchngs](https://nf-co.re/fetchngs) +- Added Pangolin results to MultiQC report +- Added warning to MultiQC report for samples that have no reads after adapter trimming +- Added docs about structure of data required for running Nanopore data +- Added docs about using other primer sets for Illumina data +- Added docs about overwriting default container definitions to use latest versions e.g. Pangolin +- Dashes and spaces in sample names will be converted to underscores to avoid issues when creating the summary metrics +- [[#196](https://github.com/nf-core/viralrecon/issues/196)] - Add mosdepth heatmap to MultiQC report +- [[#197](https://github.com/nf-core/viralrecon/issues/197)] - Output a .tsv comprising the Nextclade and Pangolin results for all samples processed +- [[#198](https://github.com/nf-core/viralrecon/issues/198)] - ASCIIGenome failing during analysis +- [[#201](https://github.com/nf-core/viralrecon/issues/201)] - Conditional include are not expected to work +- [[#204](https://github.com/nf-core/viralrecon/issues/204)] - Memory errors for SNP_EFF step + +### Parameters + +| Old parameter | New parameter | +| --------------------------- | ------------- | +| `--public_data_ids` | | +| `--skip_sra_fastq_download` | | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> +> **NB:** Parameter has been **added** if just the new parameter information is present. +> +> **NB:** Parameter has been **removed** if new parameter information isn't present. + +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| -------------- | ----------- | ----------- | +| `nextclade_js` | 0.14.2 | 0.14.4 | +| `pangolin` | 2.4.2 | 3.0.5 | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> +> **NB:** Dependency has been **added** if just the new version information is present. +> +> **NB:** Dependency has been **removed** if new version information isn't present. + +## [[2.0](https://github.com/nf-core/viralrecon/releases/tag/2.0)] - 2021-05-13 + +### :warning: Major enhancements + +- Pipeline has been re-implemented in [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) +- All software containers are now exclusively obtained from [Biocontainers](https://biocontainers.pro/#/registry) +- Updated minimum Nextflow version to `v21.04.0` (see [nextflow#572](https://github.com/nextflow-io/nextflow/issues/1964)) +- [BCFtools](http://samtools.github.io/bcftools/bcftools.html) and [iVar](https://github.com/andersen-lab/ivar) will be run by default for Illumina metagenomics and amplicon data, respectively. However, this behaviour can be customised with the `--callers` parameter. +- Variant graph processes to call variants relative to the reference genome directly from _de novo_ assemblies have been deprecated and removed +- Variant calling with Varscan 2 has been deprecated and removed due to [licensing restrictions](https://github.com/dkoboldt/varscan/issues/12) +- New tools: + - [Pangolin](https://github.com/cov-lineages/pangolin) for lineage analysis + - [Nextclade](https://github.com/nextstrain/nextclade) for clade assignment, mutation calling and consensus sequence quality checks + - [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) for individual variant screenshots with annotation tracks + +### Other enhancements & fixes + +- Illumina and Nanopore runs containing the same 48 samples sequenced on both platforms have been uploaded to the nf-core AWS account for full-sized tests on release +- Initial implementation of a standardised samplesheet JSON schema to use with user interfaces and for validation +- Default human `--kraken2_db` link has been changed from Zenodo to an AWS S3 bucket for more reliable downloads +- Updated pipeline template to nf-core/tools `1.14` +- Optimise MultiQC configuration and input files for faster run-time on huge sample numbers +- [[#122](https://github.com/nf-core/viralrecon/issues/122)] - Single SPAdes command to rule them all +- [[#138](https://github.com/nf-core/viralrecon/issues/138)] - Problem masking the consensus sequence +- [[#142](https://github.com/nf-core/viralrecon/issues/142)] - Unknown method invocation `toBytes` on String type +- [[#169](https://github.com/nf-core/viralrecon/issues/169)] - ggplot2 error when generating mosdepth amplicon plot with Swift v2 primers +- [[#170](https://github.com/nf-core/viralrecon/issues/170)] - ivar trimming of Swift libraries new offset feature +- [[#175](https://github.com/nf-core/viralrecon/issues/175)] - MultiQC report does not include all the metrics +- [[#188](https://github.com/nf-core/viralrecon/pull/188)] - Add and fix EditorConfig linting in entire pipeline + +### Parameters + +| Old parameter | New parameter | +| ----------------------------- | ------------------------------------- | +| `--amplicon_bed` | `--primer_bed` | +| `--amplicon_fasta` | `--primer_fasta` | +| `--amplicon_left_suffix` | `--primer_left_suffix` | +| `--amplicon_right_suffix` | `--primer_right_suffix` | +| `--filter_dups` | `--filter_duplicates` | +| `--skip_adapter_trimming` | `--skip_fastp` | +| `--skip_amplicon_trimming` | `--skip_cutadapt` | +| | `--artic_minion_aligner` | +| | `--artic_minion_caller` | +| | `--artic_minion_medaka_model` | +| | `--asciigenome_read_depth` | +| | `--asciigenome_window_size` | +| | `--blast_db` | +| | `--enable_conda` | +| | `--fast5_dir` | +| | `--fastq_dir` | +| | `--ivar_trim_offset` | +| | `--kraken2_assembly_host_filter` | +| | `--kraken2_variants_host_filter` | +| | `--min_barcode_reads` | +| | `--min_guppyplex_reads` | +| | `--multiqc_title` | +| | `--platform` | +| | `--primer_set` | +| | `--primer_set_version` | +| | `--public_data_ids` | +| | `--save_trimmed_fail` | +| | `--save_unaligned` | +| | `--sequencing_summary` | +| | `--singularity_pull_docker_container` | +| | `--skip_asciigenome` | +| | `--skip_bandage` | +| | `--skip_consensus` | +| | `--skip_ivar_trim` | +| | `--skip_nanoplot` | +| | `--skip_pangolin` | +| | `--skip_pycoqc` | +| | `--skip_nextclade` | +| | `--skip_sra_fastq_download` | +| | `--spades_hmm` | +| | `--spades_mode` | +| `--cut_mean_quality` | | +| `--filter_unmapped` | | +| `--ivar_trim_min_len` | | +| `--ivar_trim_min_qual` | | +| `--ivar_trim_window_width` | | +| `--kraken2_use_ftp` | | +| `--max_allele_freq` | | +| `--min_allele_freq` | | +| `--min_base_qual` | | +| `--min_coverage` | | +| `--min_trim_length` | | +| `--minia_kmer` | | +| `--mpileup_depth` | | +| `--name` | | +| `--qualified_quality_phred` | | +| `--save_align_intermeds` | | +| `--save_kraken2_fastq` | | +| `--save_sra_fastq` | | +| `--skip_sra` | | +| `--skip_vg` | | +| `--unqualified_percent_limit` | | +| `--varscan2_strand_filter` | | + +> **NB:** Parameter has been **updated** if both old and new parameter information is present. +> +> **NB:** Parameter has been **added** if just the new parameter information is present. +> +> **NB:** Parameter has been **removed** if new parameter information isn't present. + +### Software dependencies + +Note, since the pipeline is now using Nextflow DSL2, each process will be run with its own [Biocontainer](https://biocontainers.pro/#/registry). This means that on occasion it is entirely possible for the pipeline to be using different versions of the same tool. However, the overall software dependency changes compared to the last release have been listed below for reference. + +| Dependency | Old version | New version | +| ----------------------------- | ----------- | ----------- | +| `artic` | | 1.2.1 | +| `asciigenome` | | 1.16.0 | +| `bc` | 1.07.1 | | +| `bcftools` | 1.9 | 1.11 | +| `bedtools` | 2.29.2 | 2.30.0 | +| `bioconductor-biostrings` | 2.54.0 | 2.58.0 | +| `bioconductor-complexheatmap` | 2.2.0 | 2.6.2 | +| `blast` | 2.9.0 | 2.10.1 | +| `bowtie2` | 2.4.1 | 2.4.2 | +| `cutadapt` | 2.10 | 3.2 | +| `ivar` | 1.2.2 | 1.3.1 | +| `kraken2` | 2.0.9beta | 2.1.1 | +| `markdown` | 3.2.2 | | +| `minimap2` | 2.17 | | +| `mosdepth` | 0.2.6 | 0.3.1 | +| `multiqc` | 1.9 | 1.10.1 | +| `nanoplot` | | 1.36.1 | +| `nextclade_js` | | 0.14.2 | +| `pangolin` | | 2.4.2 | +| `parallel-fastq-dump` | 0.6.6 | | +| `picard` | 2.23.0 | 2.23.9 | +| `pigz` | 2.3.4 | | +| `plasmidid` | 1.6.3 | 1.6.4 | +| `pycoqc` | | 2.5.2 | +| `pygments` | 2.6.1 | | +| `pymdown-extensions` | 7.1 | | +| `python` | 3.6.10 | 3.8.3 | +| `r-base` | 3.6.2 | 4.0.3 | +| `r-ggplot2` | 3.3.1 | 3.3.3 | +| `r-tidyr` | 1.1.0 | | +| `requests` | | 2.24.0 | +| `samtools` | 1.9 | 1.10 | +| `seqwish` | 0.4.1 | | +| `snpeff` | 4.5covid19 | 5.0 | +| `spades` | 3.14.0 | 3.15.2 | +| `sra-tools` | 2.10.7 | | +| `tabix` | | 0.2.6 | +| `unicycler` | 0.4.7 | 0.4.8 | +| `varscan` | 2.4.4 | | +| `vg` | 1.24.0 | | + +> **NB:** Dependency has been **updated** if both old and new version information is present. +> +> **NB:** Dependency has been **added** if just the new version information is present. +> +> **NB:** Dependency has been **removed** if new version information isn't present. + +## [[1.1.0](https://github.com/nf-core/viralrecon/releases/tag/1.1.0)] - 2020-06-23 + +### Added + +- [#112](https://github.com/nf-core/viralrecon/issues/112) - Per-amplicon coverage plot +- [#124](https://github.com/nf-core/viralrecon/issues/124) - Intersect variants across callers +- [nf-core/tools#616](https://github.com/nf-core/tools/pull/616) - Updated GitHub Actions to build Docker image and push to Docker Hub +- Parameters: + - `--min_mapped_reads` to circumvent failures for samples with low number of mapped reads + - `--varscan2_strand_filter` to toggle the default Varscan 2 strand filter + - `--skip_mosdepth` - skip genome-wide and amplicon coverage plot generation from mosdepth output + - `--amplicon_left_suffix` - to provide left primer suffix used in name field of `--amplicon_bed` + - `--amplicon_right_suffix` - to provide right primer suffix used in name field of `--amplicon_bed` + - Unify parameter specification with COG-UK pipeline: + - `--min_allele_freq` - minimum allele frequency threshold for calling variants + - `--mpileup_depth` - SAMTools mpileup max per-file depth + - `--ivar_exclude_reads` renamed to `--ivar_trim_noprimer` + - `--ivar_trim_min_len` - minimum length of read to retain after primer trimming + - `--ivar_trim_min_qual` - minimum quality threshold for sliding window to pass + - `--ivar_trim_window_width` - width of sliding window +- [#118] Updated GitHub Actions AWS workflow for small and full size tests. + +### Removed + +- `--skip_qc` parameter + +### Dependencies + +- Add mosdepth `0.2.6` +- Add bioconductor-complexheatmap `2.2.0` +- Add bioconductor-biostrings `2.54.0` +- Add r-optparse `1.6.6` +- Add r-tidyr `1.1.0` +- Add r-tidyverse `1.3.0` +- Add r-ggplot2 `3.3.1` +- Add r-reshape2 `1.4.4` +- Add r-viridis `0.5.1` +- Update sra-tools `2.10.3` -> `2.10.7` +- Update bowtie2 `2.3.5.1` -> `2.4.1` +- Update picard `2.22.8` -> `2.23.0` +- Update minia `3.2.3` -> `3.2.4` +- Update plasmidid `1.5.2` -> `1.6.3` + +## [[1.0.0](https://github.com/nf-core/viralrecon/releases/tag/1.0.0)] - 2020-06-01 + +Initial release of nf-core/viralrecon, created with the [nf-core](http://nf-co.re/) template. + +This pipeline is a re-implementation of the [SARS_Cov2_consensus-nf](https://github.com/BU-ISCIII/SARS_Cov2_consensus-nf) and [SARS_Cov2_assembly-nf](https://github.com/BU-ISCIII/SARS_Cov2_assembly-nf) pipelines initially developed by [Sarai Varona](https://github.com/svarona) and [Sara Monzon](https://github.com/saramonzon) from [BU-ISCIII](https://github.com/BU-ISCIII). Porting both of these pipelines to nf-core was an international collaboration between numerous contributors and developers, led by [Harshil Patel](https://github.com/drpatelh) from the [The Bioinformatics & Biostatistics Group](https://www.crick.ac.uk/research/science-technology-platforms/bioinformatics-and-biostatistics/) at [The Francis Crick Institute](https://www.crick.ac.uk/), London. We appreciated the need to have a portable, reproducible and scalable pipeline for the analysis of COVID-19 sequencing samples and so the Avengers Assembled! + +### Pipeline summary + +1. Download samples via SRA, ENA or GEO ids ([`ENA FTP`](https://ena-docs.readthedocs.io/en/latest/retrieval/file-download.html), [`parallel-fastq-dump`](https://github.com/rvalieris/parallel-fastq-dump); _if required_) +2. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html); _if required_) +3. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +4. Adapter trimming ([`fastp`](https://github.com/OpenGene/fastp)) +5. Variant calling + 1. Read alignment ([`Bowtie 2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) + 2. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 3. Primer sequence removal ([`iVar`](https://github.com/andersen-lab/ivar); _amplicon data only_) + 4. Duplicate read marking ([`picard`](https://broadinstitute.github.io/picard/); _removal optional_) + 5. Alignment-level QC ([`picard`](https://broadinstitute.github.io/picard/), [`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 6. Choice of multiple variant calling and consensus sequence generation routes ([`VarScan 2`](http://dkoboldt.github.io/varscan/), [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/) _||_ [`iVar variants and consensus`](https://github.com/andersen-lab/ivar) _||_ [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/)) + - Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) + - Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) +6. _De novo_ assembly + 1. Primer trimming ([`Cutadapt`](https://cutadapt.readthedocs.io/en/stable/guide.html); _amplicon data only_) + 2. Removal of host reads ([`Kraken 2`](http://ccb.jhu.edu/software/kraken2/)) + 3. Choice of multiple assembly tools ([`SPAdes`](http://cab.spbu.ru/software/spades/) _||_ [`metaSPAdes`](http://cab.spbu.ru/software/meta-spades/) _||_ [`Unicycler`](https://github.com/rrwick/Unicycler) _||_ [`minia`](https://github.com/GATB/minia)) + - Blast to reference genome ([`blastn`](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch)) + - Contiguate assembly ([`ABACAS`](https://www.sanger.ac.uk/science/tools/pagit)) + - Assembly report ([`PlasmidID`](https://github.com/BU-ISCIII/plasmidID)) + - Assembly assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) + - Call variants relative to reference ([`Minimap2`](https://github.com/lh3/minimap2), [`seqwish`](https://github.com/ekg/seqwish), [`vg`](https://github.com/vgteam/vg), [`Bandage`](https://github.com/rrwick/Bandage)) + - Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) +7. Present QC and visualisation for raw read, alignment, assembly and variant calling results ([`MultiQC`](http://multiqc.info/)) diff --git a/CITATIONS.md b/CITATIONS.md index 8d5f2e68..19850506 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -10,13 +10,117 @@ ## Pipeline tools +- [ABACAS](https://www.ncbi.nlm.nih.gov/pubmed/19497936/) + + > Assefa S, Keane TM, Otto TD, Newbold C, Berriman M. ABACAS: algorithm-based automatic contiguation of assembled sequences. Bioinformatics. 2009 Aug 1;25(15):1968-9. doi: 10.1093/bioinformatics/btp347. Epub 2009 Jun 3. PubMed PMID: 19497936; PubMed Central PMCID: PMC2712343. + +- [ASCIIGenome](https://www.ncbi.nlm.nih.gov/pubmed/28119307/) + + > Beraldi D. ASCIIGenome: a command line genome browser for console terminals. Bioinformatics. 2017 May 15;33(10):1568-1569. doi: 10.1093/bioinformatics/btx007. PubMed PMID: 28119307; PubMed Central PMCID: PMC5423454. + +- [ARTIC network](https://github.com/artic-network) + +- [Bandage](https://www.ncbi.nlm.nih.gov/pubmed/26099265) + + > Wick R.R., Schultz M.B., Zobel J. & Holt K.E. Bandage: interactive visualisation of de novo genome assemblies. Bioinformatics, 31(20), 3350-3352. doi: 10.1093/bioinformatics/btv383. PubMed PMID: 26099265; PubMed Central PCMID: PMC4595904. + +- [BCFtools](https://www.ncbi.nlm.nih.gov/pubmed/21903627/) + + > Li H. A statistical framework for SNP calling, mutation discovery, association mapping and population genetical parameter estimation from sequencing data. Bioinformatics. 2011 Nov 1;27(21):2987-93. doi: 10.1093/bioinformatics/btr509. Epub 2011 Sep 8. PubMed PMID: 21903627; PubMed Central PMCID: PMC3198575. + +- [BEDTools](https://www.ncbi.nlm.nih.gov/pubmed/20110278/) + + > Quinlan AR, Hall IM. BEDTools: a flexible suite of utilities for comparing genomic features. Bioinformatics. 2010 Mar 15;26(6):841-2. doi: 10.1093/bioinformatics/btq033. Epub 2010 Jan 28. PubMed PMID: 20110278; PubMed Central PMCID: PMC2832824. + +- [BLAST](https://www.ncbi.nlm.nih.gov/pubmed/20003500/) + + > Camacho C, Coulouris G, Avagyan V, Ma N, Papadopoulos J, Bealer K, Madden TL. BLAST+: architecture and applications. BMC Bioinformatics. 2009 Dec 15;10:421. doi: 10.1186/1471-2105-10-421. PubMed PMID: 20003500; PubMed Central PMCID: PMC2803857. + +- [Bowtie 2](https://www.ncbi.nlm.nih.gov/pubmed/22388286/) + + > Langmead B, Salzberg SL. Fast gapped-read alignment with Bowtie 2. Nat Methods. 2012 Mar 4;9(4):357-9. doi: 10.1038/nmeth.1923. PubMed PMID: 22388286; PubMed Central PMCID: PMC3322381. + +- [Cutadapt](http://dx.doi.org/10.14806/ej.17.1.200) + + > Marcel, M. Cutadapt removes adapter sequences from high-throughput sequencing reads. EMBnet.journal, [S.l.], v. 17, n. 1, p. pp. 10-12, may 2011. ISSN 2226-6089. doi: 10.14806/ej.17.1.200. + +- [fastp](https://www.ncbi.nlm.nih.gov/pubmed/30423086/) + + > Chen S, Zhou Y, Chen Y, Gu J. fastp: an ultra-fast all-in-one FASTQ preprocessor. Bioinformatics. 2018 Sep 1;34(17):i884-i890. doi: 10.1093/bioinformatics/bty560. PubMed PMID: 30423086; PubMed Central PMCID: PMC6129281. + - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) -> Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. + +- [iVar](https://www.ncbi.nlm.nih.gov/pubmed/30621750/) + + > Grubaugh ND, Gangavarapu K, Quick J, Matteson NL, De Jesus JG, Main BJ, Tan AL, Paul LM, Brackney DE, Grewal S, Gurfield N, Van Rompay KKA, Isern S, Michael SF, Coffey LL, Loman NJ, Andersen KG. An amplicon-based sequencing framework for accurately measuring intrahost virus diversity using PrimalSeq and iVar. Genome Biol. 2019 Jan 8;20(1):8. doi: 10.1186/s13059-018-1618-7. PubMed PMID: 30621750; PubMed Central PMCID: PMC6325816. + +- [Kraken 2](https://www.ncbi.nlm.nih.gov/pubmed/31779668/) + + > Wood DE, Lu J, Langmead B. Improved metagenomic analysis with Kraken 2. Genome Biol. 2019 Nov 28;20(1):257. doi: 10.1186/s13059-019-1891-0. PubMed PMID: 31779668; PubMed Central PMCID: PMC6883579. + +- [minia](https://www.ncbi.nlm.nih.gov/pubmed/24040893/) + + > Chikhi R, Rizk G. Space-efficient and exact de Bruijn graph representation based on a Bloom filter. Algorithms Mol Biol. 2013 Sep 16;8(1):22. doi: 10.1186/1748-7188-8-22. PubMed PMID: 24040893; PubMed Central PMCID: PMC3848682. + +- [mosdepth](https://www.ncbi.nlm.nih.gov/pubmed/29096012) + + > Pedersen BS, Quinlan AR. Mosdepth: Quick Coverage Calculation for Genomes and Exomes. Bioinformatics. 2018 Mar 1;34(5):867-868. doi: 10.1093/bioinformatics/btx699. PMID: 29096012 PMCID: PMC6030888. + +- [MultiQC](https://www.ncbi.nlm.nih.gov/pubmed/27312411/) + + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + +- [NanoPlot](https://pubmed.ncbi.nlm.nih.gov/29547981/) + + > De Coster W, D'Hert S, Schultz DT, Cruts M, Van Broeckhoven C. NanoPack: visualizing and processing long-read sequencing data. Bioinformatics. 2018 Aug 1;34(15):2666-2669. doi: 10.1093/bioinformatics/bty149. PubMed PMID: 29547981; PubMed Central PMCID: PMC6061794. + +- [Nextstrain](https://pubmed.ncbi.nlm.nih.gov/29790939/) + + > Hadfield J, Megill C, Bell SM, Huddleston J, Potter B, Callender C, Sagulenko P, Bedford T, Neher RA. Nextstrain: real-time tracking of pathogen evolution. Bioinformatics. 2018 Dec 1;34(23):4121-4123. doi: 10.1093/bioinformatics/bty407. PubMed PMID: 29790939; PubMed Central PMCID: PMC6247931. + +- [pangolin](https://github.com/cov-lineages/pangolin) + + > Áine O'Toole, Emily Scher, Anthony Underwood, Ben Jackson, Verity Hill, JT McCrone, Chris Ruis, Khali Abu-Dahab, Ben Taylor, Corin Yeats, Louis du Plessis, David Aanensen, Eddie Holmes, Oliver Pybus, Andrew Rambaut. pangolin: lineage assignment in an emerging pandemic as an epidemiological tool. Publication in preparation. + +- [picard-tools](http://broadinstitute.github.io/picard) + +- [pycoQC](https://doi.org/10.21105/joss.01236) + + > Leger A, Leonardi T, (2019). pycoQC, interactive quality control for Oxford Nanopore Sequencing. Journal of Open Source Software, 4(34), 1236. + +- [QUAST](https://www.ncbi.nlm.nih.gov/pubmed/23422339/) + + > Gurevich A, Saveliev V, Vyahhi N, Tesler G. QUAST: quality assessment tool for genome assemblies. Bioinformatics. 2013 Apr 15;29(8):1072-5. doi: 10.1093/bioinformatics/btt086. Epub 2013 Feb 19. PubMed PMID: 23422339; PubMed Central PMCID: PMC3624806. + +- [R](https://www.R-project.org/) + + > R Core Team (2017). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. + +- [SAMtools](https://www.ncbi.nlm.nih.gov/pubmed/19505943/) + + > Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. doi: 10.1093/bioinformatics/btp352. Epub 2009 Jun 8. PubMed PMID: 19505943; PubMed Central PMCID: PMC2723002. + +- [SnpEff](https://www.ncbi.nlm.nih.gov/pubmed/22728672/) + + > Cingolani P, Platts A, Wang le L, Coon M, Nguyen T, Wang L, Land SJ, Lu X, Ruden DM. A program for annotating and predicting the effects of single nucleotide polymorphisms, SnpEff: SNPs in the genome of Drosophila melanogaster strain w1118; iso-2; iso-3. Fly (Austin). 2012 Apr-Jun;6(2):80-92. doi: 10.4161/fly.19695. PubMed PMID: 22728672; PubMed Central PMCID: PMC3679285. + +- [SnpSift](https://www.ncbi.nlm.nih.gov/pubmed/22435069/) + + > Cingolani P, Patel VM, Coon M, Nguyen T, Land SJ, Ruden DM, Lu X. Using Drosophila melanogaster as a Model for Genotoxic Chemical Mutational Studies with a New Program, SnpSift. Front Genet. 2012 Mar 15;3:35. doi: 10.3389/fgene.2012.00035. eCollection 2012. PubMed PMID: 22435069; PubMed Central PMCID: PMC3304048. + +- [SPAdes](https://www.ncbi.nlm.nih.gov/pubmed/24093227/) + + > Nurk S, Bankevich A, Antipov D, Gurevich AA, Korobeynikov A, Lapidus A, Prjibelski AD, Pyshkin A, Sirotkin A, Sirotkin Y, Stepanauskas R, Clingenpeel SR, Woyke T, McLean JS, Lasken R, Tesler G, Alekseyev MA, Pevzner PA. Assembling single-cell genomes and mini-metagenomes from chimeric MDA products. J Comput Biol. 2013 Oct;20(10):714-37. doi: 10.1089/cmb.2013.0084. PubMed PMID: 24093227; PubMed Central PMCID: PMC3791033. + +- [Unicycler](https://www.ncbi.nlm.nih.gov/pubmed/28594827/) + + > Wick RR, Judd LM, Gorrie CL, Holt KE. Unicycler: Resolving bacterial genome assemblies from short and long sequencing reads. PLoS Comput Biol. 2017 Jun 8;13(6):e1005595. doi: 10.1371/journal.pcbi.1005595. eCollection 2017 Jun. PubMed PMID: 28594827; PubMed Central PMCID: PMC5481147. -- [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) +- [Vcflib](https://www.biorxiv.org/content/early/2021/05/23/2021.05.21.445151) -> Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. + > Garrison E, Kronenberg ZN, Dawson ET, Pedersen BS, P Pjotr. Vcflib and tools for processing the VCF variant call format. bioRxiv 2021 May.doi: 10.1101/2021.05.21.445151. ## Software packaging/containerisation tools diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md old mode 100644 new mode 100755 diff --git a/README.md b/README.md index 80f33ba0..ef4b57b3 100644 --- a/README.md +++ b/README.md @@ -19,28 +19,80 @@ ## Introduction -**nf-core/viralrecon** is a bioinformatics pipeline that ... - - - - - - -1. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) -2. Present QC for raw reads ([`MultiQC`](http://multiqc.info/)) +**nf-core/viralrecon** is a bioinformatics analysis pipeline used to perform assembly and intra-host/low-frequency variant calling for viral samples. The pipeline supports both Illumina and Nanopore sequencing data. For Illumina short-reads the pipeline is able to analyse metagenomics data typically obtained from shotgun sequencing (e.g. directly from clinical samples) and enrichment-based library preparation methods (e.g. amplicon-based: [ARTIC SARS-CoV-2 enrichment protocol](https://artic.network/ncov-2019); or probe-capture-based). For Nanopore data the pipeline only supports amplicon-based analysis obtained from primer sets created and maintained by the [ARTIC Network](https://artic.network/). + +On release, automated continuous integration tests run the pipeline on a full-sized dataset on the AWS cloud infrastructure. This ensures that the pipeline runs on AWS, has sensible resource allocation defaults set to run on real-world datasets, and permits the persistent storage of results to benchmark between pipeline releases and other analysis sources. The results obtained from running the full-sized tests individually for each `--platform` option can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results) and the output directories will be named accordingly i.e. `platform_illumina/` and `platform_nanopore/`. + +The pipeline is built using [Nextflow](https://www.nextflow.io), a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. Where possible, these processes have been submitted to and installed from [nf-core/modules](https://github.com/nf-core/modules) in order to make them available to all nf-core pipelines, and to everyone within the Nextflow community! + +## Pipeline summary + +The pipeline has numerous options to allow you to run only specific aspects of the workflow if you so wish. For example, for Illumina data you can skip the host read filtering step with Kraken 2 with `--skip_kraken2` or you can skip all of the assembly steps with the `--skip_assembly` parameter. See the [usage](https://nf-co.re/viralrecon/usage) and [parameter](https://nf-co.re/viralrecon/parameters) docs for all of the available options when running the pipeline. + +The SRA download functionality has been removed from the pipeline (`>=2.1`) and ported to an independent workflow called [nf-core/fetchngs](https://nf-co.re/fetchngs). You can provide `--nf_core_pipeline viralrecon` when running nf-core/fetchngs to download and auto-create a samplesheet containing publicly available samples that can be accepted directly by the Illumina processing mode of nf-core/viralrecon. + +A number of improvements were made to the pipeline recently, mainly with regard to the variant calling. Please see [Major updates in v2.3](https://github.com/nf-core/viralrecon/issues/271) for a more detailed description. + +### Illumina + +![nf-core/viralrecon Illumina metro map](docs/images/nf-core-viralrecon_metro_map_illumina.png) + +1. Merge re-sequenced FastQ files ([`cat`](http://www.linfo.org/cat.html)) +2. Read QC ([`FastQC`](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)) +3. Adapter trimming ([`fastp`](https://github.com/OpenGene/fastp)) +4. Removal of host reads ([`Kraken 2`](http://ccb.jhu.edu/software/kraken2/); _optional_) +5. Variant calling + 1. Read alignment ([`Bowtie 2`](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml)) + 2. Sort and index alignments ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 3. Primer sequence removal ([`iVar`](https://github.com/andersen-lab/ivar); _amplicon data only_) + 4. Duplicate read marking ([`picard`](https://broadinstitute.github.io/picard/); _optional_) + 5. Alignment-level QC ([`picard`](https://broadinstitute.github.io/picard/), [`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) + 6. Genome-wide and amplicon coverage QC plots ([`mosdepth`](https://github.com/brentp/mosdepth/)) + 7. Choice of multiple variant callers ([`iVar variants`](https://github.com/andersen-lab/ivar); _default for amplicon data_ _||_ [`BCFTools`](http://samtools.github.io/bcftools/bcftools.html); _default for metagenomics data_) + - Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) + - Individual variant screenshots with annotation tracks ([`ASCIIGenome`](https://asciigenome.readthedocs.io/en/latest/)) + 8. Choice of multiple consensus callers ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html), [`BEDTools`](https://github.com/arq5x/bedtools2/); _default for both amplicon and metagenomics data_ _||_ [`iVar consensus`](https://github.com/andersen-lab/ivar)) + - Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) + - Lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) + - Clade assignment, mutation calling and sequence quality checks ([`Nextclade`](https://github.com/nextstrain/nextclade)) + 9. Relative lineage abundance analysis from mixed SARS-CoV-2 samples ([`Freyja`](https://github.com/andersen-lab/Freyja)) + 10. Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) +6. _De novo_ assembly + 1. Primer trimming ([`Cutadapt`](https://cutadapt.readthedocs.io/en/stable/guide.html); _amplicon data only_) + 2. Choice of multiple assembly tools ([`SPAdes`](http://cab.spbu.ru/software/spades/) _||_ [`Unicycler`](https://github.com/rrwick/Unicycler) _||_ [`minia`](https://github.com/GATB/minia)) + - Blast to reference genome ([`blastn`](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch)) + - Contiguate assembly ([`ABACAS`](https://www.sanger.ac.uk/science/tools/pagit)) + - Assembly report ([`PlasmidID`](https://github.com/BU-ISCIII/plasmidID)) + - Assembly assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) +7. Present QC and visualisation for raw read, alignment, assembly and variant calling results ([`MultiQC`](http://multiqc.info/)) + +### Nanopore + +![nf-core/viralrecon Nanopore metro map](docs/images/nf-core-viralrecon_metro_map_nanopore.png) + +1. Sequencing QC ([`pycoQC`](https://github.com/a-slide/pycoQC)) +2. Aggregate pre-demultiplexed reads from MinKNOW/Guppy ([`artic guppyplex`](https://artic.readthedocs.io/en/latest/commands/)) +3. Read QC ([`NanoPlot`](https://github.com/wdecoster/NanoPlot)) +4. Align reads, call variants and generate consensus sequence ([`artic minion`](https://artic.readthedocs.io/en/latest/commands/)) +5. Remove unmapped reads and obtain alignment metrics ([`SAMtools`](https://sourceforge.net/projects/samtools/files/samtools/)) +6. Genome-wide and amplicon coverage QC plots ([`mosdepth`](https://github.com/brentp/mosdepth/)) +7. Downstream variant analysis: + - Count metrics ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)) + - Variant annotation ([`SnpEff`](http://snpeff.sourceforge.net/SnpEff.html), [`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) + - Consensus assessment report ([`QUAST`](http://quast.sourceforge.net/quast)) + - Lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) + - Clade assignment, mutation calling and sequence quality checks ([`Nextclade`](https://github.com/nextstrain/nextclade)) + - Individual variant screenshots with annotation tracks ([`ASCIIGenome`](https://asciigenome.readthedocs.io/en/latest/)) + - Recover relative lineage abundances from mixed SARS-CoV-2 samples ([`Freyja`](https://github.com/andersen-lab/Freyja)) + - Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)) +8. Present QC, visualisation and custom reporting for sequencing, raw reads, alignment and variant calling results ([`MultiQC`](http://multiqc.info/)) ## Usage > [!NOTE] -> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data. - - +### Typical commands -Now, you can run the pipeline using: +#### Illumina shotgun analysis + +```bash +nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --outdir \ + --platform illumina \ + --protocol metagenomic \ + --genome 'MN908947.3' \ + -profile -profile +``` - +#### Illumina amplicon analysis ```bash nextflow run nf-core/viralrecon \ - -profile \ --input samplesheet.csv \ - --outdir + --outdir \ + --platform illumina \ + --protocol amplicon \ + --genome 'MN908947.3' \ + --primer_set artic \ + --primer_set_version 3 \ + --skip_assembly \ + -profile -profile +``` + +#### Nanopore amplicon analysis: + +```bash +nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --outdir \ + --platform nanopore \ + --genome 'MN908947.3' \ + --primer_set 'artic' \ + --primer_set_version 3 \ + --fastq_dir fastq_pass/ \ + --fast5_dir fast5_pass/ \ + --sequencing_summary sequencing_summary.txt \ + -profile -profile ``` > [!WARNING] @@ -71,6 +155,27 @@ nextflow run nf-core/viralrecon \ For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/viralrecon/usage) and the [parameter documentation](https://nf-co.re/viralrecon/parameters). +### Automatic samplesheet generation + +An executable Python script called [`fastq_dir_to_samplesheet.py`](https://github.com/nf-core/viralrecon/blob/master/bin/fastq_dir_to_samplesheet.py) has been provided if you are using `--platform illumina` and would like to auto-create an input samplesheet based on a directory containing FastQ files **before** you run the pipeline (requires Python 3 installed locally) e.g. + +```console +wget -L https://raw.githubusercontent.com/nf-core/viralrecon/master/bin/fastq_dir_to_samplesheet.py +./fastq_dir_to_samplesheet.py samplesheet.csv +``` + +### Reference genomes + +You can find the default keys used to specify `--genome` in the [genomes config file](https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config). This provides default options for + +- Reference genomes (including SARS-CoV-2) +- Genome associates primer sets +- [Nextclade datasets](https://docs.nextstrain.org/projects/nextclade/en/latest/user/datasets.html) + +The Pangolin and Nextclade lineage and clade definitions change regularly as new SARS-CoV-2 lineages are discovered. For instructions to use more recent versions of lineage analysis tools like Pangolin and Nextclade please refer to the [updating containers](https://nf-co.re/viralrecon/usage#updating-containers) section in the usage docs. + +Where possible we are trying to collate links and settings for standard primer sets to make it easier to run the pipeline with standard keys; see [usage docs](https://nf-co.re/viralrecon/usage#illumina-primer-sets). + ## Pipeline output To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/viralrecon/results) tab on the nf-core website pipeline page. @@ -79,11 +184,41 @@ For more details about the output files and reports, please refer to the ## Credits -nf-core/viralrecon was originally written by Patel H, Varona S and Monzon S. - -We thank the following people for their extensive assistance in the development of this pipeline: - - +These scripts were originally written by [Sarai Varona](https://github.com/svarona), [Miguel Juliá](https://github.com/MiguelJulia), [Erika Kvalem](https://github.com/ErikaKvalem) and [Sara Monzon](https://github.com/saramonzon) from [BU-ISCIII](https://github.com/BU-ISCIII) and co-ordinated by Isabel Cuesta for the [Institute of Health Carlos III](https://eng.isciii.es/eng.isciii.es/Paginas/Inicio.html), Spain. Through collaboration with the nf-core community the pipeline has now been updated substantially to include additional processing steps, to standardise inputs/outputs and to improve pipeline reporting; implemented and maintained primarily by Harshil Patel ([@drpatelh](https://github.com/drpatelh)) from [Seqera Labs, Spain](https://seqera.io/). + +The key steps in the Nanopore implementation of the pipeline are carried out using the [ARTIC Network's field bioinformatics pipeline](https://github.com/artic-network/fieldbioinformatics) and were inspired by the amazing work carried out by contributors to the [connor-lab/ncov2019-artic-nf pipeline](https://github.com/connor-lab/ncov2019-artic-nf) originally written by [Matt Bull](https://github.com/m-bull) for use by the [COG-UK](https://github.com/COG-UK) project. Thank you for all of your incredible efforts during this pandemic! + +Many thanks to others who have helped out and contributed along the way too, including (but not limited to)\*: + +| Name | Affiliation | +| --------------------------------------------------------- | ------------------------------------------------------------------------------------- | +| [Aengus Stewart](https://github.com/stewarta) | [The Francis Crick Institute, UK](https://www.crick.ac.uk/) | +| [Alexander Peltzer](https://github.com/apeltzer) | [Boehringer Ingelheim, Germany](https://www.boehringer-ingelheim.de/) | +| [Alison Meynert](https://github.com/ameynert) | [University of Edinburgh, Scotland](https://www.ed.ac.uk/) | +| [Anthony Underwood](https://github.com/antunderwood) | [Centre for Genomic Pathogen Surveillance](https://www.pathogensurveillance.net) | +| [Anton Korobeynikov](https://github.com/asl) | [Saint Petersburg State University, Russia](https://english.spbu.ru/) | +| [Artem Babaian](https://github.com/ababaian) | [University of British Columbia, Canada](https://www.ubc.ca/) | +| [Dmitry Meleshko](https://github.com/1dayac) | [Saint Petersburg State University, Russia](https://english.spbu.ru/) | +| [Edgar Garriga Nogales](https://github.com/edgano) | [Centre for Genomic Regulation, Spain](https://www.crg.eu/) | +| [Erik Garrison](https://github.com/ekg) | [UCSC, USA](https://www.ucsc.edu/) | +| [Gisela Gabernet](https://github.com/ggabernet) | [QBiC, University of Tübingen, Germany](https://portal.qbic.uni-tuebingen.de/portal/) | +| [Joao Curado](https://github.com/jcurado-flomics) | [Flomics Biotech, Spain](https://www.flomics.com/) | +| [Jerome Nicod](https://github.com/Jeromics) | [The Francis Crick Institute, UK](https://www.crick.ac.uk) | +| [Jose Espinosa-Carrasco](https://github.com/JoseEspinosa) | [Centre for Genomic Regulation, Spain](https://www.crg.eu/) | +| [Katrin Sameith](https://github.com/ktrns) | [DRESDEN-concept Genome Center, Germany](https://genomecenter.tu-dresden.de) | +| [Kevin Menden](https://github.com/KevinMenden) | [QBiC, University of Tübingen, Germany](https://portal.qbic.uni-tuebingen.de/portal/) | +| [Lluc Cabus](https://github.com/lcabus-flomics) | [Flomics Biotech, Spain](https://www.flomics.com/) | +| [Marta Pozuelo](https://github.com/mpozuelo-flomics) | [Flomics Biotech, Spain](https://www.flomics.com/) | +| [Maxime Garcia](https://github.com/maxulysse) | [Seqera Labs, Spain](https://seqera.io/) | +| [Michael Heuer](https://github.com/heuermh) | [UC Berkeley, USA](https://https://rise.cs.berkeley.edu) | +| [Phil Ewels](https://github.com/ewels) | [SciLifeLab, Sweden](https://www.scilifelab.se/) | +| [Richard Mitter](https://github.com/rjmitter) | [The Francis Crick Institute, UK](https://www.crick.ac.uk/) | +| [Robert Goldstone](https://github.com/rjgoldstone) | [The Francis Crick Institute, UK](https://www.crick.ac.uk/) | +| [Simon Heumos](https://github.com/subwaystation) | [QBiC, University of Tübingen, Germany](https://portal.qbic.uni-tuebingen.de/portal/) | +| [Stephen Kelly](https://github.com/stevekm) | [Memorial Sloan Kettering Cancer Center, USA](https://www.mskcc.org/) | +| [Thanh Le Viet](https://github.com/thanhleviet) | [Quadram Institute, UK](https://quadram.ac.uk/) | + +> \* Listed in alphabetical order ## Contributions and Support @@ -93,10 +228,7 @@ For further information or help, don't hesitate to get in touch on the [Slack `# ## Citations - - - - +If you use nf-core/viralrecon for your analysis, please cite it using the following doi: [10.5281/zenodo.3901628](https://doi.org/10.5281/zenodo.3901628) An extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file. diff --git a/assets/email_template.html b/assets/email_template.html index 455d0263..fbf28653 100644 --- a/assets/email_template.html +++ b/assets/email_template.html @@ -12,16 +12,98 @@ -

nf-core/viralrecon ${version}

-

Run Name: $runName

+

nf-core/viralrecon v${version}

+

Run Name: $runName

-<% if (!success){ - out << """ -
-

nf-core/viralrecon execution completed unsuccessfully!

+ <% if (!success) { out << """ +
+

nf-core/viralrecon execution completed unsuccessfully!

The exit status of the task that caused the workflow execution to fail was: $exitStatus.

The full error message was:

-
${errorReport}
+
${errorReport}
+
+ """ } else if (fail_mapped_reads.size() > 0) { out << """ +
+

nf-core/viralrecon execution completed with warnings!

+

+ The pipeline finished successfully, but the following samples were skipped due to failing the minimum mapped + read threshold (< ${min_mapped_reads}): +

+
    +
  • ${fail_mapped_reads.sort().join('
  • ')}
  • +
+

+
+ + """ } else { out << """ +
+ nf-core/viralrecon execution completed successfully! +
+ """ } %> + +

The workflow was completed at $dateComplete (duration: $duration)

+

The command used to launch the workflow was as follows:

+
+$commandLine
+ +

Pipeline Configuration:

+ + + <% out << summary.collect{ k,v -> "" }.join("\n") %> + +
$k
$v
+ +

nf-core/viralrecon

+

https://github.com/nf-core/viralrecon

""" } else { diff --git a/assets/email_template.txt b/assets/email_template.txt index bc26cee5..2ae75f0c 100644 --- a/assets/email_template.txt +++ b/assets/email_template.txt @@ -8,19 +8,31 @@ ---------------------------------------------------- Run Name: $runName -<% if (success){ - out << "## nf-core/viralrecon execution completed successfully! ##" -} else { +<% if (!success){ out << """#################################################### -## nf-core/viralrecon execution completed unsuccessfully! ## -#################################################### +## nf-core/viralrecon completed unsuccessfully! ## +####################################################\n The exit status of the task that caused the workflow execution to fail was: $exitStatus. The full error message was: ${errorReport} """ -} %> +} else if (fail_mapped_reads.size() > 0) { + out << """#################################################### +## nf-core/viralrecon completed with warnings! ## +####################################################\n +The pipeline finished successfully, but the following samples were skipped +due to failing the minimum mapped read threshold (less than ${min_mapped_reads}): + - ${fail_mapped_reads.sort().join("\n - ")} +""" +} else { + out << """#################################################### +## nf-core/viralrecon completed successfully! ## +####################################################\n +""" +} +%> The workflow was completed at $dateComplete (duration: $duration) @@ -29,11 +41,11 @@ The command used to launch the workflow was as follows: $commandLine - Pipeline Configuration: ----------------------- <% out << summary.collect{ k,v -> " - $k: $v" }.join("\n") %> + -- nf-core/viralrecon https://github.com/nf-core/viralrecon diff --git a/assets/headers/blast_filtered_outfmt6_header.txt b/assets/headers/blast_filtered_outfmt6_header.txt new file mode 100644 index 00000000..e9584e74 --- /dev/null +++ b/assets/headers/blast_filtered_outfmt6_header.txt @@ -0,0 +1 @@ +stitle staxids qaccver saccver pident length mismatch gapopen qstart qend sstart send evalue bitscore slen qlen qcovs %cgAligned %refCovered diff --git a/assets/headers/blast_outfmt6_header.txt b/assets/headers/blast_outfmt6_header.txt new file mode 100644 index 00000000..57a9bc2d --- /dev/null +++ b/assets/headers/blast_outfmt6_header.txt @@ -0,0 +1 @@ +stitle staxids qaccver saccver pident length mismatch gapopen qstart qend sstart send evalue bitscore slen qlen qcovs diff --git a/assets/headers/ivar_variants_header_mqc.txt b/assets/headers/ivar_variants_header_mqc.txt new file mode 100644 index 00000000..34e5ddf5 --- /dev/null +++ b/assets/headers/ivar_variants_header_mqc.txt @@ -0,0 +1,8 @@ +#id: 'ivar_variants' +#section_name: 'VARIANTS: Total variants (iVar)' +#description: "Is calculated from the total number of variants called by +# iVar (Defaults: 0.25 allele frequency, minimum quality score = 20 and minimum position depth = 10)." +#plot_type: 'bargraph' +#anchor: 'ivar_variants' +#pconfig: +# title: 'iVar variant counts' diff --git a/assets/multiqc_config_illumina.yml b/assets/multiqc_config_illumina.yml new file mode 100644 index 00000000..5d0c1f08 --- /dev/null +++ b/assets/multiqc_config_illumina.yml @@ -0,0 +1,314 @@ +report_comment: > + This report has been generated by the nf-core/viralrecon + analysis pipeline. For information about how to interpret these results, please see the + documentation. + +data_format: "yaml" + +max_table_rows: 10000 + +run_modules: + - custom_content + - fastqc + - fastp + - kraken + - bowtie2 + - samtools + - mosdepth + - bcftools + - snpeff + - quast + - pangolin + - cutadapt + - freyja + +module_order: + - fastqc: + name: "PREPROCESS: FastQC (raw reads)" + info: "This section of the report shows FastQC results for the raw reads before adapter trimming." + path_filters: + - "./fastqc/*.zip" + - fastp: + name: "PREPROCESS: fastp (adapter trimming)" + info: "This section of the report shows fastp results for reads after adapter and quality trimming." + - kraken: + name: "PREPROCESS: Kraken 2" + info: "This section of the report shows Kraken 2 classification results for reads after adapter trimming with fastp." + - bowtie2: + name: "VARIANTS: Bowtie 2" + info: "This section of the report shows Bowtie 2 mapping results for reads after adapter trimming and quality trimming." + - samtools: + name: "VARIANTS: SAMTools (raw)" + anchor: "samtools_bowtie2" + info: "This section of the report shows SAMTools counts/statistics after mapping with Bowtie 2." + path_filters: + - "./bowtie2/*" + - samtools: + name: "VARIANTS: SAMTools (iVar)" + anchor: "samtools_ivar" + info: "This section of the report shows SAMTools counts/statistics after primer sequence removal with iVar." + path_filters: + - "./ivar_trim/*" + - samtools: + name: "VARIANTS: SAMTools (MarkDuplicates)" + anchor: "samtools_markduplicates" + info: "This section of the report shows SAMTools counts/statistics after duplicate removal with picard MarkDuplicates." + path_filters: + - "./picard_markduplicates/*" + - mosdepth: + name: "VARIANTS: mosdepth" + info: "This section of the report shows genome-wide coverage metrics generated by mosdepth." + - pangolin: + name: "VARIANTS: Pangolin" + info: "This section of the report shows Pangolin lineage analysis results for the called variants." + path_filters: + - "./variants/*.pangolin.csv" + - freyja: + name: "VARIANTS: Freyja" + info: "This section of the report shows relative lineage abundances from mixed SARS-CoV-2 samples from Freyja demix." + path_filters: + - "./freyja_demix/*.tsv" + - bcftools: + name: "VARIANTS: BCFTools" + info: "This section of the report shows BCFTools stats results for the called variants." + path_filters: + - "./variants/*.txt" + - snpeff: + name: "VARIANTS: SnpEff" + info: "This section of the report shows SnpEff results for the called variants passing filters (Defaults: 0.25 allele frequency, minimum quality score = 20 and minimum position depth = 10). Some variants may have more than one annotation respect to genomic region, impact or effect, leading to differences in the number of variants respect to the vcf file." + path_filters: + - "./variants/*.csv" + - quast: + name: "VARIANTS: QUAST" + anchor: "quast_variants" + info: "This section of the report shows QUAST QC results for the consensus sequence." + path_filters: + - "./variants/*.tsv" + - cutadapt: + name: "ASSEMBLY: Cutadapt (primer trimming)" + info: "This section of the report shows Cutadapt results for reads after primer sequence trimming." + - quast: + name: "ASSEMBLY: QUAST (SPAdes)" + anchor: "quast_spades" + info: "This section of the report shows QUAST results from SPAdes de novo assembly." + path_filters: + - "./assembly_spades/*.tsv" + - quast: + name: "ASSEMBLY: QUAST (Unicycler)" + anchor: "quast_unicycler" + info: "This section of the report shows QUAST results from Unicycler de novo assembly." + path_filters: + - "./assembly_unicycler/*.tsv" + - quast: + name: "ASSEMBLY: QUAST (minia)" + anchor: "quast_minia" + info: "This section of the report shows QUAST results from minia de novo assembly." + path_filters: + - "./assembly_minia/*.tsv" + +report_section_order: + fail_mapped_reads: + after: summary_variants_metrics + fail_mapped_samples: + after: summary_variants_metrics + summary_assembly_metrics: + before: summary_variants_metrics + amplicon_heatmap: + before: summary_assembly_metrics + ivar_variants: + before: mosdepth + software_versions: + order: -1001 + nf-core-viralrecon-summary: + order: -1002 + +bcftools: + collapse_complementary_changes: true + +# See https://github.com/ewels/MultiQC_TestData/blob/master/data/custom_content/with_config/table_headerconfig/multiqc_config.yaml +custom_data: + amplicon_heatmap: + section_name: "Amplicon coverage heatmap" + description: "Heatmap to show median log10(coverage+1) per amplicon across samples." + plot_type: "heatmap" + pconfig: + id: "amplicon_heatmap" + xTitle: "Amplicon" + namespace: "Heatmap to show median log10(coverage+1) per amplicon across samples" + square: False + colstops: + [ + [0, "#440154"], + [0.05, "#471365"], + [0.1, "#482475"], + [0.15, "#463480"], + [0.2, "#414487"], + [0.25, "#3b528b"], + [0.3, "#355f8d"], + [0.35, "#2f6c8e"], + [0.4, "#2a788e"], + [0.45, "#25848e"], + [0.5, "#21918c"], + [0.55, "#1e9c89"], + [0.6, "#22a884"], + [0.65, "#2fb47c"], + [0.7, "#44bf70"], + [0.75, "#5ec962"], + [0.8, "#7ad151"], + [0.85, "#9bd93c"], + [0.9, "#bddf26"], + [0.95, "#dfe318"], + [1, "#fde725"], + ] + summary_variants_metrics: + section_name: "Variant calling metrics" + description: "generated by the nf-core/viralrecon pipeline" + plot_type: "table" + headers: + "# Input reads": + description: "Total number of reads in raw fastq file" + format: "{:,.0f}" + "% Non-host reads (Kraken 2)": + description: "Total number of non-host reads identified by Kraken2" + format: "{:,.2f}" + "# Trimmed reads (fastp)": + description: "Total number of reads remaining after adapter/quality trimming with fastp" + format: "{:,.0f}" + "# Mapped reads": + description: "Total number of Bowtie2 mapped reads relative to the viral genome" + format: "{:,.0f}" + "% Mapped reads": + description: "Percentage of Bowtie2 mapped reads relative to the viral genome" + format: "{:,.2f}" + "# Trimmed reads (iVar)": + description: "Total number of reads remaining after primer trimming with iVar" + format: "{:,.0f}" + "Coverage median": + description: "Median coverage calculated by mosdepth" + format: "{:,.2f}" + "% Coverage > 1x": + description: "Coverage > 1x calculated by mosdepth" + format: "{:,.2f}" + "% Coverage > 10x": + description: "Coverage > 10x calculated by mosdepth" + format: "{:,.2f}" + "# SNPs": + description: "Total number of SNPs" + format: "{:,.0f}" + "# INDELs": + description: "Total number of INDELs" + format: "{:,.0f}" + "# Missense variants": + description: "Total number of variants identified as missense mutations with SnpEff" + format: "{:,.0f}" + "# Ns per 100kb consensus": + description: "Number of N bases per 100kb in consensus sequence" + format: "{:,.2f}" + "Pangolin lineage": + description: "Pangolin lineage inferred from the consensus sequence" + "Nextclade clade": + description: "Nextclade clade inferred from the consensus sequence" + pconfig: + id: "summary_variants_metrics_plot" + table_title: "Variant calling metrics" + namespace: "Variant calling metrics" + only_defined_headers: False + format: "{:.0f}" + summary_assembly_metrics: + section_name: "De novo assembly metrics" + description: "generated by the nf-core/viralrecon pipeline" + plot_type: "table" + headers: + "# Input reads": + description: "Total number of reads in raw fastq file" + format: "{:,.0f}" + "# Trimmed reads (Cutadapt)": + description: "Total number of reads remaining after adapter/quality trimming with fastp" + format: "{:,.0f}" + "% Non-host reads (Kraken 2)": + description: "Total number of non-host reads identified by Kraken2" + format: "{:,.2f}" + "# Contigs (SPAdes)": + description: "Total number of contigs in SPAdes assembly as calculated by QUAST" + format: "{:,.0f}" + "Largest contig (SPAdes)": + description: "Size of largest contig in SPAdes assembly as calculated by QUAST" + format: "{:,.0f}" + "% Genome fraction (SPAdes)": + description: "% genome fraction for SPAdes assembly as calculated by QUAST" + format: "{:,.2f}" + "N50 (SPAdes)": + description: "N50 metric for SPAdes assembly as calculated by QUAST" + format: "{:,.2f}" + "# Contigs (Unicycler)": + description: "Total number of contigs in Unicycler assembly as calculated by QUAST" + format: "{:,.0f}" + "Largest contig (Unicycler)": + description: "Size of largest contig in Unicycler assembly as calculated by QUAST" + format: "{:,.0f}" + "% Genome fraction (Unicycler)": + description: "% genome fraction for Unicycler assembly as calculated by QUAST" + format: "{:,.2f}" + "N50 (Unicycler)": + description: "N50 metric for Unicycler assembly as calculated by QUAST" + format: "{:,.2f}" + "# Contigs (minia)": + description: "Total number of contigs in minia assembly as calculated by QUAST" + format: "{:,.0f}" + "Largest contig (minia)": + description: "Size of largest contig in minia assembly as calculated by QUAST" + format: "{:,.0f}" + "% Genome fraction (minia)": + description: "% genome fraction for minia assembly as calculated by QUAST" + format: "{:,.2f}" + "N50 (minia)": + description: "N50 metric for minia assembly as calculated by QUAST" + format: "{:,.2f}" + pconfig: + id: "summary_assembly_metrics_plot" + table_title: "De novo assembly metrics" + namespace: "De novo assembly metrics" + only_defined_headers: False + format: "{:.0f}" + fail_mapped_reads: + section_name: "WARNING: Fail Reads Check" + description: "List of samples that had no reads after adapter trimming, and hence were ignored for the downstream processing steps." + plot_type: "table" + pconfig: + id: "fail_mapped_reads_table" + table_title: "Samples failed read threshold" + namespace: "Samples failed read threshold" + format: "{:,.0f}" + fail_mapped_samples: + section_name: "WARNING: Fail Alignment Check" + description: "List of samples that failed the Bowtie2 minimum mapped reads threshold specified via the '--min_mapped_reads' parameter, and hence were ignored for the downstream processing steps." + plot_type: "table" + pconfig: + id: "fail_mapped_samples_table" + table_title: "Samples failed mapped read threshold" + namespace: "Samples failed mapping read threshold" + format: "{:,.0f}" + +extra_fn_clean_exts: + - ".markduplicates" + - ".unclassified" + - "_MN908947.3" + - " MN908947.3" + +extra_fn_clean_trim: + - "Consensus_" + +# # Customise the module search patterns to speed up execution time +# # - Skip module sub-tools that we are not interested in +# # - Replace file-content searching with filename pattern searching +# # - Don't add anything that is the same as the MultiQC default +# # See https://multiqc.info/docs/#optimise-file-search-patterns for details +sp: + fastp: + fn: "*.fastp.json" + bowtie2: + fn: "*.bowtie2.log" + mosdepth/global_dist: + fn: "*.global.dist.txt" + cutadapt: + fn: "*.cutadapt.log" diff --git a/assets/multiqc_config_nanopore.yml b/assets/multiqc_config_nanopore.yml new file mode 100644 index 00000000..ad155d94 --- /dev/null +++ b/assets/multiqc_config_nanopore.yml @@ -0,0 +1,183 @@ +report_comment: > + This report has been generated by the nf-core/viralrecon + analysis pipeline. For information about how to interpret these results, please see the + documentation. + +data_format: "yaml" + +max_table_rows: 10000 + +run_modules: + - custom_content + - pycoqc + - samtools + - bcftools + - mosdepth + - snpeff + - quast + - pangolin + - freyja + +module_order: + - pangolin: + path_filters: + - "*.pangolin.csv" + - pycoqc + - samtools: + path_filters: + - "./samtools_stats/*" + - mosdepth + - freyja: + path_filters: + - "./freyja_demix/*.tsv" + - bcftools: + path_filters: + - "./bcftools_stats/*.txt" + - snpeff: + path_filters: + - "./snpeff/*.csv" + - quast: + path_filters: + - "./quast/*.tsv" + +report_section_order: + fail_barcodes_no_sample: + after: summary_variants_metrics + fail_no_barcode_samples: + after: summary_variants_metrics + fail_barcode_count_samples: + after: summary_variants_metrics + fail_guppyplex_count_samples: + after: summary_variants_metrics + amplicon_heatmap: + before: summary_variants_metrics + software_versions: + order: -1001 + nf-core-viralrecon-summary: + order: -1002 + +bcftools: + collapse_complementary_changes: true + +# See https://github.com/ewels/MultiQC_TestData/blob/master/data/custom_content/with_config/table_headerconfig/multiqc_config.yaml +custom_data: + fail_barcodes_no_sample: + section_name: "WARNING: Barcodes without sample id" + description: "List of barcodes that appear to have reads in the '--fastq_dir' folder but were not specified in mappings samplesheet via '--input'." + plot_type: "table" + pconfig: + id: "fail_barcodes_no_sample_table" + table_title: "Barcodes without sample id" + namespace: "Barcodes without sample id" + format: "{:,.0f}" + fail_no_barcode_samples: + section_name: "WARNING: No barcode" + description: "List of samples that were specified in mappings samplesheet via '--input' but didn't have an associated barcode in the '--fastq_dir' folder." + plot_type: "table" + pconfig: + id: "fail_no_barcode_samples_table" + table_title: "Sample ids without barcode" + namespace: "Sample ids without barcode" + fail_barcode_count_samples: + section_name: "WARNING: Fail barcode read count" + description: "Samples that failed the minimum number of reads required per barcode specified via the '--min_barcode_reads' parameter, and hence were ignored for the downstream processing steps." + plot_type: "bargraph" + pconfig: + id: "fail_barcode_count_samples_table" + table_title: "Samples failed barcode read count threshold" + namespace: "Samples failed barcode read count threshold" + format: "{:,.0f}" + fail_guppyplex_count_samples: + section_name: "WARNING: Fail guppyplex read count" + description: "Samples that failed the minimum number of reads required per sample specified via the '--min_guppyplex_reads' parameter, and hence were ignored for the downstream processing steps." + plot_type: "bargraph" + pconfig: + id: "fail_guppyplex_count_samples_table" + table_title: "Samples failed artic guppyplex read count threshold" + namespace: "Samples failed artic guppyplex read count threshold" + format: "{:,.0f}" + amplicon_heatmap: + section_name: "Amplicon coverage heatmap" + description: "Heatmap to show median log10(coverage+1) per amplicon across samples." + plot_type: "heatmap" + pconfig: + id: "amplicon_heatmap" + xTitle: "Amplicon" + namespace: "Heatmap to show median log10(coverage+1) per amplicon across samples" + square: False + colstops: + [ + [0, "#440154"], + [0.05, "#471365"], + [0.1, "#482475"], + [0.15, "#463480"], + [0.2, "#414487"], + [0.25, "#3b528b"], + [0.3, "#355f8d"], + [0.35, "#2f6c8e"], + [0.4, "#2a788e"], + [0.45, "#25848e"], + [0.5, "#21918c"], + [0.55, "#1e9c89"], + [0.6, "#22a884"], + [0.65, "#2fb47c"], + [0.7, "#44bf70"], + [0.75, "#5ec962"], + [0.8, "#7ad151"], + [0.85, "#9bd93c"], + [0.9, "#bddf26"], + [0.95, "#dfe318"], + [1, "#fde725"], + ] + summary_variants_metrics: + section_name: "Variant calling metrics" + description: "generated by the nf-core/viralrecon pipeline" + plot_type: "table" + headers: + "# Mapped reads": + description: "Total number of mapped reads relative to the viral genome" + format: "{:,.0f}" + "Coverage median": + description: "Median coverage calculated by mosdepth" + format: "{:,.2f}" + "% Coverage > 1x": + description: "Coverage > 1x calculated by mosdepth" + format: "{:,.2f}" + "% Coverage > 10x": + description: "Coverage > 10x calculated by mosdepth" + format: "{:,.2f}" + "# SNPs": + description: "Total number of SNPs called by artic minion that pass quality filters" + format: "{:,.0f}" + "# INDELs": + description: "Total number of INDELs called by artic minion that pass quality filters" + format: "{:,.0f}" + "# Missense variants": + description: "Total number of missense mutations identified by variant annotation with SnpEff" + format: "{:,.0f}" + "# Ns per 100kb consensus": + description: "Number of N bases per 100kb in consensus sequence generated by artic minion" + format: "{:,.2f}" + "Pangolin lineage": + description: "Pangolin lineage inferred from the consensus sequence generated by artic minion" + "Nextclade clade": + description: "Nextclade clade inferred from the consensus sequence generated by artic minion" + pconfig: + id: "summary_variants_metrics_plot_table" + table_title: "Variant calling metrics" + namespace: "Variant calling metrics" + only_defined_headers: False + format: "{:,.0f}" + +extra_fn_clean_exts: + - ".pass" + - "_ARTIC" + +# # Customise the module search patterns to speed up execution time +# # - Skip module sub-tools that we are not interested in +# # - Replace file-content searching with filename pattern searching +# # - Don't add anything that is the same as the MultiQC default +# # See https://multiqc.info/docs/#optimise-file-search-patterns for details +sp: + mosdepth/global_dist: + fn: "*.global.dist.txt" diff --git a/assets/samplesheet.csv b/assets/samplesheet.csv deleted file mode 100644 index 5f653ab7..00000000 --- a/assets/samplesheet.csv +++ /dev/null @@ -1,3 +0,0 @@ -sample,fastq_1,fastq_2 -SAMPLE_PAIRED_END,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz -SAMPLE_SINGLE_END,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, diff --git a/assets/samplesheet_illumina.csv b/assets/samplesheet_illumina.csv new file mode 100755 index 00000000..dc09508c --- /dev/null +++ b/assets/samplesheet_illumina.csv @@ -0,0 +1,3 @@ +sample,fastq_1,fastq_2 +SAMPLE_PE,/path/to/fastq/files/AEG588A1_S1_L002_R1_001.fastq.gz,/path/to/fastq/files/AEG588A1_S1_L002_R2_001.fastq.gz +SAMPLE_SE,/path/to/fastq/files/AEG588A4_S4_L003_R1_001.fastq.gz, diff --git a/assets/samplesheet_nanopore.csv b/assets/samplesheet_nanopore.csv new file mode 100755 index 00000000..28f5d7aa --- /dev/null +++ b/assets/samplesheet_nanopore.csv @@ -0,0 +1,6 @@ +sample,barcode +21X983255,1 +70H209408,2 +49Y807476,3 +70N209581,4 +CONTROL,5 diff --git a/assets/schema_input.json b/assets/schema_input.json index 445430dd..55c9333c 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -26,8 +26,13 @@ "exists": true, "pattern": "^\\S+\\.f(ast)?q\\.gz$", "errorMessage": "FastQ file for reads 2 cannot contain spaces and must have extension '.fq.gz' or '.fastq.gz'" + }, + "barcode": { + "type": "integer", + "pattern": "^\\d+$", + "errorMessage": "Barcode must be provided and must be an integer" } }, - "required": ["sample", "fastq_1"] + "required": ["sample"] } } diff --git a/bin/collapse_primer_bed.py b/bin/collapse_primer_bed.py new file mode 100755 index 00000000..d04d7744 --- /dev/null +++ b/bin/collapse_primer_bed.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +import os +import sys +import re +import errno +import argparse + + +def parse_args(args=None): + Description = "Collapse LEFT/RIGHT primers in primer BED to single intervals." + Epilog = """Example usage: python collapse_primer_bed.py """ + + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("FILE_IN", help="Input BED file.") + parser.add_argument("FILE_OUT", help="Output BED file.") + parser.add_argument( + "-lp", + "--left_primer_suffix", + type=str, + dest="LEFT_PRIMER_SUFFIX", + default="_LEFT", + help="Suffix for left primer in name column of BED file (default: '_LEFT').", + ) + parser.add_argument( + "-rp", + "--right_primer_suffix", + type=str, + dest="RIGHT_PRIMER_SUFFIX", + default="_RIGHT", + help="Suffix for right primer in name column of BED file (default: '_RIGHT').", + ) + return parser.parse_args(args) + + +def make_dir(path): + if not len(path) == 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + + +## See https://stackoverflow.com/a/480227 +def uniqify(seq): + seen = set() + seen_add = seen.add + return [x for x in seq if not (x in seen or seen_add(x))] + + +def collapse_primer_bed(file_in, file_out, left_primer_suffix, right_primer_suffix): + start_pos_list = [] + interval_dict = {} + fin = open(file_in, "r") + while True: + line = fin.readline() + if line: + chrom, start, end, name, score, strand = line.strip().split("\t") + primer = re.sub(r"(?:{}|{}).*".format(left_primer_suffix, right_primer_suffix), "", name) + if primer not in interval_dict: + interval_dict[primer] = [] + interval_dict[primer].append((chrom, int(start), int(end), score)) + start_pos_list.append((int(start), primer)) + else: + fin.close() + break + + fout = open(file_out, "w") + for primer in uniqify([x[1] for x in sorted(start_pos_list)]): + pos_list = [item for elem in interval_dict[primer] for item in elem[1:3]] + chrom = interval_dict[primer][0][0] + start = min(pos_list) + end = max(pos_list) + strand = "+" + score = interval_dict[primer][0][3] + fout.write(f"{chrom}\t{start}\t{end}\t{primer}\t{score}\t{strand}\n") + fout.close() + + +def main(args=None): + args = parse_args(args) + collapse_primer_bed(args.FILE_IN, args.FILE_OUT, args.LEFT_PRIMER_SUFFIX, args.RIGHT_PRIMER_SUFFIX) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/fastq_dir_to_samplesheet.py b/bin/fastq_dir_to_samplesheet.py new file mode 100755 index 00000000..b2e08eed --- /dev/null +++ b/bin/fastq_dir_to_samplesheet.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python + +import os +import sys +import glob +import argparse + + +def parse_args(args=None): + Description = "Generate nf-core/viralrecon samplesheet from a directory of FastQ files." + Epilog = "Example usage: python fastq_dir_to_samplesheet.py " + + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("FASTQ_DIR", help="Folder containing raw FastQ files.") + parser.add_argument("SAMPLESHEET_FILE", help="Output samplesheet file.") + parser.add_argument( + "-r1", + "--read1_extension", + type=str, + dest="READ1_EXTENSION", + default="_R1_001.fastq.gz", + help="File extension for read 1.", + ) + parser.add_argument( + "-r2", + "--read2_extension", + type=str, + dest="READ2_EXTENSION", + default="_R2_001.fastq.gz", + help="File extension for read 2.", + ) + parser.add_argument( + "-se", + "--single_end", + dest="SINGLE_END", + action="store_true", + help="Single-end information will be auto-detected but this option forces paired-end FastQ files to be treated as single-end so only read 1 information is included in the samplesheet.", + ) + parser.add_argument( + "-sn", + "--sanitise_name", + dest="SANITISE_NAME", + action="store_true", + help="Whether to further sanitise FastQ file name to get sample id. Used in conjunction with --sanitise_name_delimiter and --sanitise_name_index.", + ) + parser.add_argument( + "-sd", + "--sanitise_name_delimiter", + type=str, + dest="SANITISE_NAME_DELIMITER", + default="_", + help="Delimiter to use to sanitise sample name.", + ) + parser.add_argument( + "-si", + "--sanitise_name_index", + type=int, + dest="SANITISE_NAME_INDEX", + default=1, + help="After splitting FastQ file name by --sanitise_name_delimiter all elements before this index (1-based) will be joined to create final sample name.", + ) + return parser.parse_args(args) + + +def fastq_dir_to_samplesheet( + fastq_dir, + samplesheet_file, + read1_extension="_R1_001.fastq.gz", + read2_extension="_R2_001.fastq.gz", + single_end=False, + sanitise_name=False, + sanitise_name_delimiter="_", + sanitise_name_index=1, +): + def sanitize_sample(path, extension): + """Retrieve sample id from filename""" + sample = os.path.basename(path).replace(extension, "") + if sanitise_name: + sample = sanitise_name_delimiter.join( + os.path.basename(path).split(sanitise_name_delimiter)[:sanitise_name_index] + ) + return sample + + def get_fastqs(extension): + """ + Needs to be sorted to ensure R1 and R2 are in the same order + when merging technical replicates. Glob is not guaranteed to produce + sorted results. + See also https://stackoverflow.com/questions/6773584/how-is-pythons-glob-glob-ordered + """ + return sorted(glob.glob(os.path.join(fastq_dir, f"*{extension}"), recursive=False)) + + read_dict = {} + + ## Get read 1 files + for read1_file in get_fastqs(read1_extension): + sample = sanitize_sample(read1_file, read1_extension) + if sample not in read_dict: + read_dict[sample] = {"R1": [], "R2": []} + read_dict[sample]["R1"].append(read1_file) + + ## Get read 2 files + if not single_end: + for read2_file in get_fastqs(read2_extension): + sample = sanitize_sample(read2_file, read2_extension) + read_dict[sample]["R2"].append(read2_file) + + ## Write to file + if len(read_dict) > 0: + out_dir = os.path.dirname(samplesheet_file) + if out_dir and not os.path.exists(out_dir): + os.makedirs(out_dir) + + with open(samplesheet_file, "w") as fout: + header = ["sample", "fastq_1", "fastq_2"] + fout.write(",".join(header) + "\n") + for sample, reads in sorted(read_dict.items()): + for idx, read_1 in enumerate(reads["R1"]): + read_2 = "" + if idx < len(reads["R2"]): + read_2 = reads["R2"][idx] + sample_info = ",".join([sample, read_1, read_2]) + fout.write(f"{sample_info}\n") + else: + error_str = "\nWARNING: No FastQ files found so samplesheet has not been created!\n\n" + error_str += "Please check the values provided for the:\n" + error_str += " - Path to the directory containing the FastQ files\n" + error_str += " - '--read1_extension' parameter\n" + error_str += " - '--read2_extension' parameter\n" + print(error_str) + sys.exit(1) + + +def main(args=None): + args = parse_args(args) + + fastq_dir_to_samplesheet( + fastq_dir=args.FASTQ_DIR, + samplesheet_file=args.SAMPLESHEET_FILE, + read1_extension=args.READ1_EXTENSION, + read2_extension=args.READ2_EXTENSION, + single_end=args.SINGLE_END, + sanitise_name=args.SANITISE_NAME, + sanitise_name_delimiter=args.SANITISE_NAME_DELIMITER, + sanitise_name_index=args.SANITISE_NAME_INDEX, + ) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py new file mode 100755 index 00000000..d00b3988 --- /dev/null +++ b/bin/ivar_variants_to_vcf.py @@ -0,0 +1,594 @@ +#!/usr/bin/env python + +from email.charset import QP +import os +import sys +import re +import errno +import argparse +from collections import OrderedDict +from collections import deque + +import numpy as np +from Bio import SeqIO +from scipy.stats import fisher_exact + + +def parse_args(args=None): + Description = "Convert iVar variants TSV file to VCF format." + Epilog = """Example usage: python ivar_variants_to_vcf.py """ + + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("file_in", help="Input iVar TSV file.") + parser.add_argument("file_out", help="Full path to output VCF file.") + parser.add_argument( + "-po", + "--pass_only", + help="Only output variants that PASS filters.", + action="store_true", + ) + parser.add_argument( + "-af", + "--allele_freq_threshold", + type=float, + default=0, + help="Only output variants where allele frequency is greater than this number (default: 0).", + ) + parser.add_argument( + "-is", + "--ignore_strand_bias", + default=False, + help="Does not take strand bias into account, use this option when not using amplicon sequencing.", + action="store_true", + ) + parser.add_argument( + "-ic", + "--ignore_merge_codons", + help="Output variants without taking into account if consecutive positions belong to the same codon.", + action="store_true", + ) + parser.add_argument( + "-f", + "--fasta", + type=str, + default=None, + help="Fasta file used in mapping and variant calling for vcf header reference genome lenght info.", + ) + return parser.parse_args(args) + + +def make_dir(path): + """ + Description: + Create directory if it doesn't exist. + Input: + path - path where the directory will be created. + Returns: + None + """ + if not len(path) == 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + + +def parse_ivar_line(line): + """ + Description: + Parse ivar line to get needed variables for vcf format. + input: + line - ivar tsv line + return: + CHROM, POS, ID, REF, ALT, QUAL, INFO, FORMAT, REF_CODON, ALT_CODON, pass_test, var_type + """ + + line = line.strip("\n").split("\t") + ## Assign intial fields to variables + CHROM = line[0] + POS = line[1] + ID = "." + REF = line[2] + ALT = line[3] + + ## REF/ALF depths and quals + try: + REF_DP = int(line[4]) + except ValueError: + print(line) + print(line[4]) + exit(-1) + REF_RV = int(line[5]) + REF_FW = REF_DP - REF_RV + REF_QUAL = int(line[6]) + ALT_RV = int(line[8]) + ALT_DP = int(line[7]) + ALT_FW = ALT_DP - ALT_RV + ALT_QUAL = int(line[9]) + ALT_FREQ = float(line[10]) + FORMAT = [REF_DP, REF_RV, REF_QUAL, ALT_DP, ALT_RV, ALT_QUAL, ALT_FREQ] + + ## Codon annotation + REF_CODON = line[15] + ALT_CODON = line[17] + + ## Determine variant type + var_type = "SNP" + if ALT[0] == "+": + ALT = REF + ALT[1:] + var_type = "INS" + elif ALT[0] == "-": + REF += ALT[1:] + ALT = line[2] + var_type = "DEL" + + QUAL = "." + + ## Determine FILTER field + INFO = f"DP={int(float(line[11]))}" + pass_test = line[13] + + return ( + CHROM, + POS, + ID, + REF, + ALT, + QUAL, + INFO, + FORMAT, + REF_CODON, + ALT_CODON, + pass_test, + var_type, + ) + + +###################### +## FILTER FUNCTIONS ## +###################### + + +def ivar_filter(pass_test): + """ + Description: + process ivar filter into vcf filter format. + input: + pass_test - ivar fisher exact test [ True, False ] + return: + Whether it passes the filter or not. [False, "ft"] + """ + if pass_test == "TRUE": + return False + else: + return "ft" + + +def strand_bias_filter(format): + """ + Description: + Calculate strand-bias fisher test. + input: + format - format variables + return: + Whether it passes the filter or not. [False, "sb"] + """ + # format=[REF_DP, REF_RV, REF_QUAL, ALT_DP, ALT_RV, ALT_QUAL, ALT_FREQ] + # table: + ## REF_FW REF_RV + ## ALT_FW ALT_RV + table = np.array([[format[0] - format[1], format[1]], [format[3] - format[4], format[4]]]) + oddsr, pvalue = fisher_exact(table, alternative="greater") + + # h0: both strands are equally represented. + # If test is significant h0 is refused so there is an strand bias. + if pvalue < 0.05: + return "sb" + else: + return False + + +def write_vcf_header(ref, ignore_strand_bias, file_out, filename): + """ + Description: + Write vcf header for VCFv4.2 + input: + ref - (optional), ref in fasta format + ignore_strand_bias - if no strand-bias is calculated [True, False] + file_out - output file_in + filename - name of the output file + return: + Nothing. + """ + ## Define VCF header + header_source = ["##fileformat=VCFv4.2", "##source=iVar"] + if ref: + header_contig = [] + for record in SeqIO.parse(ref, "fasta"): + header_contig += ["##contig="] + + header_source += header_contig + + header_info = ['##INFO='] + header_filter = [ + '##FILTER=', + '##FILTER= 0.05">', + ] + header_format = [ + '##FORMAT=', + '##FORMAT=', + '##FORMAT=', + '##FORMAT=', + '##FORMAT=', + '##FORMAT=', + '##FORMAT=', + '##FORMAT=', + ] + header_cols = [f"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{filename}"] + if not ignore_strand_bias: + header_filter += ['##FILTER='] + + header = header_source + header_info + header_filter + header_format + header_cols + fout = open(file_out, "w") + fout.write("\n".join(header) + "\n") + fout.close() + + +def write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format, file_out): + """ + Description: + Format variables into vcf line format and write line to file. + input: + chrom, pos, id, ref, alt, filter, qual, info, format - vcf variables + file_out - file output + return: + Nothing. + """ + sample = f'1:{":".join(str(x) for x in format)}' + format = "GT:REF_DP:REF_RV:REF_QUAL:ALT_DP:ALT_RV:ALT_QUAL:ALT_FREQ" + + oline = ( + chrom + + "\t" + + pos + + "\t" + + id + + "\t" + + ref + + "\t" + + alt + + "\t" + + qual + + "\t" + + filter + + "\t" + + info + + "\t" + + format + + "\t" + + sample + + "\n" + ) + fout = open(file_out, "a") + fout.write(oline) + fout.close() + + +############################ +## MERGE CODONS FUNCTIONS ## +############################ + + +def check_consecutive(mylist): + """ + Description: + This function checks a list of numbers and returns how many items are consecutive. + input: + my_list - A list of integers + return: + Number of items consecutive in the list - [False, 2, 3,..] + """ + # getting first index of tuple for consecutive checking + my_list = list(map(int, [i[0] for i in mylist])) + ## Check if the list contains consecutive numbers + if len(my_list) == 1: + return False + elif sorted(my_list) == list(range(min(my_list), max(my_list) + 1)): + return len(my_list) + else: + ## If not, and the list is > 1, remove the last item and reevaluate. + if len(my_list) > 2: + my_list.pop() + if sorted(my_list) == list(range(min(my_list), max(my_list) + 1)): + return len(my_list) + else: + return False + return False + + +def get_diff_position(seq1, seq2): + """ + Description: + Function to compare two codon nucleotide sequences (size 3) and retuns the position where it differs. + Input: + seq1 - string size 3 [A,T,C,G]. Ex. "ATC" + seq2 - string size 3 [A,T,C,G]. Ex. "ACC" + Returns: + Returns position where seq1 != seq2 + """ + # If codon is NA treat as not same codon + if seq1 == "NA": + return 2 + + ind_diff = [i for i in range(len(seq1)) if seq1[i] != seq2[i]] + if len(ind_diff) > 1: + print("There has been an issue, more than one difference between the seqs.") + return False + else: + return ind_diff[0] + + +def check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt): + """ + Description: + Logic for determine if variant lines need to be collapsed into one determining + if they are consecutive and belong to the same codon. + Input: + qpos - list of positions. Ex. [4441, 4442, 4443] + fe_codon_ref - first position codon annotation for ref. Ex. "ATG" + fe_codon_alt - first position codon annotation for alt. Ex. "AGG" + Returns: + Returns num_collapse. Number of lines that need to be collapsed into one. + """ + # Are two positions in the queue consecutive? + # q_pos = [4441, 4442, 5067] + num_collapse = 0 + if check_consecutive(list(q_pos)) == 2: + ## If the first position is not on the third position of the codon they are in the same codon. + if get_diff_position(fe_codon_ref, fe_codon_alt) != 2: + num_collapse = 2 + else: + num_collapse = 1 + # Are the three positions in the queue consecutive? + # q_pos = [4441, 4442, 4443] + elif check_consecutive(list(q_pos)) == 3: + ## we check the first position in which codon position is to process it acordingly. + # If first position is in the first codon position all three positions belong to the same codon. + if get_diff_position(fe_codon_ref, fe_codon_alt) == 0: + num_collapse = 3 + # If first position is in the second codon position, we have the two first positions belonging to the same codon and the last one independent. + elif get_diff_position(fe_codon_ref, fe_codon_alt) == 1: + num_collapse = 2 + ## Finally if we have the first position in the last codon position, we write first position and left the remaining two to be evaluated in the next iteration. + elif get_diff_position(fe_codon_ref, fe_codon_alt) == 2: + num_collapse = 1 + # If no consecutive process only one line. + elif check_consecutive(list(q_pos)) == False: + num_collapse = 1 + + return num_collapse + + +def process_variants(variants, num_collapse): + """ + Description: + The function set the variables acordingly to the lines to collapse do to consecutive variants. + Input: + variants - Dict with var lines. + num_collapse - number of lines to collapse [2,3] + Returns:: + Vars fixed: chrom, pos, id, ref, alt, qual, filter, info, format + """ + # Collapsed variant parameters equal to first variant + key_list = ["chrom", "pos", "id", "qual", "filter", "info", "format"] + chrom, pos, id, qual, filter, info, format = [variants[next(iter(variants))][key] for key in key_list] + + # If no consecutive, process one variant line + # If two consecutive, process two variant lines into one + # If three consecutive process three variant lines and write one + ref = "" + alt = "" + iter_variants = iter(variants) + for _ in range(num_collapse): # fixed notation + var = next(iter_variants) + ref += variants[var]["ref"] + alt += variants[var]["alt"] + + return chrom, pos, id, ref, alt, qual, filter, info, format + + +def main(args=None): + # Process args + args = parse_args(args) + + # Initialize vars + filename = os.path.splitext(args.file_in)[0] + out_dir = os.path.dirname(args.file_out) + var_list = [] # store variants + var_count_dict = {"SNP": 0, "INS": 0, "DEL": 0} # variant counts + variants = OrderedDict() # variant dict (merge codon) + q_pos = deque([], maxlen=3) # pos fifo queue (merge codon) + last_pos = "" + + # Create output directory + make_dir(out_dir) + + ############################## + ## Write vcf header to file ## + ############################## + write_vcf_header(args.fasta, args.ignore_strand_bias, args.file_out, filename) + + ################################# + ## Read and process input file ## + ################################# + with open(args.file_in, "r") as fin: + for line in fin: + if "REGION" not in line: + ################ + ## Parse line ## + ################ + ## format= + # [REF_DP, REF_RV, REF_QUAL, ALT_DP, ALT_RV, ALT_QUAL, ALT_FREQ] + write_line = True + ( + chrom, + pos, + id, + ref, + alt, + qual, + info, + format, + ref_codon, + alt_codon, + pass_test, + var_type, + ) = parse_ivar_line(line) + + ## If pos is duplicated due to annotation skip lines + if pos == last_pos: + continue + + last_pos = pos + ##################### + ## Process filters ## + ##################### + ## ivar fisher test + filter = "" + if ivar_filter(pass_test): + filter = ivar_filter(pass_test) + ## strand-bias fisher test + if not args.ignore_strand_bias: + if strand_bias_filter(format): + if filter: + filter += ";" + strand_bias_filter(format) + else: + filter = strand_bias_filter(format) + + if not filter: + filter = "PASS" + + ##################### + ## Filter variants ## + ##################### + if args.pass_only and filter != "PASS": + write_line = False + ### AF filtering. ALT_DP/(ALT_DP+REF_DP) + if float(format[3] / (format[0] + format[3])) < args.allele_freq_threshold: + write_line = False + ### Duplication filter + if (chrom, pos, ref, alt) in var_list: + write_line = False + else: + var_list.append((chrom, pos, ref, alt)) + + ############################################################ + ## MERGE_CODONS ## + ## Merge consecutive variants belonging to the same codon ## + ############################################################ + if not args.ignore_merge_codons and var_type == "SNP": + ## re-fill queue and dict accordingly + q_pos.append((pos, var_type)) # adding type information + variants[(chrom, pos, ref, alt)] = { + "chrom": chrom, + "pos": pos, + "id": id, + "ref": ref, + "alt": alt, + "qual": qual, + "filter": filter, + "info": info, + "format": format, + "ref_codon": ref_codon, + "alt_codon": alt_codon, + } + + if len(q_pos) == q_pos.maxlen: + fe_codon_ref = variants[next(iter(variants))]["ref_codon"] + fe_codon_alt = variants[next(iter(variants))]["alt_codon"] + num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt) + ( + chrom, + pos, + id, + ref, + alt, + qual, + filter, + info, + format, + ) = process_variants(variants, num_collapse) + + ## Empty variants dict and queue accordingly + for _ in range(num_collapse): + variants.popitem(last=False) + q_pos.popleft() + else: + write_line = False + + ############################## + ## Write output to vcf file ## + ############################## + if write_line: + var_count_dict[var_type] += 1 + write_vcf_line( + chrom, + pos, + id, + ref, + alt, + filter, + qual, + info, + format, + args.file_out, + ) + + if not args.ignore_merge_codons: + ####################### + ## handle last lines ## + ####################### + while len(q_pos) > 0: + try: + fe_codon_ref = variants[next(iter(variants))]["ref_codon"] + fe_codon_alt = variants[next(iter(variants))]["alt_codon"] + except StopIteration: + break + else: + num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt) + (chrom, pos, id, ref, alt, qual, filter, info, format) = process_variants(variants, num_collapse) + + var_count_dict[q_pos[0][1]] += 1 + write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format, args.file_out) + ## Empty variants dict and queue accordingly + for _ in range(num_collapse): + variants.popitem(last=False) + q_pos.popleft() + + ############################################# + ## variant counts to pass to MultiQC ## + ############################################# + var_count_list = [(k, str(v)) for k, v in sorted(var_count_dict.items())] + + # format output table a little more cleanly + # row_spacing = len(filename) + + row = create_f_string(30, "<") # an arbitraily long value to fit most sample names + row += create_f_string(10) * len(var_count_list) # A spacing of ten looks pretty + + headers = ["sample"] + headers.extend([x[0] for x in var_count_list]) + data = [filename] + data.extend([x[1] for x in var_count_list]) + print(row.format(*headers)) + print(row.format(*data)) + + +def create_f_string(str_size, placement="^"): + row_size = "{: " + placement + str(str_size) + "}" + return row_size + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/make_bed_mask.py b/bin/make_bed_mask.py new file mode 100755 index 00000000..29b07a27 --- /dev/null +++ b/bin/make_bed_mask.py @@ -0,0 +1,71 @@ +#!/usr/bin/env python + +import os +import sys +import re +import argparse +import gzip + + +def parse_args(args=None): + Description = "Find indels positions in bed file" + Epilog = "Example usage: python make_bed_mask.py " + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument("VCF_IN", help="Input vcf file.") + parser.add_argument("BED_IN", help="Input bed file.") + parser.add_argument("BED_OUT", help="Name of the output new bed file.") + return parser.parse_args(args) + + +def find_indels_vcf(vcf_in): + encoding = "utf-8" + indels_pos_len = {} + with gzip.open(vcf_in, "r") as f: + for line in f: + if "#" not in str(line, encoding): + line = re.split("\t", str(line, encoding)) + var_pos = line[1] + ref = line[3] + alt = line[4] + if len(ref) != len(alt): + indels_pos_len[var_pos] = len(ref) + return indels_pos_len + + +def make_bed_mask(bed_in, bed_out, indels_pos_len): + fout = open(bed_out, "w") + indels_positions = [] + for pos in indels_pos_len: + indels_positions.append(pos) + with open(bed_in) as b: + for line in b: + line = re.split("\t", line) + ref_genome = line[0] + init_pos = line[1] + end_pos = line[2] + range_length = int(end_pos) - int(init_pos) + oline = ref_genome + "\t" + init_pos + "\t" + end_pos + test = True + for position in indels_positions: + indel_init_pos = position + indel_whole_length = indels_pos_len[position] + indel_end_pos = int(indel_init_pos) + int(indel_whole_length) - 1 + if int(init_pos) in range(int(indel_init_pos), int(indel_end_pos)) or int(end_pos) in range( + int(indel_init_pos), int(indel_end_pos) + ): + test = False + break + else: + oline = ref_genome + "\t" + init_pos + "\t" + end_pos + if test: + fout.write(oline + "\n") + + +def main(args=None): + args = parse_args(args) + indels_pos_len = find_indels_vcf(args.VCF_IN) + make_bed_mask(args.BED_IN, args.BED_OUT, indels_pos_len) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/make_variants_long_table.py b/bin/make_variants_long_table.py new file mode 100755 index 00000000..dbcfc277 --- /dev/null +++ b/bin/make_variants_long_table.py @@ -0,0 +1,366 @@ +#!/usr/bin/env python + +import os +import sys +import glob +import errno +import shutil +import logging +import argparse +import pandas as pd +from matplotlib import table + + +logger = logging.getLogger() + + +pd.set_option("display.max_columns", None) +pd.set_option("display.max_rows", None) + + +def parser_args(args=None): + Description = "Create long/wide tables containing variant information." + Epilog = """Example usage: python make_variants_long_table.py --bcftools_query_dir ./bcftools_query/ --snpsift_dir ./snpsift/ --pangolin_dir ./pangolin/""" + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument( + "-bd", + "--bcftools_query_dir", + type=str, + default="./bcftools_query", + help="Directory containing output of BCFTools query for each sample (default: './bcftools_query').", + ) + parser.add_argument( + "-sd", + "--snpsift_dir", + type=str, + default="./snpsift", + help="Directory containing output of SnpSift for each sample (default: './snpsift').", + ) + parser.add_argument( + "-pd", + "--pangolin_dir", + type=str, + default="./pangolin", + help="Directory containing output of Pangolin for each sample (default: './pangolin').", + ) + parser.add_argument( + "-bs", + "--bcftools_file_suffix", + type=str, + default=".bcftools_query.txt", + help="Suffix to trim off BCFTools query file name to obtain sample name (default: '.bcftools_query.txt').", + ) + parser.add_argument( + "-ss", + "--snpsift_file_suffix", + type=str, + default=".snpsift.txt", + help="Suffix to trim off SnpSift file name to obtain sample name (default: '.snpsift.txt').", + ) + parser.add_argument( + "-ps", + "--pangolin_file_suffix", + type=str, + default=".pangolin.csv", + help="Suffix to trim off Pangolin file name to obtain sample name (default: '.pangolin.csv').", + ) + parser.add_argument( + "-of", + "--output_file", + type=str, + default="variants_long_table.csv", + help="Full path to output file (default: 'variants_long_table.csv').", + ) + parser.add_argument( + "-vc", "--variant_caller", type=str, default="ivar", help="Tool used to call the variants (default: 'ivar')." + ) + return parser.parse_args(args) + + +def make_dir(path): + if not len(path) == 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + + +def get_file_dict(file_dir, file_suffix): + files = glob.glob(os.path.join(file_dir, f"*{file_suffix}")) + samples = [os.path.basename(x).removesuffix(f"{file_suffix}") for x in files] + + return dict(zip(samples, files)) + + +def three_letter_aa_to_one(hgvs_three): + aa_dict = { + "Ala": "A", + "Arg": "R", + "Asn": "N", + "Asp": "D", + "Cys": "C", + "Gln": "Q", + "Glu": "E", + "Gly": "G", + "His": "H", + "Ile": "I", + "Leu": "L", + "Lys": "K", + "Met": "M", + "Phe": "F", + "Pro": "P", + "Pyl": "O", + "Ser": "S", + "Sec": "U", + "Thr": "T", + "Trp": "W", + "Tyr": "Y", + "Val": "V", + "Asx": "B", + "Glx": "Z", + "Xaa": "X", + "Xle": "J", + "Ter": "*", + } + hgvs_one = hgvs_three + for key in aa_dict: + if key in hgvs_one: + hgvs_one = hgvs_one.replace(str(key), str(aa_dict[key])) + + return hgvs_one + + +## Returns a pandas dataframe in the format: +# CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF +# 0 MN908947.3 241 C T PASS 642 375 266 0.41 +# 1 MN908947.3 1875 C T PASS 99 63 34 0.34 +def ivar_bcftools_query_to_table(bcftools_query_file): + table = pd.read_table(bcftools_query_file, header="infer") + table = table.dropna(how="all", axis=1) + old_colnames = list(table.columns) + new_colnames = [x.split("]")[-1].split(":")[-1] for x in old_colnames] + table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True) + + if not table.empty: + table[["ALT_DP", "DP"]] = table[["ALT_DP", "DP"]].apply(pd.to_numeric) + table["AF"] = table["ALT_DP"] / table["DP"] + table["AF"] = table["AF"].round(2) + + return table + + +## Returns a pandas dataframe in the format: +# CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF +# 0 MN908947.3 241 C T . 24 8 16 0.67 +# 1 MN908947.3 3037 C T . 17 5 12 0.71 +def bcftools_bcftools_query_to_table(bcftools_query_file): + table = pd.read_table(bcftools_query_file, header="infer") + table = table.dropna(how="all", axis=1) + old_colnames = list(table.columns) + new_colnames = [x.split("]")[-1].split(":")[-1] for x in old_colnames] + table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True) + + if not table.empty: + table[["REF_DP", "ALT_DP"]] = table["AD"].str.split(",", expand=True) + table[["ALT_DP", "DP"]] = table[["ALT_DP", "DP"]].apply(pd.to_numeric) + table["AF"] = table["ALT_DP"] / table["DP"] + table["AF"] = table["AF"].round(2) + table.drop("AD", axis=1, inplace=True) + + return table + + +## Returns a pandas dataframe in the format: +# CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF +# 0 MN908947.3 241 C T PASS 30 1 29 0.97 +# 1 MN908947.3 1163 A T PASS 28 0 28 1.00 +def nanopolish_bcftools_query_to_table(bcftools_query_file): + table = pd.read_table(bcftools_query_file, header="infer") + table = table.dropna(how="all", axis=1) + old_colnames = list(table.columns) + new_colnames = [x.split("]")[-1].split(":")[-1] for x in old_colnames] + table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True) + + ## Split out ref/alt depths from StrandSupport column + if not table.empty: + table_cp = table.copy() + table_cp[["FORW_REF_DP", "REV_REF_DP", "FORW_ALT_DP", "REV_ALT_DP"]] = table_cp["StrandSupport"].str.split( + ",", expand=True + ) + table_cp[["FORW_REF_DP", "REV_REF_DP", "FORW_ALT_DP", "REV_ALT_DP"]] = table_cp[ + ["FORW_REF_DP", "REV_REF_DP", "FORW_ALT_DP", "REV_ALT_DP"] + ].apply(pd.to_numeric) + + table["DP"] = table_cp[["FORW_REF_DP", "REV_REF_DP", "FORW_ALT_DP", "REV_ALT_DP"]].sum(axis=1) + table["REF_DP"] = table_cp[["FORW_REF_DP", "REV_REF_DP"]].sum(axis=1) + table["ALT_DP"] = table_cp[["FORW_ALT_DP", "REV_ALT_DP"]].sum(axis=1) + table["AF"] = table["ALT_DP"] / table["DP"] + table["AF"] = table["AF"].round(2) + table.drop("StrandSupport", axis=1, inplace=True) + + return table + + +## Returns a pandas dataframe in the format: +# CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF +# 0 MN908947.3 241 C T PASS 21 0 21 1.00 +# 1 MN908947.3 3037 C T PASS 28 0 25 0.89 +def medaka_bcftools_query_to_table(bcftools_query_file): + table = pd.read_table(bcftools_query_file, header="infer") + table = table.dropna(how="all", axis=1) + old_colnames = list(table.columns) + new_colnames = [x.split("]")[-1].split(":")[-1] for x in old_colnames] + table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True) + + if not table.empty: + table[["REF_DP", "ALT_DP"]] = table["AC"].str.split(",", expand=True) + table[["ALT_DP", "DP"]] = table[["ALT_DP", "DP"]].apply(pd.to_numeric) + table["AF"] = table["ALT_DP"] / table["DP"] + table["AF"] = table["AF"].round(2) + table.drop("AC", axis=1, inplace=True) + + return table + + +def get_pangolin_lineage(pangolin_file): + table = pd.read_csv(pangolin_file, sep=",", header="infer") + + return table["lineage"][0] + + +def snpsift_to_table(snpsift_file): + table = pd.read_table(snpsift_file, sep="\t", header="infer") + table = table.loc[:, ~table.columns.str.contains("^Unnamed")] + old_colnames = list(table.columns) + new_colnames = [x.replace("ANN[*].", "") for x in old_colnames] + table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True) + table = table.loc[:, ["CHROM", "POS", "REF", "ALT", "GENE", "EFFECT", "HGVS_C", "HGVS_P"]] + table = one_effect_per_line(table) + + ## Amino acid substitution + aa = [] + for index, item in table["HGVS_P"].iteritems(): + hgvs_p = three_letter_aa_to_one(str(item)) + aa.append(hgvs_p) + table["HGVS_P_1LETTER"] = pd.Series(aa) + + return table + + +def one_effect_per_line(table): + one_effect_per_line_table = pd.DataFrame() + for i in range(len(table)): + gene_list = table.iloc[i, 4].split(",") + effect_list = table.iloc[i, 5].split(",") + hgvs_c_list = table.iloc[i, 6].split(",") + hgvs_p_list = table.iloc[i, 7].split(",") + + count = 0 + for j in range(len(gene_list)): + if "upstream" in effect_list[j] or "downstream" in effect_list[j]: + count += 1 + for j in range(len(gene_list)): + if len(effect_list) == count: + row = { + "CHROM": table.iloc[i, 0], + "POS": table.iloc[i, 1], + "REF": table.iloc[i, 2], + "ALT": table.iloc[i, 3], + "GENE": gene_list[0], + "EFFECT": effect_list[0], + "HGVS_C": hgvs_c_list[0], + "HGVS_P": hgvs_p_list[0], + } + one_effect_per_line_table = pd.concat( + [one_effect_per_line_table, pd.DataFrame([row])], ignore_index=True + ) + else: + if not "upstream" in effect_list[j] and not "downstream" in effect_list[j]: + row = { + "CHROM": table.iloc[i, 0], + "POS": table.iloc[i, 1], + "REF": table.iloc[i, 2], + "ALT": table.iloc[i, 3], + "GENE": gene_list[j], + "EFFECT": effect_list[j], + "HGVS_C": hgvs_c_list[j], + "HGVS_P": hgvs_p_list[j], + } + one_effect_per_line_table = pd.concat( + [one_effect_per_line_table, pd.DataFrame([row])], ignore_index=True + ) + return one_effect_per_line_table + + +def main(args=None): + args = parser_args(args) + + ## Create output directory if it doesn't exist + out_dir = os.path.dirname(args.output_file) + make_dir(out_dir) + + ## Check correct variant caller has been provided + variant_callers = ["ivar", "bcftools", "nanopolish", "medaka"] + if args.variant_caller not in variant_callers: + logger.error( + f"Invalid option '--variant caller {args.variant_caller}'. Valid options: " + ", ".join(variant_callers) + ) + sys.exit(1) + + ## Find files and create a dictionary {'sample': '/path/to/file'} + bcftools_files = get_file_dict(args.bcftools_query_dir, args.bcftools_file_suffix) + snpsift_files = get_file_dict(args.snpsift_dir, args.snpsift_file_suffix) + pangolin_files = get_file_dict(args.pangolin_dir, args.pangolin_file_suffix) + + ## Check all files are provided for each sample + if set(bcftools_files) != set(snpsift_files): + logger.error( + f"Number of BCFTools ({len(bcftools_files)}) and SnpSift ({len(snpsift_files)}) files do not match!" + ) + sys.exit(1) + else: + if pangolin_files: + if set(bcftools_files) != set(pangolin_files): + logger.error( + f"Number of BCFTools ({len(bcftools_files)}) and Pangolin ({len(pangolin_files)}) files do not match!" + ) + sys.exit(1) + + ## Create per-sample table and write to file + sample_tables = [] + for sample in sorted(bcftools_files): + ## Read in BCFTools query file + bcftools_table = None + if args.variant_caller == "ivar": + bcftools_table = ivar_bcftools_query_to_table(bcftools_files[sample]) + elif args.variant_caller == "bcftools": + bcftools_table = bcftools_bcftools_query_to_table(bcftools_files[sample]) + elif args.variant_caller == "nanopolish": + bcftools_table = nanopolish_bcftools_query_to_table(bcftools_files[sample]) + elif args.variant_caller == "medaka": + bcftools_table = medaka_bcftools_query_to_table(bcftools_files[sample]) + + if not bcftools_table.empty: + ## Read in SnpSift file + snpsift_table = snpsift_to_table(snpsift_files[sample]) + + merged_table = pd.DataFrame(data=bcftools_table) + merged_table.insert(0, "SAMPLE", sample) + merged_table = pd.merge(merged_table, snpsift_table, how="outer") + merged_table["CALLER"] = args.variant_caller + + ## Read in Pangolin lineage file + if pangolin_files: + merged_table["LINEAGE"] = get_pangolin_lineage(pangolin_files[sample]) + + sample_tables.append(merged_table) + + ## Merge table across samples + if sample_tables: + merged_tables = pd.concat(sample_tables) + merged_tables.to_csv(args.output_file, index=False, encoding="utf-8-sig") + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/multiqc_to_custom_csv.py b/bin/multiqc_to_custom_csv.py new file mode 100755 index 00000000..5ce8c36d --- /dev/null +++ b/bin/multiqc_to_custom_csv.py @@ -0,0 +1,367 @@ +#!/usr/bin/env python + +import os +import sys +import errno +import argparse +import yaml + + +def parse_args(args=None): + Description = ( + "Create custom spreadsheet for pertinent MultiQC metrics generated by the nf-core/viralrecon pipeline." + ) + Epilog = "Example usage: python multiqc_to_custom_tsv.py" + parser = argparse.ArgumentParser(description=Description, epilog=Epilog) + parser.add_argument( + "-pl", + "--platform", + type=str, + dest="PLATFORM", + default="illumina", + help="Sequencing platform for input data. Accepted values = 'illumina' or 'nanopore' (default: 'illumina').", + ) + parser.add_argument( + "-md", + "--multiqc_data_dir", + type=str, + dest="MULTIQC_DATA_DIR", + default="multiqc_data", + help="Full path to directory containing YAML files for each module, as generated by MultiQC. (default: 'multiqc_data').", + ) + parser.add_argument( + "-op", + "--out_prefix", + type=str, + dest="OUT_PREFIX", + default="summary", + help="Full path to output prefix (default: 'summary').", + ) + return parser.parse_args(args) + + +def make_dir(path): + if not len(path) == 0: + try: + os.makedirs(path) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise + + +# Find key in dictionary created from YAML file recursively +# From https://stackoverflow.com/a/37626981 +def find_tag(d, tag): + if tag in d: + yield d[tag] + for k, v in d.items(): + if isinstance(v, dict): + for i in find_tag(v, tag): + yield i + + +def yaml_fields_to_dict(yaml_file, append_dict={}, field_mapping_list=[], valid_sample_list=[]): + integer_fields = [ + "mapped_passed", + "number_of_SNPs", + "number_of_indels", + "MISSENSE", + "# contigs (>= 0 bp)", + "# contigs (>= 5000 bp)", + "Largest contig", + ] + if os.path.exists(yaml_file): + with open(yaml_file) as f: + yaml_dict = yaml.safe_load(f) + for k in yaml_dict.keys(): + key = k + if os.path.basename(yaml_file).startswith("multiqc_picard_insertSize"): + if k[-3:] == "_FR": + key = k[:-3] + if os.path.basename(yaml_file).startswith("multiqc_cutadapt"): + names = [x for x in valid_sample_list if key[:-2] == x] + names += [x for x in valid_sample_list if key == x] + if names != []: + key = names[0] + include_sample = True + if len(valid_sample_list) != 0 and key not in valid_sample_list: + include_sample = False + if include_sample: + if key not in append_dict: + append_dict[key] = {} + if field_mapping_list != []: + for i, j in field_mapping_list: + val = list(find_tag(yaml_dict[k], j[0])) + ## Fix for Cutadapt reporting reads/pairs as separate values + if j[0] == "r_written" and len(val) == 0: + val = [list(find_tag(yaml_dict[k], "pairs_written"))[0] * 2] + if len(val) != 0: + val = val[0] + if len(j) == 2: + val = list(find_tag(val, j[1]))[0] + if j[0] in integer_fields: + val = int(val) + if i not in append_dict[key]: + append_dict[key][i] = val + else: + print( + "WARNING: {} key already exists in dictionary so will be overwritten. YAML file {}.".format( + i, yaml_file + ) + ) + else: + append_dict[key] = yaml_dict[k] + else: + print("WARNING: File does not exist: {}".format(yaml_file)) + if len(valid_sample_list) != 0: + for key in valid_sample_list: + if key not in append_dict: + append_dict[key] = {} + if field_mapping_list != []: + for i, j in field_mapping_list: + if i not in append_dict[key]: + append_dict[key][i] = "NA" + else: + print( + "WARNING: {} key already exists in dictionary so will be overwritten. YAML file {}.".format( + i, yaml_file + ) + ) + else: + append_dict[key] = "NA" + return append_dict + + +def metrics_dict_to_file(file_field_list, multiqc_data_dir, out_file, valid_sample_list=[]): + metrics_dict = {} + field_list = [] + for yaml_file, mapping_list in file_field_list: + yaml_file = os.path.join(multiqc_data_dir, yaml_file) + metrics_dict = yaml_fields_to_dict( + yaml_file=yaml_file, + append_dict=metrics_dict, + field_mapping_list=mapping_list, + valid_sample_list=valid_sample_list, + ) + field_list += [x[0] for x in mapping_list] + + if metrics_dict != {}: + make_dir(os.path.dirname(out_file)) + fout = open(out_file, "w") + header = ["Sample"] + field_list + fout.write("{}\n".format(",".join(header))) + for k in sorted(metrics_dict.keys()): + row_list = [k] + for field in field_list: + if field in metrics_dict[k]: + if metrics_dict[k][field]: + row_list.append(str(metrics_dict[k][field]).replace(",", ";")) + else: + row_list.append("NA") + else: + row_list.append("NA") + fout.write("{}\n".format(",".join(row_list))) + fout.close() + return metrics_dict + + +def main(args=None): + args = parse_args(args) + + ## File names for MultiQC YAML along with fields to fetch from each file + illumina_variant_files = [ + ( + "multiqc_fastp.yaml", + [ + ("# Input reads", ["before_filtering", "total_reads"]), + ("# Trimmed reads (fastp)", ["after_filtering", "total_reads"]), + ], + ), + ( + "multiqc_general_stats.yaml", + [ + ( + "% Non-host reads (Kraken 2)", + ["PREPROCESS: Kraken 2_mqc-generalstats-preprocess_kraken_2-Unclassified"], + ) + ], + ), + ("multiqc_bowtie2.yaml", [("% Mapped reads", ["overall_alignment_rate"])]), + ( + "multiqc_samtools_flagstat_samtools_bowtie2.yaml", + [("# Mapped reads", ["mapped_passed"])], + ), + ( + "multiqc_samtools_flagstat_samtools_ivar.yaml", + [("# Trimmed reads (iVar)", ["flagstat_total"])], + ), + ( + "multiqc_general_stats.yaml", + [ + ( + "Coverage median", + ["VARIANTS: mosdepth_mqc-generalstats-variants_mosdepth-median_coverage"], + ), + ( + "% Coverage > 1x", + ["VARIANTS: mosdepth_mqc-generalstats-variants_mosdepth-1_x_pc"], + ), + ( + "% Coverage > 10x", + ["VARIANTS: mosdepth_mqc-generalstats-variants_mosdepth-10_x_pc"], + ), + ], + ), + ( + "multiqc_bcftools_stats.yaml", + [ + ("# SNPs", ["number_of_SNPs"]), + ("# INDELs", ["number_of_indels"]), + ], + ), + ( + "multiqc_snpeff.yaml", + [("# Missense variants", ["MISSENSE"])], + ), + ( + "multiqc_quast_quast_variants.yaml", + [("# Ns per 100kb consensus", ["# N's per 100 kbp"])], + ), + ( + "multiqc_pangolin.yaml", + [("Pangolin lineage", ["lineage"])], + ), + ("multiqc_nextclade_clade-plot.yaml", [("Nextclade clade", ["clade"])]), + ] + + illumina_assembly_files = [ + ( + "multiqc_fastp.yaml", + [("# Input reads", ["before_filtering", "total_reads"])], + ), + ("multiqc_cutadapt.yaml", [("# Trimmed reads (Cutadapt)", ["r_written"])]), + ( + "multiqc_general_stats.yaml", + [ + ( + "% Non-host reads (Kraken 2)", + ["PREPROCESS: Kraken 2_mqc-generalstats-preprocess_kraken_2-Unclassified"], + ) + ], + ), + ( + "multiqc_quast_quast_spades.yaml", + [ + ("# Contigs (SPAdes)", ["# contigs (>= 0 bp)"]), + ("Largest contig (SPAdes)", ["Largest contig"]), + ("% Genome fraction (SPAdes)", ["Genome fraction (%)"]), + ("N50 (SPAdes)", ["N50"]), + ], + ), + ( + "multiqc_quast_quast_unicycler.yaml", + [ + ("# Contigs (Unicycler)", ["# contigs (>= 0 bp)"]), + ("Largest contig (Unicycler)", ["Largest contig"]), + ("% Genome fraction (Unicycler)", ["Genome fraction (%)"]), + ("N50 (Unicycler)", ["N50"]), + ], + ), + ( + "multiqc_quast_quast_minia.yaml", + [ + ("# Contigs (minia)", ["# contigs (>= 0 bp)"]), + ("Largest contig (minia)", ["Largest contig"]), + ("% Genome fraction (minia)", ["Genome fraction (%)"]), + ("N50 (minia)", ["N50"]), + ], + ), + ] + + nanopore_variant_files = [ + ("multiqc_samtools_flagstat.yaml", [("# Mapped reads", ["mapped_passed"])]), + ( + "multiqc_general_stats.yaml", + [ + ( + "Coverage median", + ["mosdepth_mqc-generalstats-mosdepth-median_coverage"], + ), + ("% Coverage > 1x", ["mosdepth_mqc-generalstats-mosdepth-1_x_pc"]), + ("% Coverage > 10x", ["mosdepth_mqc-generalstats-mosdepth-10_x_pc"]), + ], + ), + ( + "multiqc_bcftools_stats.yaml", + [("# SNPs", ["number_of_SNPs"]), ("# INDELs", ["number_of_indels"])], + ), + ("multiqc_snpeff.yaml", [("# Missense variants", ["MISSENSE"])]), + ("multiqc_quast.yaml", [("# Ns per 100kb consensus", ["# N's per 100 kbp"])]), + ("multiqc_pangolin.yaml", [("Pangolin lineage", ["lineage"])]), + ("multiqc_nextclade_clade-plot.yaml", [("Nextclade clade", ["clade"])]), + ] + + if args.PLATFORM == "illumina": + ## Dictionary of samples being single-end/paired-end + is_pe_dict = {} + yaml_file = os.path.join(args.MULTIQC_DATA_DIR, "multiqc_fastp.yaml") + if os.path.exists(yaml_file): + metrics_dict = yaml_fields_to_dict( + yaml_file=yaml_file, + append_dict={}, + field_mapping_list=[("command", ["command"])], + valid_sample_list=[], + ) + for sample, val in metrics_dict.items(): + if metrics_dict[sample]["command"].find("--out2") != -1: + is_pe_dict[sample] = True + else: + is_pe_dict[sample] = False + + ## Write variant calling metrics to file + metrics_dict_to_file( + file_field_list=illumina_variant_files, + multiqc_data_dir=args.MULTIQC_DATA_DIR, + out_file=args.OUT_PREFIX + "_variants_metrics_mqc.csv", + valid_sample_list=is_pe_dict.keys(), + ) + + ## Write de novo assembly metrics to file + metrics_dict_to_file( + file_field_list=illumina_assembly_files, + multiqc_data_dir=args.MULTIQC_DATA_DIR, + out_file=args.OUT_PREFIX + "_assembly_metrics_mqc.csv", + valid_sample_list=is_pe_dict.keys(), + ) + + elif args.PLATFORM == "nanopore": + ## List of real samples to output in report + sample_list = [] + yaml_file = os.path.join(args.MULTIQC_DATA_DIR, "multiqc_samtools_flagstat.yaml") + if os.path.exists(yaml_file): + metrics_dict = yaml_fields_to_dict( + yaml_file=yaml_file, + append_dict={}, + field_mapping_list=[("# Mapped reads", ["mapped_passed"])], + valid_sample_list=[], + ) + sample_list = metrics_dict.keys() + + metrics_dict_to_file( + file_field_list=nanopore_variant_files, + multiqc_data_dir=args.MULTIQC_DATA_DIR, + out_file=args.OUT_PREFIX + "_variants_metrics_mqc.csv", + valid_sample_list=sample_list, + ) + + else: + print( + "Unrecognised option passed to --platform: {}. Accepted values = 'illumina' or 'nanopore'".format( + args.PLATFORM + ) + ) + sys.exit(1) + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/bin/plot_base_density.r b/bin/plot_base_density.r new file mode 100755 index 00000000..993b8fb2 --- /dev/null +++ b/bin/plot_base_density.r @@ -0,0 +1,164 @@ +#!/usr/bin/env Rscript + +################################################ +################################################ +## LOAD LIBRARIES ## +################################################ +################################################ + +library(optparse) +library(ggplot2) +library(scales) +library(reshape2) +library(Biostrings) + +################################################ +################################################ +## VALIDATE COMMAND-LINE PARAMETERS ## +################################################ +################################################ + +option_list <- list(make_option(c("-i", "--fasta_files"), type="character", default=NULL, help="Comma-separated list of fasta files", metavar="fasta_files"), + make_option(c("-s", "--prefixes"), type="character", default=NULL, help="Comma-separated list of prefixes associated with fasta files to add to plots. Must be unique and in same order as fasta file input.", metavar="prefixes"), + make_option(c("-o", "--output_dir"), type="character", default='./', help="Output directory", metavar="path")) + +opt_parser <- OptionParser(option_list=option_list) +opt <- parse_args(opt_parser) + +## Check input files +INPUT_FILES <- unique(unlist(strsplit(opt$fasta_files,","))) +if (length(INPUT_FILES) == 0) { + print_help(opt_parser) + stop("At least one input file must be supplied", call.=FALSE) +} +if (!all(file.exists(INPUT_FILES))) { + stop(paste("The following input files don't exist:",paste(INPUT_FILES[!file.exists(INPUT_FILES)], sep='', collapse=' '), sep=' '), call.=FALSE) +} + +## Check prefixes for input files +PREFIXES <- basename(INPUT_FILES) +if (!is.null(opt$prefixes)){ + PREFIXES <- unique(unlist(strsplit(opt$prefixes,","))) + if (length(INPUT_FILES) != length(PREFIXES)) { + print_help(opt_parser) + stop("Please provide a unique prefix for each fasta file.", call.=FALSE) + } +} + +## Check the output directory has a trailing slash, if not add one +OUTDIR <- opt$output_dir +if (tail(strsplit(OUTDIR,"")[[1]],1)!="/") { + OUTDIR <- paste(OUTDIR,"/",sep='') +} +## Create the directory if it doesn't already exist. +if (!file.exists(OUTDIR)) { + dir.create(OUTDIR, recursive=TRUE) +} + +################################################ +################################################ +## READ IN DATA ## +################################################ +################################################ + +dat <- NULL +for (input_file in INPUT_FILES) { + dat <- c(dat,readDNAStringSet(input_file)[1]) +} + +################################################ +################################################ +## PLOTS ## +################################################ +################################################ + +bases_std <- c("A","C","T","G") +base_cols <- c("A" = "#009E73", + "C" = "#0072B2", + "T" = "#D55E00", + "G" = "#000000", + "N" = "#E69F00", + "X" = "#999999") + +for (idx in 1:length(dat)) { + + ## Table of base counts + base_seq <- strsplit(toString(dat[[idx]]), "")[[1]] + base_tab <- data.frame(table(base_seq), stringsAsFactors=FALSE) + colnames(base_tab) <- c("base","freq") + rownames(base_tab) <- base_tab$base + for (base in 1:length(bases_std)) { + if (!any(base_tab$base %in% bases_std[base])) { + base_tab <- rbind(base_tab,c(bases_std[base],0)) + } + } + base_tab$perc <- 100 *base_tab$freq / sum(base_tab$freq) + base_tab <- base_tab[order(base_tab$base, decreasing=FALSE),] + base_tab <- rbind(base_tab[c(bases_std, "N"),], base_tab[!rownames(base_tab) %in% c(bases_std, "N"),]) + base_tab$base <- factor(base_tab$base, levels=rownames(base_tab)) + outfile <- paste(OUTDIR, PREFIXES[idx], ".base_counts.tsv", sep='') + write.table(base_tab, file=outfile, col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE) + + ## Barplot of base frequencies + barplot <- ggplot(base_tab, aes(x=base,y=perc)) + + geom_bar(stat="identity") + + theme_classic() + + scale_y_continuous(limits=c(0,100),breaks=c(0,25,50,75,100)) + + ylab("% Observed") + + xlab("Base") + + ggtitle(PREFIXES[idx]) + outfile <- paste(OUTDIR, PREFIXES[idx], ".base_counts.pdf", sep='') + ggsave(file=outfile, barplot, width=12, height=10, units="cm") + + ## Create a data frame of base coverage + bases <- unique(c(bases_std,"N",unique(base_seq))) + base_dat <- data.frame(sample=names(dat[[idx]])[1], position=1:length(base_seq), stringsAsFactors=FALSE) + for (base in 1:length(bases)) { + base_dat[,bases[base]] <- as.numeric(base_seq==bases[base]) + } + + ## Stretches of N's + N_rle <- Rle(base_dat[,"N"]) + N_dat <- data.frame(start=cumsum(runLength(N_rle))[runValue(N_rle)==1], width=runLength(N_rle)[runValue(N_rle)==1]) + outfile <- paste(OUTDIR, PREFIXES[idx], ".N_run.tsv", sep='') + write.table(N_dat, file=outfile, col.names=TRUE, row.names=FALSE, sep="\t", quote=FALSE) + + ## Running mean of bp density for standard bases + run_k <- 1001 + run_dat <- base_dat[,c("sample", "position", bases_std)] + for (base in bases_std) { + run_dat[,base] <- as.numeric(runmean(Rle(base_dat[,base]), k=run_k, endrule="constant")) + } + run_dat <- melt(run_dat, c(1,2)) + colnames(run_dat)[3] <- "base" + run_dat$position <- run_dat$position/1000 + lineplot <- ggplot(run_dat,aes(x=position, y=value, colour=base)) + + geom_line() + + theme_classic() + + theme(panel.border=element_rect(colour="black", fill=NA, size=1)) + + scale_y_continuous(breaks=c(0,0.25,0.50,0.75,1)) + + xlab("Position (Kb)") + + ylab(paste("Base density (running mean k=",run_k,")", sep='')) + + ggtitle(PREFIXES[idx]) + + scale_colour_manual(values=base_cols) + outfile <- paste(OUTDIR, PREFIXES[idx], ".ACTG_density.pdf", sep='') + ggsave(file=outfile, lineplot, width=18, height=10, units="cm") + + ## Single base density plots, nucleotide resolution. + bases_other <- bases[!bases %in% bases_std] + for (obase in bases_other) { + plot_dat <- base_dat[,c("sample", "position", obase)] + colnames(plot_dat)[3] <- "base" + plot_col <- ifelse(obase=="N", base_cols[["N"]], base_cols[["X"]]) + lineplot <- ggplot(plot_dat, aes(x=position/1000, y=base)) + + geom_line(colour=plot_col) + + theme_classic() + + theme(legend.position="none", panel.border=element_rect(colour="black", fill=NA, size=1)) + + scale_y_continuous(breaks=c(0,1), labels=c(0,1)) + + xlab("Position (Kb)") + + ylab(paste(obase,"density", sep=' ')) + + ggtitle(PREFIXES[idx]) + outfile <- paste(OUTDIR, PREFIXES[idx], ".", obase, "_density.pdf", sep='') + ggsave(file=outfile, lineplot, width=18, height=10, units="cm") + } +} diff --git a/bin/plot_mosdepth_regions.r b/bin/plot_mosdepth_regions.r new file mode 100755 index 00000000..7db55824 --- /dev/null +++ b/bin/plot_mosdepth_regions.r @@ -0,0 +1,183 @@ +#!/usr/bin/env Rscript + +################################################ +################################################ +## LOAD LIBRARIES ## +################################################ +################################################ + +library(optparse) +library(ggplot2) +library(scales) +library(ComplexHeatmap) +library(viridis) +library(tidyverse) + +################################################ +################################################ +## VALIDATE COMMAND-LINE PARAMETERS ## +################################################ +################################################ + +option_list <- list(make_option(c("-i", "--input_files"), type="character", default=NULL, help="Comma-separated list of mosdepth regions output file (typically end in *.regions.bed.gz)", metavar="input_files"), + make_option(c("-s", "--input_suffix"), type="character", default='.regions.bed.gz', help="Portion of filename after sample name to trim for plot title e.g. '.regions.bed.gz' if 'SAMPLE1.regions.bed.gz'", metavar="input_suffix"), + make_option(c("-o", "--output_dir"), type="character", default='./', help="Output directory", metavar="path"), + make_option(c("-p", "--output_suffix"), type="character", default='regions', help="Output suffix", metavar="output_suffix"), + make_option(c("-r", "--regions_prefix"), type="character", default=NULL, help="Replace this prefix from region names before plotting", metavar="regions_prefix")) + +opt_parser <- OptionParser(option_list=option_list) +opt <- parse_args(opt_parser) + +## Check input files +INPUT_FILES <- unique(unlist(strsplit(opt$input_files,","))) +if (length(INPUT_FILES) == 0) { + print_help(opt_parser) + stop("At least one input file must be supplied", call.=FALSE) +} +if (!all(file.exists(INPUT_FILES))) { + stop(paste("The following input files don't exist:",paste(INPUT_FILES[!file.exists(INPUT_FILES)], sep='', collapse=' '), sep=' '), call.=FALSE) +} + +## Check the output directory has a trailing slash, if not add one +OUTDIR <- opt$output_dir +if (tail(strsplit(OUTDIR,"")[[1]],1)!="/") { + OUTDIR <- paste(OUTDIR,"/",sep='') +} +## Create the directory if it doesn't already exist. +if (!file.exists(OUTDIR)) { + dir.create(OUTDIR,recursive=TRUE) +} + +OUTSUFFIX <- trimws(opt$output_suffix, "both", whitespace = "\\.") + +################################################ +################################################ +## READ IN DATA ## +################################################ +################################################ + +## Read in data +dat <- NULL +for (input_file in INPUT_FILES) { + sample = gsub(opt$input_suffix,'',basename(input_file)) + dat <- rbind(dat, cbind(read.delim(input_file, header=FALSE, sep='\t', stringsAsFactors=FALSE, check.names=FALSE)[,-6], sample, stringsAsFactors=F)) +} + +## Reformat table +if (ncol(dat) == 6) { + colnames(dat) <- c('chrom', 'start','end', 'region', 'coverage', 'sample') + if (!is.null(opt$regions_prefix)) { + dat$region <- as.character(gsub(opt$regions_prefix, '', dat$region)) + } + dat$region <- factor(dat$region, levels=unique(dat$region[order(dat$start)])) +} else { + colnames(dat) <- c('chrom', 'start','end', 'coverage', 'sample') +} +dat$sample <- factor(dat$sample, levels=sort(unique(dat$sample))) + +## Write merged coverage data for all samples to file +outfile <- paste(OUTDIR,"all_samples.",OUTSUFFIX,".coverage.tsv", sep='') +write.table(dat, file=outfile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) + +################################################ +################################################ +## PER-SAMPLE COVERAGE PLOTS ## +################################################ +################################################ + +for (sample in unique(dat$sample)) { + sample_dat <- dat[dat$sample == sample,] + outfile <- paste(OUTDIR,sample,".",OUTSUFFIX,".coverage.tsv", sep='') + write.table(sample_dat,file=outfile, col.names=TRUE, row.names=FALSE, sep='\t', quote=FALSE) + sample_dat$coverage <- sample_dat$coverage + 1 + + if (ncol(sample_dat) == 6) { + plot <- ggplot(sample_dat,aes(x=region,y=coverage)) + + geom_bar(stat="identity", fill="#D55E00", width=0.6) + + theme_bw() + + theme( + plot.title=element_text(size=10), + axis.text.x=element_text(size=10), + axis.text.y=element_text(size=6)) + + coord_flip() + + scale_x_discrete(expand=c(0, 0)) + + scale_y_continuous( + trans=log10_trans(), + breaks=10^c(0:10), + labels=trans_format('log10', math_format(10^.x)), + expand=c(0, 0)) + + expand_limits(y=1) + + ylab(bquote('log'[10]~'(Coverage+1)')) + + xlab('Amplicon') + + ggtitle(paste(sample,'median coverage per amplicon')) + + outfile <- paste(OUTDIR,sample,".",OUTSUFFIX,".coverage.pdf", sep='') + ggsave(file=outfile, plot, height=3+(0.2*length(unique(sample_dat$region))), width=16, units="cm", limitsize=FALSE) + } else { + plot <- ggplot(sample_dat,aes(x=end,y=coverage)) + + geom_ribbon(aes(ymin=0, ymax=coverage), fill="#D55E00", data=) + + theme_bw() + + scale_x_continuous(expand=c(0, 0)) + + scale_y_continuous( + trans=log10_trans(), + breaks=10^c(0:10), + labels=trans_format('log10', math_format(10^.x)), + expand=c(0, 0)) + + expand_limits(y=1) + + ylab(bquote('log'[10]~'(Coverage+1)')) + + xlab('Position (bp)') + + ggtitle(paste(sample,'coverage')) + + outfile <- paste(OUTDIR,sample,".",OUTSUFFIX,".coverage.pdf", sep='') + ggsave(file=outfile, plot, height=6, width=12, units="in") + } +} + +################################################ +################################################ +## REGION-BASED HEATMAP ACROSS ALL SAMPLES ## +################################################ +################################################ + +if (ncol(dat) == 6 && length(INPUT_FILES) > 1) { + mat <- spread(dat[,c("sample", "region", "coverage")], sample, coverage, fill=NA, convert=FALSE) + rownames(mat) <- mat[,1] + mat <- t(as.matrix(log10(mat[,-1] + 1))) + heatmap <- Heatmap(mat, + column_title = "Heatmap to show median amplicon coverage across samples", + name = "log10(Coverage+1)", + cluster_rows = TRUE, + cluster_columns = FALSE, + show_row_names = TRUE, + show_column_names = TRUE, + column_title_side = "top", + column_names_side = "bottom", + row_names_side = "right", + rect_gp = gpar(col="white", lwd=1), + show_heatmap_legend = TRUE, + heatmap_legend_param = list(title_gp=gpar(fontsize=12, fontface="bold"), labels_gp=gpar(fontsize=10), direction="horizontal"), + column_title_gp = gpar(fontsize=14, fontface="bold"), + row_names_gp = gpar(fontsize=10, fontface="bold"), + column_names_gp = gpar(fontsize=10, fontface="bold"), + height = unit(5, "mm")*nrow(mat), + width = unit(5, "mm")*ncol(mat), + col = viridis(50)) + + ## Size of heatmaps scaled based on matrix dimensions: https://jokergoo.github.io/ComplexHeatmap-reference/book/other-tricks.html#set-the-same-cell-size-for-different-heatmaps-with-different-dimensions + height = 0.1969*nrow(mat) + (2*1.5) + width = 0.1969*ncol(mat) + (2*1.5) + outfile <- paste(OUTDIR,"all_samples.",OUTSUFFIX,".heatmap.pdf", sep='') + pdf(file=outfile, height=height, width=width) + draw(heatmap, heatmap_legend_side="bottom") + dev.off() + + ## Write heatmap to file + mat <- mat[row_order(heatmap),] + outfile <- paste(OUTDIR,"all_samples.",OUTSUFFIX,".heatmap.tsv", sep='') + write.table(cbind(sample = rownames(mat), mat), file=outfile, row.names=FALSE, col.names=TRUE, sep="\t", quote=FALSE) +} + +################################################ +################################################ +################################################ +################################################ diff --git a/conf/base.config b/conf/base.config index 46b66dcd..fbfe9964 100644 --- a/conf/base.config +++ b/conf/base.config @@ -24,7 +24,6 @@ process { // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules. // If possible, it would be nice to keep the same label naming convention when // adding in your local modules too. - // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { cpus = { 1 } diff --git a/conf/igenomes.config b/conf/igenomes.config deleted file mode 100644 index 3f114377..00000000 --- a/conf/igenomes.config +++ /dev/null @@ -1,440 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for iGenomes paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Defines reference genomes using iGenome paths. - Can be used by any config that customises the base path using: - $params.igenomes_base / --igenomes_base ----------------------------------------------------------------------------------------- -*/ - -params { - // illumina iGenomes reference file paths - genomes { - 'GRCh37' { - fasta = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/Ensembl/GRCh37/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/GRCh37-blacklist.bed" - } - 'GRCh38' { - fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - } - 'CHM13' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAIndex/" - bwamem2 = "${params.igenomes_base}/Homo_sapiens/UCSC/CHM13/Sequence/BWAmem2Index/" - gtf = "${params.igenomes_base}/Homo_sapiens/NCBI/CHM13/Annotation/Genes/genes.gtf" - gff = "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF/009/914/755/GCF_009914755.1_T2T-CHM13v2.0/GCF_009914755.1_T2T-CHM13v2.0_genomic.gff.gz" - mito_name = "chrM" - } - 'GRCm38' { - fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/GRCm38-blacklist.bed" - } - 'TAIR10' { - fasta = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Arabidopsis_thaliana/Ensembl/TAIR10/Annotation/README.txt" - mito_name = "Mt" - } - 'EB2' { - fasta = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bacillus_subtilis_168/Ensembl/EB2/Annotation/README.txt" - } - 'UMD3.1' { - fasta = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Bos_taurus/Ensembl/UMD3.1/Annotation/README.txt" - mito_name = "MT" - } - 'WBcel235' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/Ensembl/WBcel235/Annotation/Genes/genes.bed" - mito_name = "MtDNA" - macs_gsize = "9e7" - } - 'CanFam3.1' { - fasta = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/Ensembl/CanFam3.1/Annotation/README.txt" - mito_name = "MT" - } - 'GRCz10' { - fasta = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/Ensembl/GRCz10/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'BDGP6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/Ensembl/BDGP6/Annotation/Genes/genes.bed" - mito_name = "M" - macs_gsize = "1.2e8" - } - 'EquCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/Ensembl/EquCab2/Annotation/README.txt" - mito_name = "MT" - } - 'EB1' { - fasta = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Escherichia_coli_K_12_DH10B/Ensembl/EB1/Annotation/README.txt" - } - 'Galgal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/Ensembl/Galgal4/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Gm01' { - fasta = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Glycine_max/Ensembl/Gm01/Annotation/README.txt" - } - 'Mmul_1' { - fasta = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Macaca_mulatta/Ensembl/Mmul_1/Annotation/README.txt" - mito_name = "MT" - } - 'IRGSP-1.0' { - fasta = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Oryza_sativa_japonica/Ensembl/IRGSP-1.0/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'CHIMP2.1.4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/Ensembl/CHIMP2.1.4/Annotation/README.txt" - mito_name = "MT" - } - 'Rnor_5.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_5.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'Rnor_6.0' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/Ensembl/Rnor_6.0/Annotation/Genes/genes.bed" - mito_name = "MT" - } - 'R64-1-1' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Saccharomyces_cerevisiae/Ensembl/R64-1-1/Annotation/Genes/genes.bed" - mito_name = "MT" - macs_gsize = "1.2e7" - } - 'EF2' { - fasta = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Schizosaccharomyces_pombe/Ensembl/EF2/Annotation/README.txt" - mito_name = "MT" - macs_gsize = "1.21e7" - } - 'Sbi1' { - fasta = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sorghum_bicolor/Ensembl/Sbi1/Annotation/README.txt" - } - 'Sscrofa10.2' { - fasta = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/Ensembl/Sscrofa10.2/Annotation/README.txt" - mito_name = "MT" - } - 'AGPv3' { - fasta = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Zea_mays/Ensembl/AGPv3/Annotation/Genes/genes.bed" - mito_name = "Mt" - } - 'hg38' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg38/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg38-blacklist.bed" - } - 'hg19' { - fasta = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Homo_sapiens/UCSC/hg19/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "2.7e9" - blacklist = "${projectDir}/assets/blacklists/hg19-blacklist.bed" - } - 'mm10' { - fasta = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Mus_musculus/UCSC/mm10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.87e9" - blacklist = "${projectDir}/assets/blacklists/mm10-blacklist.bed" - } - 'bosTau8' { - fasta = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Bos_taurus/UCSC/bosTau8/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'ce10' { - fasta = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Caenorhabditis_elegans/UCSC/ce10/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "9e7" - } - 'canFam3' { - fasta = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Canis_familiaris/UCSC/canFam3/Annotation/README.txt" - mito_name = "chrM" - } - 'danRer10' { - fasta = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Danio_rerio/UCSC/danRer10/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.37e9" - } - 'dm6' { - fasta = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Drosophila_melanogaster/UCSC/dm6/Annotation/Genes/genes.bed" - mito_name = "chrM" - macs_gsize = "1.2e8" - } - 'equCab2' { - fasta = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Equus_caballus/UCSC/equCab2/Annotation/README.txt" - mito_name = "chrM" - } - 'galGal4' { - fasta = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Gallus_gallus/UCSC/galGal4/Annotation/README.txt" - mito_name = "chrM" - } - 'panTro4' { - fasta = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Pan_troglodytes/UCSC/panTro4/Annotation/README.txt" - mito_name = "chrM" - } - 'rn6' { - fasta = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Rattus_norvegicus/UCSC/rn6/Annotation/Genes/genes.bed" - mito_name = "chrM" - } - 'sacCer3' { - fasta = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Sequence/BismarkIndex/" - readme = "${params.igenomes_base}/Saccharomyces_cerevisiae/UCSC/sacCer3/Annotation/README.txt" - mito_name = "chrM" - macs_gsize = "1.2e7" - } - 'susScr3' { - fasta = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/WholeGenomeFasta/genome.fa" - bwa = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BWAIndex/version0.6.0/" - bowtie2 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/Bowtie2Index/" - star = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/STARIndex/" - bismark = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Sequence/BismarkIndex/" - gtf = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.gtf" - bed12 = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/Genes/genes.bed" - readme = "${params.igenomes_base}/Sus_scrofa/UCSC/susScr3/Annotation/README.txt" - mito_name = "chrM" - } - } -} diff --git a/conf/igenomes_ignored.config b/conf/igenomes_ignored.config deleted file mode 100644 index b4034d82..00000000 --- a/conf/igenomes_ignored.config +++ /dev/null @@ -1,9 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Nextflow config file for iGenomes paths -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Empty genomes dictionary to use when igenomes is ignored. ----------------------------------------------------------------------------------------- -*/ - -params.genomes = [:] diff --git a/conf/modules.config b/conf/modules.config index d266a387..c7b907d6 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -10,17 +10,17 @@ ---------------------------------------------------------------------------------------- */ -process { +// +// General configuration options +// +process { publishDir = [ path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] - withName: FASTQC { - ext.args = '--quiet' - } withName: 'MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } publishDir = [ @@ -29,5 +29,10 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } +} +if (params.platform == 'nanopore') { + includeConfig 'modules_nanopore.config' +} else if (params.platform == 'illumina') { + includeConfig 'modules_illumina.config' } diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config new file mode 100644 index 00000000..1756cb48 --- /dev/null +++ b/conf/modules_illumina.config @@ -0,0 +1,1099 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +def variant_caller = params.variant_caller +if (!variant_caller) { variant_caller = params.protocol == 'amplicon' ? 'ivar' : 'bcftools' } + +def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.trim().toLowerCase() } : [] + +// +// Pre-processing and general configuration options +// + +process { + withName: '.*:.*:PREPARE_GENOME:GUNZIP_.*' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: '.*:.*:PREPARE_GENOME:UNTAR_.*' { + ext.args2 = '--no-same-owner' + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'CUSTOM_GETCHROMSIZES' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'CAT_FASTQ' { + publishDir = [ + path: { "${params.outdir}/fastq" }, + enabled: false + ] + } +} + +if (!params.skip_fastqc) { + process { + withName: '.*:.*:FASTQ_TRIM_FASTP_FASTQC:FASTQC_RAW' { + ext.args = '--quiet' + publishDir = [ + path: { "${params.outdir}/fastqc/raw" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + +if (!params.skip_fastp) { + process { + withName: 'FASTP' { + ext.args = '--cut_front --cut_tail --trim_poly_x --cut_mean_quality 30 --qualified_quality_phred 30 --unqualified_percent_limit 10 --length_required 50' + publishDir = [ + [ + path: { "${params.outdir}/fastp" }, + mode: params.publish_dir_mode, + pattern: "*.{json,html}" + ], + [ + path: { "${params.outdir}/fastp/log" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/fastp" }, + mode: params.publish_dir_mode, + pattern: "*.fail.fastq.gz", + enabled: params.save_trimmed_fail + ] + ] + } + } + + if (!params.skip_fastqc) { + process { + withName: '.*:.*:FASTQ_TRIM_FASTP_FASTQC:FASTQC_TRIM' { + ext.args = '--quiet' + publishDir = [ + path: { "${params.outdir}/fastqc/trim" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } +} + +if (!params.skip_kraken2) { + process { + withName: 'KRAKEN2_BUILD' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'KRAKEN2_KRAKEN2' { + ext.args = '--report-zero-counts' + publishDir = [ + path: { "${params.outdir}/kraken2" }, + mode: params.publish_dir_mode, + pattern: "*report.txt" + ] + } + } +} + +// +// Variant calling configuration options +// + +if (!params.skip_variants) { + process { + withName: 'BOWTIE2_BUILD' { + ext.args = '--seed 1' + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'BOWTIE2_ALIGN' { + ext.args = '--local --very-sensitive-local --seed 1' + ext.args2 = '-F4 -bhS' + publishDir = [ + [ + path: { "${params.outdir}/variants/bowtie2/log" }, + mode: params.publish_dir_mode, + pattern: "*.log" + ], + [ + path: { "${params.outdir}/variants/bowtie2/unmapped" }, + mode: params.publish_dir_mode, + pattern: "*.fastq.gz", + enabled: params.save_unaligned + ] + ] + } + + withName: '.*:.*:FASTQ_ALIGN_BOWTIE2:.*:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.sorted" } + publishDir = [ + path: { "${params.outdir}/variants/bowtie2" }, + mode: params.publish_dir_mode, + pattern: "*.bam" + ] + } + + withName: '.*:.*:FASTQ_ALIGN_BOWTIE2:.*:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/variants/bowtie2" }, + mode: params.publish_dir_mode, + pattern: "*.bai" + ] + } + + withName: '.*:.*:FASTQ_ALIGN_BOWTIE2:.*:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + } + } + + if (!params.skip_freyja) { + process { + withName: 'FREYJA_VARIANTS' { + ext.args = {} + publishDir = [ + path: { "${params.outdir}/variants/freyja/variants" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv}" + ] + } + withName: 'FREYJA_DEMIX' { + ext.args = [ + params.freyja_depthcutoff ? "--depthcutoff ${params.freyja_depthcutoff}" : '', + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/variants/freyja/demix" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv}" + ] + } + withName: 'FREYJA_BOOT' { + ext.args = [ + params.freyja_depthcutoff ? "--depthcutoff ${params.freyja_depthcutoff}" : '', + '--boxplot pdf', + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/variants/freyja/bootstrap" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv,pdf}" + ] + } + withName: 'FREYJA_UPDATE' { + publishDir = [ + path: { "${params.outdir}/variants/freyja/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + } + } + + if (!params.skip_ivar_trim && params.protocol == 'amplicon') { + process { + withName: 'IVAR_TRIM' { + ext.args = [ + '-m 30 -q 20', + params.ivar_trim_noprimer ? '' : '-e', + params.ivar_trim_offset ? "-x ${params.ivar_trim_offset}" : '' + ].join(' ').trim() + ext.prefix = { "${meta.id}.ivar_trim" } + publishDir = [ + path: { "${params.outdir}/variants/bowtie2/log" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ] + } + + withName: '.*:.*:BAM_TRIM_PRIMERS_IVAR:.*:SAMTOOLS_SORT' { + ext.prefix = { "${meta.id}.ivar_trim.sorted" } + publishDir = [ + path: { "${params.outdir}/variants/bowtie2" }, + mode: params.publish_dir_mode, + pattern: "*.bam", + enabled: params.skip_markduplicates + ] + } + + withName: '.*:.*:BAM_TRIM_PRIMERS_IVAR:.*:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/variants/bowtie2" }, + mode: params.publish_dir_mode, + pattern: "*.bai", + enabled: params.skip_markduplicates + ] + } + + withName: '.*:.*:BAM_TRIM_PRIMERS_IVAR:.*:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.ivar_trim.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + } + } + } + + if (!params.skip_markduplicates) { + process { + withName: 'PICARD_MARKDUPLICATES' { + ext.args = [ + '--ASSUME_SORTED true --VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp', + params.filter_duplicates ? 'REMOVE_DUPLICATES=true' : '' + ].join(' ').trim() + ext.prefix = { "${meta.id}.markduplicates.sorted" } + publishDir = [ + [ + path: { "${params.outdir}/variants/bowtie2/picard_metrics" }, + mode: params.publish_dir_mode, + pattern: '*metrics.txt' + ], + [ + path: { "${params.outdir}/variants/bowtie2" }, + mode: params.publish_dir_mode, + pattern: '*.bam' + ] + ] + } + + withName: '.*:BAM_MARKDUPLICATES_PICARD:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/variants/bowtie2" }, + mode: params.publish_dir_mode, + pattern: '*.bai' + ] + } + + withName: '.*:BAM_MARKDUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.markduplicates.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/variants/bowtie2/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: '*.{stats,flagstat,idxstats}' + ] + } + } + } + + if (!params.skip_picard_metrics) { + process { + withName: 'PICARD_COLLECTMULTIPLEMETRICS' { + ext.args = '--VALIDATION_STRINGENCY LENIENT --TMP_DIR tmp' + publishDir = [ + [ + path: { "${params.outdir}/variants/bowtie2/picard_metrics" }, + mode: params.publish_dir_mode, + pattern: '*metrics' + ], + [ + path: { "${params.outdir}/variants/bowtie2/picard_metrics/pdf" }, + mode: params.publish_dir_mode, + pattern: '*.pdf' + ] + ] + } + } + } + + if (!params.skip_mosdepth) { + process { + withName: 'MOSDEPTH_GENOME' { + ext.args = '--fast-mode --by 200' + publishDir = [ + path: { "${params.outdir}/variants/bowtie2/mosdepth/genome" }, + mode: params.publish_dir_mode, + pattern: "*.summary.txt" + ] + } + + withName: 'PLOT_MOSDEPTH_REGIONS_GENOME' { + ext.args = '--input_suffix .regions.bed.gz' + publishDir = [ + path: { "${params.outdir}/variants/bowtie2/mosdepth/genome" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,pdf}" + ] + } + } + + if (params.protocol == 'amplicon') { + process { + withName: 'COLLAPSE_PRIMERS' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'MOSDEPTH_AMPLICON' { + ext.args = '--fast-mode --use-median --thresholds 0,1,10,50,100,500' + publishDir = [ + path: { "${params.outdir}/variants/bowtie2/mosdepth/amplicon" }, + mode: params.publish_dir_mode, + pattern: "*.summary.txt" + ] + } + + withName: 'PLOT_MOSDEPTH_REGIONS_AMPLICON' { + ext.args = '--input_suffix .regions.bed.gz' + publishDir = [ + path: { "${params.outdir}/variants/bowtie2/mosdepth/amplicon" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,pdf}" + ] + } + } + } + } + + if (variant_caller == 'ivar') { + process { + withName: 'IVAR_VARIANTS' { + ext.args = '-t 0.25 -q 20 -m 10' + ext.args2 = '--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0' + publishDir = [ + path: { "${params.outdir}/variants/ivar" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'IVAR_VARIANTS_TO_VCF' { + ext.args = params.protocol == 'amplicon' ? '--ignore_strand_bias' : '' + publishDir = [ + path: { "${params.outdir}/variants/ivar/log" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ] + } + + withName: '.*:.*:VARIANTS_IVAR:BCFTOOLS_SORT' { + publishDir = [ + path: { "${params.outdir}/variants/ivar" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:VARIANTS_IVAR:.*:TABIX_TABIX' { + ext.args = '-p vcf -f' + publishDir = [ + path: { "${params.outdir}/variants/ivar" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:VARIANTS_IVAR:.*:BCFTOOLS_STATS' { + publishDir = [ + path: { "${params.outdir}/variants/ivar/bcftools_stats" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (variant_caller == 'bcftools') { + process { + withName: 'BCFTOOLS_MPILEUP' { + ext.args = '--ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 20 --annotate FORMAT/AD,FORMAT/ADF,FORMAT/ADR,FORMAT/DP,FORMAT/SP,INFO/AD,INFO/ADF,INFO/ADR' + ext.args2 = '--ploidy 1 --keep-alts --keep-masked-ref --multiallelic-caller --variants-only' + ext.args3 = "--include 'INFO/DP>=10'" + ext.prefix = { "${meta.id}.orig" } + publishDir = [ + path: { "${params.outdir}/variants/bcftools" }, + mode: params.publish_dir_mode, + pattern: '*.mpileup', + enabled: params.save_mpileup + ] + } + + withName: 'BCFTOOLS_NORM' { + ext.args = '--do-not-normalize --output-type z --multiallelics -any' + publishDir = [ + path: { "${params.outdir}/variants/bcftools" }, + mode: params.publish_dir_mode, + pattern: "*.vcf.gz" + ] + } + + withName: '.*:.*:VARIANTS_BCFTOOLS:.*:TABIX_TABIX' { + ext.args = '-p vcf -f' + publishDir = [ + path: { "${params.outdir}/variants/bcftools" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:VARIANTS_BCFTOOLS:.*:BCFTOOLS_STATS' { + publishDir = [ + path: { "${params.outdir}/variants/bcftools/bcftools_stats" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_asciigenome) { + process { + withName: 'ASCIIGENOME' { + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/asciigenome/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_snpeff) { + process { + withName: 'SNPEFF_BUILD' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'SNPEFF_ANN' { + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/snpeff" }, + mode: params.publish_dir_mode, + pattern: "*.{csv,txt,html}" + ] + } + + withName: '.*:.*:.*:.*:SNPEFF_SNPSIFT:.*:TABIX_BGZIP' { + ext.prefix = { "${meta.id}.snpeff" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/snpeff" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:.*:.*:SNPEFF_SNPSIFT:.*:.*:TABIX_TABIX' { + ext.args = '-p vcf -f' + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/snpeff" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:.*:.*:SNPEFF_SNPSIFT:.*:.*:BCFTOOLS_STATS' { + ext.prefix = { "${meta.id}.snpeff" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/snpeff/bcftools_stats" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:.*:.*:SNPEFF_SNPSIFT:SNPSIFT_EXTRACTFIELDS' { + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/snpeff" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (!params.skip_variants_long_table) { + process { + withName: 'BCFTOOLS_QUERY' { + ext.args = [ + variant_caller == 'ivar' ? "-H -f '%CHROM\\t%POS\\t%REF\\t%ALT\\t%FILTER\\t[%DP\\t]\\t[%REF_DP\\t]\\t[%ALT_DP\\t]\\n'" : '', + variant_caller == 'bcftools' ? "-H -f '%CHROM\\t%POS\\t%REF\\t%ALT\\t%FILTER\\t[%DP\\t]\\t[%AD\\t]\\n'" : '', + ].join(' ').trim() + ext.prefix = { "${meta.id}.bcftools_query" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}" }, + enabled: false + ] + } + + withName: 'MAKE_VARIANTS_LONG_TABLE' { + ext.args = "--variant_caller ${variant_caller}" + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'MAKE_VARIANTS_LONG_TABLE_ADDITIONAL' { + ext.args = "--variant_caller ${variant_caller} --output_file 'additional_variants_long_table.csv'" + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + } + + if (!params.skip_consensus && params.consensus_caller == 'ivar') { + process { + withName: 'IVAR_CONSENSUS' { + ext.args = '-t 0.75 -q 20 -m 10 -n N' + ext.args2 = '--count-orphans --no-BAQ --max-depth 0 --min-BQ 0 -aa' + ext.prefix = { "${meta.id}.consensus" } + publishDir = [ + [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/ivar" }, + mode: params.publish_dir_mode, + pattern: "*.{fa,txt}", + ], + [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/ivar" }, + mode: params.publish_dir_mode, + pattern: "*.mpileup", + enabled: params.save_mpileup + ] + ] + } + } + } + + if (!params.skip_consensus && params.consensus_caller == 'bcftools') { + process { + withName: 'BCFTOOLS_FILTER' { + ext.args = [ + '--output-type z', + variant_caller == 'ivar' ? "--include 'FORMAT/ALT_FREQ >= 0.75'" : '', + variant_caller == 'bcftools' ? "--include 'FORMAT/AD[:1] / FORMAT/DP >= 0.75'" : '', + ].join(' ').trim() + ext.prefix = { "${meta.id}.filtered" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:CONSENSUS_BCFTOOLS:TABIX_TABIX' { + ext.args = '-p vcf -f' + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'MAKE_BED_MASK' { + ext.args = "-a --ignore-overlaps --count-orphans --no-BAQ --max-depth 0 --min-BQ 0" + ext.args2 = 10 + ext.prefix = { "${meta.id}.coverage.masked" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, + mode: params.publish_dir_mode, + pattern: "*.mpileup", + enabled: params.save_mpileup + ] + } + + withName: 'BEDTOOLS_MERGE' { + ext.prefix = { "${meta.id}.coverage.merged" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, + enabled: false + ] + } + + withName: 'BEDTOOLS_MASKFASTA' { + ext.prefix = { "${meta.id}.masked" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, + enabled: false + ] + } + + withName: 'BCFTOOLS_CONSENSUS' { + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, + enabled: false + ] + } + + withName: 'RENAME_FASTA_HEADER' { + ext.prefix = { "${meta.id}.consensus" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/bcftools" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_consensus) { + if (!params.skip_pangolin) { + process { + withName: 'PANGOLIN' { + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/${params.consensus_caller}/pangolin" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_nextclade) { + process { + withName: 'NEXTCLADE_RUN' { + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/${params.consensus_caller}/nextclade" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.endsWith(".csv") && !filename.endsWith("errors.csv") && !filename.endsWith("insertions.csv") ? filename : null } + ] + } + } + } + + if (!params.skip_variants_quast) { + process { + withName: '.*:.*:CONSENSUS_.*:.*:QUAST' { + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/${params.consensus_caller}" }, + mode: params.publish_dir_mode, + pattern: "quast" + ] + } + } + } + + if (!params.skip_consensus_plots) { + process { + withName: 'PLOT_BASE_DENSITY' { + ext.prefix = { "${meta.id}.consensus" } + publishDir = [ + path: { "${params.outdir}/variants/${variant_caller}/consensus/${params.consensus_caller}/base_qc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + } +} + +if (!params.skip_assembly) { + if (!params.skip_blast) { + process { + withName: 'BLAST_MAKEBLASTDB' { + ext.args = '-parse_seqids -dbtype nucl' + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + } + } + + if (params.protocol == 'amplicon' && !params.skip_cutadapt) { + process { + withName: 'BEDTOOLS_GETFASTA' { + ext.args = '-s -nameOnly' + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'CUTADAPT' { + ext.args = '--overlap 5 --minimum-length 30 --error-rate 0.1' + ext.prefix = { "${meta.id}.primer_trim" } + publishDir = [ + path: { "${params.outdir}/assembly/cutadapt/log" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ] + } + } + + if (!params.skip_fastqc) { + process { + withName: '.*:.*:FASTQC' { + ext.args = '--quiet' + ext.prefix = { "${meta.id}.primer_trim" } + publishDir = [ + path: { "${params.outdir}/assembly/cutadapt/fastqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + } + + if ('spades' in assemblers) { + process { + withName: 'SPADES' { + ext.args = params.spades_mode ? "--${params.spades_mode}" : '' + publishDir = [ + [ + path: { "${params.outdir}/assembly/spades/${params.spades_mode}" }, + mode: params.publish_dir_mode, + pattern: '*.{fa.gz,gfa.gz}' + ], + [ + path: { "${params.outdir}/assembly/spades/${params.spades_mode}/log" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ] + ] + } + + withName: '.*:.*:ASSEMBLY_SPADES:GUNZIP_SCAFFOLDS' { + publishDir = [ + path: { "${params.outdir}/assembly/spades/${params.spades_mode}" }, + enabled: false + ] + } + + withName: '.*:.*:ASSEMBLY_SPADES:GUNZIP_GFA' { + publishDir = [ + path: { "${params.outdir}/assembly/spades/${params.spades_mode}" }, + enabled: false + ] + } + } + + if (!params.skip_bandage) { + process { + withName: '.*:.*:ASSEMBLY_SPADES:BANDAGE_IMAGE' { + ext.args = '--height 1000' + publishDir = [ + path: { "${params.outdir}/assembly/spades/${params.spades_mode}/bandage" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_blast) { + process { + withName: '.*:.*:ASSEMBLY_SPADES:.*:BLAST_BLASTN' { + ext.args = "-outfmt '6 stitle staxids std slen qlen qcovs'" + publishDir = [ + path: { "${params.outdir}/assembly/spades/${params.spades_mode}/blastn" }, + enabled: false + ] + } + + withName: '.*:.*:ASSEMBLY_SPADES:.*:FILTER_BLASTN' { + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/assembly/spades/${params.spades_mode}/blastn" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_assembly_quast) { + process { + withName: '.*:.*:ASSEMBLY_SPADES:.*:QUAST' { + publishDir = [ + path: { "${params.outdir}/assembly/spades/${params.spades_mode}" }, + mode: params.publish_dir_mode, + pattern: "quast" + ] + } + } + } + + if (!params.skip_abacas) { + process { + withName: '.*:.*:ASSEMBLY_SPADES:.*:ABACAS' { + ext.args = '-m -p nucmer' + publishDir = [ + path: { "${params.outdir}/assembly/spades/${params.spades_mode}/abacas" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_plasmidid) { + process { + withName: '.*:.*:ASSEMBLY_SPADES:.*:PLASMIDID' { + ext.args = '--only-reconstruct -C 47 -S 47 -i 60 --no-trim -k 0.80' + publishDir = [ + path: { "${params.outdir}/assembly/spades/${params.spades_mode}/plasmidid" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + } + + if ('unicycler' in assemblers) { + process { + withName: 'UNICYCLER' { + publishDir = [ + [ + path: { "${params.outdir}/assembly/unicycler" }, + mode: params.publish_dir_mode, + pattern: '*.{fa.gz,gfa.gz}' + ], + [ + path: { "${params.outdir}/assembly/unicycler/log" }, + mode: params.publish_dir_mode, + pattern: '*.log' + ] + ] + } + + withName: '.*:.*:ASSEMBLY_UNICYCLER:GUNZIP_SCAFFOLDS' { + publishDir = [ + path: { "${params.outdir}/assembly/unicycler" }, + enabled: false + ] + } + + withName: '.*:.*:ASSEMBLY_UNICYCLER:GUNZIP_GFA' { + publishDir = [ + path: { "${params.outdir}/assembly/unicycler" }, + enabled: false + ] + } + } + + if (!params.skip_bandage) { + process { + withName: '.*:.*:ASSEMBLY_UNICYCLER:BANDAGE_IMAGE' { + ext.args = '--height 1000' + publishDir = [ + path: { "${params.outdir}/assembly/unicycler/bandage" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_blast) { + process { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:BLAST_BLASTN' { + ext.args = "-outfmt '6 stitle staxids std slen qlen qcovs'" + publishDir = [ + path: { "${params.outdir}/assembly/unicycler/blastn" }, + enabled: false + ] + } + + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:FILTER_BLASTN' { + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/assembly/unicycler/blastn" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_assembly_quast) { + process { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:QUAST' { + publishDir = [ + path: { "${params.outdir}/assembly/unicycler" }, + mode: params.publish_dir_mode, + pattern: "quast" + ] + } + } + } + + if (!params.skip_abacas) { + process { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:ABACAS' { + ext.args = '-m -p nucmer' + publishDir = [ + path: { "${params.outdir}/assembly/unicycler/abacas" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_plasmidid) { + process { + withName: '.*:.*:ASSEMBLY_UNICYCLER:.*:PLASMIDID' { + ext.args = '--only-reconstruct -C 47 -S 47 -i 60 --no-trim -k 0.80' + publishDir = [ + path: { "${params.outdir}/assembly/unicycler/plasmidid" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + } + + if ('minia' in assemblers) { + process { + withName: 'MINIA' { + ext.args = '-kmer-size 31 -abundance-min 20' + publishDir = [ + path: { "${params.outdir}/assembly/minia" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (!params.skip_blast) { + process { + withName: '.*:.*:ASSEMBLY_MINIA:.*:BLAST_BLASTN' { + ext.args = "-outfmt '6 stitle staxids std slen qlen qcovs'" + publishDir = [ + path: { "${params.outdir}/assembly/minia/blastn" }, + enabled: false + ] + } + + withName: '.*:.*:ASSEMBLY_MINIA:.*:FILTER_BLASTN' { + ext.prefix = { "${meta.id}" } + publishDir = [ + path: { "${params.outdir}/assembly/minia/blastn" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_assembly_quast) { + process { + withName: '.*:.*:ASSEMBLY_MINIA:.*:QUAST' { + publishDir = [ + path: { "${params.outdir}/assembly/minia" }, + mode: params.publish_dir_mode, + pattern: "quast" + ] + } + } + } + + if (!params.skip_abacas) { + process { + withName: '.*:.*:ASSEMBLY_MINIA:.*:ABACAS' { + ext.args = '-m -p nucmer' + publishDir = [ + path: { "${params.outdir}/assembly/minia/abacas" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + + if (!params.skip_plasmidid) { + process { + withName: '.*:.*:ASSEMBLY_MINIA:.*:PLASMIDID' { + ext.args = '--only-reconstruct -C 47 -S 47 -i 60 --no-trim -k 0.80' + publishDir = [ + path: { "${params.outdir}/assembly/minia/plasmidid" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } + } +} + +if (!params.skip_multiqc) { + process { + withName: 'MULTIQC' { + ext.args = [ + '-k yaml', + params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ].join(' ').trim() + publishDir = [ + [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + pattern: 'multiqc*' + ], + [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + pattern: '*variants_metrics_mqc.csv', + enabled: !params.skip_variants + ], + [ + path: { "${params.outdir}/multiqc" }, + mode: params.publish_dir_mode, + pattern: '*assembly_metrics_mqc.csv', + enabled: !params.skip_assembly + ] + ] + } + } +} diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config new file mode 100644 index 00000000..f877ef44 --- /dev/null +++ b/conf/modules_nanopore.config @@ -0,0 +1,402 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Config file for defining DSL2 per module options and publishing paths +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Available keys to override module options: + ext.args = Additional arguments appended to command in module. + ext.args2 = Second set of arguments appended to command in module (multi-tool modules). + ext.args3 = Third set of arguments appended to command in module (multi-tool modules). + ext.prefix = File name prefix for output files. +---------------------------------------------------------------------------------------- +*/ + +// +// General configuration options +// + +process { + withName: 'GUNZIP_.*' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'CUSTOM_GETCHROMSIZES' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'ARTIC_GUPPYPLEX' { + ext.args = params.primer_set_version == 1200 ? '--min-length 250 --max-length 1500' : '--min-length 400 --max-length 700' + publishDir = [ + path: { "${params.outdir}/guppyplex" }, + enabled: false + ] + } + + withName: 'ARTIC_MINION' { + ext.args = [ + '--normalise 500', + params.artic_minion_caller == 'medaka' ? '--medaka' : '', + params.artic_minion_aligner == 'bwa' ? '--bwa' : '--minimap2' + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}" }, + mode: params.publish_dir_mode, + pattern: "*.{sorted.bam,sorted.bam.bai,fail.vcf,merged.vcf,primers.vcf,gz,tbi,consensus.fasta}" + ] + } + + withName: 'VCFLIB_VCFUNIQ' { + ext.args = '-f' + ext.prefix = { "${meta.id}.pass.unique" } + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:TABIX_TABIX' { + ext.args = '-p vcf -f' + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:.*:SAMTOOLS_VIEW' { + ext.args = '-b -F 4' + ext.prefix = { "${meta.id}.mapped.sorted" } + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:.*:SAMTOOLS_INDEX' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:.*:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.mapped.sorted.bam" } + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/samtools_stats" }, + mode: params.publish_dir_mode, + pattern: "*.{stats,flagstat,idxstats}" + ] + } + + withName: '.*:.*:BCFTOOLS_STATS' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/bcftools_stats" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } +} + +// +// Optional configuration options +// + +if (params.sequencing_summary && !params.skip_pycoqc) { + process { + withName: 'PYCOQC' { + ext.prefix = 'pycoqc' + publishDir = [ + path: { "${params.outdir}/pycoqc" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + +if (!params.skip_nanoplot) { + process { + withName: 'NANOPLOT' { + publishDir = [ + path: { "${params.outdir}/nanoplot/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + +if (!params.skip_mosdepth) { + process { + withName: 'COLLAPSE_PRIMERS' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'MOSDEPTH_GENOME' { + ext.args = '--fast-mode --by 200' + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/mosdepth/genome" }, + mode: params.publish_dir_mode, + pattern: "*.summary.txt" + ] + } + + withName: 'PLOT_MOSDEPTH_REGIONS_GENOME' { + ext.args = '--input_suffix .regions.bed.gz' + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/mosdepth/genome" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,pdf}" + ] + } + + withName: 'MOSDEPTH_AMPLICON' { + ext.args = '--fast-mode --use-median --thresholds 0,1,10,50,100,500' + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/mosdepth/amplicon" }, + mode: params.publish_dir_mode, + pattern: "*.summary.txt" + ] + } + + withName: 'PLOT_MOSDEPTH_REGIONS_AMPLICON' { + ext.args = '--input_suffix .regions.bed.gz' + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/mosdepth/amplicon" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,pdf}" + ] + } + } +} + +if (!params.skip_pangolin) { + process { + withName: 'PANGOLIN' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/pangolin" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + +if (!params.skip_nextclade) { + process { + withName: 'UNTAR' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'NEXTCLADE_DATASETGET' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'NEXTCLADE_RUN' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/nextclade" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.endsWith(".csv") && !filename.endsWith("errors.csv") && !filename.endsWith("insertions.csv") ? filename : null } + ] + } + } +} + +if (!params.skip_freyja) { + process { + withName: 'FREYJA_VARIANTS' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/freyja/variants" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv}" + ] + } + + withName: 'FREYJA_DEMIX' { + ext.args = [ + params.freyja_depthcutoff ? "--depthcutoff ${params.freyja_depthcutoff}" : '', + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/variants/freyja/demix" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv}" + ] + } + + withName: 'FREYJA_BOOT' { + ext.args = [ + params.freyja_depthcutoff ? "--depthcutoff ${params.freyja_depthcutoff}" : '', + '--boxplot pdf', + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/variants/freyja/bootstrap" }, + mode: params.publish_dir_mode, + pattern: "*.{tsv,csv,pdf}" + ] + } + + withName: 'FREYJA_UPDATE' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/freyja/" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + ] + } + } +} + +if (!params.skip_variants_quast) { + process { + withName: 'QUAST' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}" }, + mode: params.publish_dir_mode, + pattern: "quast" + ] + } + } +} + +if (!params.skip_snpeff) { + process { + withName: 'SNPEFF_BUILD' { + publishDir = [ + path: { "${params.outdir}/genome" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, + enabled: params.save_reference + ] + } + + withName: 'SNPEFF_ANN' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/snpeff" }, + mode: params.publish_dir_mode, + pattern: "*.{csv,txt,html}" + ] + } + + withName: '.*:.*:.*:.*:TABIX_BGZIP' { + ext.prefix = { "${meta.id}.snpeff" } + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/snpeff" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:.*:.*:.*:TABIX_TABIX' { + ext.args = '-p vcf -f' + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/snpeff" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*:.*:.*:.*:.*:BCFTOOLS_STATS' { + ext.prefix = { "${meta.id}.snpeff" } + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/snpeff/bcftools_stats" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: 'SNPSIFT_EXTRACTFIELDS' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/snpeff" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + + if (!params.skip_variants_long_table) { + process { + withName: 'BCFTOOLS_QUERY' { + ext.args = [ + params.artic_minion_caller == 'nanopolish' ? "-H -f '%CHROM\\t%POS\\t%REF\\t%ALT\\t%FILTER\\t%StrandSupport\\n'" : '', + params.artic_minion_caller == 'medaka' ? "-H -f '%CHROM\\t%POS\\t%REF\\t%ALT\\t%FILTER\\t%DP\\t%AC\\n'" : '' + ].join(' ').trim() + ext.prefix = { "${meta.id}.bcftools_query" } + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}" }, + enabled: false + ] + } + + withName: 'MAKE_VARIANTS_LONG_TABLE' { + ext.args = "--variant_caller ${params.artic_minion_caller}" + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: 'MAKE_VARIANTS_LONG_TABLE_ADDITIONAL' { + ext.args = "--variant_caller ${params.artic_minion_caller} --output_file 'additional_variants_long_table.csv'" + publishDir = [ + path: { "${params.outdir}/variants/${params.artic_minion_caller}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } + } +} + +if (!params.skip_asciigenome) { + process { + withName: 'ASCIIGENOME' { + publishDir = [ + path: { "${params.outdir}/${params.artic_minion_caller}/asciigenome/${meta.id}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} + +if (!params.skip_multiqc) { + process { + withName: 'MULTIQC' { + ext.args = [ + '-k yaml', + params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' + ].join(' ').trim() + publishDir = [ + path: { "${params.outdir}/multiqc/${params.artic_minion_caller}" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + } +} diff --git a/conf/test.config b/conf/test.config index 430b4e5f..aa66c213 100644 --- a/conf/test.config +++ b/conf/test.config @@ -22,11 +22,21 @@ params { config_profile_name = 'Test profile' config_profile_description = 'Minimal test dataset to check pipeline function' - // Input data - // TODO nf-core: Specify the paths to your test data on nf-core/test-datasets - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv' + // Input data to test amplicon analysis + input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/v2.6/samplesheet_test_amplicon_illumina.csv' + platform = 'illumina' + protocol = 'amplicon' + primer_set = 'artic' + primer_set_version = 1 // Genome references - genome = 'R64-1-1' + genome = 'MN908947.3' + kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' + + // Variant calling options + variant_caller = 'ivar' + freyja_repeats = 10 + + // Assembly options + assemblers = 'spades,unicycler' } diff --git a/conf/test_full.config b/conf/test_full.config index 3aa3ddb6..5382ff51 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -14,11 +14,26 @@ params { config_profile_name = 'Full test profile' config_profile_description = 'Full test dataset to check pipeline function' - // Input data for full size test - // TODO nf-core: Specify the paths to your full test data ( on nf-core/test-datasets or directly in repositories, e.g. SRA) - // TODO nf-core: Give any required params for the test so that command line flags are not needed - input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/samplesheet_full_illumina_amplicon.csv' + // Input data for full test of amplicon analysis + input = params.pipelines_testdata_base_path + 'viralrecon/samplesheet/v2.6/samplesheet_full_amplicon_illumina.csv' + platform = 'illumina' + protocol = 'amplicon' + primer_set = 'artic' + primer_set_version = 3 // Genome references - genome = 'R64-1-1' + genome = 'MN908947.3' + + // Variant calling options + variant_caller = 'ivar' + freyja_repeats = 10 + + // Assembly options + assemblers = 'spades,unicycler,minia' +} + +process { + withName: 'PLASMIDID' { + errorStrategy = 'ignore' + } } diff --git a/conf/test_full_nanopore.config b/conf/test_full_nanopore.config new file mode 100644 index 00000000..43fceae1 --- /dev/null +++ b/conf/test_full_nanopore.config @@ -0,0 +1,34 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_full_nanopore, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile for nanopore data' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data for full test of amplicon analysis + platform = 'nanopore' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_full_amplicon_nanopore.csv' + fastq_dir = 's3://ngi-igenomes/test-data/viralrecon/20210205_1526_X4_FAP51364_21fa8135/fastq_pass/' + fast5_dir = 's3://ngi-igenomes/test-data/viralrecon/20210205_1526_X4_FAP51364_21fa8135/fast5_pass/' + sequencing_summary = 's3://ngi-igenomes/test-data/viralrecon/20210205_1526_X4_FAP51364_21fa8135/sequencing_summary.txt' + + // Genome references + genome = 'MN908947.3' + primer_set = 'artic' + primer_set_version = 3 + + // variant calling options + freyja_repeats = 10 + + // Other parameters + artic_minion_medaka_model = 's3://ngi-igenomes/test-data/viralrecon/20210205_1526_X4_FAP51364_21fa8135/r941_min_high_g360_model.hdf5' +} diff --git a/conf/test_full_sispa.config b/conf/test_full_sispa.config new file mode 100644 index 00000000..54e0235f --- /dev/null +++ b/conf/test_full_sispa.config @@ -0,0 +1,37 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running full-size tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a full size pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_full_sispa, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Full test profile' + config_profile_description = 'Full test dataset to check pipeline function' + + // Input data for full test of SISPA/metagenomics analysis + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_full_metagenomic_illumina.csv' + platform = 'illumina' + protocol = 'metagenomic' + + // Genome references + genome = 'MN908947.3' + + // Variant calling options + variant_caller = 'bcftools' + freyja_repeats = 10 + + // Assembly options + assemblers = 'spades,unicycler,minia' +} + +process { + withName: 'PLASMIDID' { + errorStrategy = 'ignore' + } +} diff --git a/conf/test_nanopore.config b/conf/test_nanopore.config new file mode 100644 index 00000000..13009cee --- /dev/null +++ b/conf/test_nanopore.config @@ -0,0 +1,42 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_nanopore, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function for Nanopore data' + + // Input data to test nanopore analysis + platform = 'nanopore' + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_test_amplicon_nanopore.csv' + fastq_dir = 's3://ngi-igenomes/test-data/viralrecon/minion_test/fastq_pass/' + fast5_dir = 's3://ngi-igenomes/test-data/viralrecon/minion_test/fast5_pass/' + sequencing_summary = 's3://ngi-igenomes/test-data/viralrecon/minion_test/sequencing_summary.txt' + + // Genome references + genome = 'MN908947.3' + primer_set = 'artic' + primer_set_version = 3 + + // variant calling options + freyja_repeats = 10 + + // Other parameters + artic_minion_medaka_model = 's3://ngi-igenomes/test-data/viralrecon/minion_test/r941_min_high_g360_model.hdf5' +} diff --git a/conf/test_sispa.config b/conf/test_sispa.config new file mode 100644 index 00000000..00bd8438 --- /dev/null +++ b/conf/test_sispa.config @@ -0,0 +1,40 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/viralrecon -profile test_sispa, --outdir + +---------------------------------------------------------------------------------------- +*/ + +process { + resourceLimits = [ + cpus: 4, + memory: '15.GB', + time: '1.h' + ] +} + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Input data to test SISPA/metagenomics analysis + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_test_metagenomic_illumina.csv' + platform = 'illumina' + protocol = 'metagenomic' + + // Genome references + genome = 'MN908947.3' + kraken2_db = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/genome/kraken2/kraken2_hs22.tar.gz' + + // Variant calling options + variant_caller = 'bcftools' + freyja_repeats = 10 + + // Assembly options + assemblers = 'spades,unicycler,minia' +} diff --git a/docs/images/asciigenome_screenshot.png b/docs/images/asciigenome_screenshot.png new file mode 100755 index 00000000..55b7f071 Binary files /dev/null and b/docs/images/asciigenome_screenshot.png differ diff --git a/docs/images/freyja_screenshot.png b/docs/images/freyja_screenshot.png new file mode 100644 index 00000000..cae764d4 Binary files /dev/null and b/docs/images/freyja_screenshot.png differ diff --git a/docs/images/mqc_bcftools_stats_plot.png b/docs/images/mqc_bcftools_stats_plot.png new file mode 100755 index 00000000..dcdab9c3 Binary files /dev/null and b/docs/images/mqc_bcftools_stats_plot.png differ diff --git a/docs/images/mqc_bowtie2_plot.png b/docs/images/mqc_bowtie2_plot.png new file mode 100755 index 00000000..edb2f472 Binary files /dev/null and b/docs/images/mqc_bowtie2_plot.png differ diff --git a/docs/images/mqc_cutadapt_plot.png b/docs/images/mqc_cutadapt_plot.png new file mode 100755 index 00000000..1cbdea48 Binary files /dev/null and b/docs/images/mqc_cutadapt_plot.png differ diff --git a/docs/images/mqc_fastp_plot.png b/docs/images/mqc_fastp_plot.png new file mode 100755 index 00000000..798539ca Binary files /dev/null and b/docs/images/mqc_fastp_plot.png differ diff --git a/docs/images/mqc_fastqc_plot.png b/docs/images/mqc_fastqc_plot.png new file mode 100755 index 00000000..25401cff Binary files /dev/null and b/docs/images/mqc_fastqc_plot.png differ diff --git a/docs/images/mqc_ivar_trim_plot.png b/docs/images/mqc_ivar_trim_plot.png new file mode 100755 index 00000000..ca054905 Binary files /dev/null and b/docs/images/mqc_ivar_trim_plot.png differ diff --git a/docs/images/mqc_ivar_variants_plot.png b/docs/images/mqc_ivar_variants_plot.png new file mode 100755 index 00000000..1ff6884f Binary files /dev/null and b/docs/images/mqc_ivar_variants_plot.png differ diff --git a/docs/images/mqc_kraken2_plot.png b/docs/images/mqc_kraken2_plot.png new file mode 100755 index 00000000..6837fb18 Binary files /dev/null and b/docs/images/mqc_kraken2_plot.png differ diff --git a/docs/images/mqc_mosdepth_plot.png b/docs/images/mqc_mosdepth_plot.png new file mode 100755 index 00000000..614870b2 Binary files /dev/null and b/docs/images/mqc_mosdepth_plot.png differ diff --git a/docs/images/mqc_picard_duplicates_plot.png b/docs/images/mqc_picard_duplicates_plot.png new file mode 100755 index 00000000..57e51e1e Binary files /dev/null and b/docs/images/mqc_picard_duplicates_plot.png differ diff --git a/docs/images/mqc_picard_insert_size_plot.png b/docs/images/mqc_picard_insert_size_plot.png new file mode 100755 index 00000000..7614aa29 Binary files /dev/null and b/docs/images/mqc_picard_insert_size_plot.png differ diff --git a/docs/images/mqc_quast_plot.png b/docs/images/mqc_quast_plot.png new file mode 100755 index 00000000..afcb6d09 Binary files /dev/null and b/docs/images/mqc_quast_plot.png differ diff --git a/docs/images/mqc_samtools_stats_plot.png b/docs/images/mqc_samtools_stats_plot.png new file mode 100755 index 00000000..3d926e74 Binary files /dev/null and b/docs/images/mqc_samtools_stats_plot.png differ diff --git a/docs/images/mqc_snpeff_plot.png b/docs/images/mqc_snpeff_plot.png new file mode 100755 index 00000000..79313e5c Binary files /dev/null and b/docs/images/mqc_snpeff_plot.png differ diff --git a/docs/images/mqc_varscan2_plot.png b/docs/images/mqc_varscan2_plot.png new file mode 100755 index 00000000..98ed977f Binary files /dev/null and b/docs/images/mqc_varscan2_plot.png differ diff --git a/docs/images/nanoplot_readlengthquality.png b/docs/images/nanoplot_readlengthquality.png new file mode 100755 index 00000000..11eb7d37 Binary files /dev/null and b/docs/images/nanoplot_readlengthquality.png differ diff --git a/docs/images/nextclade_tag_example.png b/docs/images/nextclade_tag_example.png new file mode 100644 index 00000000..1d29b64e Binary files /dev/null and b/docs/images/nextclade_tag_example.png differ diff --git a/docs/images/nf-core-viralrecon_metro_map.svg b/docs/images/nf-core-viralrecon_metro_map.svg new file mode 100644 index 00000000..38e6792c --- /dev/null +++ b/docs/images/nf-core-viralrecon_metro_map.svg @@ -0,0 +1,7257 @@ + + + +image/svg+xmlviralreconSTAGE1. Pre-processing2. Alignment & BAM post-processing3. Variant calling4. Consensus calling5. De novo assembly6. Final QCFASTQcatfastqFastQCfastpKraken2FastQCilluminapicardCollectMultipleMetricsmosdepthBowtie2BCFToolsiVarvariantsBCFToolsPangolinNextcladeQUASTSnpSiftTSVVariantslong tableMultiQCHTMLSPAdesBlastABACAScutadaptminiaUnicyclerQUASTPlasmidIDBandageimageMultiQCHTMLMETHODVariant calling - Variants: iVar, Consensus: iVarVariant calling - Variants: iVar, Consensus: BCFToolsVariant calling - Variants: BCFTools, Consensus: iVarVariant calling - Variants: BCFTools, Consensus: BCFToolsDe novo assemblyASCIIGenomeSAMtoolsiVartrimpicardMarkDuplicatesSnpEffiVarconsensusLicense:125346nanoporeviralreconSTAGE1. Pre-processing2. Alignment, variant & consensus calling3. Consensus analysis4. Variant analysis5. Final QC43125License:NanoPlotpycoQCarticguppyplexFAST5articminionvcflibvcfuniqASCIIGenomeQUASTSnpEffPangolinSnpSiftNextclademosdepthVariantslong tableMultiQCTSVHTMLVCFFASTASamtoolsview diff --git a/docs/images/nf-core-viralrecon_metro_map_illumina.png b/docs/images/nf-core-viralrecon_metro_map_illumina.png new file mode 100644 index 00000000..fd5f2928 Binary files /dev/null and b/docs/images/nf-core-viralrecon_metro_map_illumina.png differ diff --git a/docs/images/nf-core-viralrecon_metro_map_nanopore.png b/docs/images/nf-core-viralrecon_metro_map_nanopore.png new file mode 100644 index 00000000..5fe0cdf1 Binary files /dev/null and b/docs/images/nf-core-viralrecon_metro_map_nanopore.png differ diff --git a/docs/images/pycoqc_readsperbarcode.png b/docs/images/pycoqc_readsperbarcode.png new file mode 100755 index 00000000..d62bd3b7 Binary files /dev/null and b/docs/images/pycoqc_readsperbarcode.png differ diff --git a/docs/images/r_amplicon_barplot.png b/docs/images/r_amplicon_barplot.png new file mode 100755 index 00000000..9d35f298 Binary files /dev/null and b/docs/images/r_amplicon_barplot.png differ diff --git a/docs/images/r_amplicon_heatmap.png b/docs/images/r_amplicon_heatmap.png new file mode 100755 index 00000000..3111119d Binary files /dev/null and b/docs/images/r_amplicon_heatmap.png differ diff --git a/docs/images/r_genome_coverage.png b/docs/images/r_genome_coverage.png new file mode 100755 index 00000000..e9782600 Binary files /dev/null and b/docs/images/r_genome_coverage.png differ diff --git a/docs/output.md b/docs/output.md index 3cd8d227..e28c02cf 100644 --- a/docs/output.md +++ b/docs/output.md @@ -1,34 +1,1069 @@ -# nf-core/viralrecon: Output - -## Introduction +# Introduction This document describes the output produced by the pipeline. Most of the plots are taken from the MultiQC report, which summarises results at the end of the pipeline. The directories listed below will be created in the results directory after the pipeline has finished. All paths are relative to the top-level results directory. - +# Nanopore: Pipeline overview + +- [Introduction](#introduction) +- [Nanopore: Pipeline overview](#nanopore-pipeline-overview) + - [Nanopore: Preprocessing](#nanopore-preprocessing) + - [Nanopore: pycoQC](#nanopore-pycoqc) + - [Nanopore: artic guppyplex](#nanopore-artic-guppyplex) + - [Nanopore: NanoPlot](#nanopore-nanoplot) + - [Nanopore: Variant calling](#nanopore-variant-calling) + - [Nanopore: artic minion](#nanopore-artic-minion) + - [Nanopore: Downstream analysis](#nanopore-downstream-analysis) + - [Nanopore: SAMtools](#nanopore-samtools) + - [Nanopore: mosdepth](#nanopore-mosdepth) + - [Nanopore: BCFTools](#nanopore-bcftools) + - [Nanopore: SnpEff and SnpSift](#nanopore-snpeff-and-snpsift) + - [Nanopore: QUAST](#nanopore-quast) + - [Nanopore: Pangolin](#nanopore-pangolin) + - [Nanopore: Nextclade](#nanopore-nextclade) + - [Nanopore: Freyja](#nanopore-freyja) + - [Nanopore: ASCIIGenome](#nanopore-asciigenome) + - [Nanopore: Variants long table](#nanopore-variants-long-table) + - [Nanopore: Workflow reporting](#nanopore-workflow-reporting) + - [Nanopore: MultiQC](#nanopore-multiqc) +- [Illumina: Pipeline overview](#illumina-pipeline-overview) + - [Illumina: Preprocessing](#illumina-preprocessing) + - [cat](#cat) + - [FastQC](#fastqc) + - [fastp](#fastp) + - [Kraken 2](#kraken-2) + - [Illumina: Variant calling](#illumina-variant-calling) + - [Bowtie 2](#bowtie-2) + - [SAMtools](#samtools) + - [iVar trim](#ivar-trim) + - [picard MarkDuplicates](#picard-markduplicates) + - [picard CollectMultipleMetrics](#picard-collectmultiplemetrics) + - [mosdepth](#mosdepth) + - [iVar variants](#ivar-variants) + - [BCFTools call](#bcftools-call) + - [SnpEff and SnpSift](#snpeff-and-snpsift) + - [Freyja](#freyja) + - [ASCIIGenome](#asciigenome) + - [iVar consensus](#ivar-consensus) + - [BCFTools and BEDTools](#bcftools-and-bedtools) + - [QUAST](#quast) + - [Pangolin](#pangolin) + - [Nextclade](#nextclade) + - [Variants long table](#variants-long-table) + - [Illumina: De novo assembly](#illumina-de-novo-assembly) + - [Cutadapt](#cutadapt) + - [SPAdes](#spades) + - [Unicycler](#unicycler) + - [minia](#minia) + - [BLAST](#blast) + - [ABACAS](#abacas) + - [PlasmidID](#plasmidid) + - [Assembly QUAST](#assembly-quast) + - [Illumina: Workflow reporting and genomes](#illumina-workflow-reporting-and-genomes) + - [MultiQC](#multiqc) + - [Reference genome files](#reference-genome-files) +- [Pipeline information](#pipeline-information) + +## Nanopore: Preprocessing + +A file called `summary_variants_metrics_mqc.csv` containing a selection of read alignment and variant calling metrics will be saved in the `multiqc//` output directory which is determined by the `--artic_minion_caller` parameter (Default: `nanopolish/`). The same metrics will also be added to the top of the MultiQC report. + +### Nanopore: pycoQC + +
+Output files + +- `pycoqc/` + - `*.html` and `.json` file that includes a run summary and graphical representation of various QC metrics including distribution of read length, distribution of read quality scores, mean read quality per sequence length, output per channel over experiment time and percentage of reads per barcode. + +
+ +[PycoQC](https://github.com/a-slide/pycoQC) compute metrics and generate QC plots using the sequencing summary information generated by basecalling/demultiplexing tools such as Guppy e.g. distribution of read length, read length over time, number of reads per barcode and other general stats. + +

PycoQC - Number of reads per barcode

+ +### Nanopore: artic guppyplex + +
+Output files + +- `guppyplex/` + - `*.fastq.gz` files generated by aggregate pre-demultiplexed reads from MinKNOW/Guppy. These files are not saved by default but can be via a custom config file such as the one below. + +```nextflow +params { + modules { + 'nanopore_artic_guppyplex' { + publish_files = ['fastq.gz':''] + } + } +} +``` + +
+ +The [artic guppyplex](https://artic.readthedocs.io/en/latest/commands/) tool from the [ARTIC field bioinformatics pipeline](https://github.com/artic-network/fieldbioinformatics) is used to perform length filtering of the demultiplexed Nanopore reads obtained per barcode. This essentially filters out chimeric reads that may be generated by the ARTIC protocol. The pipeline uses a default minimum and maximum read length of 400 and 700, respectively as tailored for the [nCoV-2019 primer set](https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html). However, you may need to adjust these for different primer schemes e.g. by using the minimum length of the amplicons (`--min-length`) as well as the maximum length plus 200 (`--max-length`). + +### Nanopore: NanoPlot + +
+Output files + +- `nanoplot//` + - Per-sample `*.html` files for QC metrics and individual `*.png` image files for plots. + +
+ +[NanoPlot](https://github.com/wdecoster/NanoPlot) it a tool that can be used to produce general quality metrics from various Nanopore-based input files including fastq files e.g. quality score distribution, read lengths and other general stats. + +

Nanoplot - Read quality vs read length

+ +## Nanopore: Variant calling + +### Nanopore: artic minion + +
+Output files + +- `/` + - `*.consensus.fasta`: Consensus fasta file generated by artic minion. + - `*.pass.unique.vcf.gz`: VCF file containing unique variants passing quality filters. + - `*.pass.unique.vcf.gz.tbi`: VCF index file containing unique variants passing quality filters. + - `*.pass.vcf.gz`: VCF file containing variants passing quality filters. + - `*.pass.vcf.gz.tbi`: VCF index file containing variants passing quality filters. + - `*.primers.vcf`: VCF file containing variants found in primer-binding regions. + - `*.merged.vcf`: VCF file containing all detected variants. + - `*.fail.vcf`: VCF file containing variants failing quality filters. + - `*.sorted.bam`: BAM file generated by initial alignment. + - `*.sorted.bam.bai`: BAM index file generated by initial alignment. + - `*.trimmed.rg.sorted.bam`: BAM file without primer-binding site trimming. + - `*.trimmed.rg.sorted.bam.bai`: BAM index file without primer-binding site trimming. + - `*.primertrimmed.rg.sorted.bam`: BAM file generated after primer-binding site trimming. + - `*.primertrimmed.rg.sorted.bam.bai`: BAM index file generated after primer-binding site trimming. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +The [artic minion](https://artic.readthedocs.io/en/latest/commands/) tool from the [ARTIC field bioinformatics pipeline](https://github.com/artic-network/fieldbioinformatics) is used to align reads, call variants and to generate the consensus sequence. By default, artic minion uses [Minimap2](https://github.com/lh3/minimap2) to align the reads to the viral genome, however you can use [BWA](https://github.com/lh3/bwa) instead using the `--artic_minion_aligner bwa` parameter. Similarly, the default variant caller used by artic minion is [Nanopolish](https://github.com/jts/nanopolish), however, you can use [Medaka](https://github.com/nanoporetech/medaka) instead via the `--artic_minion_caller medaka` parameter. Medaka is faster than Nanopolish, performs mostly the same and can be run directly from `fastq` input files as opposed to requiring the `fastq`, `fast5` and `sequencing_summary.txt` files required to run Nanopolish. You must provide the appropriate [Medaka model](https://github.com/nanoporetech/medaka#models) via the `--artic_minion_medaka_model` parameter if using `--artic_minion_caller medaka`. + +## Nanopore: Downstream analysis + +### Nanopore: SAMtools + +
+Output files + +- `/` + - `*.mapped.sorted.bam`: Coordinate sorted BAM file containing read alignment information. + - `*.mapped.sorted.bam.bai`: Index file for coordinate sorted BAM file. +- `/samtools_stats/` + - SAMtools `*.mapped.sorted.bam.flagstat`, `*.mapped.sorted.bam.idxstats` and `*.mapped.sorted.bam.stats` files generated from the alignment files. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +BAM files containing the original alignments from either Minimap2 or BWA are further processed with [SAMtools](http://samtools.sourceforge.net/) to remove unmapped reads as well as to generate read mapping statistics. + +![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) + +### Nanopore: mosdepth + +
+Output files + +- `/mosdepth/genome/` + - `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. + - `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. + - `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. + - `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. +- `/mosdepth/amplicon/` + - `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. + - `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. + - `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. + - `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. + - `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. + +![R - Samples amplicon coverage heatmap ](images/r_amplicon_heatmap.png) + +![R - Sample genome-wide coverage plot](images/r_genome_coverage.png) + +

R - Sample per-amplicon coverage plot

+ +### Nanopore: BCFTools + +
+Output files + +- `/bcftools_stats/` + - `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[BCFtools](http://samtools.github.io/bcftools/bcftools.html) is a set of utilities that manipulate variant calls in [VCF](https://vcftools.github.io/specs.html) and its binary counterpart BCF format. It can also used be used to generate statistics and counts obtained from VCF files as used here. + +![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) + +### Nanopore: SnpEff and SnpSift + +
+Output files + +- `/snpeff/` + - `*.snpeff.csv`: Variant annotation csv file. + - `*.snpeff.genes.txt`: Gene table for annotated variants. + - `*.snpeff.summary.html`: Summary html file for variants. + - `*.snpeff.vcf.gz`: VCF file with variant annotations. + - `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. + - `*.snpsift.txt`: SnpSift summary table. +- `/snpeff/bcftools_stats/` + - `*.snpeff.bcftools_stats.txt`: Statistics and counts obtained from SnpEff VCF file. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[SnpEff](http://snpeff.sourceforge.net/SnpEff.html) is a genetic variant annotation and functional effect prediction toolbox. It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). + +[SnpSift](http://snpeff.sourceforge.net/SnpSift.html) annotates genomic variants using databases, filters, and manipulates genomic annotated variants. After annotation with SnpEff, you can use SnpSift to help filter large genomic datasets in order to find the most significant variants. -## Pipeline overview +![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) -The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes data using the following steps: +### Nanopore: QUAST -- [FastQC](#fastqc) - Raw read QC -- [MultiQC](#multiqc) - Aggregate report describing results and QC from the whole pipeline -- [Pipeline information](#pipeline-information) - Report metrics generated during the workflow execution +
+Output files + +- `/quast/` + - `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. + +### Nanopore: Pangolin + +
+Output files + +- `/pangolin/` + - `*.pangolin.csv`: Lineage analysis results from Pangolin. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://github.com/cov-lineages/pangolin)) has been used extensively during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. A [web application](https://pangolin.cog-uk.io/) also exists that allows users to upload genome sequences via a web browser to assign lineages to genome sequences of SARS-CoV-2, view descriptive characteristics of the assigned lineage(s), view the placement of the lineage in a phylogeny of global samples, and view the temporal and geographic distribution of the assigned lineage(s). + +### Nanopore: Nextclade + +
+Output files + +- `/nextclade/` + - `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. + +### Nanopore: Freyja + +
+Output files + +- `/freyja/demix` + - `*.tsv`: Analysis results including the lineages present, their corresponding abundances, and summarization by constellation +- `/freyja/freyja_db` + - `.json`: dataset containing lineage metadata that correspond to barcodes. + - `.yml`: dataset containing the lineage topology. + - `.csv`: dataset containing lineage defining barcodes. +- `/freyja/variants` + - `*.variants.tsv`: Analysis results including identified variants in a gff-like format + - `*.depth.tsv`: Analysis results including the depth of the identified variants +- `/freyja/boot` + - `*lineages.csv` Analysis results inculding lineages present and their corresponding abundances with variation identified through bootstrapping + - `*summarized.csv`Analysis results inculding lineages present but summarized by constellation and their corresponding abundances with variation identified through bootstrapping + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +[Freyja](https://github.com/andersen-lab/Freyja) is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference). The method uses lineage-determining mutational "barcodes" derived from the [UShER](https://usher-wiki.readthedocs.io/en/latest/#) global phylogenetic tree as a basis set to solve the constrained (unit sum, non-negative) de-mixing problem. + +

Freyja screenshot

+ +### Nanopore: ASCIIGenome + +
+Output files + +- `/asciigenome//` + - `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol. + +

ASCIIGenome screenshot

+ +### Nanopore: Variants long table + +
+Output files + +- `/` + - `variants_long_table.csv`: Long format table collating per-sample information for individual variants, functional effect prediction and lineage analysis. + - `additional_variants_long_table.csv`: Long format table similar to `variants_long_table.csv` for additional annotation file with overlapping annotation features. + +**NB:** The value of `` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish'). + +
+ +Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)). The variants used for this table are the ones passing artic minion quality filters (`*.pass.unique.vcf.gz`) explained before in [Nanopore: artic minion](#nanopore-artic-minion) output files. + +The more pertinent variant information is summarised in this table to make it easier for researchers to assess the impact of variants found amongst the sequenced sample(s). An example of the fields included in the table are shown below: + +```bash +SAMPLE,CHROM,POS,REF,ALT,FILTER,DP,REF_DP,ALT_DP,AF,GENE,EFFECT,HGVS_C,HGVS_P,HGVS_P_1LETTER,CALLER,LINEAGE +SAMPLE1_PE,MN908947.3,241,C,T,PASS,489,4,483,0.99,orf1ab,upstream_gene_variant,c.-25C>T,.,.,ivar,B.1 +SAMPLE1_PE,MN908947.3,1875,C,T,PASS,92,62,29,0.32,orf1ab,missense_variant,c.1610C>T,p.Ala537Val,p.A537V,ivar,B.1 +SAMPLE1_PE,MN908947.3,3037,C,T,PASS,213,0,213,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,B.1 +SAMPLE1_PE,MN908947.3,11719,G,A,PASS,195,9,186,0.95,orf1ab,synonymous_variant,c.11454G>A,p.Gln3818Gln,p.Q3818Q,ivar,B.1 +``` + +Table columns: + +- SAMPLE: sample name +- CHROM: Reference/fragment ID +- POS: Position of the variant respect to the reference genome +- REF: Reference allele +- ALT: Alternative allele +- FILTER: Column indicating if the variant passed the filters. If PASS the variant passed all the filters. If not, the name of the filter that wasn't passed will appear. +- DP: Position read depth +- REF_DP: Reference allele depth +- ALT_DP: Alternative allele depth +- AF: Alternative allele frequency +- GENE: Gene name in annotation file​ +- EFFECT: Effect of the variant +- HGVS_C: Position annotation at CDS level +- HGVS_P: Position annotation at protein level +- HGVS_P_1LETTER: Position annotation at protein level with the aminoacid annotation in 1 letter format +- Caller: Variant caller used​​ + +## Nanopore: Workflow reporting + +### Nanopore: MultiQC + +
+Output files + +- `multiqc//` + - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. + - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. + - `summary_variants_metrics_mqc.csv`: file containing a selection of read alignmnet and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. + +
+ +![MultiQC - FastQC adapter content plot](images/mqc_fastqc_adapter.png) + +Results generated by MultiQC collate pipeline QC from pycoQC, samtools, mosdepth, BCFTools, SnpEff and QUAST. + +The default [`multiqc config file`](https://github.com/nf-core/viralrecon/blob/master/assets/multiqc_config_nanopore.yaml) has been written in a way in which to structure these QC metrics to make them more interpretable in the final report. + +The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . + +An example MultiQC report generated from a full-sized dataset can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results). + +# Illumina: Pipeline overview + +- [Introduction](#introduction) +- [Nanopore: Pipeline overview](#nanopore-pipeline-overview) + - [Nanopore: Preprocessing](#nanopore-preprocessing) + - [Nanopore: pycoQC](#nanopore-pycoqc) + - [Nanopore: artic guppyplex](#nanopore-artic-guppyplex) + - [Nanopore: NanoPlot](#nanopore-nanoplot) + - [Nanopore: Variant calling](#nanopore-variant-calling) + - [Nanopore: artic minion](#nanopore-artic-minion) + - [Nanopore: Downstream analysis](#nanopore-downstream-analysis) + - [Nanopore: SAMtools](#nanopore-samtools) + - [Nanopore: mosdepth](#nanopore-mosdepth) + - [Nanopore: BCFTools](#nanopore-bcftools) + - [Nanopore: SnpEff and SnpSift](#nanopore-snpeff-and-snpsift) + - [Nanopore: QUAST](#nanopore-quast) + - [Nanopore: Pangolin](#nanopore-pangolin) + - [Nanopore: Nextclade](#nanopore-nextclade) + - [Nanopore: Freyja](#nanopore-freyja) + - [Nanopore: ASCIIGenome](#nanopore-asciigenome) + - [Nanopore: Variants long table](#nanopore-variants-long-table) + - [Nanopore: Workflow reporting](#nanopore-workflow-reporting) + - [Nanopore: MultiQC](#nanopore-multiqc) +- [Illumina: Pipeline overview](#illumina-pipeline-overview) + - [Illumina: Preprocessing](#illumina-preprocessing) + - [cat](#cat) + - [FastQC](#fastqc) + - [fastp](#fastp) + - [Kraken 2](#kraken-2) + - [Illumina: Variant calling](#illumina-variant-calling) + - [Bowtie 2](#bowtie-2) + - [SAMtools](#samtools) + - [iVar trim](#ivar-trim) + - [picard MarkDuplicates](#picard-markduplicates) + - [picard CollectMultipleMetrics](#picard-collectmultiplemetrics) + - [mosdepth](#mosdepth) + - [iVar variants](#ivar-variants) + - [BCFTools call](#bcftools-call) + - [SnpEff and SnpSift](#snpeff-and-snpsift) + - [Freyja](#freyja) + - [ASCIIGenome](#asciigenome) + - [iVar consensus](#ivar-consensus) + - [BCFTools and BEDTools](#bcftools-and-bedtools) + - [QUAST](#quast) + - [Pangolin](#pangolin) + - [Nextclade](#nextclade) + - [Variants long table](#variants-long-table) + - [Illumina: De novo assembly](#illumina-de-novo-assembly) + - [Cutadapt](#cutadapt) + - [SPAdes](#spades) + - [Unicycler](#unicycler) + - [minia](#minia) + - [BLAST](#blast) + - [ABACAS](#abacas) + - [PlasmidID](#plasmidid) + - [Assembly QUAST](#assembly-quast) + - [Illumina: Workflow reporting and genomes](#illumina-workflow-reporting-and-genomes) + - [MultiQC](#multiqc) + - [Reference genome files](#reference-genome-files) +- [Pipeline information](#pipeline-information) + +## Illumina: Preprocessing + +### cat + +
+Output files + +- `fastq/` + - `*.merged.fastq.gz`: These files are not saved by default but can be via a custom config file such as the one below. + +```nextflow +params { + modules { + 'illumina_cat_fastq' { + publish_files = null + } + } +} +``` + +
+ +If multiple libraries/runs have been provided for the same sample in the input samplesheet (e.g. to increase sequencing depth) then these will be merged at the very beginning of the pipeline in order to have consistent sample naming throughout the pipeline. Please refer to the [usage documentation](https://nf-co.re/viralrecon/usage#illumina-samplesheet-format) to see how to specify these samples in the input samplesheet. ### FastQC
Output files -- `fastqc/` +- `fastqc/raw/` - `*_fastqc.html`: FastQC report containing quality metrics. - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. +**NB:** The FastQC plots in this directory are generated relative to the raw, input reads. They may contain adapter sequence and regions of low quality. To see how your reads look after trimming please refer to the FastQC reports in the `fastqc/trim/` directory. +
[FastQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/) gives general quality metrics about your sequenced reads. It provides information about the quality score distribution across your reads, per base sequence content (%A/T/G/C), adapter contamination and overrepresented sequences. For further reading and documentation see the [FastQC help pages](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/). +![MultiQC - FastQC per base sequence plot](images/mqc_fastqc_plot.png) + +### fastp + +
+Output files + +- `fastp/` + - `*.fastp.html`: Trimming report in html format. + - `*.fastp.json`: Trimming report in json format. +- `fastp/log/` + - `*.fastp.log`: Trimming log file. +- `fastqc/trim/` + - `*_fastqc.html`: FastQC report of the trimmed reads. + - `*_fastqc.zip`: Zip archive containing the FastQC report, tab-delimited data file and plot images. + +
+ +[fastp](https://github.com/OpenGene/fastp) is a tool designed to provide fast, all-in-one preprocessing for FastQ files. It has been developed in C++ with multithreading support to achieve higher performance. fastp is used in this pipeline for standard adapter trimming and quality filtering. + +![MultiQC - fastp filtered reads plot](images/mqc_fastp_plot.png) + +### Kraken 2 + +
+Output files + +- `kraken2/` + - `*.kraken2.report.txt`: Kraken 2 taxonomic report. See [here](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual#sample-report-output-format) for a detailed description of the format. + +
+ +[Kraken 2](https://ccb.jhu.edu/software/kraken2/index.shtml?t=manual) is a sequence classifier that assigns taxonomic labels to DNA sequences. Kraken 2 examines the k-mers within a query sequence and uses the information within those k-mers to query a database. That database maps k-mers to the lowest common ancestor (LCA) of all genomes known to contain a given k-mer. + +We use a Kraken 2 database in this workflow to filter out reads specific to the host genome before performing the _de novo_ assembly steps in the pipeline. This filtering is not performed in the variant calling arm of the pipeline by default but Kraken 2 is still run to obtain an estimate of host reads, however, the filtering can be amended via the `--kraken2_variants_host_filter` parameter. + +![MultiQC - Kraken 2 classification plot](images/mqc_kraken2_plot.png) + +## Illumina: Variant calling + +A file called `summary_variants_metrics_mqc.csv` containing a selection of read alignment and variant calling metrics will be saved in the `multiqc/` results directory. The same metrics will also be added to the top of the MultiQC report. + +### Bowtie 2 + +
+Output files + +- `variants/bowtie2/log/` + - `*.bowtie2.log`: Bowtie 2 mapping log file. + +
+ +[Bowtie 2](http://bio-bwa.sourceforge.net/) is an ultrafast and memory-efficient tool for aligning sequencing reads to long reference sequences. Bowtie 2 supports gapped, local, and paired-end alignment modes. + +![MultiQC - Bowtie2 alignment score plot](images/mqc_bowtie2_plot.png) + +### SAMtools + +
+Output files + +- `variants/bowtie2/` + - `.sorted.bam`: Coordinate sorted BAM file containing read alignment information. + - `.sorted.bam.bai`: Index file for coordinate sorted BAM file. +- `variants/bowtie2/samtools_stats/` + - SAMtools `.sorted.bam.flagstat`, `.sorted.bam.idxstats` and `.sorted.bam.stats` files generated from the alignment files. + +
+ +Bowtie 2 BAM files are further processed with [SAMtools](http://samtools.sourceforge.net/) to sort them by coordinate, for indexing, as well as to generate read mapping statistics. + +![MultiQC - SAMtools alignment scores plot](images/mqc_samtools_stats_plot.png) + +### iVar trim + +
+Output files + +- `variants/bowtie2/` + - `*.ivar_trim.sorted.bam`: Coordinate sorted BAM file after primer trimming. + - `*.ivar_trim.sorted.bam.bai`: Index file for coordinate sorted BAM file after primer trimming. +- `variants/bowtie2/samtools_stats/` + - SAMtools `*.ivar_trim.sorted.bam.flagstat`, `*.ivar_trim.sorted.bam.idxstats` and `*.ivar_trim.sorted.bam.stats` files generated from the primer trimmed alignment files. +- `variants/bowtie2/log/` + - `*.ivar_trim.ivar.log`: iVar trim log file obtained from stdout. + +
+ +If the `--protocol amplicon` parameter is provided then [iVar](http://gensoft.pasteur.fr/docs/ivar/1.0/manualpage.html) is used to trim amplicon primer sequences from the aligned reads. iVar uses the primer positions supplied in `--primer_bed` to soft clip primer sequences from a coordinate sorted BAM file. + +### picard MarkDuplicates + +
+Output files + +- `variants/bowtie2/` + - `*.markduplicates.sorted.bam`: Coordinate sorted BAM file after duplicate marking. + - `*.markduplicates.sorted.bam.bai`: Index file for coordinate sorted BAM file after duplicate marking. +- `variants/bowtie2/samtools_stats/` + - SAMtools `*.markduplicates.sorted.bam.flagstat`, `*.markduplicates.sorted.bam.idxstats` and `*.markduplicates.sorted.bam.stats` files generated from the duplicate marked alignment files. +- `variants/bowtie2/picard_metrics/` + - `*.markduplicates.sorted.MarkDuplicates.metrics.txt`: Metrics file from MarkDuplicates. + +
+ +Unless you are using [UMIs](https://emea.illumina.com/science/sequencing-method-explorer/kits-and-arrays/umi.html) it is not possible to establish whether the fragments you have sequenced from your sample were derived via true biological duplication (i.e. sequencing independent template fragments) or as a result of PCR biases introduced during the library preparation. [picard MarkDuplicates](https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-) isn't run by default because you anticipate high levels of duplication with viral data due to the size of the genome, however, you can activate it by adding `--skip_markduplicates false` to the command you use to run the pipeline. This will only _mark_ the duplicate reads identified amongst the alignments to allow you to guage the overall level of duplication in your samples. You can also choose to remove any reads identified as duplicates via the `--filter_duplicates` parameter. + +![MultiQC - Picard MarkDuplicates metrics plot](images/mqc_picard_duplicates_plot.png) + +### picard CollectMultipleMetrics + +
+Output files + +- `variants/bowtie2/picard_metrics/` + - `*.CollectMultipleMetrics.*`: Alignment QC files from picard CollectMultipleMetrics in `*_metrics` textual format. +- `variants/bowtie2/picard_metrics/pdf/` + - `*.pdf` plots for metrics obtained from CollectMultipleMetrics. + +
+ +[picard-tools](https://broadinstitute.github.io/picard/command-line-overview.html) is a set of command-line tools for manipulating high-throughput sequencing data. We use picard-tools in this pipeline to obtain mapping and coverage metrics. + +![MultiQC - Picard insert size plot](images/mqc_picard_insert_size_plot.png) + +### mosdepth + +
+Output files + +- `variants/bowtie2/mosdepth/genome/` + - `all_samples.mosdepth.coverage.tsv`: File aggregating genome-wide coverage values across all samples used for plotting. + - `*.mosdepth.coverage.pdf`: Whole-genome coverage plot. + - `*.mosdepth.coverage.tsv`: File containing coverage values for the above plot. + - `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. +- `variants/bowtie2/mosdepth/amplicon/` + - `all_samples.mosdepth.coverage.tsv`: File aggregating per-amplicon coverage values across all samples used for plotting. + - `all_samples.mosdepth.heatmap.pdf`: Heatmap showing per-amplicon coverage across all samples. + - `*.mosdepth.coverage.pdf`: Bar plot showing per-amplicon coverage for an individual sample. + - `*.mosdepth.coverage.tsv`: File containing per-amplicon coverage values for the above plot. + - `*.mosdepth.summary.txt`: Summary metrics including mean, min and max coverage values. + +
+ +[mosdepth](mosdepth) is a fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. mosdepth is used in this pipeline to obtain genome-wide coverage values in 200bp windows and for `--protocol amplicon` to obtain amplicon/region-specific coverage metrics. The results are then either rendered in MultiQC (genome-wide coverage) or are plotted using custom `R` scripts. + +![R - Samples amplicon coverage heatmap ](images/r_amplicon_heatmap.png) + +![R - Sample genome-wide coverage plot](images/r_genome_coverage.png) + +

R - Sample per-amplicon coverage plot

+ +### iVar variants + +
+Output files + +- `variants/ivar/` + - `*.tsv`: Original iVar variants in TSV format. + - `*.vcf.gz`: iVar variants in VCF format. Converted using custom `ivar_variants_to_vcf.py` python script. + - `*.vcf.gz.tbi`: iVar variants VCF index file. +- `variants/ivar/log/` + - `*.variant_counts.log`: Counts for type of variants called by iVar. +- `variants/ivar/bcftools_stats/` + - `*.bcftools_stats.txt`: Statistics and counts obtained from iVar variants VCF file. + +
+ +[iVar](https://github.com/andersen-lab/ivar/blob/master/docs/MANUAL.md) is a computational package that contains functions broadly useful for viral amplicon-based sequencing. We use iVar in this pipeline to [trim primer sequences](#ivar-trim) for amplicon input data as well as to call variants. + +iVar outputs a tsv format which is not compatible with downstream analysis such as annotation using SnpEff. Moreover some issues need to be addressed such as [strand-bias filtering](https://github.com/andersen-lab/ivar/issues/5) and [the consecutive reporting of variants belonging to the same codon](https://github.com/andersen-lab/ivar/issues/92). This pipeline uses a custom Python script [ivar_variants_to_vcf.py](https://github.com/nf-core/viralrecon/blob/master/bin/ivar_variants_to_vcf.py) to convert the default iVar output to VCF whilst also addressing both of these issues. + +![MultiQC - iVar variants called plot](images/mqc_ivar_variants_plot.png) + +### BCFTools call + +
+Output files + +- `variants/bcftools/` + - `*.vcf.gz`: Variants VCF file. + - `*.vcf.gz.tbi`: Variants VCF index file. +- `variants/bcftools/bcftools_stats/` + - `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. + +
+ +[BCFtools](http://samtools.github.io/bcftools/bcftools.html) can be used to call variants directly from BAM alignment files. It is a set of utilities that manipulate variant calls in [VCF](https://vcftools.github.io/specs.html) and its binary counterpart BCF format. BCFTools is used in the variant calling and _de novo_ assembly steps of this pipeline to obtain basic statistics from the VCF output. + +![MultiQC - BCFTools variant counts](images/mqc_bcftools_stats_plot.png) + +### SnpEff and SnpSift + +
+Output files + +- `variants//snpeff/` + - `*.snpeff.csv`: Variant annotation csv file. + - `*.snpeff.genes.txt`: Gene table for annotated variants. + - `*.snpeff.summary.html`: Summary html file for variants. + - `*.snpeff.vcf.gz`: VCF file with variant annotations. + - `*.snpeff.vcf.gz.tbi`: Index for VCF file with variant annotations. + - `*.snpsift.txt`: SnpSift summary table. +- `variants//snpeff/bcftools_stats/` + - `*.bcftools_stats.txt`: Statistics and counts obtained from VCF file. + +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +[SnpEff](http://snpeff.sourceforge.net/SnpEff.html) is a genetic variant annotation and functional effect prediction toolbox. It annotates and predicts the effects of genetic variants on genes and proteins (such as amino acid changes). + +[SnpSift](http://snpeff.sourceforge.net/SnpSift.html) annotates genomic variants using databases, filters, and manipulates genomic annotated variants. After annotation with SnpEff, you can use SnpSift to help filter large genomic datasets in order to find the most significant variants. + +![MultiQC - SnpEff annotation counts](images/mqc_snpeff_plot.png) + +### Freyja + +
+Output files + +- `/freyja/demix` + - `*.tsv`: Analysis results including the lineages present, their corresponding abundances, and summarization by constellation +- `/freyja/freyja_db` + - `.json`: dataset containing lineage metadata that correspond to barcodes. + - `.yml`: dataset containing the lineage topology. + - `.csv`: dataset containing lineage defining barcodes. +- `/freyja/variants` + - `*.variants.tsv`: Analysis results including identified variants in a gff-like format + - `*.depth.tsv`: Analysis results including the depth of the identified variants +- `/freyja/boot` + - `*lineages.csv` Analysis results inculding lineages present and their corresponding abundances with variation identified through bootstrapping + - `*summarized.csv`Analysis results inculding lineages present but summarized by constellation and their corresponding abundances with variation identified through bootstrapping + +
+ +[Freyja](https://github.com/andersen-lab/Freyja) is a tool to recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference). The method uses lineage-determining mutational "barcodes" derived from the [UShER](https://usher-wiki.readthedocs.io/en/latest/#) global phylogenetic tree as a basis set to solve the constrained (unit sum, non-negative) de-mixing problem. + +

Freyja screenshot

+ +### ASCIIGenome + +
+Output files + +- `variants//asciigenome//` + - `*.pdf`: Individual variant screenshots with annotation tracks in PDF format. + +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs.io/en/latest/) is a command-line genome browser that can be run from a terminal window and is solely based on ASCII characters. The closest program to ASCIIGenome is probably [samtools tview](http://www.htslib.org/doc/samtools-tview.html) but ASCIIGenome offers much more flexibility, similar to popular GUI viewers like the [IGV](https://software.broadinstitute.org/software/igv/) browser. We are using the batch processing mode of ASCIIGenome in this pipeline to generate individual screenshots for all of the variant sites reported for each sample in the VCF files. This is incredibly useful to be able to quickly QC the variants called by the pipeline without having to tediously load all of the relevant tracks into a conventional genome browser. Where possible, the BAM read alignments, VCF variant file, primer BED file and GFF annotation track will be represented in the screenshot for contextual purposes. The screenshot below shows a SNP called relative to the MN908947.3 SARS-CoV-2 reference genome that overlaps the ORF7a protein and the nCoV-2019_91_LEFT primer from the ARIC v3 protocol. + +

ASCIIGenome screenshot

+ +### iVar consensus + +
+Output files + +- `variants//consensus/ivar/` + - `*.consensus.fa`: Consensus Fasta file generated by iVar. + - `*.consensus.qual.txt`: File with the average quality of each base in the consensus sequence. +- `variants//consensus/ivar/base_qc/` + - `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. + - `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. + - `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. + - `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. + - `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. + +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +As described in the [iVar variants](#ivar-variants) section, iVar can be used in this pipeline to call variants and for the consensus sequence generation. + +### BCFTools and BEDTools + +
+Output files + +- `variants//consensus/bcftools/` + - `*.consensus.fa`: Consensus fasta file generated by integrating the high allele-frequency variants called by iVar/BCFTools into the reference genome. + - `*.filtered.vcf.gz`: VCF file containing high allele-frequency variants (default: `>= 0.75`) that were integrated into the consensus sequence. + - `*.filtered.vcf.gz.tbi`: Variants VCF index file for high allele frequency variants. +- `variants//consensus/bcftools/base_qc/` + - `*.ACTG_density.pdf`: Plot showing density of ACGT bases within the consensus sequence. + - `*.base_counts.pdf`: Plot showing frequency and percentages of all bases in consensus sequence. + - `*.base_counts.tsv`: File containing frequency and percentages of all bases in consensus sequence. + - `*.N_density.pdf`: Plot showing density of N bases within the consensus sequence. + - `*.N_run.tsv`: File containing start positions and width of N bases in consensus sequence. + +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +[BCFTools](http://samtools.github.io/bcftools/bcftools.html) is used in the variant calling and _de novo_ assembly steps of this pipeline to obtain basic statistics from the VCF output. It can also used be used to generate a consensus sequence by integrating variant calls into the reference genome. In this pipeline, we use `samtools mpileup` to create a mask using low coverage positions, and `bedtools maskfasta` to mask the genome sequences based on these intervals. Finally, `bcftools consensus` is used to generate the consensus by projecting the high allele frequency variants onto the masked genome reference sequence. + +### QUAST + +
+Output files + +- `variants//consensus//quast/` + - `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. + +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). +**NB:** The value of `` in the output directory name above is determined by the `--consensus_caller` parameter (Default: 'bcftools' for both '--protocol amplicon' and '--protocol metagenomic'). + +
+ +[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the consensus sequence across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. + +### Pangolin + +
+Output files + +- `variants//consensus//pangolin/` + - `*.pangolin.csv`: Lineage analysis results from Pangolin. + +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). +**NB:** The value of `` in the output directory name above is determined by the `--consensus_caller` parameter (Default: 'bcftools' for both '--protocol amplicon' and '--protocol metagenomic'). + +
+ +Phylogenetic Assignment of Named Global Outbreak LINeages ([Pangolin](https://github.com/cov-lineages/pangolin)) has been used extensively during the COVID-19 pandemic in order to to assign lineages to SARS-CoV-2 genome sequenced samples. A [web application](https://pangolin.cog-uk.io/) also exists that allows users to upload genome sequences via a web browser to assign lineages to genome sequences of SARS-CoV-2, view descriptive characteristics of the assigned lineage(s), view the placement of the lineage in a phylogeny of global samples, and view the temporal and geographic distribution of the assigned lineage(s). + +### Nextclade + +
+Output files + +- `variants//consensus//nextclade/` + - `*.csv`: Analysis results from Nextlade containing genome clade assignment, mutation calling and sequence quality checks. + +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). +**NB:** The value of `` in the output directory name above is determined by the `--consensus_caller` parameter (Default: 'bcftools' for both '--protocol amplicon' and '--protocol metagenomic'). + +
+ +[Nextclade](https://github.com/nextstrain/nextclade) performs viral genome clade assignment, mutation calling and sequence quality checks for the consensus sequences generated in this pipeline. Similar to Pangolin, it has been used extensively during the COVID-19 pandemic. A [web application](https://clades.nextstrain.org/) also exists that allows users to upload genome sequences via a web browser. + +### Variants long table + +
+Output files + +- `variants//` + - `variants_long_table.csv`: Long format table collating per-sample information for individual variants, functional effect prediction and lineage analysis. + - `additional_variants_long_table.csv`: Long format table similar to `variants_long_table.csv` for additional annotation file with overlapping annotation features. + +**NB:** The value of `` in the output directory name above is determined by the `--variant_caller` parameter (Default: 'ivar' for '--protocol amplicon' and 'bcftools' for '--protocol metagenomic'). + +
+ +Create variants long format table collating per-sample information for individual variants ([`BCFTools`](http://samtools.github.io/bcftools/bcftools.html)), functional effect prediction ([`SnpSift`](http://snpeff.sourceforge.net/SnpSift.html)) and lineage analysis ([`Pangolin`](https://github.com/cov-lineages/pangolin)). The variants used for this table are the ones passing variant caller filters (`variants//*.vcf.gz`): + +- For ivar by default filters are: + - Allele frequency threshold >= 0.25 + - Minimum quality score threshold = 20 + - Minimum position depth = 10 + - If using metagenomics protocol, strand bias filter also is aplied in `ivar_variants_to_vcf.py` +- For bcftools default filters are: + - Minimum quality score threshold = 20 + - Minimum position depth = 10 + +To filter variants included in the consensus genome from the variants long table file, the following filters should be applied: + +- AF >= 0.75 + +Additionally, to filter variants included in the consensus genome that are missense variants from the variants long table file, the following filters should be applied: + +- AF >= 0.75 +- EFFECT == missense_variant + +The more pertinent variant information is summarised in this table to make it easier for researchers to assess the impact of variants found amongst the sequenced sample(s). An example of the fields included in the table are shown below: + +```bash +SAMPLE,CHROM,POS,REF,ALT,FILTER,DP,REF_DP,ALT_DP,AF,GENE,EFFECT,HGVS_C,HGVS_P,HGVS_P_1LETTER,CALLER,LINEAGE +SAMPLE1_PE,MN908947.3,241,C,T,PASS,489,4,483,0.99,orf1ab,upstream_gene_variant,c.-25C>T,.,.,ivar,B.1 +SAMPLE1_PE,MN908947.3,1875,C,T,PASS,92,62,29,0.32,orf1ab,missense_variant,c.1610C>T,p.Ala537Val,p.A537V,ivar,B.1 +SAMPLE1_PE,MN908947.3,3037,C,T,PASS,213,0,213,1.0,orf1ab,synonymous_variant,c.2772C>T,p.Phe924Phe,p.F924F,ivar,B.1 +SAMPLE1_PE,MN908947.3,11719,G,A,PASS,195,9,186,0.95,orf1ab,synonymous_variant,c.11454G>A,p.Gln3818Gln,p.Q3818Q,ivar,B.1 +``` + +Table columns: + +- SAMPLE: sample name +- CHROM: Reference/fragment ID +- POS: Position of the variant respect to the reference genome +- REF: Reference allele +- ALT: Alternative allele +- FILTER: Column indicating if the variant passed the filters. If PASS the variant passed all the filters. If not, the name of the filter that wasn't passed will appear. +- DP: Position read depth +- REF_DP: Reference allele depth +- ALT_DP: Alternative allele depth +- AF: Alternative allele frequency +- GENE: Gene name in annotation file​ +- EFFECT: Effect of the variant +- HGVS_C: Position annotation at CDS level +- HGVS_P: Position annotation at protein level +- HGVS_P_1LETTER: Position annotation at protein level with the aminoacid annotation in 1 letter format +- Caller: Variant caller used​​ + +## Illumina: De novo assembly + +A file called `summary_assembly_metrics_mqc.csv` containing a selection of read alignment and _de novo_ assembly related metrics will be saved in the `multiqc/` results directory. The same metrics will also be added to the top of the MultiQC report. + +### Cutadapt + +
+Output files + +- `assembly/cutadapt/log/` + - `*.cutadapt.log`: Cutadapt log file generated from stdout. +- `assembly/cutadapt/fastqc/` + - `*_fastqc.html`: FastQC report of the trimmed reads. + - `*_fastqc.zip`: Zip archive containing the FastQC report. + +
+ +In the variant calling branch of the pipeline we are using [iVar trim](#ivar-trim) to remove primer sequences from the aligned BAM files for amplicon data. Since in the _de novo_ assembly branch we don't align the reads, we use [Cutadapt](https://cutadapt.readthedocs.io/en/stable/guide.html) as an alternative option to remove and clean the primer sequences directly from FastQ files. + +![MultiQC - Cutadapt filtered reads plot](images/mqc_cutadapt_plot.png) + +### SPAdes + +
+Output files + +- `assembly/spades//` + - `*.scaffolds.fa.gz`: SPAdes scaffold assembly. + - `*.contigs.fa.gz`: SPAdes assembly contigs. + - `*.assembly.gfa.gz`: SPAdes assembly graph in [GFA](https://github.com/GFA-spec/GFA-spec/blob/master/GFA1.md) format. +- `assembly/spades//bandage/` + - `*.png`: Bandage visualisation for SPAdes assembly graph in PNG format. + - `*.svg`: Bandage visualisation for SPAdes assembly graph in SVG format. + +**NB:** The value of `` in the output directory name above is determined by the `--spades_mode` parameter (Default: 'rnaviral'). + +
+ +[SPAdes](http://cab.spbu.ru/software/spades/) is an assembly toolkit containing various assembly pipelines. Generically speaking, SPAdes is one of the most popular de Bruijn graph-based assembly algorithms used for bacterial/viral genome reconstruction. + +[Bandage](https://rrwick.github.io/Bandage/) is a program for visualising _de novo_ assembly graphs. By displaying connections which are not present in the contigs file, Bandage opens up new possibilities for analysing _de novo_ assemblies. + +### Unicycler + +
+Output files + +- `assembly/unicycler/` + - `*.scaffolds.fa.gz`: Unicycler scaffold assembly. + - `*.assembly.gfa.gz`: Unicycler assembly graph in GFA format. +- `assembly/unicycler/bandage/` + - `*.png`: Bandage visualisation for Unicycler assembly graph in PNG format. + - `*.svg`: Bandage visualisation for Unicycler assembly graph in SVG format. + +
+ +[Unicycler](https://github.com/rrwick/Unicycler) is an assembly pipeline for bacterial genomes. It can assemble Illumina-only read sets where it functions as a SPAdes-optimiser. + +### minia + +
+Output files + +- `assembly/minia/` + - `*.contigs.fa`: Minia scaffold assembly. + - `*.unitigs.fa`: Minia unitigs fasta file. + - `*.h5`: Minia h5 output file. + +
+ +[Minia](https://github.com/GATB/minia) is a short-read assembler based on a de Bruijn graph, capable of assembling a human genome on a desktop computer in a day. The output of Minia is a set of contigs. Minia produces results of similar contiguity and accuracy to other de Bruijn assemblers. + +### BLAST + +
+Output files + +- `assembly//blastn/` + - `*results.blastn.txt`: BLAST results against the target virus. + - `*.filter.blastn.txt`: Filtered BLAST results. + - Applied filters by default are: + - `qlen` (contig length) > 200 nt + - `%cgAligned` (percentage of contig aligned) > 0.7 (70%) + - Columns description (for more information see: https://www.metagenomics.wiki/tools/blast/blastn-output-format-6): + - stitle: Subject Title. Name of the reference genome. + - staxids: Subject Taxonomy ID(s), separated by a ';'. When blast databse is no annotated with taxids, 0 will appear. + - qaccver: Query accesion version. Contig name. + - saccver: Subject accession version. Reference genome accession version. + - pident: Percentage of identical matches. + - length: Alignment length. + - mismatch: Number of mismatches. + - gapopen: Number of gap openings. + - qstart: Start of alignment in query (sample's contig). + - qend: End of alignment in query (sample's contig). + - sstart: Start of alignment in subject (reference genome). + - send: Start of alignment in subject (reference genome). + - evalue: Expect value. + - bitscore: Bit score. The bit-score is the requires size of a sequence database in which the current match could be found just by chance. The bit-score is a log2 scaled and normalized raw-score. Each increase by one doubles the required database size (2bit-score). + - slen: Subject (reference genome) sequence length. + - qlen: Query (contig) sequence length. + - qcovs: Query Coverage Per Subject. + - %cgAligned: Percentage of contig covered in the alignment. It is calculated dividing `length/qlen`. + - %refCovered: Percentage of reference genome covered in the alignment. It is calculated dividing `length/slen`. + +**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). + +
+ +[blastn](https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastSearch) is used to align the assembled contigs against the virus reference genome. + +### ABACAS + +
+Output files + +- `assembly//abacas/` + - `*.abacas.bin`: Bin file that contains contigs that are not used in ordering. + - `*.abacas.crunch`: Comparison file. + - `*.abacas.fasta`: Ordered and orientated sequence file. + - `*.abacas.gaps`: Gap information. + - `*.abacas.gaps.tab`: Gap information in tab-delimited format. + - `*.abacas.MULTIFASTA.fa`: A list of ordered and orientated contigs in a multi-fasta format. + - `*.abacas.tab`: Feature file + - `*.unused_contigs.out`: Information on contigs that have a mapping information but could not be used in the ordering. +- `assembly//abacas/nucmer/`: Folder containing the files generated by the NUCmer algorithm used by ABACAS. + +**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). + +
+ +[ABACAS](https://www.sanger.ac.uk/science/tools/pagit) was developed to rapidly contiguate (align, order, orientate), visualize and design primers to close gaps on shotgun assembled contigs based on a reference sequence. + +### PlasmidID + +
+Output files + +- `assembly//plasmidid//` + - `*_final_results.html`: Summary file with reference coverage stats and contigs for visualization. + - `*_final_results.tab`: Summary file with reference coverage stats and contigs. + - `images/_.png`: PNG file with the visualization of the alignment between the viral assembly and the reference viral genome. + - `logs/`: Log files. + +**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). + +
+ +[PlasmidID](https://github.com/BU-ISCIII/plasmidID) was used to graphically represent the alignment of the reference genome relative to a given assembly. This helps to visualize the coverage of the reference genome in the assembly. To find more information about the output files refer to the [documentation](https://github.com/BU-ISCIII/plasmidID/wiki/Understanding-the-image:-track-by-track). + +### Assembly QUAST + +
+Output files + +- `assembly//quast/` + - `report.html`: Results report in HTML format. Also available in various other file formats i.e. `report.pdf`, `report.tex`, `report.tsv` and `report.txt`. + +**NB:** The value of `` in the output directory name above is determined by the `--assemblers` parameter (Default: 'spades'). + +
+ +[QUAST](http://bioinf.spbau.ru/quast) is used to generate a single report with which to evaluate the quality of the _de novo_ assemblies across all of the samples provided to the pipeline. The HTML results can be opened within any browser (we recommend using Google Chrome). Please see the [QUAST output docs](http://quast.sourceforge.net/docs/manual.html#sec3) for more detailed information regarding the output files. + +![MultiQC - QUAST contig counts](images/mqc_quast_plot.png) + +## Illumina: Workflow reporting and genomes + ### MultiQC
@@ -37,15 +1072,40 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d - `multiqc/` - `multiqc_report.html`: a standalone HTML file that can be viewed in your web browser. - `multiqc_data/`: directory containing parsed statistics from the different tools used in the pipeline. - - `multiqc_plots/`: directory containing static images from the report in various formats. + - `summary_variants_metrics_mqc.csv`: file containing a selection of read alignment and variant calling metrics. The same metrics will also be added to the top of the MultiQC report. + - `summary_assembly_metrics_mqc.csv`: file containing a selection of read alignment and _de novo_ assembly related metrics. The same metrics will also be added to the top of the MultiQC report.
-[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarising all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. +[MultiQC](http://multiqc.info) is a visualization tool that generates a single HTML report summarizing all samples in your project. Most of the pipeline QC results are visualised in the report and further statistics are available in the report data directory. + +Results generated by MultiQC collate pipeline QC from FastQC, fastp, Cutadapt, Bowtie 2, Kraken 2, samtools, picard CollectMultipleMetrics, BCFTools, SnpEff and QUAST. + +The default [`multiqc config file`](https://github.com/nf-core/viralrecon/blob/master/assets/multiqc_config_illumina.yaml) has been written in a way in which to structure these QC metrics to make them more interpretable in the final report. + +The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . + +An example MultiQC report generated from a full-sized dataset can be viewed on the [nf-core website](https://nf-co.re/viralrecon/results). + +### Reference genome files + +
+Output files + +- `genome/` + - `bowtie2/`: Bowtie 2 index for viral genome. + - `blast_db/`: BLAST database for viral genome. + - `kraken2_db/`: Kraken 2 database for host genome. + - `snpeff_db/`: SnpEff database for viral genome. + - `snpeff.config`: SnpEff config file for viral genome. + - Unzipped genome fasta file for viral genome + - Unzipped genome annotation GFF file for viral genome + +
-Results generated by MultiQC collate pipeline QC from supported tools e.g. FastQC. The pipeline has special steps which also allow the software versions to be reported in the MultiQC output for future traceability. For more information about how to use MultiQC reports, see . +A number of genome-specific files are generated by the pipeline because they are required for the downstream processing of the results. If the `--save_reference` parameter is provided then the Bowtie 2 alignment indices, BLAST and Kraken 2 databases downloaded/generated by the pipeline will be saved in the `genome/` directory. It is recommended to use the `--save_reference` parameter if you are using the pipeline to build a Kraken 2 database for the host genome. This can be quite a time-consuming process and it permits their reuse for future runs of the pipeline or for other purposes. -### Pipeline information +# Pipeline information
Output files diff --git a/docs/usage.md b/docs/usage.md index 7263e3f3..a626bd95 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -4,11 +4,13 @@ > _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._ -## Introduction +## Pipeline parameters - +Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration except for parameters; see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -## Samplesheet input +## Samplesheet format + +### Illumina You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below. @@ -22,42 +24,166 @@ The `sample` identifiers have to be the same when you have re-sequenced the same ```csv title="samplesheet.csv" sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz -CONTROL_REP1,AEG588A1_S1_L004_R1_001.fastq.gz,AEG588A1_S1_L004_R2_001.fastq.gz +SAMPLE_1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz +SAMPLE_1,AEG588A1_S1_L003_R1_001.fastq.gz,AEG588A1_S1_L003_R2_001.fastq.gz +SAMPLE_2,AEG588A2_S4_L003_R1_001.fastq.gz, ``` -### Full samplesheet +| Column | Description | +| --------- | -------------------------------------------------------------------------------------------------------------------------- | +| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. | +| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -The pipeline will auto-detect whether a sample is single- or paired-end using the information provided in the samplesheet. The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 3 columns to match those defined in the table below. +> **NB:** Dashes (`-`) and spaces in sample names are automatically converted to underscores (`_`) to avoid downstream issues in the pipeline. -A final samplesheet file consisting of both single- and paired-end data may look something like the one below. This is for 6 samples, where `TREATMENT_REP3` has been sequenced twice. +### Nanopore -```csv title="samplesheet.csv" -sample,fastq_1,fastq_2 -CONTROL_REP1,AEG588A1_S1_L002_R1_001.fastq.gz,AEG588A1_S1_L002_R2_001.fastq.gz -CONTROL_REP2,AEG588A2_S2_L002_R1_001.fastq.gz,AEG588A2_S2_L002_R2_001.fastq.gz -CONTROL_REP3,AEG588A3_S3_L002_R1_001.fastq.gz,AEG588A3_S3_L002_R2_001.fastq.gz -TREATMENT_REP1,AEG588A4_S4_L003_R1_001.fastq.gz, -TREATMENT_REP2,AEG588A5_S5_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L003_R1_001.fastq.gz, -TREATMENT_REP3,AEG588A6_S6_L004_R1_001.fastq.gz, +You have the option to provide a samplesheet to the pipeline that maps sample ids to barcode ids. This allows you to associate barcode ids to clinical/public database identifiers that can be used to QC or pre-process the data with more appropriate sample names. + +```console +--input '[path to samplesheet file]' +``` + +It has to be a comma-separated file with 2 columns. A final samplesheet file may look something like the one below: + +```console +sample,barcode +21X983255,1 +70H209408,2 +49Y807476,3 +70N209581,4 +``` + +| Column | Description | +| --------- | ------------------------------------------------------------------------------------- | +| `sample` | Custom sample name, one per barcode. | +| `barcode` | Barcode identifier attributed to that sample during multiplexing. Must be an integer. | + +> **NB:** Dashes (`-`) and spaces in sample names are automatically converted to underscores (`_`) to avoid downstream issues in the pipeline. + +## Nanopore input format + +For Nanopore data the pipeline only supports amplicon-based analysis obtained from primer sets created and maintained by the [ARTIC Network](https://artic.network/). The [artic minion](https://artic.readthedocs.io/en/latest/commands/) tool from the [ARTIC field bioinformatics pipeline](https://github.com/artic-network/fieldbioinformatics) is used to align reads, call variants and to generate the consensus sequence. + +### Nanopolish + +The default variant caller used by artic minion is [Nanopolish](https://github.com/jts/nanopolish) and this requires that you provide `*.fastq`, `*.fast5` and `sequencing_summary.txt` files as input to the pipeline. These files can typically be obtained after demultiplexing and basecalling the sequencing data using [Guppy](https://nanoporetech.com/nanopore-sequencing-data-analysis) (see [ARTIC SOP docs](https://artic.network/ncov-2019/ncov2019-bioinformatics-sop.html)). This pipeline requires that the files are organised in the format outlined below and gzip compressed files are also accepted: + +```console +. +└── fastq_pass + └── barcode01 + ├── FAP51364_pass_barcode01_97ca62ca_0.fastq + ├── FAP51364_pass_barcode01_97ca62ca_1.fastq + ├── FAP51364_pass_barcode01_97ca62ca_2.fastq + ├── FAP51364_pass_barcode01_97ca62ca_3.fastq + ├── FAP51364_pass_barcode01_97ca62ca_4.fastq + ├── FAP51364_pass_barcode01_97ca62ca_5.fastq + ``` -| Column | Description | -| --------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `sample` | Custom sample name. This entry will be identical for multiple sequencing libraries/runs from the same sample. Spaces in sample names are automatically converted to underscores (`_`). | -| `fastq_1` | Full path to FastQ file for Illumina short reads 1. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | -| `fastq_2` | Full path to FastQ file for Illumina short reads 2. File has to be gzipped and have the extension ".fastq.gz" or ".fq.gz". | +```console +. +└── fast5_pass + ├── barcode01 + ├── FAP51364_pass_barcode01_97ca62ca_0.fast5 + ├── FAP51364_pass_barcode01_97ca62ca_1.fast5 + ├── FAP51364_pass_barcode01_97ca62ca_2.fast5 + ├── FAP51364_pass_barcode01_97ca62ca_3.fast5 + ├── FAP51364_pass_barcode01_97ca62ca_4.fast5 + ├── FAP51364_pass_barcode01_97ca62ca_5.fast5 + +``` -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +The command to run the pipeline would then be: + +```console +nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --outdir \ + --platform nanopore \ + --genome 'MN908947.3' \ + --primer_set 'artic' \ + --primer_set_version 3 \ + --fastq_dir fastq_pass/ \ + --fast5_dir fast5_pass/ \ + --sequencing_summary sequencing_summary.txt \ + -profile +``` + +### Medaka + +You also have the option of using [Medaka](https://github.com/nanoporetech/medaka) as an alternative variant caller to Nanopolish via the `--artic_minion_caller medaka` parameter. Medaka is faster than Nanopolish, performs mostly the same and can be run directly from `fastq` input files as opposed to requiring the `fastq`, `fast5` and `sequencing_summary.txt` files required to run Nanopolish. You must provide the appropriate [Medaka model](https://github.com/nanoporetech/medaka#models) via the `--artic_minion_medaka_model` parameter if using `--artic_minion_caller medaka`. The `fastq` files have to be organised in the same way as for Nanopolish as outlined in the section above. + +The command to run the pipeline would then be: + +```console +nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --outdir \ + --platform nanopore \ + --genome 'MN908947.3' \ + --primer_set 'artic' \ + --primer_set_version 3 \ + --fastq_dir fastq_pass/ \ + --artic_minion_caller medaka \ + --artic_minion_medaka_model r941_min_high_g360 \ + -profile +``` + +## Illumina primer sets + +The Illumina processing mode of the pipeline has been tested on numerous different primer sets. Where possible we are trying to collate links and settings for standard primer sets to make it easier to run the pipeline with standard parameter keys. If you are able to get permissions from the vendor/supplier to share the primer information then we would be more than happy to support it within the pipeline. + +For SARS-CoV-2 data we recommend using the "MN908947.3" genome because it is supported out-of-the-box by the most commonly used primer sets available from the [ARTIC Network](https://artic.network/). For ease of use, we are also maintaining a version of the "MN908947.3" genome along with the appropriate links to the ARTIC primer sets in the [genomes config file](https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config) used by the pipeline. The genomes config file can be updated independently from the main pipeline code to make it possible to dynamically extend this file for other viral genomes/primer sets on request. + +For further information or help, don't hesitate to get in touch on the [Slack `#viralrecon` channel](https://nfcore.slack.com/channels/viralrecon) (you can join with [this invite](https://nf-co.re/join/slack)). + +### ARTIC primer sets + +An example command using v3 ARTIC primers with "MN908947.3": + +```console +nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --outdir \ + --platform illumina \ + --protocol amplicon \ + --genome 'MN908947.3' \ + --primer_set artic \ + --primer_set_version 3 \ + --skip_assembly \ + -profile +``` + +### SWIFT primer sets + +The [SWIFT amplicon panel](https://swiftbiosci.com/swift-amplicon-sars-cov-2-panel/) is another commonly used method used to prep and sequence SARS-CoV-2 samples. We haven't been able to obtain explicit permission to host standard SWIFT primer sets but you can obtain a masterfile which is freely available from their website that contains the primer sequences as well as genomic co-ordinates. You just need to convert this file to [BED6](https://genome.ucsc.edu/FAQ/FAQformat.html#format1) format and provide it to the pipeline with `--primer_bed swift_primers.bed`. Be sure to check the values provided to `--primer_left_suffix` and `--primer_right_suffix` match the primer names defined in the BED file as highlighted in [this issue](https://github.com/nf-core/viralrecon/issues/169). For an explanation behind the usage of the `--ivar_trim_offset 5` for SWIFT primer sets see [this issue](https://github.com/nf-core/viralrecon/issues/170). + +An example command using SWIFT primers with "MN908947.3": + +```console +nextflow run nf-core/viralrecon \ + --input samplesheet.csv \ + --outdir \ + --platform illumina \ + --protocol amplicon \ + --genome 'MN908947.3' \ + --primer_bed swift_primers.bed \ + --primer_left_suffix '_F' \ + --primer_right_suffix '_R' \ + --ivar_trim_offset 5 \ + --skip_assembly \ + -profile +``` ## Running the pipeline The typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/viralrecon --input ./samplesheet.csv --outdir ./results --genome GRCh37 -profile docker +nextflow run nf-core/viralrecon --input samplesheet.csv --outdir --genome 'MN908947.3' -profile docker ``` This will launch the pipeline with the `docker` configuration profile. See below for more information about profiles. @@ -191,6 +317,10 @@ A pipeline might not always support every possible argument or option of a parti To learn how to provide additional arguments to a particular tool of the pipeline, please see the [customising tool arguments](https://nf-co.re/docs/usage/configuration#customising-tool-arguments) section of the nf-core website. +#### Freyja + +[Freyja](https://github.com/andersen-lab/Freyja) relies on a dataset of barcodes that use lineage defining mutations (see [UShER](https://usher-wiki.readthedocs.io/en/latest/#)). By default the most recent barcodes will be downloaded and used. However, if analyses need to be compared across multiple datasets, it might be of interest to re-use the same barcodes, or to rerun all Freyja analyses with the most recent dataset. To do this, specify the barcodes and lineages using the `--freyja_barcodes`, `--freyja_lineages` parameters, respectivly. The boostrapping of Freyja can be skipped by specifying `--skip_freyja_boot`. + ### nf-core/configs In most cases, you will only need to create a custom config as a one-off but if you and others within your organisation are likely to be running nf-core pipelines regularly and need to use the same settings regularly it may be a good idea to request that your custom config file is uploaded to the `nf-core/configs` git repository. Before you do this please can you test that the config file works with your pipeline of choice using the `-c` parameter. You can then create a pull request to the `nf-core/configs` repository with the addition of your config file, associated documentation file (see examples in [`nf-core/configs/docs`](https://github.com/nf-core/configs/tree/master/docs)), and amending [`nfcore_custom.config`](https://github.com/nf-core/configs/blob/master/nfcore_custom.config) to include your custom profile. diff --git a/lib/WorkflowCommons.groovy b/lib/WorkflowCommons.groovy new file mode 100755 index 00000000..dd805236 --- /dev/null +++ b/lib/WorkflowCommons.groovy @@ -0,0 +1,166 @@ +// +// This file holds several functions common to the multiple workflows in the nf-core/viralrecon pipeline +// +import nextflow.Nextflow + +class WorkflowCommons { + + // + // Exit pipeline if incorrect --genome key provided + // + private static void genomeExistsError(params, log) { + if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) { + Nextflow.error("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' not found in any config files provided to the pipeline.\n" + + " Currently, the available genome keys are:\n" + + " ${params.genomes.keySet().join(", ")}\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + } + } + + // + // Get workflow summary for MultiQC + // + public static String paramsSummaryMultiqc(workflow, summary) { + String summary_section = '' + for (group in summary.keySet()) { + def group_params = summary.get(group) // This gets the parameters of that particular group + if (group_params) { + summary_section += "

$group

\n" + summary_section += "
\n" + for (param in group_params.keySet()) { + summary_section += "
$param
${group_params.get(param) ?: 'N/A'}
\n" + } + summary_section += "
\n" + } + } + + String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" + yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" + yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" + yaml_file_text += "plot_type: 'html'\n" + yaml_file_text += "data: |\n" + yaml_file_text += "${summary_section}" + return yaml_file_text + } + + // + // Create MultiQC tsv custom content from a list of values + // + public static String multiqcTsvFromList(tsv_data, header) { + def tsv_string = "" + if (tsv_data.size() > 0) { + tsv_string += "${header.join('\t')}\n" + tsv_string += tsv_data.join('\n') + } + return tsv_string + } + + // + // Function to check whether primer BED file has the correct suffixes as provided to the pipeline + // + public static void checkPrimerSuffixes(primer_bed_file, primer_left_suffix, primer_right_suffix, log) { + def total = 0 + def left = 0 + def right = 0 + primer_bed_file.eachLine { line -> + total += 1 + def name = line.split('\t')[3] + if (name.contains(primer_left_suffix)) { + left += 1 + } else if (name.contains(primer_right_suffix)) ( + right += 1 + ) + } + if (total != (left + right)) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Please check the name field (column 4) in the file supplied via --primer_bed.\n\n" + + " All of the values in that column do not end with those supplied by:\n" + + " --primer_left_suffix : $primer_left_suffix\n" + + " --primer_right_suffix: $primer_right_suffix\n\n" + + " This information is required to collapse the primer intervals into amplicons\n" + + " for the coverage plots generated by the pipeline.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + } + } + + // + // Function to get column entries from a file + // + public static ArrayList getColFromFile(input_file, col=0, uniqify=false, sep='\t') { + def vals = [] + input_file.eachLine { line -> + def val = line.split(sep)[col] + if (uniqify) { + if (!vals.contains(val)) { + vals << val + } + } else { + vals << val + } + } + return vals + } + + // + // Function that returns the number of lines in a file + // + public static Integer getNumLinesInFile(input_file) { + def num_lines = 0 + input_file.eachLine { line -> + num_lines ++ + } + return num_lines + } + + // + // Function to generate an error if contigs in BED file do not match those in reference genome + // + public static void checkContigsInBED(fai_contigs, bed_contigs, log) { + def intersect = bed_contigs.intersect(fai_contigs) + if (intersect.size() != bed_contigs.size()) { + def diff = bed_contigs.minus(intersect).sort() + Nextflow.error("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Contigs in primer BED file do not match those in the reference genome:\n\n" + + " ${diff.join('\n ')}\n\n" + + " Please check:\n" + + " - Primer BED file supplied with --primer_bed\n" + + " - Genome FASTA file supplied with --fasta\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") + } + } + + // + // Function to read in all fields into a Groovy Map from Nextclade CSV output file + // + // See: https://stackoverflow.com/a/67766919 + public static Map getNextcladeFieldMapFromCsv(nextclade_report) { + def headers = [] + def field_map = [:] + nextclade_report.readLines().eachWithIndex { row, row_index -> + def vals = row.split(';') + if (row_index == 0) { + headers = vals + } else { + def cells = headers.eachWithIndex { header, header_index -> + def val = (header_index <= vals.size()-1) ? vals[header_index] : '' + field_map[header] = val ?: 'NA' + } + } + } + return field_map + } + + // + // Function to get number of variants reported in BCFTools stats file + // + public static Integer getNumVariantsFromBCFToolsStats(bcftools_stats) { + def num_vars = 0 + bcftools_stats.eachLine { line -> + def matcher = line =~ /SN\s*0\s*number\sof\srecords:\s*([\d]+)/ + if (matcher) num_vars = matcher[0][1].toInteger() + } + return num_vars + } +} diff --git a/lib/WorkflowIllumina.groovy b/lib/WorkflowIllumina.groovy new file mode 100755 index 00000000..68a0ee58 --- /dev/null +++ b/lib/WorkflowIllumina.groovy @@ -0,0 +1,141 @@ +// +// This file holds several functions specific to the workflow/illumina.nf in the nf-core/viralrecon pipeline +// +import nextflow.Nextflow +import groovy.json.JsonSlurper + +class WorkflowIllumina { + + // + // Check and validate parameters + // + public static void initialise(params, log, valid_params) { + WorkflowCommons.genomeExistsError(params, log) + + // Generic parameter validation + if (!valid_params['protocols'].contains(params.protocol)) { + Nextflow.error("Invalid option: '${params.protocol}'. Valid options for '--protocol': ${valid_params['protocols'].join(', ')}.") + } + + if (!params.fasta) { + Nextflow.error("Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file.") + } + + if (!params.skip_kraken2 && !params.kraken2_db) { + if (!params.kraken2_db_name) { + Nextflow.error("Please specify a valid name to build Kraken2 database for host e.g. '--kraken2_db_name human'.") + } + } + + // Variant calling parameter validation + if (params.variant_caller) { + if (!valid_params['variant_callers'].contains(params.variant_caller)) { + Nextflow.error("Invalid option: ${params.variant_caller}. Valid options for '--variant_caller': ${valid_params['variant_callers'].join(', ')}.") + } + } + + // Consensus calling parameter validation + if (params.consensus_caller) { + if (!valid_params['consensus_callers'].contains(params.consensus_caller)) { + Nextflow.error("Invalid option: ${params.consensus_caller}. Valid options for '--consensus_caller': ${valid_params['consensus_callers'].join(', ')}.") + } + } + + if (params.protocol == 'amplicon' && !params.skip_variants && !params.primer_bed) { + Nextflow.error("To perform variant calling in amplicon mode please provide a valid primer BED file e.g. '--primer_bed primers.bed'.") + } + + // Assembly parameter validation + def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.trim().toLowerCase() } : [] + if ((valid_params['assemblers'] + assemblers).unique().size() != valid_params['assemblers'].size()) { + Nextflow.error("Invalid option: ${params.assemblers}. Valid options for '--assemblers': ${valid_params['assemblers'].join(', ')}.") + } + + if (!valid_params['spades_modes'].contains(params.spades_mode)) { + Nextflow.error("Invalid option: ${params.spades_mode}. Valid options for '--spades_modes': ${valid_params['spades_modes'].join(', ')}.") + } + } + + // + // Print warning if genome fasta has more than one sequence + // + public static void isMultiFasta(fasta_file, log) { + def count = 0 + def line = null + fasta_file.withReader { reader -> + while (line = reader.readLine()) { + if (line.contains('>')) { + count++ + if (count > 1) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " This pipeline does not officially support multi-fasta genome files!\n\n" + + " The parameters and processes are tailored for viral genome analysis.\n" + + " Please amend the '--fasta' parameter.\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + break + } + } + } + } + } + + // + // Function that parses and returns the number of mapped reasds from flagstat files + // + public static ArrayList getFlagstatMappedReads(flagstat_file, params) { + def mapped_reads = 0 + flagstat_file.eachLine { line -> + if (line.contains(' mapped (')) { + mapped_reads = line.tokenize().first().toInteger() + } + } + + def pass = false + def logname = flagstat_file.getBaseName() - 'flagstat' + if (mapped_reads > params.min_mapped_reads.toInteger()) { + pass = true + } + return [ mapped_reads, pass ] + } + + // + // Check if the primer BED file supplied to the pipeline is from the SWIFT/SNAP protocol + // + public static void checkIfSwiftProtocol(primer_bed_file, name_prefix, log) { + def count = 0 + def line = null + primer_bed_file.withReader { reader -> + while (line = reader.readLine()) { + def name = line.split('\t')[3] + if (name.contains(name_prefix)) { + count++ + if (count > 1) { + log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Found '${name_prefix}' in the name field of the primer BED file!\n" + + " This suggests that you have used the SWIFT/SNAP protocol to prep your samples.\n" + + " If so, please set '--ivar_trim_offset 5' as suggested in the issue below:\n" + + " https://github.com/nf-core/viralrecon/issues/170\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + break + } + } + } + } + } + + // + // Function that parses fastp json output file to get total number of reads after trimming + // + public static Integer getFastpReadsAfterFiltering(json_file) { + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') + return json['after_filtering']['total_reads'].toInteger() + } + + // + // Function that parses fastp json output file to get total number of reads before trimming + // + public static Integer getFastpReadsBeforeFiltering(json_file) { + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') + return json['before_filtering']['total_reads'].toInteger() + } +} diff --git a/lib/WorkflowNanopore.groovy b/lib/WorkflowNanopore.groovy new file mode 100755 index 00000000..e9983ee3 --- /dev/null +++ b/lib/WorkflowNanopore.groovy @@ -0,0 +1,54 @@ +// +// This file holds several functions specific to the workflow/nanopore.nf in the nf-core/viralrecon pipeline +// +import nextflow.Nextflow + +class WorkflowNanopore { + + // + // Check and validate parameters + // + public static void initialise(params, log, valid_params) { + WorkflowCommons.genomeExistsError(params, log) + + // Generic parameter validation + if (!params.fasta) { + Nextflow.error("Genome fasta file not specified with e.g. '--fasta genome.fa' or via a detectable config file.") + } + + if (!params.primer_bed) { + Nextflow.error("Primer BED file not specified with e.g. '--primer_bed primers.bed' or via a detectable config file.") + } + + if (!params.artic_scheme) { + Nextflow.error("ARTIC scheme not specified with e.g. --artic_scheme 'nCoV-2019' or via a detectable config file.") + } + + if (!valid_params['artic_minion_caller'].contains(params.artic_minion_caller)) { + Nextflow.error("Invalid option: ${params.artic_minion_caller}. Valid options for '--artic_minion_caller': ${valid_params['artic_minion_caller'].join(', ')}.") + } + + if (!valid_params['artic_minion_aligner'].contains(params.artic_minion_aligner)) { + Nextflow.error("Invalid option: ${params.artic_minion_aligner}. Valid options for '--artic_minion_aligner': ${valid_params['artic_minion_aligner'].join(', ')}.") + } + + if (!params.fastq_dir) { + Nextflow.error("Please specify a valid folder containing ONT basecalled fastq files generated by guppy_barcoder or guppy_basecaller e.g. '--fastq_dir ./20191023_1522_MC-110615_0_FAO93606_12bf9b4f/fastq_pass/") + } + + if (params.artic_minion_caller == 'nanopolish') { + if (!params.fast5_dir) { + Nextflow.error("Please specify a valid folder containing ONT fast5 files e.g. '--fast5_dir ./20191023_1522_MC-110615_0_FAO93606_12bf9b4f/fast5_pass/") + } + if (!params.sequencing_summary) { + Nextflow.error("Please specify a valid ONT sequencing summary file e.g. '--sequencing_summary ./20191023_1522_MC-110615_0_FAO93606_12bf9b4f/sequencing_summary.txt") + } + } + + if (params.artic_minion_caller == 'medaka') { + if (!params.artic_minion_medaka_model) { + Nextflow.error("Please specify the '--artic_minion_medaka_model' parameter too if using the '--artic_minion_caller medaka' workflow.\nSee https://github.com/nanoporetech/medaka") + } + } + } +} diff --git a/main.nf b/main.nf index 5cf74e5c..96ae5c1f 100644 --- a/main.nf +++ b/main.nf @@ -11,25 +11,48 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS + GENOME PARAMETER VALUES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -include { VIRALRECON } from './workflows/viralrecon' -include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_viralrecon_pipeline' -include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_viralrecon_pipeline' -include { getGenomeAttribute } from './subworkflows/local/utils_nfcore_viralrecon_pipeline' +def primer_set = '' +def primer_set_version = 0 +if (params.platform == 'illumina' && params.protocol == 'amplicon') { + primer_set = params.primer_set + primer_set_version = params.primer_set_version +} else if (params.platform == 'nanopore') { + primer_set = params.primer_set + primer_set_version = params.primer_set_version + params.artic_scheme = getGenomeAttribute('scheme', primer_set, primer_set_version) +} + +params.fasta = getGenomeAttribute('fasta') +params.gff = getGenomeAttribute('gff') +params.bowtie2_index = getGenomeAttribute('bowtie2') +params.primer_bed = getGenomeAttribute('primer_bed', primer_set, primer_set_version) + +params.nextclade_dataset = getGenomeAttribute('nextclade_dataset') +params.nextclade_dataset_name = getGenomeAttribute('nextclade_dataset_name') +params.nextclade_dataset_reference = getGenomeAttribute('nextclade_dataset_reference') +params.nextclade_dataset_tag = getGenomeAttribute('nextclade_dataset_tag') + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - GENOME PARAMETER VALUES + IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// TODO nf-core: Remove this line if you don't need a FASTA file -// This is an example of how to use getGenomeAttribute() to fetch parameters -// from igenomes.config using `--genome` -params.fasta = getGenomeAttribute('fasta') +if (params.platform == 'illumina') { + include { ILLUMINA } from './workflows/illumina' +} else if (params.platform == 'nanopore') { + include { NANOPORE } from './workflows/nanopore' +} + +include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_viralrecon_pipeline' +include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_viralrecon_pipeline' + + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -37,8 +60,10 @@ params.fasta = getGenomeAttribute('fasta') ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ + + // -// WORKFLOW: Run main analysis pipeline depending on type of input +// WORKFLOW: Run main nf-core/viralrecon analysis pipeline depending on type of input // workflow NFCORE_VIRALRECON { @@ -50,18 +75,56 @@ workflow NFCORE_VIRALRECON { // // WORKFLOW: Run pipeline // - VIRALRECON ( - samplesheet - ) + multiqc_report = Channel.empty() + + if (params.platform == 'illumina') { + ILLUMINA ( + samplesheet, + params.fasta, + params.gff, + params.primer_bed, + params.bowtie2_index, + params.nextclade_dataset, + params.nextclade_dataset_name, + params.nextclade_dataset_reference, + params.nextclade_dataset_tag + ) + + multiqc_report = ILLUMINA.out.multiqc_report + + } else if (params.platform == 'nanopore') { + NANOPORE ( + samplesheet, + params.fasta, + params.gff, + params.primer_bed, + params.artic_scheme, + params.bowtie2_index, + params.nextclade_dataset, + params.nextclade_dataset_name, + params.nextclade_dataset_reference, + params.nextclade_dataset_tag + ) + + multiqc_report = NANOPORE.out.multiqc_report + } + emit: - multiqc_report = VIRALRECON.out.multiqc_report // channel: /path/to/multiqc_report.html + multiqc_report // channel: /path/to/multiqc_report.html + } + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RUN MAIN WORKFLOW ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +// +// WORKFLOW: Execute a single named workflow for the pipeline +// See: https://github.com/nf-core/rnaseq/issues/619 +// + workflow { main: @@ -97,6 +160,66 @@ workflow { ) } +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + FUNCTIONS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def getGenomeAttribute(attribute, primer_set='', primer_set_version=0) { + def val = '' + def support_link = " The default genome config used by the pipeline can be found here:\n" + + " - https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config\n\n" + + " If you would still like to blame us please come and find us on nf-core Slack:\n" + + " - https://nf-co.re/viralrecon#contributions-and-support\n" + + "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~" + + if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { + def genome_map = params.genomes[ params.genome ] + if (primer_set) { + if (genome_map.containsKey('primer_sets')) { + genome_map = genome_map[ 'primer_sets' ] + if (genome_map.containsKey(primer_set)) { + genome_map = genome_map[ primer_set ] + primer_set_version = primer_set_version.toString() + if (genome_map.containsKey(primer_set_version)) { + genome_map = genome_map[ primer_set_version ] + } else { + Nextflow.error("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " --primer_set_version '${primer_set_version}' not found!\n\n" + + " Currently, the available primer set version keys are: ${genome_map.keySet().join(", ")}\n\n" + + " Please check:\n" + + " - The value provided to --primer_set_version (currently '${primer_set_version}')\n" + + " - The value provided to --primer_set (currently '${primer_set}')\n" + + " - The value provided to --genome (currently '${params.genome}')\n" + + " - Any custom config files provided to the pipeline.\n\n" + support_link) + } + } else { + Nextflow.error("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " --primer_set '${primer_set}' not found!\n\n" + + " Currently, the available primer set keys are: ${genome_map.keySet().join(", ")}\n\n" + + " Please check:\n" + + " - The value provided to --primer_set (currently '${primer_set}')\n" + + " - The value provided to --genome (currently '${params.genome}')\n" + + " - Any custom config files provided to the pipeline.\n\n" + support_link) + } + } else { + Nextflow.error("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + + " Genome '${params.genome}' does not contain any primer sets!\n\n" + + " Please check:\n" + + " - The value provided to --genome (currently '${params.genome}')\n" + + " - Any custom config files provided to the pipeline.\n\n" + support_link) + } + } + if (genome_map.containsKey(attribute)) { + val = genome_map[ attribute ] + } else if (params.genomes[ params.genome ].containsKey(attribute)) { + val = params.genomes[ params.genome ][ attribute ] + } + } + return val +} + /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END diff --git a/modules.json b/modules.json index 9982e22e..bffff062 100644 --- a/modules.json +++ b/modules.json @@ -5,20 +5,305 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "abacas": { + "branch": "master", + "git_sha": "3304b04f6022222e217bd17bc117f0babc8cbd6a", + "installed_by": ["modules"] + }, + "artic/guppyplex": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "artic/minion": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "bandage/image": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "bcftools/consensus": { + "branch": "master", + "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", + "installed_by": ["modules"] + }, + "bcftools/filter": { + "branch": "master", + "git_sha": "a3893076a76e91b3ff152faddf872f00778fb224", + "installed_by": ["modules"] + }, + "bcftools/mpileup": { + "branch": "master", + "git_sha": "e7df38a545d7d72083eededabd8849f731a01502", + "installed_by": ["modules"] + }, + "bcftools/norm": { + "branch": "master", + "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", + "installed_by": ["modules"] + }, + "bcftools/query": { + "branch": "master", + "git_sha": "44096c08ffdbc694f5f92ae174ea0f7ba0f37e09", + "installed_by": ["modules"] + }, + "bcftools/sort": { + "branch": "master", + "git_sha": "b42fec6f7c6e5d0716685cabb825ef6bf6e386b5", + "installed_by": ["modules"] + }, + "bcftools/stats": { + "branch": "master", + "git_sha": "618364f55cb88f6c283f6c6c45c24d5f9f08f998", + "installed_by": ["modules"] + }, + "bedtools/getfasta": { + "branch": "master", + "git_sha": "cdcdd5e3d806f0ff3983c40c69e0b07bb44ec299", + "installed_by": ["modules"] + }, + "bedtools/maskfasta": { + "branch": "master", + "git_sha": "3b248b84694d1939ac4bb33df84bf6233a34d668", + "installed_by": ["modules"] + }, + "bedtools/merge": { + "branch": "master", + "git_sha": "a5377837fe9013bde89de8689829e83e84086536", + "installed_by": ["modules"] + }, + "blast/blastn": { + "branch": "master", + "git_sha": "209e5a3e2753c5e628736a662c877c20f341ee15", + "installed_by": ["modules"] + }, + "blast/makeblastdb": { + "branch": "master", + "git_sha": "a01c66c96e0bc610ad126e7adc4a94cd4acd1b48", + "installed_by": ["modules"] + }, + "bowtie2/align": { + "branch": "master", + "git_sha": "0fe30831abbc2ed115e46e92330edf38f56edc3d", + "installed_by": ["fastq_align_bowtie2"] + }, + "bowtie2/build": { + "branch": "master", + "git_sha": "1fea64f5132a813ec97c1c6d3a74e0aee7142b6d", + "installed_by": ["modules"] + }, + "cat/fastq": { + "branch": "master", + "git_sha": "4fc983ad0b30e6e32696fa7d980c76c7bfe1c03e", + "installed_by": ["modules"] + }, + "custom/getchromsizes": { + "branch": "master", + "git_sha": "1b0ffa4e5aed5b7e3cd4311af31bd3b2c8345051", + "installed_by": ["modules"] + }, + "fastp": { + "branch": "master", + "git_sha": "1ceaa8ba4d0fd886dbca0e545815d905b7407de7", + "installed_by": ["modules"] + }, "fastqc": { "branch": "master", "git_sha": "666652151335353eef2fcd58880bcef5bc2928e1", "installed_by": ["modules"] }, - "multiqc": { + "freyja/boot": { + "branch": "master", + "git_sha": "5a30449ceb86c4f968fc5425a020b9e5809c9c5c", + "installed_by": ["bam_variant_demix_boot_freyja"] + }, + "freyja/demix": { + "branch": "master", + "git_sha": "5a30449ceb86c4f968fc5425a020b9e5809c9c5c", + "installed_by": ["bam_variant_demix_boot_freyja"] + }, + "freyja/update": { + "branch": "master", + "git_sha": "5a30449ceb86c4f968fc5425a020b9e5809c9c5c", + "installed_by": ["bam_variant_demix_boot_freyja"] + }, + "freyja/variants": { + "branch": "master", + "git_sha": "5a30449ceb86c4f968fc5425a020b9e5809c9c5c", + "installed_by": ["bam_variant_demix_boot_freyja"] + }, + "gunzip": { + "branch": "master", + "git_sha": "3a5fef109d113b4997c9822198664ca5f2716208", + "installed_by": ["modules"] + }, + "ivar/consensus": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "ivar/trim": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "ivar/variants": { "branch": "master", - "git_sha": "cf17ca47590cc578dfb47db1c2a44ef86f89976d", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "kraken2/kraken2": { + "branch": "master", + "git_sha": "ca87ad032a62f025f0c373facacef2df0c5411b2", + "installed_by": ["modules"] + }, + "minia": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "mosdepth": { + "branch": "master", + "git_sha": "e0616fba0919adb190bfe070d17fb12d76ba3a26", + "installed_by": ["modules"] + }, + "nanoplot": { + "branch": "master", + "git_sha": "3135090b46f308a260fc9d5991d7d2f9c0785309", + "installed_by": ["modules"] + }, + "nextclade/datasetget": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "nextclade/run": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "pangolin": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "picard/collectmultiplemetrics": { + "branch": "master", + "git_sha": "20b0918591d4ba20047d7e13e5094bcceba81447", + "installed_by": ["modules"] + }, + "picard/markduplicates": { + "branch": "master", + "git_sha": "1943aa60f7490c3d6740e8872e6e69122ccc8087", + "installed_by": ["bam_markduplicates_picard"] + }, + "plasmidid": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "pycoqc": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "quast": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "samtools/flagstat": { + "branch": "master", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_stats_samtools"] + }, + "samtools/idxstats": { + "branch": "master", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_stats_samtools"] + }, + "samtools/index": { + "branch": "master", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools"] + }, + "samtools/sort": { + "branch": "master", + "git_sha": "4352dbdb09ec40db71e9b172b97a01dcf5622c26", + "installed_by": ["bam_sort_stats_samtools"] + }, + "samtools/stats": { + "branch": "master", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_stats_samtools"] + }, + "samtools/view": { + "branch": "master", + "git_sha": "0bd7d2333a88483aa0476acea172e9f5f6dd83bb", + "installed_by": ["modules"] + }, + "spades": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "tabix/bgzip": { + "branch": "master", + "git_sha": "09d3c8c29b31a2dfd610305b10550f0e1dbcd4a9", + "installed_by": ["modules"] + }, + "tabix/tabix": { + "branch": "master", + "git_sha": "9502adb23c0b97ed8e616bbbdfa73b4585aec9a1", + "installed_by": ["modules"] + }, + "unicycler": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", + "installed_by": ["modules"] + }, + "untar": { + "branch": "master", + "git_sha": "5caf7640a9ef1d18d765d55339be751bb0969dfa", + "installed_by": ["modules"] + }, + "vcflib/vcfuniq": { + "branch": "master", + "git_sha": "3f5420aa22e00bd030a2556dfdffc9e164ec0ec5", "installed_by": ["modules"] } } }, "subworkflows": { "nf-core": { + "bam_markduplicates_picard": { + "branch": "master", + "git_sha": "1943aa60f7490c3d6740e8872e6e69122ccc8087", + "installed_by": ["subworkflows"] + }, + "bam_sort_stats_samtools": { + "branch": "master", + "git_sha": "4352dbdb09ec40db71e9b172b97a01dcf5622c26", + "installed_by": ["fastq_align_bowtie2"] + }, + "bam_stats_samtools": { + "branch": "master", + "git_sha": "f4596fe0bdc096cf53ec4497e83defdb3a94ff62", + "installed_by": ["bam_markduplicates_picard", "bam_sort_stats_samtools"] + }, + "bam_variant_demix_boot_freyja": { + "branch": "master", + "git_sha": "5a30449ceb86c4f968fc5425a020b9e5809c9c5c", + "installed_by": ["subworkflows"] + }, + "fastq_align_bowtie2": { + "branch": "master", + "git_sha": "55e7bb6d5279ec21f254bbab0943117cb3d2fc35", + "installed_by": ["subworkflows"] + }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "3aa0aec1d52d492fe241919f0c6100ebf0074082", diff --git a/modules/local/asciigenome.nf b/modules/local/asciigenome.nf new file mode 100644 index 00000000..adf8cf14 --- /dev/null +++ b/modules/local/asciigenome.nf @@ -0,0 +1,61 @@ +process ASCIIGENOME { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::asciigenome=1.16.0 bioconda::bedtools=2.30.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-093691b47d719890dc19ac0c13c4528e9776897f:27211b8c38006480d69eb1be3ef09a7bf0a49d76-0' : + 'quay.io/biocontainers/mulled-v2-093691b47d719890dc19ac0c13c4528e9776897f:27211b8c38006480d69eb1be3ef09a7bf0a49d76-0' }" + + input: + tuple val(meta), path(bam), path(vcf) + path fasta + path sizes + path gff + path bed + val window + val track_height + + output: + tuple val(meta), path("*pdf"), emit: pdf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def gff_track = gff ? "$gff" : '' + def bed_track = bed ? "$bed" : '' + def paired_end = meta.single_end ? '' : '&& readsAsPairs -on' + """ + zcat $vcf \\ + | grep -v '#' \\ + | awk -v FS='\t' -v OFS='\t' '{print \$1, (\$2-1), (\$2)}' \\ + > variants.bed + + bedtools \\ + slop \\ + -i variants.bed \\ + -g $sizes \\ + -b $window \\ + > variants.slop.bed + + ASCIIGenome \\ + -ni \\ + -x "trackHeight 0 bam#1 && trackHeight $track_height bam@2 $paired_end && filterVariantReads && save ${prefix}.%r.pdf" \\ + --batchFile variants.slop.bed \\ + --fasta $fasta \\ + $bam \\ + $vcf \\ + $bed_track \\ + $gff_track \\ + > /dev/null + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + asciigenome: \$(echo \$(ASCIIGenome -ni --version 2>&1) | sed -e "s/ASCIIGenome //g") + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/local/collapse_primers.nf b/modules/local/collapse_primers.nf new file mode 100644 index 00000000..4219b6ae --- /dev/null +++ b/modules/local/collapse_primers.nf @@ -0,0 +1,35 @@ +process COLLAPSE_PRIMERS { + tag "$bed" + label 'process_medium' + + conda "conda-forge::python=3.9.5" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/python:3.9--1' : + 'quay.io/biocontainers/python:3.9--1' }" + + input: + path bed + val left_suffix + val right_suffix + + output: + path '*.bed' , emit: bed + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/viralrecon/bin/ + """ + collapse_primer_bed.py \\ + --left_primer_suffix $left_suffix \\ + --right_primer_suffix $right_suffix \\ + $bed \\ + ${bed.baseName}.collapsed.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/cutadapt.nf b/modules/local/cutadapt.nf new file mode 100644 index 00000000..11c8f6a5 --- /dev/null +++ b/modules/local/cutadapt.nf @@ -0,0 +1,43 @@ +process CUTADAPT { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::cutadapt=4.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/cutadapt:4.2--py39hbf8eff0_0' : + 'quay.io/biocontainers/cutadapt:4.2--py39hbf8eff0_0' }" + + input: + tuple val(meta), path(reads) + path adapters + + output: + tuple val(meta), path('*.fastq.gz'), emit: reads + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "-a file:adapters.sub.fa" : "-a file:adapters.sub.fa -A file:adapters.sub.fa" + def trimmed = meta.single_end ? "-o ${prefix}.fastq.gz" : "-o ${prefix}_1.fastq.gz -p ${prefix}_2.fastq.gz" + """ + sed -r '/^[ACTGactg]+\$/ s/\$/X/g' $adapters > adapters.sub.fa + + cutadapt \\ + --cores $task.cpus \\ + $args \\ + $paired \\ + $trimmed \\ + $reads \\ + > ${prefix}.cutadapt.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ +} diff --git a/modules/local/filter_blastn.nf b/modules/local/filter_blastn.nf new file mode 100644 index 00000000..e87b559b --- /dev/null +++ b/modules/local/filter_blastn.nf @@ -0,0 +1,38 @@ +process FILTER_BLASTN { + tag "$meta.id" + label 'process_low' + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(hits) + path header + path filtered_header + + output: + tuple val(meta), path('*filter.blastn.txt') , emit: txt + tuple val(meta), path('*.results.blastn.txt'), emit: blast + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def min_contig_length = params.min_contig_length + def min_perc_contig_aligned = params.min_perc_contig_aligned + + """ + cat $header $hits > ${prefix}.results.blastn.txt + awk 'BEGIN{OFS=\"\\t\";FS=\"\\t\"}{print \$0,\$6/\$16,\$6/\$15}' $hits | awk 'BEGIN{OFS=\"\\t\";FS=\"\\t\"} \$16 > ${min_contig_length} && \$18 > ${min_perc_contig_aligned} && \$1 !~ /phage/ {print \$0}' > tmp.out + cat $filtered_header tmp.out > ${prefix}.filter.blastn.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/ivar_variants_to_vcf.nf b/modules/local/ivar_variants_to_vcf.nf new file mode 100644 index 00000000..e6c88328 --- /dev/null +++ b/modules/local/ivar_variants_to_vcf.nf @@ -0,0 +1,41 @@ +process IVAR_VARIANTS_TO_VCF { + tag "$meta.id" + + conda "conda-forge::python=3.9.5 conda-forge::matplotlib=3.5.1 conda-forge::pandas=1.3.5 conda-forge::r-sys=3.4 conda-forge::regex=2021.11.10 conda-forge::scipy=1.7.3 conda-forge::biopython=1.79" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ff46c3f421ca930fcc54e67ab61c8e1bcbddfe22:1ad3da14f705eb0cdff6b5a44fea4909307524b4-0' : + 'quay.io/biocontainers/mulled-v2-ff46c3f421ca930fcc54e67ab61c8e1bcbddfe22:1ad3da14f705eb0cdff6b5a44fea4909307524b4-0' }" + + input: + tuple val(meta), path(tsv) + path fasta + path header + + output: + tuple val(meta), path("*.vcf"), emit: vcf + tuple val(meta), path("*.log"), emit: log + tuple val(meta), path("*.tsv"), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/viralrecon/bin/ + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + ivar_variants_to_vcf.py \\ + $tsv \\ + ${prefix}.vcf \\ + --fasta $fasta \\ + $args \\ + > ${prefix}.variant_counts.log + + cat $header ${prefix}.variant_counts.log > ${prefix}.variant_counts_mqc.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/kraken2_build.nf b/modules/local/kraken2_build.nf new file mode 100644 index 00000000..f1aa6865 --- /dev/null +++ b/modules/local/kraken2_build.nf @@ -0,0 +1,35 @@ +process KRAKEN2_BUILD { + tag "$library" + label 'process_high' + + conda "bioconda::kraken2=2.1.2 conda-forge::pigz=2.6" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' : + 'quay.io/biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' }" + + input: + val library + + output: + path 'kraken2_db' , emit: db + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + """ + kraken2-build --db kraken2_db --threads $task.cpus $args --download-taxonomy + kraken2-build --db kraken2_db --threads $task.cpus $args2 --download-library $library + kraken2-build --db kraken2_db --threads $task.cpus $args3 --build + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ +} diff --git a/modules/local/make_bed_mask.nf b/modules/local/make_bed_mask.nf new file mode 100644 index 00000000..c8c75bff --- /dev/null +++ b/modules/local/make_bed_mask.nf @@ -0,0 +1,47 @@ +process MAKE_BED_MASK { + tag "$meta.id" + + conda "conda-forge::python=3.9.5 bioconda::samtools=1.14" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-1a35167f7a491c7086c13835aaa74b39f1f43979:6b5cffa1187cfccf2dc983ed3b5359d49b999eb0-0' : + 'quay.io/biocontainers/mulled-v2-1a35167f7a491c7086c13835aaa74b39f1f43979:6b5cffa1187cfccf2dc983ed3b5359d49b999eb0-0' }" + + input: + tuple val(meta), path(bam), path(vcf) + path fasta + val save_mpileup + + output: + tuple val(meta), path("*.bed") , emit: bed + tuple val(meta), path("*.mpileup"), optional:true, emit: mpileup + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/viralrecon/bin/ + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: 10 + def prefix = task.ext.prefix ?: "${meta.id}" + def mpileup = save_mpileup ? "| tee ${prefix}.mpileup" : "" + """ + samtools \\ + mpileup \\ + $args \\ + --reference $fasta \\ + $bam \\ + $mpileup \\ + | awk -v OFS='\\t' '{print \$1, \$2-1, \$2, \$4}' | awk '\$4 < $args2' > lowcov_positions.txt + + make_bed_mask.py \\ + $vcf \\ + lowcov_positions.txt \\ + ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/make_variants_long_table.nf b/modules/local/make_variants_long_table.nf new file mode 100644 index 00000000..1d8b40fc --- /dev/null +++ b/modules/local/make_variants_long_table.nf @@ -0,0 +1,34 @@ +process MAKE_VARIANTS_LONG_TABLE { + + conda "conda-forge::python=3.9.5 conda-forge::matplotlib=3.5.1 conda-forge::pandas=1.3.5 conda-forge::r-sys=3.4 conda-forge::regex=2021.11.10 conda-forge::scipy=1.7.3" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-77320db00eefbbf8c599692102c3d387a37ef02a:08144a66f00dc7684fad061f1466033c0176e7ad-0' : + 'quay.io/biocontainers/mulled-v2-77320db00eefbbf8c599692102c3d387a37ef02a:08144a66f00dc7684fad061f1466033c0176e7ad-0' }" + + input: + path ('bcftools_query/*') + path ('snpsift/*') + path ('pangolin/*') + + output: + path "*.csv" , emit: csv + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/viralrecon/bin/ + def args = task.ext.args ?: '' + """ + make_variants_long_table.py \\ + --bcftools_query_dir ./bcftools_query \\ + --snpsift_dir ./snpsift \\ + --pangolin_dir ./pangolin \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + python: \$(python --version | sed 's/Python //g') + END_VERSIONS + """ +} diff --git a/modules/local/multiqc_illumina.nf b/modules/local/multiqc_illumina.nf new file mode 100644 index 00000000..8130bf92 --- /dev/null +++ b/modules/local/multiqc_illumina.nf @@ -0,0 +1,81 @@ +process MULTIQC { + label 'process_medium' + + conda "bioconda::multiqc=1.19" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.19--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + path workflow_summary + path fail_reads_summary + path fail_mapping_summary + path 'amplicon_heatmap_mqc.tsv' + path ('fastqc/*') + path ('fastp/*') + path ('kraken2/*') + path ('bowtie2/*') + path ('bowtie2/*') + path ('ivar_trim/*') + path ('picard_markduplicates/*') + path ('mosdepth/*') + path ('variants/*') + path ('variants/*') + path ('variants/*') + path ('variants/*') + path ('variants/*') + path ('variants/*') + path ('cutadapt/*') + path ('assembly_spades/*') + path ('assembly_unicycler/*') + path ('assembly_minia/*') + path ('freyja_demix/*') + + output: + path "*multiqc_report.html" , emit: report + path "*_data" , emit: data + path "*variants_metrics_mqc.csv", optional:true, emit: csv_variants + path "*assembly_metrics_mqc.csv", optional:true, emit: csv_assembly + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + + """ + ## Run MultiQC once to parse tool logs + multiqc -f $args $config $extra_config $logo. + + ## Parse YAML files dumped by MultiQC to obtain metrics + multiqc_to_custom_csv.py --platform illumina + + ## Manually remove files that we don't want in the report + if grep -q ">skip_assembly<" workflow_summary_mqc.yaml; then + rm -f *assembly_metrics_mqc.csv + fi + + if grep -q ">skip_variants<" workflow_summary_mqc.yaml; then + rm -f *variants_metrics_mqc.csv + fi + + rm -f variants/report.tsv + + ## Run MultiQC a second time + multiqc -f $args -e general_stats --ignore nextclade_clade_mqc.tsv $config $extra_config $logo . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/local/multiqc_nanopore.nf b/modules/local/multiqc_nanopore.nf new file mode 100644 index 00000000..112dd216 --- /dev/null +++ b/modules/local/multiqc_nanopore.nf @@ -0,0 +1,65 @@ +process MULTIQC { + label 'process_medium' + + conda "bioconda::multiqc=1.19" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' : + 'quay.io/biocontainers/multiqc:1.19--pyhdfd78af_0' }" + + input: + path multiqc_files, stageAs: "?/*" + path(multiqc_config) + path(extra_multiqc_config) + path(multiqc_logo) + path workflow_summary + path fail_barcodes_no_sample + path fail_no_barcode_samples + path fail_barcode_count_samples + path fail_guppyplex_count_samples + path 'amplicon_heatmap_mqc.tsv' + path ('pycoqc/*') + path ('artic_minion/*') + path ('samtools_stats/*') + path ('bcftools_stats/*') + path ('mosdepth/*') + path ('quast/*') + path ('snpeff/*') + path pangolin_lineage + path nextclade_clade + path ('freyja_demix/*') + + output: + path "*multiqc_report.html", emit: report + path "*_data" , emit: data + path "*.csv" , optional:true, emit: csv + path "*_plots" , optional:true, emit: plots + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def config = multiqc_config ? "--config $multiqc_config" : '' + def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' + def logo = multiqc_logo ? /--cl-config 'custom_logo: "${multiqc_logo}"'/ : '' + + """ + ## Run MultiQC once to parse tool logs + multiqc -f $args $config $extra_config $logo . + + ## Parse YAML files dumped by MultiQC to obtain metrics + multiqc_to_custom_csv.py --platform nanopore + + ## Manually remove files that we don't want in the report + rm -rf quast + + ## Run MultiQC a second time + multiqc -f $args -e general_stats --ignore *nextclade_clade_mqc.tsv $config $extra_config $logo . + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) + END_VERSIONS + """ +} diff --git a/modules/local/plot_base_density.nf b/modules/local/plot_base_density.nf new file mode 100644 index 00000000..1932350a --- /dev/null +++ b/modules/local/plot_base_density.nf @@ -0,0 +1,34 @@ +process PLOT_BASE_DENSITY { + tag "$fasta" + label 'process_medium' + + conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' : + 'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.pdf'), emit: pdf + tuple val(meta), path('*.tsv'), emit: tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/viralrecon/bin/ + def prefix = task.ext.prefix ?: "${meta.id}" + """ + plot_base_density.r \\ + --fasta_files $fasta \\ + --prefixes $prefix \\ + --output_dir ./ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/plot_mosdepth_regions.nf b/modules/local/plot_mosdepth_regions.nf new file mode 100644 index 00000000..0195549b --- /dev/null +++ b/modules/local/plot_mosdepth_regions.nf @@ -0,0 +1,37 @@ +process PLOT_MOSDEPTH_REGIONS { + label 'process_medium' + + conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' : + 'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }" + + input: + path beds + + output: + path '*coverage.pdf', emit: coverage_pdf + path '*coverage.tsv', emit: coverage_tsv + path '*heatmap.pdf' , optional:true, emit: heatmap_pdf + path '*heatmap.tsv' , optional:true, emit: heatmap_tsv + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: // This script is bundled with the pipeline, in nf-core/viralrecon/bin/ + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "mosdepth" + """ + plot_mosdepth_regions.r \\ + --input_files ${beds.join(',')} \\ + --output_dir ./ \\ + --output_suffix $prefix \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + r-base: \$(echo \$(R --version 2>&1) | sed 's/^.*R version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/rename_fasta_header.nf b/modules/local/rename_fasta_header.nf new file mode 100644 index 00000000..1fc86a30 --- /dev/null +++ b/modules/local/rename_fasta_header.nf @@ -0,0 +1,29 @@ +process RENAME_FASTA_HEADER { + tag "$meta.id" + + conda "conda-forge::sed=4.7" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("*.fa"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + sed "s/>/>${meta.id} /g" $fasta > ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + sed: \$(echo \$(sed --version 2>&1) | sed 's/^.*GNU sed) //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/local/snpeff_ann.nf b/modules/local/snpeff_ann.nf new file mode 100644 index 00000000..c834c1c5 --- /dev/null +++ b/modules/local/snpeff_ann.nf @@ -0,0 +1,53 @@ +process SNPEFF_ANN { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::snpeff=5.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snpeff:5.0--hdfd78af_1' : + 'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }" + + input: + tuple val(meta), path(vcf) + path db + path config + path fasta + + output: + tuple val(meta), path("*.vcf") , emit: vcf + tuple val(meta), path("*.csv") , emit: csv + tuple val(meta), path("*.genes.txt"), emit: txt + tuple val(meta), path("*.html") , emit: html + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 4 + if (!task.memory) { + log.info '[snpEff] Available memory not known - defaulting to 4GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + snpEff \\ + -Xmx${avail_mem}g \\ + ${fasta.baseName} \\ + -config $config \\ + -dataDir $db \\ + $args \\ + $vcf \\ + -csvStats ${prefix}.snpeff.csv \\ + > ${prefix}.snpeff.vcf + mv snpEff_summary.html ${prefix}.snpeff.summary.html + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') + END_VERSIONS + """ +} diff --git a/modules/local/snpeff_build.nf b/modules/local/snpeff_build.nf new file mode 100644 index 00000000..b9c875ba --- /dev/null +++ b/modules/local/snpeff_build.nf @@ -0,0 +1,66 @@ +process SNPEFF_BUILD { + tag "$fasta" + label 'process_low' + + conda "bioconda::snpeff=5.0" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snpeff:5.0--hdfd78af_1' : + 'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }" + + input: + path fasta + path gff + + output: + path 'snpeff_db' , emit: db + path '*.config' , emit: config + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def basename = fasta.baseName + def extension = gff.getExtension() + if (extension == "gtf") { + format = "gtf22" + } else { + format = "gff3" + } + + def avail_mem = 4 + if (!task.memory) { + log.info '[snpEff] Available memory not known - defaulting to 4GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + mkdir -p snpeff_db/genomes/ + cd snpeff_db/genomes/ + ln -s ../../$fasta ${basename}.fa + + cd ../../ + mkdir -p snpeff_db/${basename}/ + cd snpeff_db/${basename}/ + ln -s ../../$gff genes.$extension + + cd ../../ + echo "${basename}.genome : ${basename}" > snpeff.config + + snpEff \\ + -Xmx${avail_mem}g \\ + build \\ + -config snpeff.config \\ + -dataDir ./snpeff_db \\ + -${format} \\ + $args \\ + -v \\ + ${basename} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpeff: \$(echo \$(snpEff -version 2>&1) | cut -f 2 -d ' ') + END_VERSIONS + """ +} diff --git a/modules/local/snpsift_extractfields.nf b/modules/local/snpsift_extractfields.nf new file mode 100644 index 00000000..5654e97e --- /dev/null +++ b/modules/local/snpsift_extractfields.nf @@ -0,0 +1,54 @@ +process SNPSIFT_EXTRACTFIELDS { + tag "$meta.id" + label 'process_medium' + + conda "bioconda::snpsift=4.3.1t" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/snpsift:4.3.1t--hdfd78af_3' : + 'quay.io/biocontainers/snpsift:4.3.1t--hdfd78af_3' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.snpsift.txt"), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + def avail_mem = 4 + if (!task.memory) { + log.info '[SnpSift] Available memory not known - defaulting to 4GB. Specify process memory requirements to change this.' + } else { + avail_mem = task.memory.giga + } + """ + SnpSift \\ + -Xmx${avail_mem}g \\ + extractFields \\ + -s "," \\ + -e "." \\ + $args \\ + $vcf \\ + CHROM POS REF ALT \\ + "ANN[*].GENE" "ANN[*].GENEID" \\ + "ANN[*].IMPACT" "ANN[*].EFFECT" \\ + "ANN[*].FEATURE" "ANN[*].FEATUREID" \\ + "ANN[*].BIOTYPE" "ANN[*].RANK" "ANN[*].HGVS_C" \\ + "ANN[*].HGVS_P" "ANN[*].CDNA_POS" "ANN[*].CDNA_LEN" \\ + "ANN[*].CDS_POS" "ANN[*].CDS_LEN" "ANN[*].AA_POS" \\ + "ANN[*].AA_LEN" "ANN[*].DISTANCE" "EFF[*].EFFECT" \\ + "EFF[*].FUNCLASS" "EFF[*].CODON" "EFF[*].AA" "EFF[*].AA_LEN" \\ + > ${prefix}.snpsift.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + snpsift: \$( echo \$(SnpSift split -h 2>&1) | sed 's/^.*version //' | sed 's/(.*//' | sed 's/t//g' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/abacas/environment.yml b/modules/nf-core/abacas/environment.yml new file mode 100644 index 00000000..c1379190 --- /dev/null +++ b/modules/nf-core/abacas/environment.yml @@ -0,0 +1,7 @@ +name: abacas +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::abacas=1.3.1 diff --git a/modules/nf-core/abacas/main.nf b/modules/nf-core/abacas/main.nf new file mode 100644 index 00000000..d0b04415 --- /dev/null +++ b/modules/nf-core/abacas/main.nf @@ -0,0 +1,40 @@ +process ABACAS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/abacas:1.3.1--pl526_0' : + 'biocontainers/abacas:1.3.1--pl526_0' }" + + input: + tuple val(meta), path(scaffold) + path fasta + + output: + tuple val(meta), path('*.abacas*'), emit: results + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + abacas.pl \\ + -r $fasta \\ + -q $scaffold \\ + $args \\ + -o ${prefix}.abacas + + mv nucmer.delta ${prefix}.abacas.nucmer.delta + mv nucmer.filtered.delta ${prefix}.abacas.nucmer.filtered.delta + mv nucmer.tiling ${prefix}.abacas.nucmer.tiling + mv unused_contigs.out ${prefix}.abacas.unused.contigs.out + cat <<-END_VERSIONS > versions.yml + "${task.process}": + abacas: \$(echo \$(abacas.pl -v 2>&1) | sed 's/^.*ABACAS.//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/abacas/meta.yml b/modules/nf-core/abacas/meta.yml new file mode 100644 index 00000000..bef9868c --- /dev/null +++ b/modules/nf-core/abacas/meta.yml @@ -0,0 +1,56 @@ +name: abacas +description: contiguate draft genome assembly +keywords: + - genome + - assembly + - contiguate +tools: + - abacas: + description: | + ABACAS is intended to rapidly contiguate (align, order, orientate), + visualize and design primers to close gaps on shotgun assembled + contigs based on a reference sequence. + homepage: http://abacas.sourceforge.net/documentation.html + documentation: http://abacas.sourceforge.net/documentation.html + doi: "10.1093/bioinformatics/btp347" + licence: ["GPL v2-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - scaffold: + type: file + description: Fasta file containing scaffold + pattern: "*.{fasta,fa}" + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - results: + type: file + description: | + List containing abacas output files + [ 'test.abacas.bin', 'test.abacas.fasta', 'test.abacas.gaps', + 'test.abacas.gaps.tab', 'test.abacas.nucmer.delta', + 'test.abacas.nucmer.filtered.delta', 'test.abacas.nucmer.tiling', + 'test.abacas.tab', 'test.abacas.unused.contigs.out', + 'test.abacas.MULTIFASTA.fa' ] + pattern: "*.{abacas}*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/abacas/tests/main.nf.test b/modules/nf-core/abacas/tests/main.nf.test new file mode 100644 index 00000000..86f056ed --- /dev/null +++ b/modules/nf-core/abacas/tests/main.nf.test @@ -0,0 +1,46 @@ +nextflow_process { + + name "Test Process ABACAS" + script "../main.nf" + process "ABACAS" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "abacas" + + test("sarscov2 [scaffolds_fasta] [genome_fasta]") { + + when { + process { + """ + input[0] = [ [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['scaffolds_fasta'], checkIfExists: true) + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.results[0][1].findAll { + file(it).name != "test.abacas.bin" && + file(it).name != "test.abacas.nucmer.delta" && + file(it).name != "test.abacas.unused.contigs.out" && + file(it).name != "test.abacas.nucmer.filtered.delta" }).match()}, + { assert file(process.out.results[0][1].find { + file(it).name == "test.abacas.bin" }).exists() }, + { assert file(process.out.results[0][1].find { + file(it).name == "test.abacas.nucmer.delta" }).exists() }, + { assert file(process.out.results[0][1].find { + file(it).name == "test.abacas.unused.contigs.out" }).exists() }, + { assert file(process.out.results[0][1].find { + file(it).name == "test.abacas.nucmer.filtered.delta" }).exists() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/nf-core/abacas/tests/main.nf.test.snap b/modules/nf-core/abacas/tests/main.nf.test.snap new file mode 100644 index 00000000..34d7ef68 --- /dev/null +++ b/modules/nf-core/abacas/tests/main.nf.test.snap @@ -0,0 +1,24 @@ +{ + "sarscov2 [scaffolds_fasta] [genome_fasta]": { + "content": [ + [ + "test.abacas.MULTIFASTA.fa:md5,46c899ad70dcef8d14b5829fd8fbab82", + "test.abacas.crunch:md5,9a95358a9bd8ee97d1f2253d95623a17", + "test.abacas.fasta:md5,5e6c403d3850d52f6bde956fa2403b13", + "test.abacas.gaps:md5,5361af445b8d18a85c3af6527a97c89a", + "test.abacas.gaps.tab:md5,193024ec9e5a553573519b218eb06e29", + "test.abacas.nucmer.tiling:md5,0adaa0ce800d92c149a523d447148d95", + "test.abacas.tab:md5,a5b9b452516f519a4201ff809655ef69" + ] + ], + "timestamp": "2023-11-24T23:10:40.830744514" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,dac92f544d3e3ff9e5faae9b332dfab4" + ] + ], + "timestamp": "2023-11-24T23:35:57.275724471" + } +} \ No newline at end of file diff --git a/modules/nf-core/abacas/tests/nextflow.config b/modules/nf-core/abacas/tests/nextflow.config new file mode 100644 index 00000000..17296503 --- /dev/null +++ b/modules/nf-core/abacas/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: ABACAS { + ext.args = '-m -p nucmer' + } + +} diff --git a/modules/nf-core/abacas/tests/tags.yml b/modules/nf-core/abacas/tests/tags.yml new file mode 100644 index 00000000..5751a360 --- /dev/null +++ b/modules/nf-core/abacas/tests/tags.yml @@ -0,0 +1,2 @@ +abacas: + - modules/nf-core/abacas/** diff --git a/modules/nf-core/artic/guppyplex/environment.yml b/modules/nf-core/artic/guppyplex/environment.yml new file mode 100644 index 00000000..19176850 --- /dev/null +++ b/modules/nf-core/artic/guppyplex/environment.yml @@ -0,0 +1,7 @@ +name: artic_guppyplex +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::artic=1.2.3 diff --git a/modules/nf-core/artic/guppyplex/main.nf b/modules/nf-core/artic/guppyplex/main.nf new file mode 100644 index 00000000..7259ef44 --- /dev/null +++ b/modules/nf-core/artic/guppyplex/main.nf @@ -0,0 +1,37 @@ +process ARTIC_GUPPYPLEX { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/artic:1.2.3--pyhdfd78af_0' : + 'biocontainers/artic:1.2.3--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fastq_dir) + + output: + tuple val(meta), path("*.fastq.gz"), emit: fastq + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.2.3' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions. + """ + artic \\ + guppyplex \\ + $args \\ + --directory $fastq_dir \\ + --output ${prefix}.fastq + + pigz -p $task.cpus *.fastq + cat <<-END_VERSIONS > versions.yml + "${task.process}": + artic: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/artic/guppyplex/meta.yml b/modules/nf-core/artic/guppyplex/meta.yml new file mode 100644 index 00000000..80a1d999 --- /dev/null +++ b/modules/nf-core/artic/guppyplex/meta.yml @@ -0,0 +1,43 @@ +name: artic_guppyplex +description: Aggregates fastq files with demultiplexed reads +keywords: + - artic + - aggregate + - demultiplexed reads +tools: + - artic: + description: ARTIC pipeline - a bioinformatics pipeline for working with virus sequencing data sequenced with nanopore + homepage: https://artic.readthedocs.io/en/latest/ + documentation: https://artic.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/artic-network/fieldbioinformatics + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq_dir: + type: directory + description: Directory containing the fastq files with demultiplexed reads + pattern: "*" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq: + type: file + description: Aggregated FastQ files + pattern: "*.{fastq.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/artic/minion/environment.yml b/modules/nf-core/artic/minion/environment.yml new file mode 100644 index 00000000..5123f82c --- /dev/null +++ b/modules/nf-core/artic/minion/environment.yml @@ -0,0 +1,7 @@ +name: artic_minion +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::artic=1.2.3 diff --git a/modules/nf-core/artic/minion/main.nf b/modules/nf-core/artic/minion/main.nf new file mode 100644 index 00000000..8559bd75 --- /dev/null +++ b/modules/nf-core/artic/minion/main.nf @@ -0,0 +1,73 @@ +process ARTIC_MINION { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/artic:1.2.3--pyhdfd78af_0' : + 'biocontainers/artic:1.2.3--pyhdfd78af_0' }" + + input: + tuple val(meta), path(fastq) + path fast5_dir + path sequencing_summary + path ("primer-schemes/${scheme}/V${scheme_version}/${scheme}.reference.fasta") + path ("primer-schemes/${scheme}/V${scheme_version}/${scheme}.scheme.bed") + path medaka_model_file + val medaka_model_string + val scheme + val scheme_version + + output: + tuple val(meta), path("${prefix}.*") , emit: results + tuple val(meta), path("${prefix}.sorted.bam") , emit: bam + tuple val(meta), path("${prefix}.sorted.bam.bai") , emit: bai + tuple val(meta), path("${prefix}.trimmed.rg.sorted.bam") , emit: bam_trimmed + tuple val(meta), path("${prefix}.trimmed.rg.sorted.bam.bai") , emit: bai_trimmed + tuple val(meta), path("${prefix}.primertrimmed.rg.sorted.bam") , emit: bam_primertrimmed + tuple val(meta), path("${prefix}.primertrimmed.rg.sorted.bam.bai"), emit: bai_primertrimmed + tuple val(meta), path("${prefix}.consensus.fasta") , emit: fasta + tuple val(meta), path("${prefix}.pass.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.pass.vcf.gz.tbi") , emit: tbi + tuple val(meta), path("*.json"), optional:true , emit: json + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def version = scheme_version.toString().toLowerCase().replaceAll('v','') + def fast5 = fast5_dir ? "--fast5-directory $fast5_dir" : "" + def summary = sequencing_summary ? "--sequencing-summary $sequencing_summary" : "" + def model = "" + if (args.tokenize().contains('--medaka')) { + fast5 = "" + summary = "" + model = medaka_model_file ? "--medaka-model ./$medaka_model_file" : "--medaka-model $medaka_model_string" + } + def hd5_plugin_path = task.ext.hd5_plugin_path ? "export HDF5_PLUGIN_PATH=" + task.ext.hd5_plugin_path : "export HDF5_PLUGIN_PATH=/usr/local/lib/python3.6/site-packages/ont_fast5_api/vbz_plugin" + def VERSION = '1.2.3' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions. + """ + $hd5_plugin_path + + artic \\ + minion \\ + $args \\ + --threads $task.cpus \\ + --read-file $fastq \\ + --scheme-directory ./primer-schemes \\ + --scheme-version $version \\ + $model \\ + $fast5 \\ + $summary \\ + $scheme \\ + $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + artic: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/artic/minion/meta.yml b/modules/nf-core/artic/minion/meta.yml new file mode 100644 index 00000000..9092edd3 --- /dev/null +++ b/modules/nf-core/artic/minion/meta.yml @@ -0,0 +1,119 @@ +name: artic_minion +description: | + Run the alignment/variant-call/consensus logic of the artic pipeline +keywords: + - artic + - aggregate + - demultiplexed reads +tools: + - artic: + description: ARTIC pipeline - a bioinformatics pipeline for working with virus sequencing data sequenced with nanopore + homepage: https://artic.readthedocs.io/en/latest/ + documentation: https://artic.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/artic-network/fieldbioinformatics + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq: + type: file + description: FastQ file containing reads + pattern: "*.{fastq.gz}" + - fast5_dir: + type: directory + description: Directory containing MinION FAST5 files + pattern: "*" + - sequencing_summary: + type: file + description: Path to Guppy sequencing summary + pattern: "*.{txt}" + - primer_scheme_fasta: + type: file + description: | + Sequence of the reference genome used in the scheme in FASTA format + pattern: "*.{reference.fasta}" + - primer_scheme_bed: + type: file + description: | + bed files containing coordinates of each primer in the scheme, + relative to the reference genome + pattern: "*.{scheme.bed}" + - medaka_model_file: + type: file + description: | + Medaka model file to use (if option --medaka is set) + pattern: "*.hdf5" + - medaka_model_string: + type: value + description: | + Medaka model string to use (if option --medaka is set) + pattern: "*" + - scheme: + type: value + description: Name of the primer scheme + - scheme_version: + type: value + description: Version of the primer scheme +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - results: + type: file + description: Aggregated FastQ files + pattern: "*.fastq.gz" + - bam: + type: file + description: BAM file + pattern: "*.{sorted.bam}" + - bai: + type: file + description: BAM index file + pattern: "*.{sorted.bai}" + - bam_trimmed: + type: file + description: BAM file with the primers left on + pattern: "*.{trimmed.rg.sorted.bam}" + - bai_trimmed: + type: file + description: BAM index file of bam_trimmed + pattern: "*.{sorted.bai}" + - bam_primertrimmed: + type: file + description: BAM containing reads after primer-binding site trimming + pattern: "*.{trimmed.rg.sorted.bam}" + - bai_primertrimmed: + type: file + description: BAM index file of bam_primertrimmed + pattern: "*.{primertrimmed.rg.sorted.bam.bai}" + - fasta: + type: file + description: FAST file with consensus sequence + pattern: "*.{consensus.fasta}" + - vcf: + type: file + description: VCF file containing detected variants passing quality filter + pattern: "*.{pass.vcf.gz}" + - tbi: + type: file + description: VCF index + pattern: "*.{pass.vcf.gz.tbi}" + - json: + type: file + description: JSON file for MultiQC + pattern: "*.json" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bandage/image/environment.yml b/modules/nf-core/bandage/image/environment.yml new file mode 100644 index 00000000..61558105 --- /dev/null +++ b/modules/nf-core/bandage/image/environment.yml @@ -0,0 +1,7 @@ +name: bandage_image +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bandage=0.8.1 diff --git a/modules/nf-core/bandage/image/main.nf b/modules/nf-core/bandage/image/main.nf new file mode 100644 index 00000000..f6801d0a --- /dev/null +++ b/modules/nf-core/bandage/image/main.nf @@ -0,0 +1,33 @@ +process BANDAGE_IMAGE { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bandage:0.8.1--hc9558a2_2' : + 'biocontainers/bandage:0.8.1--hc9558a2_2' }" + + input: + tuple val(meta), path(gfa) + + output: + tuple val(meta), path('*.png'), emit: png + tuple val(meta), path('*.svg'), emit: svg + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + Bandage image $gfa ${prefix}.png $args + Bandage image $gfa ${prefix}.svg $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bandage: \$(echo \$(Bandage --version 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bandage/image/meta.yml b/modules/nf-core/bandage/image/meta.yml new file mode 100644 index 00000000..94ce8a80 --- /dev/null +++ b/modules/nf-core/bandage/image/meta.yml @@ -0,0 +1,46 @@ +name: bandage_image +description: Render an assembly graph in GFA 1.0 format to PNG and SVG image formats +keywords: + - gfa + - graph + - assembly + - visualisation +tools: + - bandage: + description: | + Bandage - a Bioinformatics Application for Navigating De novo Assembly Graphs Easily + homepage: https://github.com/rrwick/Bandage + documentation: https://github.com/rrwick/Bandage + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gfa: + type: file + description: Assembly graph in GFA 1.0 format + pattern: "*.gfa" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - png: + type: file + description: Bandage image in PNG format + pattern: "*.png" + - svg: + type: file + description: Bandage image in SVG format + pattern: "*.svg" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@heuermh" +maintainers: + - "@heuermh" diff --git a/modules/nf-core/bcftools/consensus/environment.yml b/modules/nf-core/bcftools/consensus/environment.yml new file mode 100644 index 00000000..4217d8c1 --- /dev/null +++ b/modules/nf-core/bcftools/consensus/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_consensus +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/consensus/main.nf b/modules/nf-core/bcftools/consensus/main.nf new file mode 100644 index 00000000..9e167b9f --- /dev/null +++ b/modules/nf-core/bcftools/consensus/main.nf @@ -0,0 +1,36 @@ +process BCFTOOLS_CONSENSUS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi), path(fasta) + + output: + tuple val(meta), path('*.fa'), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + cat $fasta \\ + | bcftools \\ + consensus \\ + $vcf \\ + $args \\ + > ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/consensus/meta.yml b/modules/nf-core/bcftools/consensus/meta.yml new file mode 100644 index 00000000..3b43c808 --- /dev/null +++ b/modules/nf-core/bcftools/consensus/meta.yml @@ -0,0 +1,52 @@ +name: bcftools_consensus +description: Compresses VCF files +keywords: + - variant calling + - consensus + - VCF +tools: + - consensus: + description: | + Create consensus sequence by applying VCF variants to a reference fasta file. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF file + pattern: "*.{vcf}" + - tbi: + type: file + description: tabix index file + pattern: "*.{tbi}" + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA reference consensus file + pattern: "*.{fasta,fa}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/filter/environment.yml b/modules/nf-core/bcftools/filter/environment.yml new file mode 100644 index 00000000..b2698757 --- /dev/null +++ b/modules/nf-core/bcftools/filter/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_filter +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/filter/main.nf b/modules/nf-core/bcftools/filter/main.nf new file mode 100644 index 00000000..cc9a2361 --- /dev/null +++ b/modules/nf-core/bcftools/filter/main.nf @@ -0,0 +1,67 @@ +process BCFTOOLS_FILTER { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.${extension}"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + """ + bcftools filter \\ + --output ${prefix}.${extension} \\ + --threads ${task.cpus} \\ + $args \\ + $vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + if ("$vcf" == "${prefix}.${extension}") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + + def create_file = extension.endsWith(".gz") ? "echo '' | gzip > ${prefix}.${extension}" : "touch ${prefix}.${extension}" + + """ + ${create_file} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/filter/meta.yml b/modules/nf-core/bcftools/filter/meta.yml new file mode 100644 index 00000000..deb93b13 --- /dev/null +++ b/modules/nf-core/bcftools/filter/meta.yml @@ -0,0 +1,44 @@ +name: bcftools_filter +description: Filters VCF files +keywords: + - variant calling + - filtering + - VCF +tools: + - filter: + description: | + Apply fixed-threshold filters to VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF input file + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF filtered output file + pattern: "*.{vcf,bcf,vcf.gz,bcf.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/filter/tests/main.nf.test b/modules/nf-core/bcftools/filter/tests/main.nf.test new file mode 100644 index 00000000..eaf100e8 --- /dev/null +++ b/modules/nf-core/bcftools/filter/tests/main.nf.test @@ -0,0 +1,82 @@ +nextflow_process { + + name "Test Process BCFTOOLS_FILTER" + script "../main.nf" + process "BCFTOOLS_FILTER" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/filter" + + config "./nextflow.config" + + test("sarscov2 - vcf") { + + when { + process { + """ + input[0] = [ + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("vcf") } + ) + } + + } + + test("sarscov2 - vcf - bcf output") { + + when { + process { + """ + input[0] = [ + [id:"bcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("bcf output") } + ) + } + + } + + test("sarscov2 - vcf - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [id:"vcf_test"], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("vcf - stub") } + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/filter/tests/main.nf.test.snap b/modules/nf-core/bcftools/filter/tests/main.nf.test.snap new file mode 100644 index 00000000..f8e17aa0 --- /dev/null +++ b/modules/nf-core/bcftools/filter/tests/main.nf.test.snap @@ -0,0 +1,101 @@ +{ + "vcf": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + "versions.yml:md5,7dc77043f9afb848d942d47a7bc19f67" + ], + "vcf": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,7dc77043f9afb848d942d47a7bc19f67" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-03-27T16:57:32.940161987" + }, + "bcf output": { + "content": [ + { + "0": [ + [ + { + "id": "bcf_test" + }, + "bcf_test.bcf.gz:md5,c8a304c8d2892039201154153c8cd536" + ] + ], + "1": [ + "versions.yml:md5,7dc77043f9afb848d942d47a7bc19f67" + ], + "vcf": [ + [ + { + "id": "bcf_test" + }, + "bcf_test.bcf.gz:md5,c8a304c8d2892039201154153c8cd536" + ] + ], + "versions": [ + "versions.yml:md5,7dc77043f9afb848d942d47a7bc19f67" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-03-27T16:45:14.586866398" + }, + "vcf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,7dc77043f9afb848d942d47a7bc19f67" + ], + "vcf": [ + [ + { + "id": "vcf_test" + }, + "vcf_test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,7dc77043f9afb848d942d47a7bc19f67" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.02.0" + }, + "timestamp": "2024-03-27T17:05:52.80837892" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/filter/tests/nextflow.config b/modules/nf-core/bcftools/filter/tests/nextflow.config new file mode 100644 index 00000000..4e960c8d --- /dev/null +++ b/modules/nf-core/bcftools/filter/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = {"--no-version${meta.id == 'bcf_test' ? ' --output-type b' : ' --output-type z'}"} +} diff --git a/modules/nf-core/bcftools/filter/tests/tags.yml b/modules/nf-core/bcftools/filter/tests/tags.yml new file mode 100644 index 00000000..d5e01080 --- /dev/null +++ b/modules/nf-core/bcftools/filter/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/filter: + - "modules/nf-core/bcftools/filter/**" diff --git a/modules/nf-core/bcftools/mpileup/environment.yml b/modules/nf-core/bcftools/mpileup/environment.yml new file mode 100644 index 00000000..114390be --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_mpileup +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/mpileup/main.nf b/modules/nf-core/bcftools/mpileup/main.nf new file mode 100644 index 00000000..6bf8bc2d --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/main.nf @@ -0,0 +1,72 @@ +process BCFTOOLS_MPILEUP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(bam), path(intervals) + tuple val(meta2), path(fasta) + val save_mpileup + + output: + tuple val(meta), path("*vcf.gz") , emit: vcf + tuple val(meta), path("*vcf.gz.tbi") , emit: tbi + tuple val(meta), path("*stats.txt") , emit: stats + tuple val(meta), path("*.mpileup.gz"), emit: mpileup, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def args3 = task.ext.args3 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def mpileup = save_mpileup ? "| tee ${prefix}.mpileup" : "" + def bgzip_mpileup = save_mpileup ? "bgzip ${prefix}.mpileup" : "" + def intervals = intervals ? "-T ${intervals}" : "" + """ + echo "${meta.id}" > sample_name.list + + bcftools \\ + mpileup \\ + --fasta-ref $fasta \\ + $args \\ + $bam \\ + $intervals \\ + $mpileup \\ + | bcftools call --output-type v $args2 \\ + | bcftools reheader --samples sample_name.list \\ + | bcftools view --output-file ${prefix}.vcf.gz --output-type z $args3 + + $bgzip_mpileup + + tabix -p vcf -f ${prefix}.vcf.gz + + bcftools stats ${prefix}.vcf.gz > ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bcftools_stats.txt + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + echo "" | gzip > ${prefix}.mpileup.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/mpileup/meta.yml b/modules/nf-core/bcftools/mpileup/meta.yml new file mode 100644 index 00000000..65410ddd --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/meta.yml @@ -0,0 +1,70 @@ +name: bcftools_mpileup +description: Compresses VCF files +keywords: + - variant calling + - mpileup + - VCF +tools: + - mpileup: + description: | + Generates genotype likelihoods at each genomic position with coverage. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM file + pattern: "*.{bam}" + - intervals: + type: file + description: Input intervals file. A file (commonly '.bed') containing regions to subset + - meta: + type: map + description: | + Groovy Map containing information about the genome fasta, e.g. [ id: 'sarscov2' ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" + - save_mpileup: + type: boolean + description: Save mpileup file generated by bcftools mpileup +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF gzipped output file + pattern: "*.{vcf.gz}" + - tbi: + type: file + description: tabix index file + pattern: "*.{vcf.gz.tbi}" + - stats: + type: file + description: Text output file containing stats + pattern: "*{stats.txt}" + - mpileup: + type: file + description: mpileup gzipped output for all positions + pattern: "{*.mpileup.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/mpileup/tests/main.nf.test b/modules/nf-core/bcftools/mpileup/tests/main.nf.test new file mode 100644 index 00000000..dc35c542 --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/tests/main.nf.test @@ -0,0 +1,208 @@ +nextflow_process { + + name "Test Process BCFTOOLS_MPILEUP" + script "../main.nf" + process "BCFTOOLS_MPILEUP" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/mpileup" + + config "./nextflow.config" + + test("sarscov2 - [bam, []], fasta, false") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_fasta_false.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_fasta_false.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_fasta_false.bcftools_stats.txt") }, + { assert snapshot(process.out.versions).match("bam_fasta_false_versions") } + ) + } + + } + + test("sarscov2 - [bam, []], fasta, false stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_fasta_false_stub.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_fasta_false_stub.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_fasta_false_stub.bcftools_stats.txt") }, + { assert snapshot(process.out.versions).match("bam_fasta_false_stub_versions") } + ) + } + + } + + test("sarscov2 - [bam, []], fasta, true") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_bed_fasta_true_stub.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_bed_fasta_true_stub.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_bed_fasta_true_stub.bcftools_stats.txt") }, + { assert snapshot(file(process.out.mpileup[0][1]).name).match("bam_bed_fasta_true_stub.mpileup.gz") }, + { assert snapshot(process.out.versions).match("bam_bed_fasta_true_stub_versions") } + ) + } + + } + + test("sarscov2 - [bam, []], fasta, true stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_bed_fasta_true.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_bed_fasta_true.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_bed_fasta_true.bcftools_stats.txt") }, + { assert snapshot(file(process.out.mpileup[0][1]).name).match("bam_bed_fasta_true.mpileup.gz") }, + { assert snapshot(process.out.versions).match("bam_bed_fasta_true_versions") } + ) + } + + } + + test("sarscov2 - [bam, bed], fasta, false") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_bed_fasta_false.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_bed_fasta_false.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_bed_fasta_false.bcftools_stats.txt") }, + { assert snapshot(process.out.versions).match("bam_bed_fasta_false_versions") } + ) + } + + } + + test("sarscov2 - [bam, bed], fasta, false stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + input[1] = [ + [ id:'sarscov2' ], // meta map + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.vcf[0][1]).name).match("bam_bed_fasta_false_stub.vcf.gz") }, + { assert snapshot(file(process.out.tbi[0][1]).name).match("bam_bed_fasta_false_stub.vcf.gz.tbi") }, + { assert snapshot(file(process.out.stats[0][1]).name).match("bam_bed_fasta_false_stub.bcftools_stats.txt") }, + { assert snapshot(process.out.versions).match("bam_bed_fasta_false_stub_versions") } + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/mpileup/tests/main.nf.test.snap b/modules/nf-core/bcftools/mpileup/tests/main.nf.test.snap new file mode 100644 index 00000000..8b8d5eab --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/tests/main.nf.test.snap @@ -0,0 +1,274 @@ +{ + "bam_bed_fasta_true.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:45.172966" + }, + "bam_bed_fasta_false_stub.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:57.815085" + }, + "bam_fasta_false_stub.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:15.731277" + }, + "bam_bed_fasta_false_stub.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:57.844573" + }, + "bam_bed_fasta_true_stub.mpileup.gz": { + "content": [ + "test.mpileup.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:39.462382" + }, + "bam_bed_fasta_true.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:45.146525" + }, + "bam_bed_fasta_true_stub.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:39.42754" + }, + "bam_fasta_false_versions": { + "content": [ + [ + "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:10.137483" + }, + "bam_fasta_false_stub.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:15.746204" + }, + "bam_bed_fasta_false_versions": { + "content": [ + [ + "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:51.772936" + }, + "bam_bed_fasta_false.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:51.72798" + }, + "bam_bed_fasta_true_versions": { + "content": [ + [ + "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:45.20135" + }, + "bam_fasta_false.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:10.051991" + }, + "bam_fasta_false.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:10.123726" + }, + "bam_bed_fasta_false.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:51.761517" + }, + "bam_bed_fasta_false_stub.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:57.832271" + }, + "bam_bed_fasta_false.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:51.748389" + }, + "bam_fasta_false_stub.vcf.gz": { + "content": [ + "test.vcf.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:15.709802" + }, + "bam_bed_fasta_true_stub.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:39.453121" + }, + "bam_fasta_false.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:10.108027" + }, + "bam_fasta_false_stub_versions": { + "content": [ + [ + "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:39:15.770612" + }, + "bam_bed_fasta_true.bcftools_stats.txt": { + "content": [ + "test.bcftools_stats.txt" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:45.18304" + }, + "bam_bed_fasta_true_stub.vcf.gz.tbi": { + "content": [ + "test.vcf.gz.tbi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:39.442077" + }, + "bam_bed_fasta_false_stub_versions": { + "content": [ + [ + "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:57.868309" + }, + "bam_bed_fasta_true.mpileup.gz": { + "content": [ + "test.mpileup.gz" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:45.192888" + }, + "bam_bed_fasta_true_stub_versions": { + "content": [ + [ + "versions.yml:md5,e09c59d941374bb293aadc36e2f29dbf" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-22T18:37:39.470988" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/mpileup/tests/nextflow.config b/modules/nf-core/bcftools/mpileup/tests/nextflow.config new file mode 100644 index 00000000..a7ba19fe --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args2 = '--no-version --ploidy 1 --multiallelic-caller' + ext.args3 = '--no-version' +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/mpileup/tests/tags.yml b/modules/nf-core/bcftools/mpileup/tests/tags.yml new file mode 100644 index 00000000..07b91f98 --- /dev/null +++ b/modules/nf-core/bcftools/mpileup/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/mpileup: + - "modules/nf-core/bcftools/mpileup/**" diff --git a/modules/nf-core/bcftools/norm/environment.yml b/modules/nf-core/bcftools/norm/environment.yml new file mode 100644 index 00000000..fe80e4e7 --- /dev/null +++ b/modules/nf-core/bcftools/norm/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_norm +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/norm/main.nf b/modules/nf-core/bcftools/norm/main.nf new file mode 100644 index 00000000..47d3dab1 --- /dev/null +++ b/modules/nf-core/bcftools/norm/main.nf @@ -0,0 +1,60 @@ +process BCFTOOLS_NORM { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}") , emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + + """ + bcftools norm \\ + --fasta-ref ${fasta} \\ + --output ${prefix}.${extension}\\ + $args \\ + --threads $task.cpus \\ + ${vcf} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf.gz" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/norm/meta.yml b/modules/nf-core/bcftools/norm/meta.yml new file mode 100644 index 00000000..1f3e1b62 --- /dev/null +++ b/modules/nf-core/bcftools/norm/meta.yml @@ -0,0 +1,61 @@ +name: bcftools_norm +description: Normalize VCF file +keywords: + - normalize + - norm + - variant calling + - VCF +tools: + - norm: + description: | + Normalize VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be normalized + e.g. 'file1.vcf' + pattern: "*.{vcf,vcf.gz}" + - tbi: + type: file + description: | + An optional index of the VCF file (for when the VCF is compressed) + pattern: "*.vcf.gz.tbi" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: One of uncompressed VCF (.vcf), compressed VCF (.vcf.gz), compressed BCF (.bcf.gz) or uncompressed BCF (.bcf) normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@ramprasadn" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/query/environment.yml b/modules/nf-core/bcftools/query/environment.yml new file mode 100644 index 00000000..4f9661ca --- /dev/null +++ b/modules/nf-core/bcftools/query/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_query +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/query/main.nf b/modules/nf-core/bcftools/query/main.nf new file mode 100644 index 00000000..e9e73a6a --- /dev/null +++ b/modules/nf-core/bcftools/query/main.nf @@ -0,0 +1,56 @@ +process BCFTOOLS_QUERY { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + path regions + path targets + path samples + + output: + tuple val(meta), path("*.${suffix}"), emit: output + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + """ + bcftools query \\ + $regions_file \\ + $targets_file \\ + $samples_file \\ + $args \\ + $vcf \\ + > ${prefix}.${suffix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + """ + touch ${prefix}.${suffix} \\ + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/query/meta.yml b/modules/nf-core/bcftools/query/meta.yml new file mode 100644 index 00000000..303ef610 --- /dev/null +++ b/modules/nf-core/bcftools/query/meta.yml @@ -0,0 +1,63 @@ +name: bcftools_query +description: Extracts fields from VCF or BCF files and outputs them in user-defined format. +keywords: + - query + - variant calling + - bcftools + - VCF +tools: + - query: + description: | + Extracts fields from VCF or BCF files and outputs them in user-defined format. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be qeuried. + pattern: "*.{vcf.gz, vcf}" + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. + pattern: "*.tbi" + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - output: + type: file + description: BCFTools query output file + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@drpatelh" +maintainers: + - "@abhi18av" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test b/modules/nf-core/bcftools/query/tests/main.nf.test new file mode 100644 index 00000000..e9ea5a9d --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test @@ -0,0 +1,101 @@ +nextflow_process { + + name "Test Process BCFTOOLS_QUERY" + script "../main.nf" + process "BCFTOOLS_QUERY" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/query" + + config "./nextflow.config" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], vcf, tsv, []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + ] + input[1] = file(params.test_data['sarscov2']['illumina']['test3_vcf_gz'], checkIfExists: true) + input[2] = file(params.test_data['sarscov2']['illumina']['test2_vcf_targets_tsv_gz'], checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_vcf_gz_tbi'], checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.output[0][1]).name, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test.snap b/modules/nf-core/bcftools/query/tests/main.nf.test.snap new file mode 100644 index 00000000..a19f2053 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test.snap @@ -0,0 +1,43 @@ +{ + "sarscov2 - [vcf, tbi], vcf, tsv, []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,75a6bd0084e2e1838cf7baba11b99d19" + ] + ], + [ + "versions.yml:md5,b40206d5437ce4b044d15c47ddd93d8e" + ] + ], + "timestamp": "2023-11-29T14:21:05.191946862" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + "out.txt", + [ + "versions.yml:md5,b40206d5437ce4b044d15c47ddd93d8e" + ] + ], + "timestamp": "2023-11-29T14:21:11.169603542" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,87a2ab194e1ee3219b44e58429ec3307" + ] + ], + [ + "versions.yml:md5,b40206d5437ce4b044d15c47ddd93d8e" + ] + ], + "timestamp": "2023-11-29T14:20:59.335041418" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/query/tests/nextflow.config b/modules/nf-core/bcftools/query/tests/nextflow.config new file mode 100644 index 00000000..da81c2a0 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = "-f '%CHROM %POS %REF %ALT[%SAMPLE=%GT]'" +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/query/tests/tags.yml b/modules/nf-core/bcftools/query/tests/tags.yml new file mode 100644 index 00000000..fb9455cb --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/query: + - "modules/nf-core/bcftools/query/**" diff --git a/modules/nf-core/bcftools/sort/environment.yml b/modules/nf-core/bcftools/sort/environment.yml new file mode 100644 index 00000000..89cf911d --- /dev/null +++ b/modules/nf-core/bcftools/sort/environment.yml @@ -0,0 +1,7 @@ +name: bcftools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 diff --git a/modules/nf-core/bcftools/sort/main.nf b/modules/nf-core/bcftools/sort/main.nf new file mode 100644 index 00000000..246148d6 --- /dev/null +++ b/modules/nf-core/bcftools/sort/main.nf @@ -0,0 +1,61 @@ +process BCFTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(vcf) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}") , emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + + """ + bcftools \\ + sort \\ + --output ${prefix}.${extension} \\ + --temp-dir . \\ + $args \\ + $vcf + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + + def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" : + args.contains("--output-type u") || args.contains("-Ou") ? "bcf" : + args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" : + args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : + "vcf" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + """ + ${create_cmd} ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/sort/meta.yml b/modules/nf-core/bcftools/sort/meta.yml new file mode 100644 index 00000000..84747c6d --- /dev/null +++ b/modules/nf-core/bcftools/sort/meta.yml @@ -0,0 +1,42 @@ +name: bcftools_sort +description: Sorts VCF files +keywords: + - sorting + - VCF + - variant calling +tools: + - sort: + description: Sort VCF files by coordinates. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + tool_dev_url: https://github.com/samtools/bcftools + doi: "10.1093/bioinformatics/btp352" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: The VCF/BCF file to be sorted + pattern: "*.{vcf.gz,vcf,bcf}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Sorted VCF file + pattern: "*.{vcf.gz}" +authors: + - "@Gwennid" +maintainers: + - "@Gwennid" diff --git a/modules/nf-core/bcftools/sort/tests/main.nf.test b/modules/nf-core/bcftools/sort/tests/main.nf.test new file mode 100644 index 00000000..8a496dda --- /dev/null +++ b/modules/nf-core/bcftools/sort/tests/main.nf.test @@ -0,0 +1,54 @@ +nextflow_process { + + name "Test Process BCFTOOLS_SORT" + script "../main.nf" + process "BCFTOOLS_SORT" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/sort" + + test("sarscov2 - vcf") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("vcf") } + ) + } + + } + + test("sarscov2 - vcf - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/bcftools/sort/tests/main.nf.test.snap b/modules/nf-core/bcftools/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..6200cc42 --- /dev/null +++ b/modules/nf-core/bcftools/sort/tests/main.nf.test.snap @@ -0,0 +1,68 @@ +{ + "vcf": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + "versions.yml:md5,622bd32d4ff0fac3360cd534ae0f0168" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,622bd32d4ff0fac3360cd534ae0f0168" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-02T16:55:21.237927554" + }, + "sarscov2 - vcf - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + "versions.yml:md5,622bd32d4ff0fac3360cd534ae0f0168" + ], + "vcf": [ + [ + { + "id": "test" + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,622bd32d4ff0fac3360cd534ae0f0168" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.03.0" + }, + "timestamp": "2024-05-03T12:32:50.506309198" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/sort/tests/tags.yml b/modules/nf-core/bcftools/sort/tests/tags.yml new file mode 100644 index 00000000..6e9520dd --- /dev/null +++ b/modules/nf-core/bcftools/sort/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/sort: + - "modules/nf-core/bcftools/sort/**" diff --git a/modules/nf-core/bcftools/stats/environment.yml b/modules/nf-core/bcftools/stats/environment.yml new file mode 100644 index 00000000..7bb40dc0 --- /dev/null +++ b/modules/nf-core/bcftools/stats/environment.yml @@ -0,0 +1,8 @@ +name: bcftools_stats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bcftools=1.18 + - bioconda::htslib=1.18 diff --git a/modules/nf-core/bcftools/stats/main.nf b/modules/nf-core/bcftools/stats/main.nf new file mode 100644 index 00000000..ffa1df64 --- /dev/null +++ b/modules/nf-core/bcftools/stats/main.nf @@ -0,0 +1,60 @@ +process BCFTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bcftools:1.18--h8b25389_0': + 'biocontainers/bcftools:1.18--h8b25389_0' }" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(regions) + tuple val(meta3), path(targets) + tuple val(meta4), path(samples) + tuple val(meta5), path(exons) + tuple val(meta6), path(fasta) + + output: + tuple val(meta), path("*stats.txt"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + def reference_fasta = fasta ? "--fasta-ref ${fasta}" : "" + def exons_file = exons ? "--exons ${exons}" : "" + """ + bcftools stats \\ + $args \\ + $regions_file \\ + $targets_file \\ + $samples_file \\ + $reference_fasta \\ + $exons_file \\ + $vcf > ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.bcftools_stats.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bcftools/stats/meta.yml b/modules/nf-core/bcftools/stats/meta.yml new file mode 100644 index 00000000..7ea2103e --- /dev/null +++ b/modules/nf-core/bcftools/stats/meta.yml @@ -0,0 +1,77 @@ +name: bcftools_stats +description: Generates stats from VCF files +keywords: + - variant calling + - stats + - VCF +tools: + - stats: + description: | + Parses VCF or BCF and produces text file stats which is suitable for + machine processing and can be plotted using plot-vcfstats. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: VCF input file + pattern: "*.{vcf}" + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. Optional: only required when parameter regions is chosen. + pattern: "*.tbi" + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. (VCF, BED or tab-delimited) + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon tbi index files) + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + - exons: + type: file + description: | + Tab-delimited file with exons for indel frameshifts (chr,beg,end; 1-based, inclusive, optionally bgzip compressed). + e.g. 'exons.tsv.gz' + - fasta: + type: file + description: | + Faidx indexed reference sequence file to determine INDEL context. + e.g. 'reference.fa' +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: Text output file containing stats + pattern: "*_{stats.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@SusiJo" + - "@TCLamnidis" diff --git a/modules/nf-core/bcftools/stats/tests/main.nf.test b/modules/nf-core/bcftools/stats/tests/main.nf.test new file mode 100644 index 00000000..f027f6b1 --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/main.nf.test @@ -0,0 +1,182 @@ +nextflow_process { + + name "Test Process BCFTOOLS_STATS" + script "../main.nf" + process "BCFTOOLS_STATS" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/stats" + + test("sarscov2 - vcf_gz") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + []] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match("version") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - regions") { + + when { + process { + """ + input[0] = [ [ id:'regions_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)] + input[1] = [ [id:'regions_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match("regions_version") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - targets") { + + when { + process { + """ + input[0] = [ [ id:'targets_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [id:'targets_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match("targets_version") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - exons") { + + when { + process { + """ + input[0] = [ [ id:'exon_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [id: "exon_test"], + file(params.modules_testdata_base_path + 'delete_me/bcftools/stats/exons.tsv.gz', checkIfExists: true) ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match("exon_version") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + test("sarscov2 - vcf_gz - reference") { + + when { + process { + """ + input[0] = [ [ id:'ref_test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] ] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [id: 'ref_test'], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.version).match("ref_version") }, + { assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() }, + ) + } + + } + + + test("sarscov2 - vcf_gz - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + []] + input[1] = [ [], [] ] + input[2] = [ [], [] ] + input[3] = [ [], [] ] + input[4] = [ [], [] ] + input[5] = [ [], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/stats/tests/main.nf.test.snap b/modules/nf-core/bcftools/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..30691c32 --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/main.nf.test.snap @@ -0,0 +1,160 @@ +{ + "sarscov2 - vcf_gz - reference": { + "content": [ + [ + "# This file was produced by bcftools stats (1.18+htslib-1.18) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --fasta-ref genome.fasta test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-03-20T11:46:24.34147" + }, + "sarscov2 - vcf_gz - exons": { + "content": [ + [ + "# This file was produced by bcftools stats (1.18+htslib-1.18) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --exons exons.tsv.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-03-20T11:46:18.378716" + }, + "exon_version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T10:02:02.530551189" + }, + "ref_version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T10:02:06.885381764" + }, + "sarscov2 - vcf_gz - targets": { + "content": [ + [ + "# This file was produced by bcftools stats (1.18+htslib-1.18) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --targets-file test2.targets.tsv.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-03-20T11:46:12.48194" + }, + "targets_version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T10:01:58.412147664" + }, + "sarscov2 - vcf_gz - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,5909d472a49b0aa2bfbbb1094c129e48" + ], + "stats": [ + [ + { + "id": "test" + }, + "test.bcftools_stats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,5909d472a49b0aa2bfbbb1094c129e48" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T16:26:21.450513562" + }, + "version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T09:57:04.317347424" + }, + "regions_version": { + "content": null, + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T10:01:54.349855366" + }, + "sarscov2 - vcf_gz": { + "content": [ + [ + "# This file was produced by bcftools stats (1.18+htslib-1.18) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-03-20T11:46:01.862297" + }, + "sarscov2 - vcf_gz - regions": { + "content": [ + [ + "# This file was produced by bcftools stats (1.18+htslib-1.18) and can be plotted using plot-vcfstats.", + "# The command line was:\tbcftools stats --regions-file test3.vcf.gz test.vcf.gz", + "#", + "# Definition of sets:", + "# ID\t[2]id\t[3]tab-separated file names", + "ID\t0\ttest.vcf.gz" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-03-20T11:46:07.296109" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/stats/tests/tags.yml b/modules/nf-core/bcftools/stats/tests/tags.yml new file mode 100644 index 00000000..53c12d92 --- /dev/null +++ b/modules/nf-core/bcftools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +bcftools/stats: + - "modules/nf-core/bcftools/stats/**" diff --git a/modules/nf-core/bedtools/getfasta/environment.yml b/modules/nf-core/bedtools/getfasta/environment.yml new file mode 100644 index 00000000..a89401f2 --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_getfasta +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/getfasta/main.nf b/modules/nf-core/bedtools/getfasta/main.nf new file mode 100644 index 00000000..b316117d --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/main.nf @@ -0,0 +1,50 @@ +process BEDTOOLS_GETFASTA { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(bed) + path fasta + + output: + tuple val(meta), path("*.fa"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$fasta" == "${prefix}.fa") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + bedtools \\ + getfasta \\ + $args \\ + -fi $fasta \\ + -bed $bed \\ + -fo ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$fasta" == "${prefix}.fa") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.fa + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/getfasta/meta.yml b/modules/nf-core/bedtools/getfasta/meta.yml new file mode 100644 index 00000000..41917fe3 --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/meta.yml @@ -0,0 +1,46 @@ +name: bedtools_getfasta +description: extract sequences in a FASTA file based on intervals defined in a feature file. +keywords: + - bed + - fasta + - getfasta +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/getfasta.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Bed feature file + pattern: "*.{bed}" + - fasta: + type: file + description: Input fasta file + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Output fasta file with extracted sequences + pattern: "*.{fa}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bedtools/getfasta/tests/main.nf.test b/modules/nf-core/bedtools/getfasta/tests/main.nf.test new file mode 100644 index 00000000..4da7552c --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/tests/main.nf.test @@ -0,0 +1,62 @@ +nextflow_process { + + name "Test Process BEDTOOLS_GETFASTA" + script "../main.nf" + process "BEDTOOLS_GETFASTA" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/getfasta" + + test("sarscov2 - bed - fasta") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false], + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true), + ] + + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - bed - fasta - stub") { + + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false], + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true), + ] + + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bedtools/getfasta/tests/main.nf.test.snap b/modules/nf-core/bedtools/getfasta/tests/main.nf.test.snap new file mode 100644 index 00000000..69bf33f7 --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "sarscov2 - bed - fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,41c3a45a57a16c04f828d8f8bb52df70" + ] + ], + "1": [ + "versions.yml:md5,427b4f64b2f05f28f0beef96c9f0d310" + ], + "fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,41c3a45a57a16c04f828d8f8bb52df70" + ] + ], + "versions": [ + "versions.yml:md5,427b4f64b2f05f28f0beef96c9f0d310" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T14:16:19.383758985" + }, + "sarscov2 - bed - fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,427b4f64b2f05f28f0beef96c9f0d310" + ], + "fasta": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fa:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,427b4f64b2f05f28f0beef96c9f0d310" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-03T14:16:47.47010536" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/getfasta/tests/tags.yml b/modules/nf-core/bedtools/getfasta/tests/tags.yml new file mode 100644 index 00000000..42ec3026 --- /dev/null +++ b/modules/nf-core/bedtools/getfasta/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/getfasta: + - "modules/nf-core/bedtools/getfasta/**" diff --git a/modules/nf-core/bedtools/maskfasta/environment.yml b/modules/nf-core/bedtools/maskfasta/environment.yml new file mode 100644 index 00000000..71d18917 --- /dev/null +++ b/modules/nf-core/bedtools/maskfasta/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_maskfasta +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/maskfasta/main.nf b/modules/nf-core/bedtools/maskfasta/main.nf new file mode 100644 index 00000000..9511a8d8 --- /dev/null +++ b/modules/nf-core/bedtools/maskfasta/main.nf @@ -0,0 +1,36 @@ +process BEDTOOLS_MASKFASTA { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(bed) + path fasta + + output: + tuple val(meta), path("*.fa"), emit: fasta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + bedtools \\ + maskfasta \\ + $args \\ + -fi $fasta \\ + -bed $bed \\ + -fo ${prefix}.fa + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/maskfasta/meta.yml b/modules/nf-core/bedtools/maskfasta/meta.yml new file mode 100644 index 00000000..94f81aba --- /dev/null +++ b/modules/nf-core/bedtools/maskfasta/meta.yml @@ -0,0 +1,46 @@ +name: bedtools_maskfasta +description: masks sequences in a FASTA file based on intervals defined in a feature file. +keywords: + - bed + - fasta + - maskfasta +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/intersect.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Bed feature file + pattern: "*.{bed}" + - fasta: + type: file + description: Input fasta file + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Output masked fasta file + pattern: "*.{fa}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bedtools/merge/environment.yml b/modules/nf-core/bedtools/merge/environment.yml new file mode 100644 index 00000000..99707878 --- /dev/null +++ b/modules/nf-core/bedtools/merge/environment.yml @@ -0,0 +1,7 @@ +name: bedtools_merge +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bedtools=2.31.1 diff --git a/modules/nf-core/bedtools/merge/main.nf b/modules/nf-core/bedtools/merge/main.nf new file mode 100644 index 00000000..5310647d --- /dev/null +++ b/modules/nf-core/bedtools/merge/main.nf @@ -0,0 +1,47 @@ +process BEDTOOLS_MERGE { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/bedtools:2.31.1--hf5e1c6e_0' : + 'biocontainers/bedtools:2.31.1--hf5e1c6e_0' }" + + input: + tuple val(meta), path(bed) + + output: + tuple val(meta), path('*.bed'), emit: bed + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + if ("$bed" == "${prefix}.bed") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + bedtools \\ + merge \\ + -i $bed \\ + $args \\ + > ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bed + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bedtools: \$(bedtools --version | sed -e "s/bedtools v//g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/bedtools/merge/meta.yml b/modules/nf-core/bedtools/merge/meta.yml new file mode 100644 index 00000000..d7463e3d --- /dev/null +++ b/modules/nf-core/bedtools/merge/meta.yml @@ -0,0 +1,45 @@ +name: bedtools_merge +description: combines overlapping or “book-ended” features in an interval file into a single feature which spans all of the combined features. +keywords: + - bed + - merge + - bedtools + - overlapped bed +tools: + - bedtools: + description: | + A set of tools for genomic analysis tasks, specifically enabling genome arithmetic (merge, count, complement) on various file types. + documentation: https://bedtools.readthedocs.io/en/latest/content/tools/merge.html + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Input BED file + pattern: "*.{bed}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bed: + type: file + description: Overlapped bed file with combined features + pattern: "*.{bed}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" +maintainers: + - "@edmundmiller" + - "@sruthipsuresh" + - "@drpatelh" diff --git a/modules/nf-core/bedtools/merge/tests/main.nf.test b/modules/nf-core/bedtools/merge/tests/main.nf.test new file mode 100644 index 00000000..95dba8e5 --- /dev/null +++ b/modules/nf-core/bedtools/merge/tests/main.nf.test @@ -0,0 +1,34 @@ +nextflow_process { + + name "Test Process BEDTOOLS_MERGE" + script "../main.nf" + config "./nextflow.config" + process "BEDTOOLS_MERGE" + + tag "modules" + tag "modules_nfcore" + tag "bedtools" + tag "bedtools/merge" + + test("test_bedtools_merge") { + + when { + process { + """ + input[0] = [ [ id:'test'], + file(params.test_data['sarscov2']['genome']['test_bed'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/merge/tests/main.nf.test.snap b/modules/nf-core/bedtools/merge/tests/main.nf.test.snap new file mode 100644 index 00000000..ee6c4e63 --- /dev/null +++ b/modules/nf-core/bedtools/merge/tests/main.nf.test.snap @@ -0,0 +1,35 @@ +{ + "test_bedtools_merge": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_out.bed:md5,0cf6ed2b6f470cd44a247da74ca4fe4e" + ] + ], + "1": [ + "versions.yml:md5,2d134badb4cd1e4e903696c7967f28d6" + ], + "bed": [ + [ + { + "id": "test" + }, + "test_out.bed:md5,0cf6ed2b6f470cd44a247da74ca4fe4e" + ] + ], + "versions": [ + "versions.yml:md5,2d134badb4cd1e4e903696c7967f28d6" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T17:07:09.721153" + } +} \ No newline at end of file diff --git a/modules/nf-core/bedtools/merge/tests/nextflow.config b/modules/nf-core/bedtools/merge/tests/nextflow.config new file mode 100644 index 00000000..16444e98 --- /dev/null +++ b/modules/nf-core/bedtools/merge/tests/nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: BEDTOOLS_MERGE { + ext.prefix = { "${meta.id}_out" } + } + +} diff --git a/modules/nf-core/bedtools/merge/tests/tags.yml b/modules/nf-core/bedtools/merge/tests/tags.yml new file mode 100644 index 00000000..60c8cad1 --- /dev/null +++ b/modules/nf-core/bedtools/merge/tests/tags.yml @@ -0,0 +1,2 @@ +bedtools/merge: + - "modules/nf-core/bedtools/merge/**" diff --git a/modules/nf-core/blast/blastn/environment.yml b/modules/nf-core/blast/blastn/environment.yml new file mode 100644 index 00000000..cb9b15dd --- /dev/null +++ b/modules/nf-core/blast/blastn/environment.yml @@ -0,0 +1,7 @@ +name: blast_blastn +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::blast=2.14.1 diff --git a/modules/nf-core/blast/blastn/main.nf b/modules/nf-core/blast/blastn/main.nf new file mode 100644 index 00000000..2613e547 --- /dev/null +++ b/modules/nf-core/blast/blastn/main.nf @@ -0,0 +1,62 @@ +process BLAST_BLASTN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/blast:2.14.1--pl5321h6f7f691_0': + 'biocontainers/blast:2.14.1--pl5321h6f7f691_0' }" + + input: + tuple val(meta) , path(fasta) + tuple val(meta2), path(db) + + output: + tuple val(meta), path('*.txt'), emit: txt + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getExtension() == "gz" ? true : false + def fasta_name = is_compressed ? fasta.getBaseName() : fasta + + """ + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + DB=`find -L ./ -name "*.nal" | sed 's/\\.nal\$//'` + if [ -z "\$DB" ]; then + DB=`find -L ./ -name "*.nin" | sed 's/\\.nin\$//'` + fi + echo Using \$DB + + blastn \\ + -num_threads ${task.cpus} \\ + -db \$DB \\ + -query ${fasta_name} \\ + ${args} \\ + -out ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(blastn -version 2>&1 | sed 's/^.*blastn: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(blastn -version 2>&1 | sed 's/^.*blastn: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/blast/blastn/meta.yml b/modules/nf-core/blast/blastn/meta.yml new file mode 100644 index 00000000..a0d64dd6 --- /dev/null +++ b/modules/nf-core/blast/blastn/meta.yml @@ -0,0 +1,55 @@ +name: blast_blastn +description: Queries a BLAST DNA database +keywords: + - fasta + - blast + - blastn + - DNA sequence +tools: + - blast: + description: | + BLAST finds regions of similarity between biological sequences. + homepage: https://blast.ncbi.nlm.nih.gov/Blast.cgi + documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs + doi: 10.1016/S0022-2836(05)80360-2 + licence: ["US-Government-Work"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file containing queries sequences + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" + - meta2: + type: map + description: | + Groovy Map containing db information + e.g. [ id:'test2', single_end:false ] + - db: + type: directory + description: Directory containing the blast database + pattern: "*" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - txt: + type: file + description: File containing blastn hits + pattern: "*.txt" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@vagkaratzas" diff --git a/modules/nf-core/blast/blastn/tests/main.nf.test b/modules/nf-core/blast/blastn/tests/main.nf.test new file mode 100644 index 00000000..02ecfab5 --- /dev/null +++ b/modules/nf-core/blast/blastn/tests/main.nf.test @@ -0,0 +1,72 @@ +nextflow_process { + + name "Test Process BLAST_BLASTN" + script "../main.nf" + process "BLAST_BLASTN" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "blast" + tag "blast/blastn" + tag "blast/makeblastdb" + + setup { + run("BLAST_MAKEBLASTDB") { + script "../../makeblastdb/main.nf" + process { + """ + input[0] = [ [id:'test2'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + """ + } + } + } + + test("Should search for nucleotide hits against a blast db") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.txt[0][1]).getText().contains("Query= MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + + test("Should search for zipped nucleotide hits against a blast db") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) ] + input[1] = BLAST_MAKEBLASTDB.out.db + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.txt[0][1]).getText().contains("Query= MT192765.1 Severe acute respiratory syndrome coronavirus 2 isolate") }, + { assert snapshot(process.out.versions).match("versions_zipped") } + ) + } + + } + +} diff --git a/modules/nf-core/blast/blastn/tests/main.nf.test.snap b/modules/nf-core/blast/blastn/tests/main.nf.test.snap new file mode 100644 index 00000000..d1b5f3f2 --- /dev/null +++ b/modules/nf-core/blast/blastn/tests/main.nf.test.snap @@ -0,0 +1,18 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,2d5ffadc7035672f6a9e00b01d1751ea" + ] + ], + "timestamp": "2023-12-11T07:20:03.54997013" + }, + "versions_zipped": { + "content": [ + [ + "versions.yml:md5,2d5ffadc7035672f6a9e00b01d1751ea" + ] + ], + "timestamp": "2023-12-11T07:20:12.925782708" + } +} \ No newline at end of file diff --git a/modules/nf-core/blast/blastn/tests/nextflow.config b/modules/nf-core/blast/blastn/tests/nextflow.config new file mode 100644 index 00000000..0899289b --- /dev/null +++ b/modules/nf-core/blast/blastn/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BLAST_MAKEBLASTDB { + ext.args = '-dbtype nucl' + } +} diff --git a/modules/nf-core/blast/blastn/tests/tags.yml b/modules/nf-core/blast/blastn/tests/tags.yml new file mode 100644 index 00000000..b4588ab8 --- /dev/null +++ b/modules/nf-core/blast/blastn/tests/tags.yml @@ -0,0 +1,2 @@ +blast/blastn: + - modules/nf-core/blast/blastn/** diff --git a/modules/nf-core/blast/makeblastdb/environment.yml b/modules/nf-core/blast/makeblastdb/environment.yml new file mode 100644 index 00000000..a20783b0 --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/environment.yml @@ -0,0 +1,7 @@ +name: blast_makeblastdb +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::blast=2.15.0 diff --git a/modules/nf-core/blast/makeblastdb/main.nf b/modules/nf-core/blast/makeblastdb/main.nf new file mode 100644 index 00000000..a2c73189 --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/main.nf @@ -0,0 +1,65 @@ +process BLAST_MAKEBLASTDB { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/blast:2.15.0--pl5321h6f7f691_1': + 'biocontainers/blast:2.15.0--pl5321h6f7f691_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path("${meta.id}"), emit: db + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getExtension() == "gz" ? true : false + def fasta_name = is_compressed ? fasta.getBaseName() : fasta + """ + if [ "${is_compressed}" == "true" ]; then + gzip -c -d ${fasta} > ${fasta_name} + fi + + makeblastdb \\ + -in ${fasta_name} \\ + ${args} + mkdir ${prefix} + mv ${fasta_name}* ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(blastn -version 2>&1 | sed 's/^.*blastn: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def is_compressed = fasta.getExtension() == "gz" ? true : false + def fasta_name = is_compressed ? fasta.getBaseName() : fasta + """ + touch ${fasta_name}.fasta + touch ${fasta_name}.fasta.ndb + touch ${fasta_name}.fasta.nhr + touch ${fasta_name}.fasta.nin + touch ${fasta_name}.fasta.njs + touch ${fasta_name}.fasta.not + touch ${fasta_name}.fasta.nsq + touch ${fasta_name}.fasta.ntf + touch ${fasta_name}.fasta.nto + mkdir ${prefix} + mv ${fasta_name}* ${prefix} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + blast: \$(blastn -version 2>&1 | sed 's/^.*blastn: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/blast/makeblastdb/meta.yml b/modules/nf-core/blast/makeblastdb/meta.yml new file mode 100644 index 00000000..9ed63901 --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/meta.yml @@ -0,0 +1,45 @@ +name: blast_makeblastdb +description: Builds a BLAST database +keywords: + - fasta + - blast + - database +tools: + - blast: + description: | + BLAST finds regions of similarity between biological sequences. + homepage: https://blast.ncbi.nlm.nih.gov/Blast.cgi + documentation: https://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Web&PAGE_TYPE=Blastdocs + doi: 10.1016/S0022-2836(05)80360-2 + licence: ["US-Government-Work"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input fasta file + pattern: "*.{fa,fasta,fa.gz,fasta.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - db: + type: directory + description: Output directory containing blast database files + pattern: "*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@vagkaratzas" diff --git a/modules/nf-core/blast/makeblastdb/tests/main.nf.test b/modules/nf-core/blast/makeblastdb/tests/main.nf.test new file mode 100644 index 00000000..983b165f --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + + name "Test Process BLAST_MAKEBLASTDB" + script "../main.nf" + process "BLAST_MAKEBLASTDB" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "blast" + tag "blast/makeblastdb" + + test("Should build a blast db folder from a fasta file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.db + with(process.out.db) { + assert size() == 1 + with(get(0).get(1)) { + File folder = new File(get(0).get(1)) + File[] listOfFiles = folder.listFiles() + listOfFiles = listOfFiles.sort { it.name } + assert listOfFiles.length == 9 + assert snapshot("${get(0).get(1)}/${listOfFiles[0].name}").match("genome.fasta") + assert snapshot("${get(0).get(1)}/${listOfFiles[1].name}").match("genome.fasta.ndb") + assert snapshot("${get(0).get(1)}/${listOfFiles[2].name}").match("genome.fasta.nhr") + assert snapshot("${get(0).get(1)}/${listOfFiles[5].name}").match("genome.fasta.not") + assert snapshot("${get(0).get(1)}/${listOfFiles[6].name}").match("genome.fasta.nsq") + assert snapshot("${get(0).get(1)}/${listOfFiles[7].name}").match("genome.fasta.ntf") + assert snapshot("${get(0).get(1)}/${listOfFiles[8].name}").match("genome.fasta.nto") + } + } + }, + { assert process.out.versions } + ) + } + + } + + test("Should build a blast db folder from a zipped fasta file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta_gz'], checkIfExists: true) ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.db + with(process.out.db) { + assert size() == 1 + with(get(0).get(1)) { + File folder = new File(get(0).get(1)) + File[] listOfFiles = folder.listFiles() + listOfFiles = listOfFiles.sort { it.name } + assert listOfFiles.length == 10 + assert snapshot("${get(0).get(1)}/${listOfFiles[0].name}").match("gz_genome.fasta") + assert snapshot("${get(0).get(1)}/${listOfFiles[2].name}").match("gz_genome.fasta.ndb") + assert snapshot("${get(0).get(1)}/${listOfFiles[3].name}").match("gz_genome.fasta.nhr") + assert snapshot("${get(0).get(1)}/${listOfFiles[6].name}").match("gz_genome.fasta.not") + assert snapshot("${get(0).get(1)}/${listOfFiles[7].name}").match("gz_genome.fasta.nsq") + assert snapshot("${get(0).get(1)}/${listOfFiles[8].name}").match("gz_genome.fasta.ntf") + assert snapshot("${get(0).get(1)}/${listOfFiles[9].name}").match("gz_genome.fasta.nto") + } + } + }, + { assert process.out.versions } + ) + } + + } + +} diff --git a/modules/nf-core/blast/makeblastdb/tests/main.nf.test.snap b/modules/nf-core/blast/makeblastdb/tests/main.nf.test.snap new file mode 100644 index 00000000..b6f040ed --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/tests/main.nf.test.snap @@ -0,0 +1,86 @@ +{ + "genome.fasta": { + "content": [ + "genome.fasta:md5,6e9fe4042a72f2345f644f239272b7e6" + ], + "timestamp": "2023-11-07T12:52:38.457245596" + }, + "gz_genome.fasta.ntf": { + "content": [ + "genome.fasta.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb" + ], + "timestamp": "2023-11-07T12:58:02.121840034" + }, + "genome.fasta.not": { + "content": [ + "genome.fasta.not:md5,1e53e9d08f1d23af0299cfa87478a7bb" + ], + "timestamp": "2023-11-07T12:55:33.862012946" + }, + "genome.fasta.nhr": { + "content": [ + "genome.fasta.nhr:md5,f4b4ddb034fd3dd7b25c89e9d50c004e" + ], + "timestamp": "2023-11-07T12:55:33.857994517" + }, + "gz_genome.fasta.nhr": { + "content": [ + "genome.fasta.nhr:md5,f4b4ddb034fd3dd7b25c89e9d50c004e" + ], + "timestamp": "2023-11-07T12:58:02.102407993" + }, + "genome.fasta.ntf": { + "content": [ + "genome.fasta.ntf:md5,de1250813f0c7affc6d12dac9d0fb6bb" + ], + "timestamp": "2023-11-07T12:55:33.877288786" + }, + "gz_genome.fasta.not": { + "content": [ + "genome.fasta.not:md5,1e53e9d08f1d23af0299cfa87478a7bb" + ], + "timestamp": "2023-11-07T12:58:02.108135313" + }, + "gz_genome.fasta.ndb": { + "content": [ + "genome.fasta.ndb:md5,0d553c830656469211de113c5022f06d" + ], + "timestamp": "2023-11-07T12:58:02.094305556" + }, + "gz_genome.fasta.nsq": { + "content": [ + "genome.fasta.nsq:md5,982cbc7d9e38743b9b1037588862b9da" + ], + "timestamp": "2023-11-07T12:58:02.115010863" + }, + "genome.fasta.nto": { + "content": [ + "genome.fasta.nto:md5,33cdeccccebe80329f1fdbee7f5874cb" + ], + "timestamp": "2023-11-07T12:55:33.890761822" + }, + "gz_genome.fasta.nto": { + "content": [ + "genome.fasta.nto:md5,33cdeccccebe80329f1fdbee7f5874cb" + ], + "timestamp": "2023-11-07T12:58:02.12931429" + }, + "genome.fasta.ndb": { + "content": [ + "genome.fasta.ndb:md5,0d553c830656469211de113c5022f06d" + ], + "timestamp": "2023-11-07T12:55:33.853303997" + }, + "genome.fasta.nsq": { + "content": [ + "genome.fasta.nsq:md5,982cbc7d9e38743b9b1037588862b9da" + ], + "timestamp": "2023-11-07T12:55:33.866667927" + }, + "gz_genome.fasta": { + "content": [ + "genome.fasta:md5,6e9fe4042a72f2345f644f239272b7e6" + ], + "timestamp": "2023-11-07T12:58:02.081764854" + } +} \ No newline at end of file diff --git a/modules/nf-core/blast/makeblastdb/tests/nextflow.config b/modules/nf-core/blast/makeblastdb/tests/nextflow.config new file mode 100644 index 00000000..0899289b --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/tests/nextflow.config @@ -0,0 +1,5 @@ +process { + withName: BLAST_MAKEBLASTDB { + ext.args = '-dbtype nucl' + } +} diff --git a/modules/nf-core/blast/makeblastdb/tests/tags.yml b/modules/nf-core/blast/makeblastdb/tests/tags.yml new file mode 100644 index 00000000..ab74c02a --- /dev/null +++ b/modules/nf-core/blast/makeblastdb/tests/tags.yml @@ -0,0 +1,2 @@ +blast/makeblastdb: + - modules/nf-core/blast/makeblastdb/** diff --git a/modules/nf-core/bowtie2/align/environment.yml b/modules/nf-core/bowtie2/align/environment.yml new file mode 100644 index 00000000..d2796359 --- /dev/null +++ b/modules/nf-core/bowtie2/align/environment.yml @@ -0,0 +1,9 @@ +name: bowtie2_align +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::bowtie2=2.5.2 + - bioconda::samtools=1.18 + - conda-forge::pigz=2.6 diff --git a/modules/nf-core/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf new file mode 100644 index 00000000..96a7027d --- /dev/null +++ b/modules/nf-core/bowtie2/align/main.nf @@ -0,0 +1,117 @@ +process BOWTIE2_ALIGN { + tag "$meta.id" + label "process_high" + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' : + 'biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:f70b31a2db15c023d641c32f433fb02cd04df5a6-0' }" + + input: + tuple val(meta) , path(reads) + tuple val(meta2), path(index) + tuple val(meta3), path(fasta) + val save_unaligned + val sort_bam + + output: + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram , optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + tuple val(meta), path("*.crai") , emit: crai , optional:true + tuple val(meta), path("*.log") , emit: log + tuple val(meta), path("*fastq.gz") , emit: fastq , optional:true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: "" + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + + def unaligned = "" + def reads_args = "" + if (meta.single_end) { + unaligned = save_unaligned ? "--un-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-U ${reads}" + } else { + unaligned = save_unaligned ? "--un-conc-gz ${prefix}.unmapped.fastq.gz" : "" + reads_args = "-1 ${reads[0]} -2 ${reads[1]}" + } + + def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + """ + INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"` + [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"` + [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1 + + bowtie2 \\ + -x \$INDEX \\ + $reads_args \\ + --threads $task.cpus \\ + $unaligned \\ + $args \\ + 2> >(tee ${prefix}.bowtie2.log >&2) \\ + | samtools $samtools_command $args2 --threads $task.cpus ${reference} -o ${prefix}.${extension} - + + if [ -f ${prefix}.unmapped.fastq.1.gz ]; then + mv ${prefix}.unmapped.fastq.1.gz ${prefix}.unmapped_1.fastq.gz + fi + + if [ -f ${prefix}.unmapped.fastq.2.gz ]; then + mv ${prefix}.unmapped.fastq.2.gz ${prefix}.unmapped_2.fastq.gz + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def args2 = task.ext.args2 ?: "" + def prefix = task.ext.prefix ?: "${meta.id}" + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension = (args2 ==~ extension_pattern) ? (args2 =~ extension_pattern)[0][2].toLowerCase() : "bam" + def create_unmapped = "" + if (meta.single_end) { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped.fastq.gz" : "" + } else { + create_unmapped = save_unaligned ? "touch ${prefix}.unmapped_1.fastq.gz && touch ${prefix}.unmapped_2.fastq.gz" : "" + } + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + + """ + touch ${prefix}.${extension} + ${create_index} + touch ${prefix}.bowtie2.log + ${create_unmapped} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml new file mode 100644 index 00000000..e66811d0 --- /dev/null +++ b/modules/nf-core/bowtie2/align/meta.yml @@ -0,0 +1,70 @@ +name: bowtie2_align +description: Align reads to a reference genome using bowtie2 +keywords: + - align + - map + - fasta + - fastq + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + type: boolean + description: use samtools sort (true) or samtools view (false) + pattern: "true or false" +output: + - aligned: + type: file + description: Output BAM/SAM file containing read alignments + pattern: "*.{bam,sam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - log: + type: file + description: Aligment log + pattern: "*.log" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/align/tests/cram_crai.config b/modules/nf-core/bowtie2/align/tests/cram_crai.config new file mode 100644 index 00000000..03f1d5e5 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/cram_crai.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_ALIGN { + ext.args2 = '--output-fmt cram --write-index' + } +} diff --git a/modules/nf-core/bowtie2/align/tests/large_index.config b/modules/nf-core/bowtie2/align/tests/large_index.config new file mode 100644 index 00000000..fdc1c59d --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/large_index.config @@ -0,0 +1,5 @@ +process { + withName: BOWTIE2_BUILD { + ext.args = '--large-index' + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test b/modules/nf-core/bowtie2/align/tests/main.nf.test new file mode 100644 index 00000000..03aeaf9e --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test @@ -0,0 +1,623 @@ +nextflow_process { + + name "Test Process BOWTIE2_ALIGN" + script "../main.nf" + process "BOWTIE2_ALIGN" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + tag "bowtie2/align" + + test("sarscov2 - fastq, index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam") { + + config "./sam.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, false - sam2") { + + config "./sam2.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.sam[0][1]).readLines()[0..4], + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, true - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam") { + + config "./large_index.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - bam") { + + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + process.out.log, + process.out.fastq, + process.out.versions + ).match() } + + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, true, true - cram") { + + config "./cram_crai.config" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = true //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.cram[0][1]).name, + file(process.out.crai[0][1]).name + ).match() } + ) + } + + } + + test("sarscov2 - [fastq1, fastq2], index, fasta, false, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), + file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) + ] + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + + test("sarscov2 - fastq, index, fasta, true, false - stub") { + + options "-stub" + setup { + run("BOWTIE2_BUILD") { + script "../../build/main.nf" + process { + """ + input[0] = [ + [ id:'test'], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + } + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) + ] + input[1] = BOWTIE2_BUILD.out.index + input[2] = [[ id:'test'], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)] + input[3] = false //save_unaligned + input[4] = false //sort + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot( + file(process.out.bam[0][1]).name, + file(process.out.csi[0][1]).name, + file(process.out.log[0][1]).name, + process.out.fastq, + process.out.versions + ).match() } + ) + } + + } + +} diff --git a/modules/nf-core/bowtie2/align/tests/main.nf.test.snap b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap new file mode 100644 index 00000000..028e7da6 --- /dev/null +++ b/modules/nf-core/bowtie2/align/tests/main.nf.test.snap @@ -0,0 +1,311 @@ +{ + "sarscov2 - [fastq1, fastq2], large_index, fasta, false, false - bam": { + "content": [ + "test.bam", + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bowtie2.log:md5,bd89ce1b28c93bf822bae391ffcedd19" + ] + ], + [ + + ], + [ + "versions.yml:md5,01d18ab035146ea790e9a0f70adb758f" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T13:19:25.337323" + }, + "sarscov2 - fastq, index, fasta, false, false - sam2": { + "content": [ + [ + "ERR5069949.2151832\t16\tMT192765.1\t17453\t42\t150M\t*\t0\t0\tACGCACATTGCTAACTAAGGGCACACTAGAACCAGAATATTTCAATTCAGTGTGTAGACTTATGAAAACTATAGGTCCAGACATGTTCCTCGGAACTTGTCGGCGTTGTCCTGCTGAAATTGTTGACACTGTGAGTGCTTTGGTTTATGA\tAAAA versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + mkdir bowtie2 + touch bowtie2/${fasta.baseName}.{1..4}.bt2 + touch bowtie2/${fasta.baseName}.rev.{1,2}.bt2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + bowtie2: \$(echo \$(bowtie2 --version 2>&1) | sed 's/^.*bowtie2-align-s version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/bowtie2/build/meta.yml b/modules/nf-core/bowtie2/build/meta.yml new file mode 100644 index 00000000..2d687991 --- /dev/null +++ b/modules/nf-core/bowtie2/build/meta.yml @@ -0,0 +1,46 @@ +name: bowtie2_build +description: Builds bowtie index for reference genome +keywords: + - build + - index + - fasta + - genome + - reference +tools: + - bowtie2: + description: | + Bowtie 2 is an ultrafast and memory-efficient tool for aligning + sequencing reads to long reference sequences. + homepage: http://bowtie-bio.sourceforge.net/bowtie2/index.shtml + documentation: http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml + doi: 10.1038/nmeth.1923 + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: Input genome fasta file +output: + - meta: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test', single_end:false ] + - index: + type: file + description: Bowtie2 genome index files + pattern: "*.bt2" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bowtie2/build/tests/main.nf.test b/modules/nf-core/bowtie2/build/tests/main.nf.test new file mode 100644 index 00000000..16376025 --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/main.nf.test @@ -0,0 +1,31 @@ +nextflow_process { + + name "Test Process BOWTIE2_BUILD" + script "modules/nf-core/bowtie2/build/main.nf" + process "BOWTIE2_BUILD" + tag "modules" + tag "modules_nfcore" + tag "bowtie2" + tag "bowtie2/build" + + test("Should run without failures") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], + file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assert process.success + assert snapshot(process.out).match() + } + + } + +} diff --git a/modules/nf-core/bowtie2/build/tests/main.nf.test.snap b/modules/nf-core/bowtie2/build/tests/main.nf.test.snap new file mode 100644 index 00000000..6875e021 --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/main.nf.test.snap @@ -0,0 +1,45 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "genome.1.bt2:md5,cbe3d0bbea55bc57c99b4bfa25b5fbdf", + "genome.2.bt2:md5,47b153cd1319abc88dda532462651fcf", + "genome.3.bt2:md5,4ed93abba181d8dfab2e303e33114777", + "genome.4.bt2:md5,c25be5f8b0378abf7a58c8a880b87626", + "genome.rev.1.bt2:md5,52be6950579598a990570fbcf5372184", + "genome.rev.2.bt2:md5,e3b4ef343dea4dd571642010a7d09597" + ] + ] + ], + "1": [ + "versions.yml:md5,1df11e9b82891527271c889c880d3974" + ], + "index": [ + [ + { + "id": "test" + }, + [ + "genome.1.bt2:md5,cbe3d0bbea55bc57c99b4bfa25b5fbdf", + "genome.2.bt2:md5,47b153cd1319abc88dda532462651fcf", + "genome.3.bt2:md5,4ed93abba181d8dfab2e303e33114777", + "genome.4.bt2:md5,c25be5f8b0378abf7a58c8a880b87626", + "genome.rev.1.bt2:md5,52be6950579598a990570fbcf5372184", + "genome.rev.2.bt2:md5,e3b4ef343dea4dd571642010a7d09597" + ] + ] + ], + "versions": [ + "versions.yml:md5,1df11e9b82891527271c889c880d3974" + ] + } + ], + "timestamp": "2023-11-23T11:51:01.107681997" + } +} \ No newline at end of file diff --git a/modules/nf-core/bowtie2/build/tests/tags.yml b/modules/nf-core/bowtie2/build/tests/tags.yml new file mode 100644 index 00000000..81aa61da --- /dev/null +++ b/modules/nf-core/bowtie2/build/tests/tags.yml @@ -0,0 +1,2 @@ +bowtie2/build: + - modules/nf-core/bowtie2/build/** diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml new file mode 100644 index 00000000..8c69b121 --- /dev/null +++ b/modules/nf-core/cat/fastq/environment.yml @@ -0,0 +1,7 @@ +name: cat_fastq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::coreutils=8.30 diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf new file mode 100644 index 00000000..f132b2ad --- /dev/null +++ b/modules/nf-core/cat/fastq/main.nf @@ -0,0 +1,79 @@ +process CAT_FASTQ { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(reads, stageAs: "input*/*") + + output: + tuple val(meta), path("*.merged.fastq.gz"), emit: reads + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size >= 1) { + """ + cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size >= 2) { + def read1 = [] + def read2 = [] + readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v } + """ + cat ${read1.join(' ')} > ${prefix}_1.merged.fastq.gz + cat ${read2.join(' ')} > ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()] + if (meta.single_end) { + if (readList.size > 1) { + """ + touch ${prefix}.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } else { + if (readList.size > 2) { + """ + touch ${prefix}_1.merged.fastq.gz + touch ${prefix}_2.merged.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//') + END_VERSIONS + """ + } + } +} diff --git a/modules/nf-core/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml new file mode 100644 index 00000000..db4ac3c7 --- /dev/null +++ b/modules/nf-core/cat/fastq/meta.yml @@ -0,0 +1,42 @@ +name: cat_fastq +description: Concatenates fastq files +keywords: + - cat + - fastq + - concatenate +tools: + - cat: + description: | + The cat utility reads files sequentially, writing them to the standard output. + documentation: https://www.gnu.org/software/coreutils/manual/html_node/cat-invocation.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files to be concatenated. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Merged fastq file + pattern: "*.{merged.fastq.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test new file mode 100644 index 00000000..a71dcb8d --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test @@ -0,0 +1,140 @@ +// NOTE The version snaps may not be consistant +// https://github.com/nf-core/modules/pull/4087#issuecomment-1767948035 +nextflow_process { + + name "Test Process CAT_FASTQ" + script "../main.nf" + process "CAT_FASTQ" + tag "modules" + tag "modules_nfcore" + tag "cat" + tag "cat/fastq" + + test("test_cat_fastq_single_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_paired_end_same_name") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_cat_fastq_single_end_single_file") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap new file mode 100644 index 00000000..43dfe28f --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap @@ -0,0 +1,169 @@ +{ + "test_cat_fastq_single_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:30:39.816981" + }, + "test_cat_fastq_single_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:32:35.229332" + }, + "test_cat_fastq_single_end_single_file": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:34:00.058829" + }, + "test_cat_fastq_paired_end_same_name": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:33:33.031555" + }, + "test_cat_fastq_paired_end": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "1": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22", + "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5" + ] + ] + ], + "versions": [ + "versions.yml:md5,d42d6e24d67004608495883e00bd501b" + ] + } + ], + "timestamp": "2024-01-17T17:32:02.270935" + } +} \ No newline at end of file diff --git a/modules/nf-core/cat/fastq/tests/tags.yml b/modules/nf-core/cat/fastq/tests/tags.yml new file mode 100644 index 00000000..6ac43614 --- /dev/null +++ b/modules/nf-core/cat/fastq/tests/tags.yml @@ -0,0 +1,2 @@ +cat/fastq: + - modules/nf-core/cat/fastq/** diff --git a/modules/nf-core/custom/getchromsizes/environment.yml b/modules/nf-core/custom/getchromsizes/environment.yml new file mode 100644 index 00000000..2a01695f --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/environment.yml @@ -0,0 +1,7 @@ +name: custom_getchromsizes +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.16.1 diff --git a/modules/nf-core/custom/getchromsizes/main.nf b/modules/nf-core/custom/getchromsizes/main.nf new file mode 100644 index 00000000..e8084ea2 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/main.nf @@ -0,0 +1,44 @@ +process CUSTOM_GETCHROMSIZES { + tag "$fasta" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.16.1--h6899075_1' : + 'biocontainers/samtools:1.16.1--h6899075_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path ("*.sizes"), emit: sizes + tuple val(meta), path ("*.fai") , emit: fai + tuple val(meta), path ("*.gzi") , emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools faidx $fasta + cut -f 1,2 ${fasta}.fai > ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${fasta}.fai + touch ${fasta}.sizes + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + getchromsizes: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/custom/getchromsizes/meta.yml b/modules/nf-core/custom/getchromsizes/meta.yml new file mode 100644 index 00000000..529be07e --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/meta.yml @@ -0,0 +1,54 @@ +name: custom_getchromsizes +description: Generates a FASTA file of chromosome sizes and a fasta index file +keywords: + - fasta + - chromosome + - indexing +tools: + - samtools: + description: Tools for dealing with SAM, BAM and CRAM files + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + tool_dev_url: https://github.com/samtools/samtools + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: FASTA file + pattern: "*.{fa,fasta,fna,fas}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - sizes: + type: file + description: File containing chromosome lengths + pattern: "*.{sizes}" + - fai: + type: file + description: FASTA index file + pattern: "*.{fai}" + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@tamara-hodgetts" + - "@chris-cheshire" + - "@muffato" +maintainers: + - "@tamara-hodgetts" + - "@chris-cheshire" + - "@muffato" diff --git a/modules/nf-core/custom/getchromsizes/tests/main.nf.test b/modules/nf-core/custom/getchromsizes/tests/main.nf.test new file mode 100644 index 00000000..9f6b5640 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/tests/main.nf.test @@ -0,0 +1,62 @@ +nextflow_process { + + name "Test Process CUSTOM_GETCHROMSIZES" + script "../main.nf" + process "CUSTOM_GETCHROMSIZES" + + tag "modules" + tag "modules_nfcore" + tag "custom" + tag "custom/getchromsizes" + + test("test_custom_getchromsizes") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("test_custom_getchromsizes_bgzip") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap b/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap new file mode 100644 index 00000000..2e560bd3 --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/tests/main.nf.test.snap @@ -0,0 +1,114 @@ +{ + "test_custom_getchromsizes": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + + ], + "3": [ + "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8" + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "versions": [ + "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8" + ] + } + ], + "timestamp": "2024-01-17T17:48:35.562918" + }, + "test_custom_getchromsizes_bgzip": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "2": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "3": [ + "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8" + ], + "fai": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5" + ] + ], + "gzi": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474" + ] + ], + "sizes": [ + [ + { + "id": "test" + }, + "genome.fasta.gz.sizes:md5,a57c401f27ae5133823fb09fb21c8a3c" + ] + ], + "versions": [ + "versions.yml:md5,45a83c5f3dddbc5dcab30035169f7ce8" + ] + } + ], + "timestamp": "2024-01-17T17:49:02.562311" + } +} \ No newline at end of file diff --git a/modules/nf-core/custom/getchromsizes/tests/tags.yml b/modules/nf-core/custom/getchromsizes/tests/tags.yml new file mode 100644 index 00000000..d89a805f --- /dev/null +++ b/modules/nf-core/custom/getchromsizes/tests/tags.yml @@ -0,0 +1,2 @@ +custom/getchromsizes: + - modules/nf-core/custom/getchromsizes/** diff --git a/modules/nf-core/fastp/environment.yml b/modules/nf-core/fastp/environment.yml new file mode 100644 index 00000000..70389e66 --- /dev/null +++ b/modules/nf-core/fastp/environment.yml @@ -0,0 +1,7 @@ +name: fastp +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::fastp=0.23.4 diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf new file mode 100644 index 00000000..e1b9f565 --- /dev/null +++ b/modules/nf-core/fastp/main.nf @@ -0,0 +1,125 @@ +process FASTP { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/fastp:0.23.4--h5f740d0_0' : + 'biocontainers/fastp:0.23.4--h5f740d0_0' }" + + input: + tuple val(meta), path(reads) + path adapter_fasta + val discard_trimmed_pass + val save_trimmed_fail + val save_merged + + output: + tuple val(meta), path('*.fastp.fastq.gz') , optional:true, emit: reads + tuple val(meta), path('*.json') , emit: json + tuple val(meta), path('*.html') , emit: html + tuple val(meta), path('*.log') , emit: log + tuple val(meta), path('*.fail.fastq.gz') , optional:true, emit: reads_fail + tuple val(meta), path('*.merged.fastq.gz'), optional:true, emit: reads_merged + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : "" + def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : '' + def out_fq1 = discard_trimmed_pass ?: ( meta.single_end ? "--out1 ${prefix}.fastp.fastq.gz" : "--out1 ${prefix}_1.fastp.fastq.gz" ) + def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_2.fastp.fastq.gz" + // Added soft-links to original fastqs for consistent naming in MultiQC + // Use single ended for interleaved. Add --interleaved_in in config. + if ( task.ext.args?.contains('--interleaved_in') ) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --stdout \\ + --in1 ${prefix}.fastq.gz \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) \\ + | gzip -c > ${prefix}.fastp.fastq.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else if (meta.single_end) { + """ + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz + + fastp \\ + --in1 ${prefix}.fastq.gz \\ + $out_fq1 \\ + --thread $task.cpus \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } else { + def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' + """ + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz + [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz + fastp \\ + --in1 ${prefix}_1.fastq.gz \\ + --in2 ${prefix}_2.fastq.gz \\ + $out_fq1 \\ + $out_fq2 \\ + --json ${prefix}.fastp.json \\ + --html ${prefix}.fastp.html \\ + $adapter_list \\ + $fail_fastq \\ + $merge_fastq \\ + --thread $task.cpus \\ + --detect_adapter_for_pe \\ + $args \\ + 2> >(tee ${prefix}.fastp.log >&2) + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ + } + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def is_single_output = task.ext.args?.contains('--interleaved_in') || meta.single_end + def touch_reads = (discard_trimmed_pass) ? "" : (is_single_output) ? "echo '' | gzip > ${prefix}.fastp.fastq.gz" : "echo '' | gzip > ${prefix}_1.fastp.fastq.gz ; echo '' | gzip > ${prefix}_2.fastp.fastq.gz" + def touch_merged = (!is_single_output && save_merged) ? "echo '' | gzip > ${prefix}.merged.fastq.gz" : "" + def touch_fail_fastq = (!save_trimmed_fail) ? "" : meta.single_end ? "echo '' | gzip > ${prefix}.fail.fastq.gz" : "echo '' | gzip > ${prefix}.paired.fail.fastq.gz ; echo '' | gzip > ${prefix}_1.fail.fastq.gz ; echo '' | gzip > ${prefix}_2.fail.fastq.gz" + """ + $touch_reads + $touch_fail_fastq + $touch_merged + touch "${prefix}.fastp.json" + touch "${prefix}.fastp.html" + touch "${prefix}.fastp.log" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + fastp: \$(fastp --version 2>&1 | sed -e "s/fastp //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/fastp/meta.yml b/modules/nf-core/fastp/meta.yml new file mode 100644 index 00000000..8dfecc18 --- /dev/null +++ b/modules/nf-core/fastp/meta.yml @@ -0,0 +1,79 @@ +name: fastp +description: Perform adapter/quality trimming on sequencing reads +keywords: + - trimming + - quality control + - fastq +tools: + - fastp: + description: | + A tool designed to provide fast all-in-one preprocessing for FastQ files. This tool is developed in C++ with multithreading supported to afford high performance. + documentation: https://github.com/OpenGene/fastp + doi: 10.1093/bioinformatics/bty560 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information. Use 'single_end: true' to specify single ended or interleaved FASTQs. Use 'single_end: false' for paired-end reads. + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. If you wish to run interleaved paired-end data, supply as single-end data + but with `--interleaved_in` in your `modules.conf`'s `ext.args` for the module. + - adapter_fasta: + type: file + description: File in FASTA format containing possible adapters to remove. + pattern: "*.{fasta,fna,fas,fa}" + - discard_trimmed_pass: + type: boolean + description: Specify true to not write any reads that pass trimming thresholds. | + This can be used to use fastp for the output report only. + - save_trimmed_fail: + type: boolean + description: Specify true to save files that failed to pass trimming thresholds ending in `*.fail.fastq.gz` + - save_merged: + type: boolean + description: Specify true to save all merged reads to a file ending in `*.merged.fastq.gz` +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: The trimmed/modified/unmerged fastq reads + pattern: "*fastp.fastq.gz" + - json: + type: file + description: Results in JSON format + pattern: "*.json" + - html: + type: file + description: Results in HTML format + pattern: "*.html" + - log: + type: file + description: fastq log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - reads_fail: + type: file + description: Reads the failed the preprocessing + pattern: "*fail.fastq.gz" + - reads_merged: + type: file + description: Reads that were successfully merged + pattern: "*.{merged.fastq.gz}" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test new file mode 100644 index 00000000..30dbb8aa --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test @@ -0,0 +1,576 @@ +nextflow_process { + + name "Test Process FASTP" + script "../main.nf" + process "FASTP" + tag "modules" + tag "modules_nfcore" + tag "fastp" + + test("test_fastp_single_end") { + + when { + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end") { + + when { + + process { + """ + adapter_fasta = [] + save_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("fastp test_fastp_interleaved") { + + config './nextflow.interleaved.config' + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("paired end (151 cycles + 151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") }, + { assert process.out.reads_fail == [] }, + { assert process.out.reads_merged == [] }, + { assert snapshot( + process.out.reads, + process.out.json, + process.out.versions).match() } + ) + } + } + + test("test_fastp_single_end_trim_fail") { + + when { + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = true + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end_trim_fail") { + + config './nextflow.save_failed.config' + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + input[1] = [] + input[2] = false + input[3] = true + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 162") }, + { assert snapshot( + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.json, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end_merged") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("total reads: 75") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() }, + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) + input[2] = false + input[3] = false + input[4] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("
") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("total bases: 13683") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads_fail, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_single_end_qc_only") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("single end (151 cycles)") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("reads passed filter: 99") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads, + process.out.reads_fail, + process.out.reads_fail, + process.out.reads_merged, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_paired_end_qc_only") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.html.get(0).get(1)).getText().contains("The input has little adapter percentage (~0.000000%), probably it's trimmed before.") }, + { assert path(process.out.log.get(0).get(1)).getText().contains("Q30 bases: 12281(88.3716%)") }, + { assert snapshot( + process.out.json, + process.out.reads, + process.out.reads, + process.out.reads_fail, + process.out.reads_fail, + process.out.reads_merged, + process.out.reads_merged, + process.out.versions).match() } + ) + } + } + + test("test_fastp_single_end - stub") { + + options "-stub" + + when { + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end - stub") { + + options "-stub" + + when { + + process { + """ + adapter_fasta = [] + save_trimmed_pass = true + save_trimmed_fail = false + save_merged = false + + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("fastp - stub test_fastp_interleaved") { + + options "-stub" + + config './nextflow.interleaved.config' + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_single_end_trim_fail - stub") { + + options "-stub" + + when { + + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = true + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_trim_fail - stub") { + + options "-stub" + + config './nextflow.save_failed.config' + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)] + ]) + input[1] = [] + input[2] = false + input[3] = true + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_merged - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = false + input[3] = false + input[4] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_merged_adapterlist - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ]) + input[2] = false + input[3] = false + input[4] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_single_end_qc_only - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("test_fastp_paired_end_qc_only - stub") { + + options "-stub" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ] + ]) + input[1] = [] + input[2] = true + input[3] = false + input[4] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap new file mode 100644 index 00000000..54be7e45 --- /dev/null +++ b/modules/nf-core/fastp/tests/main.nf.test.snap @@ -0,0 +1,1331 @@ +{ + "test_fastp_single_end_qc_only - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T14:31:10.841098" + }, + "test_fastp_paired_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7", + "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39" + ] + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:28.665779" + }, + "test_fastp_paired_end_merged_adapterlist": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,5914ca3f21ce162123a824e33e8564f6" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + ] + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:44:18.210375" + }, + "test_fastp_single_end_qc_only": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,5cc5f01e449309e0e689ed6f51a2294a" + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:44:27.380974" + }, + "test_fastp_paired_end_trim_fail": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,6ff32a64c5188b9a9192be1398c262c7", + "test_2.fastp.fastq.gz:md5,db0cb7c9977e94ac2b4b446ebd017a8a" + ] + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,409b687c734cedd7a1fec14d316e1366", + "test_1.fail.fastq.gz:md5,4f273cf3159c13f79e8ffae12f5661f6", + "test_2.fail.fastq.gz:md5,f97b9edefb5649aab661fbc9e71fc995" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,4c3268ddb50ea5b33125984776aa3519" + ] + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:58.749589" + }, + "fastp - stub test_fastp_interleaved": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:50:00.270029" + }, + "test_fastp_single_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:49:42.502789" + }, + "test_fastp_paired_end_merged_adapterlist - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:54:53.458252" + }, + "test_fastp_paired_end_merged - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:50:27.689379" + }, + "test_fastp_paired_end_merged": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,b712fd68ed0322f4bec49ff2a5237fcc" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672", + "test_2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba" + ] + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5" + ] + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:44:08.68476" + }, + "test_fastp_paired_end - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:49:51.679221" + }, + "test_fastp_single_end": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7" + ] + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:18.834322" + }, + "test_fastp_single_end_trim_fail - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_fail": [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T14:05:36.898142" + }, + "test_fastp_paired_end_trim_fail - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test_1.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fastp.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_fail": [ + [ + { + "id": "test", + "single_end": false + }, + [ + "test.paired.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_1.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "test_2.fail.fastq.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ] + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T14:05:49.212847" + }, + "fastp test_fastp_interleaved": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,217d62dc13a23e92513a1bd8e1bcea39" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,b24e0624df5cc0b11cd5ba21b726fb22" + ] + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:38.910832" + }, + "test_fastp_single_end_trim_fail": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.json:md5,9a7ee180f000e8d00c7fb67f06293eb5" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.fail.fastq.gz:md5,3e4aaadb66a5b8fc9b881bf39c227abd" + ] + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:43:48.22378" + }, + "test_fastp_paired_end_qc_only": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,623064a45912dac6f2b64e3f2e9901df" + ] + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + + ], + [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T13:44:36.334938" + }, + "test_fastp_paired_end_qc_only - stub": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + + ], + "5": [ + + ], + "6": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ], + "html": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "json": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.json:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "log": [ + [ + { + "id": "test", + "single_end": false + }, + "test.fastp.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "reads": [ + + ], + "reads_fail": [ + + ], + "reads_merged": [ + + ], + "versions": [ + "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-07-05T14:31:27.096468" + } +} \ No newline at end of file diff --git a/modules/nf-core/fastp/tests/nextflow.interleaved.config b/modules/nf-core/fastp/tests/nextflow.interleaved.config new file mode 100644 index 00000000..4be8dbd2 --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.interleaved.config @@ -0,0 +1,5 @@ +process { + withName: FASTP { + ext.args = "--interleaved_in -e 30" + } +} diff --git a/modules/nf-core/fastp/tests/nextflow.save_failed.config b/modules/nf-core/fastp/tests/nextflow.save_failed.config new file mode 100644 index 00000000..53b61b0c --- /dev/null +++ b/modules/nf-core/fastp/tests/nextflow.save_failed.config @@ -0,0 +1,5 @@ +process { + withName: FASTP { + ext.args = "-e 30" + } +} diff --git a/modules/nf-core/fastp/tests/tags.yml b/modules/nf-core/fastp/tests/tags.yml new file mode 100644 index 00000000..c1afcce7 --- /dev/null +++ b/modules/nf-core/fastp/tests/tags.yml @@ -0,0 +1,2 @@ +fastp: + - modules/nf-core/fastp/** diff --git a/modules/nf-core/freyja/boot/environment.yml b/modules/nf-core/freyja/boot/environment.yml new file mode 100644 index 00000000..dfab14ec --- /dev/null +++ b/modules/nf-core/freyja/boot/environment.yml @@ -0,0 +1,7 @@ +name: freyja_boot +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::freyja=1.5.0 diff --git a/modules/nf-core/freyja/boot/main.nf b/modules/nf-core/freyja/boot/main.nf new file mode 100644 index 00000000..ca1f1a35 --- /dev/null +++ b/modules/nf-core/freyja/boot/main.nf @@ -0,0 +1,57 @@ +process FREYJA_BOOT { + tag "$meta.id" + label 'process_high' + + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/freyja:1.5.0--pyhdfd78af_0': + 'biocontainers/freyja:1.5.0--pyhdfd78af_0' }" + + input: + tuple val(meta), path(variants), path(depths) + val repeats + path barcodes + path lineages_meta + + output: + tuple val(meta), path("*lineages.csv") , emit: lineages + tuple val(meta), path("*summarized.csv"), emit: summarized + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + freyja \\ + boot \\ + $args \\ + --nt $task.cpus \\ + --nb $repeats \\ + --output_base $prefix \\ + --barcodes $barcodes \\ + --meta $lineages_meta \\ + $variants \\ + $depths + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}_lineage.csv + touch ${prefix}_summarized.csv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/freyja/boot/meta.yml b/modules/nf-core/freyja/boot/meta.yml new file mode 100644 index 00000000..f3d91b52 --- /dev/null +++ b/modules/nf-core/freyja/boot/meta.yml @@ -0,0 +1,63 @@ +name: "freyja_boot" +description: Bootstrap sample demixing by resampling each site based on a multinomial distribution of read depth across all sites, where the event probabilities were determined by the fraction of the total sample reads found at each site, followed by a secondary resampling at each site according to a multinomial distribution (that is, binomial when there was only one SNV at a site), where event probabilities were determined by the frequencies of each base at the site, and the number of trials is given by the sequencing depth. +keywords: + - variants + - fasta + - deconvolution + - wastewater + - bootstrapping +tools: + - "freyja": + description: "Freyja recovers relative lineage abundances from mixed SARS-CoV-2 samples and provides functionality to analyze lineage dynamics." + homepage: "https://github.com/andersen-lab/Freyja" + documentation: "https://github.com/andersen-lab/Freyja/wiki" + tool_dev_url: "https://github.com/andersen-lab/Freyja" + doi: "10.1038/s41586-022-05049-6" + licence: ["BSD-2-Clause"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - variants: + type: file + description: File containing identified variants in a gff-like format + pattern: "*.variants.tsv" + - depths: + type: file + description: File containing depth of the variants + pattern: "*.depth.tsv" + - repeats: + type: integer + description: Number of bootstrap repeats to perform + - barcodes: + type: file + description: File containing lineage defining barcodes + pattern: "*barcodes.csv" + - lineages_meta: + type: file + description: File containing lineage metadata that correspond to barcodes + pattern: "*lineages.json" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - lineages: + type: file + description: a csv file that includes the lineages present and their corresponding abundances + pattern: "*lineages.csv" + - summarized: + type: file + description: a csv file that includes the lineages present but summarized by constellation and their corresponding abundances + pattern: "*summarized.csv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" diff --git a/modules/nf-core/freyja/boot/tests/main.nf.test b/modules/nf-core/freyja/boot/tests/main.nf.test new file mode 100644 index 00000000..f9efb2df --- /dev/null +++ b/modules/nf-core/freyja/boot/tests/main.nf.test @@ -0,0 +1,61 @@ +nextflow_process { + + name "Test Process FREYJA_BOOT" + script "../main.nf" + process "FREYJA_BOOT" + + tag "modules" + tag "modules_nfcore" + tag "freyja" + tag "freyja/boot" + tag "freyja/variants" + tag "freyja/update" + + test("sarscov2 - illumina - test_paired_end_sorted - bam") { + + setup { + run("FREYJA_VARIANTS") { + script "../../variants/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + """ + } + } + run("FREYJA_UPDATE") { + script "../../update/main.nf" + process { + """ + input[0] = "freyja_db" + """ + } + } + } + + when { + process { + """ + input[0] = FREYJA_VARIANTS.out.variants + input[1] = 3 + input[2] = FREYJA_UPDATE.out.barcodes + input[3] = FREYJA_UPDATE.out.lineages_meta + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.summarized.get(0).get(1)).getText().contains("0.025,") }, + { assert path(process.out.lineages.get(0).get(1)).getText().contains("0.025,") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/nf-core/freyja/boot/tests/main.nf.test.snap b/modules/nf-core/freyja/boot/tests/main.nf.test.snap new file mode 100644 index 00000000..334c743f --- /dev/null +++ b/modules/nf-core/freyja/boot/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,a2af061943f186aa529b80a419a8fcc1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-02T16:10:15.064948172" + } +} \ No newline at end of file diff --git a/modules/nf-core/freyja/boot/tests/nextflow.config b/modules/nf-core/freyja/boot/tests/nextflow.config new file mode 100644 index 00000000..3045d571 --- /dev/null +++ b/modules/nf-core/freyja/boot/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + + withName: FREYJA_BOOT { + ext.args="--depthcutoff 1" + } + +} diff --git a/modules/nf-core/freyja/boot/tests/tags.yml b/modules/nf-core/freyja/boot/tests/tags.yml new file mode 100644 index 00000000..8ccc35eb --- /dev/null +++ b/modules/nf-core/freyja/boot/tests/tags.yml @@ -0,0 +1,2 @@ +freyja/boot: + - "modules/nf-core/freyja/boot/**" diff --git a/modules/nf-core/freyja/demix/environment.yml b/modules/nf-core/freyja/demix/environment.yml new file mode 100644 index 00000000..0f29b8ce --- /dev/null +++ b/modules/nf-core/freyja/demix/environment.yml @@ -0,0 +1,7 @@ +name: freyja_demix +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::freyja=1.5.0 diff --git a/modules/nf-core/freyja/demix/main.nf b/modules/nf-core/freyja/demix/main.nf new file mode 100644 index 00000000..0ae3246b --- /dev/null +++ b/modules/nf-core/freyja/demix/main.nf @@ -0,0 +1,53 @@ +process FREYJA_DEMIX { + tag "$meta.id" + label 'process_low' + + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/freyja:1.5.0--pyhdfd78af_0': + 'biocontainers/freyja:1.5.0--pyhdfd78af_0' }" + + input: + tuple val(meta), path(variants), path(depths) + path barcodes + path lineages_meta + + output: + tuple val(meta), path("*.tsv"), emit: demix + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + freyja \\ + demix \\ + $args \\ + --output ${prefix}.tsv \\ + --barcodes $barcodes \\ + --meta $lineages_meta \\ + $variants \\ + $depths + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/freyja/demix/meta.yml b/modules/nf-core/freyja/demix/meta.yml new file mode 100644 index 00000000..141a155f --- /dev/null +++ b/modules/nf-core/freyja/demix/meta.yml @@ -0,0 +1,55 @@ +name: "freyja_demix" +description: specify the relative abundance of each known haplotype +keywords: + - variants + - fasta + - deconvolution + - wastewater +tools: + - "freyja": + description: "Freyja recovers relative lineage abundances from mixed SARS-CoV-2 samples and provides functionality to analyze lineage dynamics." + homepage: "https://github.com/andersen-lab/Freyja" + documentation: "https://github.com/andersen-lab/Freyja/wiki" + tool_dev_url: "https://github.com/andersen-lab/Freyja" + doi: "10.1038/s41586-022-05049-6" + licence: ["BSD-2-Clause"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - variants: + type: file + description: File containing identified variants in a gff-like format + pattern: "*.variants.tsv" + - depths: + type: file + description: File containing depth of the variants + pattern: "*.depth.tsv" + - barcodes: + type: file + description: File containing lineage defining barcodes + pattern: "*barcodes.csv" + - lineages_meta: + type: file + description: File containing lineage metadata that correspond to barcodes + pattern: "*lineages.json" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - demix: + type: file + description: a tsv file that includes the lineages present, their corresponding abundances, and summarization by constellation + pattern: "*.demix.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" diff --git a/modules/nf-core/freyja/demix/tests/main.nf.test b/modules/nf-core/freyja/demix/tests/main.nf.test new file mode 100644 index 00000000..df25d82b --- /dev/null +++ b/modules/nf-core/freyja/demix/tests/main.nf.test @@ -0,0 +1,60 @@ + +nextflow_process { + + name "Test Process FREYJA_DEMIX" + script "../main.nf" + process "FREYJA_DEMIX" + + tag "modules" + tag "modules_nfcore" + tag "freyja" + tag "freyja/demix" + tag "freyja/variants" + tag "freyja/update" + + test("sarscov2 - illumina - test_paired_end_sorted - bam") { + + setup { + run("FREYJA_VARIANTS") { + script "../../variants/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + """ + } + } + run("FREYJA_UPDATE") { + script "../../update/main.nf" + process { + """ + input[0] = "freyja_db" + """ + } + } + } + + when { + process { + """ + input[0] = FREYJA_VARIANTS.out.variants + input[1] = FREYJA_UPDATE.out.barcodes + input[2] = FREYJA_UPDATE.out.lineages_meta + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.demix.get(0).get(1)).getText().contains("summarized") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/nf-core/freyja/demix/tests/main.nf.test.snap b/modules/nf-core/freyja/demix/tests/main.nf.test.snap new file mode 100644 index 00000000..f36e9cc6 --- /dev/null +++ b/modules/nf-core/freyja/demix/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,a9c5d4ae8f93de8cbc08c7d87debd89c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-02T16:10:46.778891705" + } +} \ No newline at end of file diff --git a/modules/nf-core/freyja/demix/tests/nextflow.config b/modules/nf-core/freyja/demix/tests/nextflow.config new file mode 100644 index 00000000..ebd5a70d --- /dev/null +++ b/modules/nf-core/freyja/demix/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + + withName: FREYJA_DEMIX{ + ext.args = "--depthcutoff 1" + } + +} diff --git a/modules/nf-core/freyja/demix/tests/tags.yml b/modules/nf-core/freyja/demix/tests/tags.yml new file mode 100644 index 00000000..9e2c4bd1 --- /dev/null +++ b/modules/nf-core/freyja/demix/tests/tags.yml @@ -0,0 +1,2 @@ +freyja/demix: + - "modules/nf-core/freyja/demix/**" diff --git a/modules/nf-core/freyja/update/environment.yml b/modules/nf-core/freyja/update/environment.yml new file mode 100644 index 00000000..9dbffbb6 --- /dev/null +++ b/modules/nf-core/freyja/update/environment.yml @@ -0,0 +1,7 @@ +name: freyja_update +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::freyja=1.5.0 diff --git a/modules/nf-core/freyja/update/main.nf b/modules/nf-core/freyja/update/main.nf new file mode 100644 index 00000000..4d4b6857 --- /dev/null +++ b/modules/nf-core/freyja/update/main.nf @@ -0,0 +1,50 @@ +process FREYJA_UPDATE { + tag "$db_name" + label 'process_single' + + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/freyja:1.5.0--pyhdfd78af_0': + 'biocontainers/freyja:1.5.0--pyhdfd78af_0' }" + + input: + val db_name + + output: + path "${db_name}/usher_barcodes.csv" , emit: barcodes + path "${db_name}/lineages.yml" , emit: lineages_topology + path "${db_name}/curated_lineages.json", emit: lineages_meta + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + mkdir -p $db_name + freyja \\ + update \\ + --outdir $db_name + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + + stub: + """ + mkdir $db_name + + touch "${db_name}/usher_barcodes.csv" + touch "${db_name}/lineages.yml" + touch "${db_name}/curated_lineages.json" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/freyja/update/meta.yml b/modules/nf-core/freyja/update/meta.yml new file mode 100644 index 00000000..1f5fc3c6 --- /dev/null +++ b/modules/nf-core/freyja/update/meta.yml @@ -0,0 +1,39 @@ +name: "freyja_update" +description: downloads new versions of the curated SARS-CoV-2 lineage file and barcodes +keywords: + - database + - variants + - UShER +tools: + - "freyja": + description: "Freyja recovers relative lineage abundances from mixed SARS-CoV-2 samples and provides functionality to analyze lineage dynamics." + homepage: "https://github.com/andersen-lab/Freyja" + documentation: "https://github.com/andersen-lab/Freyja/wiki" + tool_dev_url: "https://github.com/andersen-lab/Freyja" + doi: "10.1038/s41586-022-05049-6" + licence: ["BSD-2-Clause"] +input: + - db_name: + type: string + description: "The name of the database directory" +output: + - barcodes: + type: file + description: File containing lineage defining barcodes + pattern: "*barcodes.csv" + - lineages_topology: + type: file + description: File containing the lineage topology + pattern: "*lineages.yml" + - lineages_meta: + type: file + description: File containing lineage metadata that correspond to barcodes + pattern: "*lineages.json" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" diff --git a/modules/nf-core/freyja/update/tests/main.nf.test b/modules/nf-core/freyja/update/tests/main.nf.test new file mode 100644 index 00000000..e1c293cc --- /dev/null +++ b/modules/nf-core/freyja/update/tests/main.nf.test @@ -0,0 +1,34 @@ +nextflow_process { + + name "Test Process FREYJA_UPDATE" + script "../main.nf" + process "FREYJA_UPDATE" + + tag "modules" + tag "modules_nfcore" + tag "freyja" + tag "freyja/update" + + test("sarscov2 - value db name ") { + + when { + process { + """ + input[0] = "test_db" + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.barcodes.get(0)).exists() }, + { assert path(process.out.lineages_meta.get(0)).exists() }, + { assert path(process.out.lineages_topology.get(0)).exists() }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + + } + +} diff --git a/modules/nf-core/freyja/update/tests/main.nf.test.snap b/modules/nf-core/freyja/update/tests/main.nf.test.snap new file mode 100644 index 00000000..0f510c62 --- /dev/null +++ b/modules/nf-core/freyja/update/tests/main.nf.test.snap @@ -0,0 +1,14 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,c730cb8a7d262beaace190a3e832a54d" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-02T16:11:08.501478371" + } +} \ No newline at end of file diff --git a/modules/nf-core/freyja/update/tests/tags.yml b/modules/nf-core/freyja/update/tests/tags.yml new file mode 100644 index 00000000..90826aab --- /dev/null +++ b/modules/nf-core/freyja/update/tests/tags.yml @@ -0,0 +1,2 @@ +freyja/update: + - "modules/nf-core/freyja/update/**" diff --git a/modules/nf-core/freyja/variants/environment.yml b/modules/nf-core/freyja/variants/environment.yml new file mode 100644 index 00000000..871a0745 --- /dev/null +++ b/modules/nf-core/freyja/variants/environment.yml @@ -0,0 +1,7 @@ +name: freyja_variants +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::freyja=1.5.0 diff --git a/modules/nf-core/freyja/variants/main.nf b/modules/nf-core/freyja/variants/main.nf new file mode 100644 index 00000000..fc876971 --- /dev/null +++ b/modules/nf-core/freyja/variants/main.nf @@ -0,0 +1,51 @@ +process FREYJA_VARIANTS { + tag "$meta.id" + label 'process_medium' + + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/freyja:1.5.0--pyhdfd78af_0': + 'biocontainers/freyja:1.5.0--pyhdfd78af_0' }" + + input: + tuple val(meta), path(bam) + path fasta + + output: + tuple val(meta), path("*.variants.tsv"), path("*.depth.tsv"), emit: variants + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + freyja \\ + variants \\ + $args \\ + --ref $fasta \\ + --variants ${prefix}.variants.tsv \\ + --depths ${prefix}.depth.tsv \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.variants.tsv + touch ${prefix}.depth.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + freyja: \$(echo \$(freyja --version 2>&1) | sed 's/^.*version //' ) + END_VERSIONS + """ +} diff --git a/modules/nf-core/freyja/variants/meta.yml b/modules/nf-core/freyja/variants/meta.yml new file mode 100644 index 00000000..24f8d693 --- /dev/null +++ b/modules/nf-core/freyja/variants/meta.yml @@ -0,0 +1,50 @@ +name: "freyja_variants" +description: call variant and sequencing depth information of the variant +keywords: + - variants + - fasta + - wastewater +tools: + - "freyja": + description: "Freyja recovers relative lineage abundances from mixed SARS-CoV-2 samples and provides functionality to analyze lineage dynamics." + homepage: "https://github.com/andersen-lab/Freyja" + documentation: "https://github.com/andersen-lab/Freyja/wiki" + tool_dev_url: "https://github.com/andersen-lab/Freyja" + doi: "10.1038/s41586-022-05049-6" + licence: ["BSD-2-Clause"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.bam" + - fasta: + type: file + description: The reference sequence used for mapping and generating the BAM file + pattern: "*.fa" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - variants: + type: file + description: File containing identified variants in a gff-like format + pattern: "*.variants.tsv" + - depths: + type: file + description: File containing depth of the variants + pattern: "*.depth.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" diff --git a/modules/nf-core/freyja/variants/tests/main.nf.test b/modules/nf-core/freyja/variants/tests/main.nf.test new file mode 100644 index 00000000..e0bb4026 --- /dev/null +++ b/modules/nf-core/freyja/variants/tests/main.nf.test @@ -0,0 +1,35 @@ +nextflow_process { + + name "Test Process FREYJA_VARIANTS" + script "../main.nf" + process "FREYJA_VARIANTS" + + tag "modules" + tag "modules_nfcore" + tag "freyja" + tag "freyja/variants" + + test("sarscov2 - illumina - test_paired_end_sorted - bam") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true) + ] + input[1] = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/freyja/variants/tests/main.nf.test.snap b/modules/nf-core/freyja/variants/tests/main.nf.test.snap new file mode 100644 index 00000000..c85e8032 --- /dev/null +++ b/modules/nf-core/freyja/variants/tests/main.nf.test.snap @@ -0,0 +1,39 @@ +{ + "sarscov2 - illumina - test_paired_end_sorted - bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.variants.tsv:md5,966450bae4d9abae278572927b821983", + "test.depth.tsv:md5,27f79b28a365a8af915895b484d1153e" + ] + ], + "1": [ + "versions.yml:md5,10000e2412ba93678d3a4345fd98a2e3" + ], + "variants": [ + [ + { + "id": "test", + "single_end": false + }, + "test.variants.tsv:md5,966450bae4d9abae278572927b821983", + "test.depth.tsv:md5,27f79b28a365a8af915895b484d1153e" + ] + ], + "versions": [ + "versions.yml:md5,10000e2412ba93678d3a4345fd98a2e3" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-02T16:11:32.315489497" + } +} \ No newline at end of file diff --git a/modules/nf-core/freyja/variants/tests/tags.yml b/modules/nf-core/freyja/variants/tests/tags.yml new file mode 100644 index 00000000..b53250c9 --- /dev/null +++ b/modules/nf-core/freyja/variants/tests/tags.yml @@ -0,0 +1,2 @@ +freyja/variants: + - "modules/nf-core/freyja/variants/**" diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml new file mode 100644 index 00000000..25910b34 --- /dev/null +++ b/modules/nf-core/gunzip/environment.yml @@ -0,0 +1,7 @@ +name: gunzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - conda-forge::sed=4.7 diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf new file mode 100644 index 00000000..468a6f28 --- /dev/null +++ b/modules/nf-core/gunzip/main.nf @@ -0,0 +1,48 @@ +process GUNZIP { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$gunzip"), emit: gunzip + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + gunzip = archive.toString() - '.gz' + """ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ + $args \\ + $archive \\ + > $gunzip + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + gunzip = archive.toString() - '.gz' + """ + touch $gunzip + cat <<-END_VERSIONS > versions.yml + "${task.process}": + gunzip: \$(echo \$(gunzip --version 2>&1) | sed 's/^.*(gzip) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/gunzip/meta.yml b/modules/nf-core/gunzip/meta.yml new file mode 100644 index 00000000..231034f2 --- /dev/null +++ b/modules/nf-core/gunzip/meta.yml @@ -0,0 +1,39 @@ +name: gunzip +description: Compresses and decompresses files. +keywords: + - gunzip + - compression + - decompression +tools: + - gunzip: + description: | + gzip is a file format and a software application used for file compression and decompression. + documentation: https://www.gnu.org/software/gzip/manual/gzip.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Optional groovy Map containing meta information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be compressed/uncompressed + pattern: "*.*" +output: + - gunzip: + type: file + description: Compressed/uncompressed file + pattern: "*.*" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@jfy133" diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test new file mode 100644 index 00000000..6406008e --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process GUNZIP" + script "../main.nf" + process "GUNZIP" + tag "gunzip" + tag "modules_nfcore" + tag "modules" + + test("Should run without failures") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + ) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/gunzip/tests/main.nf.test.snap b/modules/nf-core/gunzip/tests/main.nf.test.snap new file mode 100644 index 00000000..720fd9ff --- /dev/null +++ b/modules/nf-core/gunzip/tests/main.nf.test.snap @@ -0,0 +1,31 @@ +{ + "Should run without failures": { + "content": [ + { + "0": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ], + "gunzip": [ + [ + [ + + ], + "test_1.fastq:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,54376d32aca20e937a4ec26dac228e84" + ] + } + ], + "timestamp": "2023-10-17T15:35:37.690477896" + } +} \ No newline at end of file diff --git a/modules/nf-core/gunzip/tests/tags.yml b/modules/nf-core/gunzip/tests/tags.yml new file mode 100644 index 00000000..fd3f6915 --- /dev/null +++ b/modules/nf-core/gunzip/tests/tags.yml @@ -0,0 +1,2 @@ +gunzip: + - modules/nf-core/gunzip/** diff --git a/modules/nf-core/ivar/consensus/environment.yml b/modules/nf-core/ivar/consensus/environment.yml new file mode 100644 index 00000000..c7b87d02 --- /dev/null +++ b/modules/nf-core/ivar/consensus/environment.yml @@ -0,0 +1,7 @@ +name: ivar_consensus +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ivar=1.4 diff --git a/modules/nf-core/ivar/consensus/main.nf b/modules/nf-core/ivar/consensus/main.nf new file mode 100644 index 00000000..9786200e --- /dev/null +++ b/modules/nf-core/ivar/consensus/main.nf @@ -0,0 +1,46 @@ +process IVAR_CONSENSUS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ivar:1.4--h6b7c446_1' : + 'biocontainers/ivar:1.4--h6b7c446_1' }" + + input: + tuple val(meta), path(bam) + path fasta + val save_mpileup + + output: + tuple val(meta), path("*.fa") , emit: fasta + tuple val(meta), path("*.qual.txt"), emit: qual + tuple val(meta), path("*.mpileup") , optional:true, emit: mpileup + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def mpileup = save_mpileup ? "| tee ${prefix}.mpileup" : "" + """ + samtools \\ + mpileup \\ + --reference $fasta \\ + $args2 \\ + $bam \\ + $mpileup \\ + | ivar \\ + consensus \\ + $args \\ + -p $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ivar: \$(echo \$(ivar version 2>&1) | sed 's/^.*iVar version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ivar/consensus/meta.yml b/modules/nf-core/ivar/consensus/meta.yml new file mode 100644 index 00000000..99e11924 --- /dev/null +++ b/modules/nf-core/ivar/consensus/meta.yml @@ -0,0 +1,59 @@ +name: ivar_consensus +description: Generate a consensus sequence from a BAM file using iVar +keywords: + - amplicon sequencing + - consensus + - fasta +tools: + - ivar: + description: | + iVar - a computational package that contains functions broadly useful for viral amplicon-based sequencing. + homepage: https://github.com/andersen-lab/ivar + documentation: https://andersen-lab.github.io/ivar/html/manualpage.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: A sorted (with samtools sort) and trimmed (with iVar trim) bam file + pattern: "*.bam" + - fasta: + type: file + description: The reference sequence used for mapping and generating the BAM file + pattern: "*.fa" + - save_mpileup: + type: boolean + description: Save mpileup file generated by ivar consensus + patter: "*.mpileup" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: iVar generated consensus sequence + pattern: "*.fa" + - qual: + type: file + description: iVar generated quality file + pattern: "*.qual.txt" + - mpileup: + type: file + description: mpileup output from samtools mpileup [OPTIONAL] + pattern: "*.mpileup" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@andersgs" + - "@drpatelh" +maintainers: + - "@andersgs" + - "@drpatelh" diff --git a/modules/nf-core/ivar/trim/environment.yml b/modules/nf-core/ivar/trim/environment.yml new file mode 100644 index 00000000..7fb8b3a2 --- /dev/null +++ b/modules/nf-core/ivar/trim/environment.yml @@ -0,0 +1,7 @@ +name: ivar_trim +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ivar=1.4 diff --git a/modules/nf-core/ivar/trim/main.nf b/modules/nf-core/ivar/trim/main.nf new file mode 100644 index 00000000..7a687d02 --- /dev/null +++ b/modules/nf-core/ivar/trim/main.nf @@ -0,0 +1,38 @@ +process IVAR_TRIM { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ivar:1.4--h6b7c446_1' : + 'biocontainers/ivar:1.4--h6b7c446_1' }" + + input: + tuple val(meta), path(bam), path(bai) + path bed + + output: + tuple val(meta), path("*.bam"), emit: bam + tuple val(meta), path('*.log'), emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + ivar trim \\ + $args \\ + -i $bam \\ + -b $bed \\ + -p $prefix \\ + > ${prefix}.ivar.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ivar: \$(echo \$(ivar version 2>&1) | sed 's/^.*iVar version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ivar/trim/meta.yml b/modules/nf-core/ivar/trim/meta.yml new file mode 100644 index 00000000..3a99f6f3 --- /dev/null +++ b/modules/nf-core/ivar/trim/meta.yml @@ -0,0 +1,55 @@ +name: ivar_trim +description: Trim primer sequences rom a BAM file with iVar +keywords: + - amplicon sequencing + - trimming + - fasta +tools: + - ivar: + description: | + iVar - a computational package that contains functions broadly useful for viral amplicon-based sequencing. + homepage: https://github.com/andersen-lab/ivar + documentation: https://andersen-lab.github.io/ivar/html/manualpage.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Co-ordinate sorted BAM file + pattern: "*.bam" + - bai: + type: file + description: Index file for co-ordinate sorted BAM file + pattern: "*.bai" + - bed: + type: file + description: BED file with primer labels and positions + pattern: "*.bed" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: iVar generated trimmed bam file (unsorted) + pattern: "*.bam" + - log: + type: file + description: Log file generated by iVar for use with MultiQC + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@andersgs" + - "@drpatelh" +maintainers: + - "@andersgs" + - "@drpatelh" diff --git a/modules/nf-core/ivar/variants/environment.yml b/modules/nf-core/ivar/variants/environment.yml new file mode 100644 index 00000000..7431cac4 --- /dev/null +++ b/modules/nf-core/ivar/variants/environment.yml @@ -0,0 +1,7 @@ +name: ivar_variants +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::ivar=1.4 diff --git a/modules/nf-core/ivar/variants/main.nf b/modules/nf-core/ivar/variants/main.nf new file mode 100644 index 00000000..189696aa --- /dev/null +++ b/modules/nf-core/ivar/variants/main.nf @@ -0,0 +1,50 @@ +process IVAR_VARIANTS { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ivar:1.4--h6b7c446_1' : + 'biocontainers/ivar:1.4--h6b7c446_1' }" + + input: + tuple val(meta), path(bam) + path fasta + path fai + path gff + val save_mpileup + + output: + tuple val(meta), path("*.tsv") , emit: tsv + tuple val(meta), path("*.mpileup"), optional:true, emit: mpileup + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def features = gff ? "-g $gff" : "" + def mpileup = save_mpileup ? "| tee ${prefix}.mpileup" : "" + """ + samtools \\ + mpileup \\ + $args2 \\ + --reference $fasta \\ + $bam \\ + $mpileup \\ + | ivar \\ + variants \\ + $args \\ + $features \\ + -r $fasta \\ + -p $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + ivar: \$(echo \$(ivar version 2>&1) | sed 's/^.*iVar version //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/ivar/variants/meta.yml b/modules/nf-core/ivar/variants/meta.yml new file mode 100644 index 00000000..9dc9b05c --- /dev/null +++ b/modules/nf-core/ivar/variants/meta.yml @@ -0,0 +1,63 @@ +name: ivar_variants +description: Call variants from a BAM file using iVar +keywords: + - amplicon sequencing + - variants + - fasta +tools: + - ivar: + description: | + iVar - a computational package that contains functions broadly useful for viral amplicon-based sequencing. + homepage: https://github.com/andersen-lab/ivar + documentation: https://andersen-lab.github.io/ivar/html/manualpage.html + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: A sorted (with samtools sort) and trimmed (with iVar trim) bam file + pattern: "*.bam" + - fasta: + type: file + description: The reference sequence used for mapping and generating the BAM file + pattern: "*.fa" + - fai: + type: file + description: The index for the reference sequence used for mapping and generating the BAM file + pattern: "*.fai" + - gff: + type: file + description: A GFF file in the GFF3 format can be supplied to specify coordinates of open reading frames (ORFs). In absence of GFF file, amino acid translation will not be done. + patter: "*.gff" + - save_mpileup: + type: boolean + description: Save mpileup file generated by ivar variants + patter: "*.mpileup" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tsv: + type: file + description: iVar generated TSV file with the variants + pattern: "*.tsv" + - mpileup: + type: file + description: mpileup output from samtools mpileup [OPTIONAL] + pattern: "*.mpileup" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@andersgs" + - "@drpatelh" +maintainers: + - "@andersgs" + - "@drpatelh" diff --git a/modules/nf-core/kraken2/kraken2/environment.yml b/modules/nf-core/kraken2/kraken2/environment.yml new file mode 100644 index 00000000..0c067fee --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/environment.yml @@ -0,0 +1,9 @@ +name: kraken2_kraken2 +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::kraken2=2.1.3" + - "coreutils=9.4" + - "pigz=2.8" diff --git a/modules/nf-core/kraken2/kraken2/main.nf b/modules/nf-core/kraken2/kraken2/main.nf new file mode 100644 index 00000000..364a6fe2 --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/main.nf @@ -0,0 +1,85 @@ +process KRAKEN2_KRAKEN2 { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mulled-v2-8706a1dd73c6cc426e12dd4dd33a5e917b3989ae:c8cbdc8ff4101e6745f8ede6eb5261ef98bdaff4-0' : + 'biocontainers/mulled-v2-8706a1dd73c6cc426e12dd4dd33a5e917b3989ae:c8cbdc8ff4101e6745f8ede6eb5261ef98bdaff4-0' }" + + input: + tuple val(meta), path(reads) + path db + val save_output_fastqs + val save_reads_assignment + + output: + tuple val(meta), path('*.classified{.,_}*') , optional:true, emit: classified_reads_fastq + tuple val(meta), path('*.unclassified{.,_}*') , optional:true, emit: unclassified_reads_fastq + tuple val(meta), path('*classifiedreads.txt') , optional:true, emit: classified_reads_assignment + tuple val(meta), path('*report.txt') , emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "" : "--paired" + def classified = meta.single_end ? "${prefix}.classified.fastq" : "${prefix}.classified#.fastq" + def unclassified = meta.single_end ? "${prefix}.unclassified.fastq" : "${prefix}.unclassified#.fastq" + def classified_option = save_output_fastqs ? "--classified-out ${classified}" : "" + def unclassified_option = save_output_fastqs ? "--unclassified-out ${unclassified}" : "" + def readclassification_option = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "--output /dev/null" + def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : "" + + """ + kraken2 \\ + --db $db \\ + --threads $task.cpus \\ + --report ${prefix}.kraken2.report.txt \\ + --gzip-compressed \\ + $unclassified_option \\ + $classified_option \\ + $readclassification_option \\ + $paired \\ + $args \\ + $reads + + $compress_reads_command + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "" : "--paired" + def classified = meta.single_end ? "${prefix}.classified.fastq.gz" : "${prefix}.classified_1.fastq.gz ${prefix}.classified_2.fastq.gz" + def unclassified = meta.single_end ? "${prefix}.unclassified.fastq.gz" : "${prefix}.unclassified_1.fastq.gz ${prefix}.unclassified_2.fastq.gz" + def readclassification_option = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "--output /dev/null" + def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : "" + + """ + touch ${prefix}.kraken2.report.txt + if [ "$save_output_fastqs" == "true" ]; then + touch $classified + touch $unclassified + fi + if [ "$save_reads_assignment" == "true" ]; then + touch ${prefix}.kraken2.classifiedreads.txt + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + +} diff --git a/modules/nf-core/kraken2/kraken2/meta.yml b/modules/nf-core/kraken2/kraken2/meta.yml new file mode 100644 index 00000000..7909ffe7 --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/meta.yml @@ -0,0 +1,78 @@ +name: kraken2_kraken2 +description: Classifies metagenomic sequence data +keywords: + - classify + - metagenomics + - fastq + - db +tools: + - kraken2: + description: | + Kraken2 is a taxonomic sequence classifier that assigns taxonomic labels to sequence reads + homepage: https://ccb.jhu.edu/software/kraken2/ + documentation: https://github.com/DerrickWood/kraken2/wiki/Manual + doi: 10.1186/s13059-019-1891-0 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - db: + type: directory + description: Kraken2 database + - save_output_fastqs: + type: string + description: | + If true, optional commands are added to save classified and unclassified reads + as fastq files + - save_reads_assignment: + type: string + description: | + If true, an optional command is added to save a file reporting the taxonomic + classification of each input read +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - classified_reads_fastq: + type: file + description: | + Reads classified as belonging to any of the taxa + on the Kraken2 database. + pattern: "*{fastq.gz}" + - unclassified_reads_fastq: + type: file + description: | + Reads not classified to any of the taxa + on the Kraken2 database. + pattern: "*{fastq.gz}" + - classified_reads_assignment: + type: file + description: | + Kraken2 output file indicating the taxonomic assignment of + each input read + - report: + type: file + description: | + Kraken2 report containing stats about classified + and not classifed reads. + pattern: "*.{report.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/kraken2/kraken2/tests/main.nf.test b/modules/nf-core/kraken2/kraken2/tests/main.nf.test new file mode 100644 index 00000000..4c513021 --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/tests/main.nf.test @@ -0,0 +1,143 @@ +nextflow_process { + name "Test Process KRAKEN2_KRAKEN2" + script "../main.nf" + process "KRAKEN2_KRAKEN2" + tag "modules" + tag "modules_nfcore" + tag "untar" + tag "kraken2" + tag "kraken2/kraken2" + + setup { + run("UNTAR") { + script "modules/nf-core/untar/main.nf" + process { + """ + input[0] = Channel.of([ + [], + file( + params.test_data['sarscov2']['genome']['kraken2_tar_gz'], + checkIfExists: true + ) + ]) + """ + } + } + } + + test("sarscov2 illumina single end [fastq]") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file( + params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], + checkIfExists: true + )] + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.report, + process.out.versions, + ).match() + }, + { assert process.out.classified_reads_fastq.get(0).get(1) ==~ ".*/test.classified.fastq.gz" }, + { assert process.out.unclassified_reads_fastq.get(0).get(1) ==~ ".*/test.unclassified.fastq.gz" }, + ) + } + } + + test("sarscov2 illumina paired end [fastq]") { + when { + params { + outdir = "$outputDir" + } + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + [ + file( + params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], + checkIfExists: true + ), + file( + params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], + checkIfExists: true + ) + + ] + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = true + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.report, + process.out.versions, + ).match() + }, + { assert process.out.classified_reads_fastq.get(0).get(1).get(0) + ==~ ".*/test.classified_1.fastq.gz" }, + { assert process.out.classified_reads_fastq.get(0).get(1).get(1) + ==~ ".*/test.classified_2.fastq.gz" }, + { assert process.out.unclassified_reads_fastq.get(0).get(1).get(0) + ==~ ".*/test.unclassified_1.fastq.gz" }, + { assert process.out.unclassified_reads_fastq.get(0).get(1).get(1) + ==~ ".*/test.unclassified_2.fastq.gz" }, + ) + } + } + + test("sarscov2 illumina single end [fastq] + save_reads_assignment") { + when { + params { + outdir = "$outputDir" + } + + process { + """ + input[0] = [ + [ id:'test', single_end:true ], // meta map + [ file( + params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], + checkIfExists: true + )] + ] + input[1] = UNTAR.out.untar.map{ it[1] } + input[2] = false + input[3] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.report, + process.out.classified_reads_assignment, + process.out.versions, + ).match() + }, + ) + } + } +} diff --git a/modules/nf-core/kraken2/kraken2/tests/main.nf.test.snap b/modules/nf-core/kraken2/kraken2/tests/main.nf.test.snap new file mode 100644 index 00000000..b432f878 --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "sarscov2 illumina single end [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.kraken2.report.txt:md5,4227755fe40478b8d7dc8634b489761e" + ] + ], + [ + "versions.yml:md5,79adf2ca1cfc625cb77e391b27142c43" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-04T18:47:03.745692" + }, + "sarscov2 illumina paired end [fastq]": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.kraken2.report.txt:md5,4227755fe40478b8d7dc8634b489761e" + ] + ], + [ + "versions.yml:md5,79adf2ca1cfc625cb77e391b27142c43" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-04T18:47:13.75649" + }, + "sarscov2 illumina single end [fastq] + save_reads_assignment": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.kraken2.report.txt:md5,4227755fe40478b8d7dc8634b489761e" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.kraken2.classifiedreads.txt:md5,e7a90531f0d8d777316515c36fe4cae0" + ] + ], + [ + "versions.yml:md5,79adf2ca1cfc625cb77e391b27142c43" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-04T18:47:22.459465" + } +} \ No newline at end of file diff --git a/modules/nf-core/kraken2/kraken2/tests/tags.yml b/modules/nf-core/kraken2/kraken2/tests/tags.yml new file mode 100644 index 00000000..9ebfd7ab --- /dev/null +++ b/modules/nf-core/kraken2/kraken2/tests/tags.yml @@ -0,0 +1,3 @@ +kraken2/kraken2: + - modules/nf-core/kraken2/kraken2/** + - modules/nf-core/untar/** diff --git a/modules/nf-core/minia/environment.yml b/modules/nf-core/minia/environment.yml new file mode 100644 index 00000000..10e45529 --- /dev/null +++ b/modules/nf-core/minia/environment.yml @@ -0,0 +1,7 @@ +name: minia +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::minia=3.2.6 diff --git a/modules/nf-core/minia/main.nf b/modules/nf-core/minia/main.nf new file mode 100644 index 00000000..40d3213f --- /dev/null +++ b/modules/nf-core/minia/main.nf @@ -0,0 +1,39 @@ +process MINIA { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/minia:3.2.6--h9a82719_0' : + 'biocontainers/minia:3.2.6--h9a82719_0' }" + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.contigs.fa'), emit: contigs + tuple val(meta), path('*.unitigs.fa'), emit: unitigs + tuple val(meta), path('*.h5') , emit: h5 + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def read_list = reads.join(",") + """ + echo "${read_list}" | sed 's/,/\\n/g' > input_files.txt + minia \\ + $args \\ + -nb-cores $task.cpus \\ + -in input_files.txt \\ + -out $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + minia: \$(echo \$(minia --version 2>&1 | grep Minia) | sed 's/^.*Minia version //;') + END_VERSIONS + """ +} diff --git a/modules/nf-core/minia/meta.yml b/modules/nf-core/minia/meta.yml new file mode 100644 index 00000000..03241027 --- /dev/null +++ b/modules/nf-core/minia/meta.yml @@ -0,0 +1,50 @@ +name: minia +description: Minia is a short-read assembler based on a de Bruijn graph +keywords: + - assembly +tools: + - minia: + description: | + Minia is a short-read assembler based on a de Bruijn graph, capable of assembling + a human genome on a desktop computer in a day. The output of Minia is a set of contigs. + homepage: https://github.com/GATB/minia + documentation: https://github.com/GATB/minia + licence: ["AGPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Input reads in FastQ format + pattern: "*.{fastq.gz, fastq}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - contigs: + type: file + description: The assembled contigs + pattern: "*.contigs.fa" + - unitigs: + type: file + description: The assembled unitigs + pattern: "*.unitigs.fa" + - h5: + type: file + description: Minia output h5 file + pattern: "*{.h5}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/mosdepth/environment.yml b/modules/nf-core/mosdepth/environment.yml new file mode 100644 index 00000000..bcb9d64a --- /dev/null +++ b/modules/nf-core/mosdepth/environment.yml @@ -0,0 +1,8 @@ +name: mosdepth +channels: + - conda-forge + - bioconda + - defaults +dependencies: + # renovate: datasource=conda depName=bioconda/mosdepth + - mosdepth=0.3.8 diff --git a/modules/nf-core/mosdepth/main.nf b/modules/nf-core/mosdepth/main.nf new file mode 100644 index 00000000..6f4a8383 --- /dev/null +++ b/modules/nf-core/mosdepth/main.nf @@ -0,0 +1,80 @@ +process MOSDEPTH { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/mosdepth:0.3.8--hd299d5a_0' : + 'biocontainers/mosdepth:0.3.8--hd299d5a_0'}" + + input: + tuple val(meta), path(bam), path(bai), path(bed) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path('*.global.dist.txt') , emit: global_txt + tuple val(meta), path('*.summary.txt') , emit: summary_txt + tuple val(meta), path('*.region.dist.txt') , optional:true, emit: regions_txt + tuple val(meta), path('*.per-base.d4') , optional:true, emit: per_base_d4 + tuple val(meta), path('*.per-base.bed.gz') , optional:true, emit: per_base_bed + tuple val(meta), path('*.per-base.bed.gz.csi') , optional:true, emit: per_base_csi + tuple val(meta), path('*.regions.bed.gz') , optional:true, emit: regions_bed + tuple val(meta), path('*.regions.bed.gz.csi') , optional:true, emit: regions_csi + tuple val(meta), path('*.quantized.bed.gz') , optional:true, emit: quantized_bed + tuple val(meta), path('*.quantized.bed.gz.csi') , optional:true, emit: quantized_csi + tuple val(meta), path('*.thresholds.bed.gz') , optional:true, emit: thresholds_bed + tuple val(meta), path('*.thresholds.bed.gz.csi'), optional:true, emit: thresholds_csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--fasta ${fasta}" : "" + def interval = bed ? "--by ${bed}" : "" + if (bed && args.contains("--by")) { + error "'--by' can only be specified once when running mosdepth! Either remove input BED file definition or remove '--by' from 'ext.args' definition" + } + if (!bed && args.contains("--thresholds")) { + error "'--thresholds' can only be specified in conjunction with '--by'" + } + + """ + mosdepth \\ + --threads $task.cpus \\ + $interval \\ + $reference \\ + $args \\ + $prefix \\ + $bam + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.global.dist.txt + touch ${prefix}.region.dist.txt + touch ${prefix}.summary.txt + touch ${prefix}.per-base.d4 + echo "" | gzip > ${prefix}.per-base.bed.gz + touch ${prefix}.per-base.bed.gz.csi + echo "" | gzip > ${prefix}.regions.bed.gz + touch ${prefix}.regions.bed.gz.csi + echo "" | gzip > ${prefix}.quantized.bed.gz + touch ${prefix}.quantized.bed.gz.csi + echo "" | gzip > ${prefix}.thresholds.bed.gz + touch ${prefix}.thresholds.bed.gz.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + mosdepth: \$(mosdepth --version 2>&1 | sed 's/^.*mosdepth //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/mosdepth/meta.yml b/modules/nf-core/mosdepth/meta.yml new file mode 100644 index 00000000..9caaf2cd --- /dev/null +++ b/modules/nf-core/mosdepth/meta.yml @@ -0,0 +1,109 @@ +name: mosdepth +description: Calculates genome-wide sequencing coverage. +keywords: + - mosdepth + - bam + - cram + - coverage +tools: + - mosdepth: + description: | + Fast BAM/CRAM depth calculation for WGS, exome, or targeted sequencing. + documentation: https://github.com/brentp/mosdepth + doi: 10.1093/bioinformatics/btx699 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Input BAM/CRAM file + pattern: "*.{bam,cram}" + - bai: + type: file + description: Index for BAM/CRAM file + pattern: "*.{bai,crai}" + - bed: + type: file + description: BED file with intersected intervals + pattern: "*.{bed}" + - meta2: + type: map + description: | + Groovy Map containing bed information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - global_txt: + type: file + description: Text file with global cumulative coverage distribution + pattern: "*.{global.dist.txt}" + - regions_txt: + type: file + description: Text file with region cumulative coverage distribution + pattern: "*.{region.dist.txt}" + - summary_txt: + type: file + description: Text file with summary mean depths per chromosome and regions + pattern: "*.{summary.txt}" + - per_base_bed: + type: file + description: BED file with per-base coverage + pattern: "*.{per-base.bed.gz}" + - per_base_csi: + type: file + description: Index file for BED file with per-base coverage + pattern: "*.{per-base.bed.gz.csi}" + - per_base_d4: + type: file + description: D4 file with per-base coverage + pattern: "*.{per-base.d4}" + - regions_bed: + type: file + description: BED file with per-region coverage + pattern: "*.{regions.bed.gz}" + - regions_csi: + type: file + description: Index file for BED file with per-region coverage + pattern: "*.{regions.bed.gz.csi}" + - quantized_bed: + type: file + description: BED file with binned coverage + pattern: "*.{quantized.bed.gz}" + - quantized_csi: + type: file + description: Index file for BED file with binned coverage + pattern: "*.{quantized.bed.gz.csi}" + - thresholds_bed: + type: file + description: BED file with the number of bases in each region that are covered at or above each threshold + pattern: "*.{thresholds.bed.gz}" + - thresholds_csi: + type: file + description: Index file for BED file with threshold coverage + pattern: "*.{thresholds.bed.gz.csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@ramprasadn" + - "@matthdsm" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@ramprasadn" + - "@matthdsm" diff --git a/modules/nf-core/mosdepth/tests/main.nf.test b/modules/nf-core/mosdepth/tests/main.nf.test new file mode 100644 index 00000000..21eebc1f --- /dev/null +++ b/modules/nf-core/mosdepth/tests/main.nf.test @@ -0,0 +1,246 @@ +nextflow_process { + + name "Test Process MOSDEPTH" + script "../main.nf" + process "MOSDEPTH" + + tag "modules" + tag "modules_nfcore" + tag "mosdepth" + + test("homo_sapiens - bam, bai, []") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, bed") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - cram, crai, []") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + [] + ] + input[1] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - cram, crai, bed") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_cram_crai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [ + [ id:'test' ], + file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, [] - window") { + + config "./window.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, [] - quantized") { + + config "./quantized.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + [] + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, bed - thresholds") { + + config "./threshold.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("homo_sapiens - bam, bai, bed - fail") { + + config "./window.config" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.failed } + ) + } + + } + + test("homo_sapiens - bam, bai, [] - stub") { + + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true), + file(params.test_data['homo_sapiens']['illumina']['test_paired_end_sorted_bam_bai'], checkIfExists: true), + file(params.test_data['homo_sapiens']['genome']['genome_bed'], checkIfExists: true) + ] + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/mosdepth/tests/main.nf.test.snap b/modules/nf-core/mosdepth/tests/main.nf.test.snap new file mode 100644 index 00000000..c604540b --- /dev/null +++ b/modules/nf-core/mosdepth/tests/main.nf.test.snap @@ -0,0 +1,1386 @@ +{ + "homo_sapiens - bam, bai, [] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.global.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.region.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.d4:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.global.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "per_base_d4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.d4:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "quantized_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "quantized_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.region.dist.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "thresholds_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "thresholds_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:33:16.953408231" + }, + "homo_sapiens - cram, crai, bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,e7df086f0a36e88ca231e143d43bd3f9" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,e7df086f0a36e88ca231e143d43bd3f9" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:32:50.160217828" + }, + "homo_sapiens - bam, bai, [] - quantized": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,f037c215449d361112efc10108fcc17c" + ] + ], + "9": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,4f69e6ace50206a2768be66ded3a56f0" + ] + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz:md5,f037c215449d361112efc10108fcc17c" + ] + ], + "quantized_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.quantized.bed.gz.csi:md5,4f69e6ace50206a2768be66ded3a56f0" + ] + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:33:01.164885111" + }, + "homo_sapiens - bam, bai, bed": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,e7df086f0a36e88ca231e143d43bd3f9" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,e7df086f0a36e88ca231e143d43bd3f9" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:32:39.071657456" + }, + "homo_sapiens - bam, bai, [] - window": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,0b6ea9f0da1228252d9aef2d3b6f7f76" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,34f48d16fcdd61e44d812e29e02c77b8" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,2a30bcb7f5c7632136b3efce24723970" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,34f48d16fcdd61e44d812e29e02c77b8" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,2a30bcb7f5c7632136b3efce24723970" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,0b6ea9f0da1228252d9aef2d3b6f7f76" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:32:55.631776118" + }, + "homo_sapiens - bam, bai, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:32:33.642125299" + }, + "homo_sapiens - cram, crai, []": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "10": [ + + ], + "11": [ + + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "6": [ + + ], + "7": [ + + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + + ], + "regions_csi": [ + + ], + "regions_txt": [ + + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,4f0d231060cbde4efdd673863bd2fb59" + ] + ], + "thresholds_bed": [ + + ], + "thresholds_csi": [ + + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:32:44.704920941" + }, + "homo_sapiens - bam, bai, bed - thresholds": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "10": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,fe70ae728cd10726c42a2bcd44adfc9d" + ] + ], + "11": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,219414a0751185adb98d2235d83ea055" + ] + ], + "12": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ], + "2": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "3": [ + + ], + "4": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "5": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "6": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "7": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,e7df086f0a36e88ca231e143d43bd3f9" + ] + ], + "8": [ + + ], + "9": [ + + ], + "global_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.global.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "per_base_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz:md5,da6db0fb375a3053a89db8c935eebbaa" + ] + ], + "per_base_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.per-base.bed.gz.csi:md5,6f322dc9250522a701bd68bd18fa8294" + ] + ], + "per_base_d4": [ + + ], + "quantized_bed": [ + + ], + "quantized_csi": [ + + ], + "regions_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz:md5,9ded0397623fda26a6a3514d6a0e2a2c" + ] + ], + "regions_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.regions.bed.gz.csi:md5,e7df086f0a36e88ca231e143d43bd3f9" + ] + ], + "regions_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.region.dist.txt:md5,e82e90c7d508a135b5a8a7cd6933452e" + ] + ], + "summary_txt": [ + [ + { + "id": "test", + "single_end": true + }, + "test.mosdepth.summary.txt:md5,96c037f769974b904beb53edc4f56d82" + ] + ], + "thresholds_bed": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz:md5,fe70ae728cd10726c42a2bcd44adfc9d" + ] + ], + "thresholds_csi": [ + [ + { + "id": "test", + "single_end": true + }, + "test.thresholds.bed.gz.csi:md5,219414a0751185adb98d2235d83ea055" + ] + ], + "versions": [ + "versions.yml:md5,87634e525fb18990cd98fe1080ad72ce" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-04-29T13:33:06.737266831" + } +} \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/quantized.config b/modules/nf-core/mosdepth/tests/quantized.config new file mode 100644 index 00000000..63c55350 --- /dev/null +++ b/modules/nf-core/mosdepth/tests/quantized.config @@ -0,0 +1,3 @@ +process { + ext.args = "--quantize 0:1:4:100:200" +} \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/tags.yml b/modules/nf-core/mosdepth/tests/tags.yml new file mode 100644 index 00000000..5cd2e08e --- /dev/null +++ b/modules/nf-core/mosdepth/tests/tags.yml @@ -0,0 +1,2 @@ +mosdepth: + - "modules/nf-core/mosdepth/**" diff --git a/modules/nf-core/mosdepth/tests/threshold.config b/modules/nf-core/mosdepth/tests/threshold.config new file mode 100644 index 00000000..9b014ddf --- /dev/null +++ b/modules/nf-core/mosdepth/tests/threshold.config @@ -0,0 +1,3 @@ +process { + ext.args = "--thresholds 1,10,20,30" +} \ No newline at end of file diff --git a/modules/nf-core/mosdepth/tests/window.config b/modules/nf-core/mosdepth/tests/window.config new file mode 100644 index 00000000..7a0f755c --- /dev/null +++ b/modules/nf-core/mosdepth/tests/window.config @@ -0,0 +1,3 @@ +process { + ext.args = "--by 100" +} \ No newline at end of file diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf deleted file mode 100644 index cc0643e1..00000000 --- a/modules/nf-core/multiqc/main.nf +++ /dev/null @@ -1,63 +0,0 @@ -process MULTIQC { - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.25.1--pyhdfd78af_0' : - 'biocontainers/multiqc:1.25.1--pyhdfd78af_0' }" - - input: - path multiqc_files, stageAs: "?/*" - path(multiqc_config) - path(extra_multiqc_config) - path(multiqc_logo) - path(replace_names) - path(sample_names) - - output: - path "*multiqc_report.html", emit: report - path "*_data" , emit: data - path "*_plots" , optional:true, emit: plots - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ? "--filename ${task.ext.prefix}.html" : '' - def config = multiqc_config ? "--config $multiqc_config" : '' - def extra_config = extra_multiqc_config ? "--config $extra_multiqc_config" : '' - def logo = multiqc_logo ? "--cl-config 'custom_logo: \"${multiqc_logo}\"'" : '' - def replace = replace_names ? "--replace-names ${replace_names}" : '' - def samples = sample_names ? "--sample-names ${sample_names}" : '' - """ - multiqc \\ - --force \\ - $args \\ - $config \\ - $prefix \\ - $extra_config \\ - $logo \\ - $replace \\ - $samples \\ - . - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ - - stub: - """ - mkdir multiqc_data - mkdir multiqc_plots - touch multiqc_report.html - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - multiqc: \$( multiqc --version | sed -e "s/multiqc, version //g" ) - END_VERSIONS - """ -} diff --git a/modules/nf-core/multiqc/meta.yml b/modules/nf-core/multiqc/meta.yml deleted file mode 100644 index b16c1879..00000000 --- a/modules/nf-core/multiqc/meta.yml +++ /dev/null @@ -1,78 +0,0 @@ -name: multiqc -description: Aggregate results from bioinformatics analyses across many samples into - a single report -keywords: - - QC - - bioinformatics tools - - Beautiful stand-alone HTML report -tools: - - multiqc: - description: | - MultiQC searches a given directory for analysis logs and compiles a HTML report. - It's a general use tool, perfect for summarising the output from numerous bioinformatics tools. - homepage: https://multiqc.info/ - documentation: https://multiqc.info/docs/ - licence: ["GPL-3.0-or-later"] - identifier: biotools:multiqc -input: - - - multiqc_files: - type: file - description: | - List of reports / files recognised by MultiQC, for example the html and zip output of FastQC - - - multiqc_config: - type: file - description: Optional config yml for MultiQC - pattern: "*.{yml,yaml}" - - - extra_multiqc_config: - type: file - description: Second optional config yml for MultiQC. Will override common sections - in multiqc_config. - pattern: "*.{yml,yaml}" - - - multiqc_logo: - type: file - description: Optional logo file for MultiQC - pattern: "*.{png}" - - - replace_names: - type: file - description: | - Optional two-column sample renaming file. First column a set of - patterns, second column a set of corresponding replacements. Passed via - MultiQC's `--replace-names` option. - pattern: "*.{tsv}" - - - sample_names: - type: file - description: | - Optional TSV file with headers, passed to the MultiQC --sample_names - argument. - pattern: "*.{tsv}" -output: - - report: - - "*multiqc_report.html": - type: file - description: MultiQC report file - pattern: "multiqc_report.html" - - data: - - "*_data": - type: directory - description: MultiQC data dir - pattern: "multiqc_data" - - plots: - - "*_plots": - type: file - description: Plots created by MultiQC - pattern: "*_data" - - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@abhi18av" - - "@bunop" - - "@drpatelh" - - "@jfy133" -maintainers: - - "@abhi18av" - - "@bunop" - - "@drpatelh" - - "@jfy133" diff --git a/modules/nf-core/nanoplot/environment.yml b/modules/nf-core/nanoplot/environment.yml new file mode 100644 index 00000000..219cd2e3 --- /dev/null +++ b/modules/nf-core/nanoplot/environment.yml @@ -0,0 +1,7 @@ +name: nanoplot +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::nanoplot=1.41.6 diff --git a/modules/nf-core/nanoplot/main.nf b/modules/nf-core/nanoplot/main.nf new file mode 100644 index 00000000..c1816caf --- /dev/null +++ b/modules/nf-core/nanoplot/main.nf @@ -0,0 +1,58 @@ +process NANOPLOT { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/nanoplot:1.41.6--pyhdfd78af_0' : + 'biocontainers/nanoplot:1.41.6--pyhdfd78af_0' }" + + input: + tuple val(meta), path(ontfile) + + output: + tuple val(meta), path("*.html") , emit: html + tuple val(meta), path("*.png") , optional: true, emit: png + tuple val(meta), path("*.txt") , emit: txt + tuple val(meta), path("*.log") , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def input_file = ("$ontfile".endsWith(".fastq.gz") || "$ontfile".endsWith(".fq.gz")) ? "--fastq ${ontfile}" : + ("$ontfile".endsWith(".txt")) ? "--summary ${ontfile}" : '' + """ + NanoPlot \\ + $args \\ + -t $task.cpus \\ + $input_file + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nanoplot: \$(echo \$(NanoPlot --version 2>&1) | sed 's/^.*NanoPlot //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch LengthvsQualityScatterPlot_dot.html + touch LengthvsQualityScatterPlot_kde.html + touch NanoPlot-report.html + touch NanoPlot_20240301_1130.log + touch NanoStats.txt + touch Non_weightedHistogramReadlength.html + touch Non_weightedLogTransformed_HistogramReadlength.html + touch WeightedHistogramReadlength.html + touch WeightedLogTransformed_HistogramReadlength.html + touch Yield_By_Length.html + + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nanoplot: \$(echo \$(NanoPlot --version 2>&1) | sed 's/^.*NanoPlot //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/nanoplot/meta.yml b/modules/nf-core/nanoplot/meta.yml new file mode 100644 index 00000000..46fbd562 --- /dev/null +++ b/modules/nf-core/nanoplot/meta.yml @@ -0,0 +1,62 @@ +name: nanoplot +description: Run NanoPlot on nanopore-sequenced reads +keywords: + - quality control + - qc + - fastq + - sequencing summary + - nanopore +tools: + - nanoplot: + description: | + NanoPlot is a tool for ploting long-read sequencing data and + alignment. + homepage: http://nanoplot.bioinf.be + documentation: https://github.com/wdecoster/NanoPlot + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fastq: + type: file + description: | + List of input basecalled-FastQ files. + - summary_txt: + type: file + description: | + List of sequencing_summary.txt files from running basecalling. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: NanoPlot report + pattern: "*{.html}" + - png: + type: file + description: Plots generated by NanoPlot + pattern: "*{.png}" + - txt: + type: file + description: Stats from NanoPlot + pattern: "*{.txt}" + - log: + type: file + description: log file of NanoPlot run + pattern: "*{.log}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@yuukiiwa" +maintainers: + - "@drpatelh" + - "@yuukiiwa" diff --git a/modules/nf-core/nanoplot/tests/main.nf.test b/modules/nf-core/nanoplot/tests/main.nf.test new file mode 100644 index 00000000..29b57c10 --- /dev/null +++ b/modules/nf-core/nanoplot/tests/main.nf.test @@ -0,0 +1,94 @@ +nextflow_process { + + name "Test Process NANOPLOT" + tag "modules_nfcore" + tag "modules" + tag "nanoplot" + script "../main.nf" + process "NANOPLOT" + + test("NanoPlot summary") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.test_data['sarscov2']['nanopore']['test_sequencing_summary'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.txt, + process.out.versions + ).match() + }, + { + with(process.out.html.get(0)) { + assert get(1).collect { p -> file(p).getName() }.contains("NanoPlot-report.html") + } + } + ) + } + + } + + test("NanoPlot FASTQ") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.test_data['sarscov2']['nanopore']['test_fastq_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.txt, + process.out.versions + ).match() + }, + { + with(process.out.html.get(0)) { + assert get(1).collect { p -> file(p).getName() }.contains("NanoPlot-report.html") + } + } + ) + } + + } + + test("NanoPlot - stub") { + + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ file(params.test_data['sarscov2']['nanopore']['test_sequencing_summary'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } +} diff --git a/modules/nf-core/nanoplot/tests/main.nf.test.snap b/modules/nf-core/nanoplot/tests/main.nf.test.snap new file mode 100644 index 00000000..f7f8028a --- /dev/null +++ b/modules/nf-core/nanoplot/tests/main.nf.test.snap @@ -0,0 +1,131 @@ +{ + "NanoPlot - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "LengthvsQualityScatterPlot_dot.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "LengthvsQualityScatterPlot_kde.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "NanoPlot-report.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Non_weightedHistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Non_weightedLogTransformed_HistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "WeightedHistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "WeightedLogTransformed_HistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Yield_By_Length.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "NanoStats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + { + "id": "test" + }, + "NanoPlot_20240301_1130.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + "versions.yml:md5,961cee64736aeb9e56b65d05ee3cd1a5" + ], + "html": [ + [ + { + "id": "test" + }, + [ + "LengthvsQualityScatterPlot_dot.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "LengthvsQualityScatterPlot_kde.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "NanoPlot-report.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Non_weightedHistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Non_weightedLogTransformed_HistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "WeightedHistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "WeightedLogTransformed_HistogramReadlength.html:md5,d41d8cd98f00b204e9800998ecf8427e", + "Yield_By_Length.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "log": [ + [ + { + "id": "test" + }, + "NanoPlot_20240301_1130.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "png": [ + + ], + "txt": [ + [ + { + "id": "test" + }, + "NanoStats.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,961cee64736aeb9e56b65d05ee3cd1a5" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2024-03-01T14:54:18.083198" + }, + "NanoPlot FASTQ": { + "content": [ + [ + [ + { + "id": "test" + }, + "NanoStats.txt:md5,50373c7543e71e3baf040926f0c69ac1" + ] + ], + [ + "versions.yml:md5,961cee64736aeb9e56b65d05ee3cd1a5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-10-17T16:18:44.848688965" + }, + "NanoPlot summary": { + "content": [ + [ + [ + { + "id": "test" + }, + "NanoStats.txt:md5,90464bf7049ca66106de56e7eac23dd4" + ] + ], + [ + "versions.yml:md5,961cee64736aeb9e56b65d05ee3cd1a5" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.0" + }, + "timestamp": "2023-10-17T16:18:31.104601192" + } +} \ No newline at end of file diff --git a/modules/nf-core/nanoplot/tests/tags.yaml b/modules/nf-core/nanoplot/tests/tags.yaml new file mode 100644 index 00000000..7c6ce3fa --- /dev/null +++ b/modules/nf-core/nanoplot/tests/tags.yaml @@ -0,0 +1,2 @@ +nanoplot: + - modules/nf-core/nanoplot/** diff --git a/modules/nf-core/nextclade/datasetget/environment.yml b/modules/nf-core/nextclade/datasetget/environment.yml new file mode 100644 index 00000000..cdd9f646 --- /dev/null +++ b/modules/nf-core/nextclade/datasetget/environment.yml @@ -0,0 +1,7 @@ +name: nextclade_datasetget +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::nextclade=2.12.0 diff --git a/modules/nf-core/nextclade/datasetget/main.nf b/modules/nf-core/nextclade/datasetget/main.nf new file mode 100644 index 00000000..70c900a5 --- /dev/null +++ b/modules/nf-core/nextclade/datasetget/main.nf @@ -0,0 +1,42 @@ +process NEXTCLADE_DATASETGET { + tag "$dataset" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/nextclade:2.12.0--h9ee0642_0' : + 'biocontainers/nextclade:2.12.0--h9ee0642_0' }" + + input: + val dataset + val reference + val tag + + output: + path "$prefix" , emit: dataset + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${dataset}" + def fasta = reference ? "--reference ${reference}" : '' + def version = tag ? "--tag ${tag}" : '' + """ + nextclade \\ + dataset \\ + get \\ + $args \\ + --name $dataset \\ + $fasta \\ + $version \\ + --output-dir $prefix + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nextclade: \$(echo \$(nextclade --version 2>&1) | sed 's/^.*nextclade //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/nextclade/datasetget/meta.yml b/modules/nf-core/nextclade/datasetget/meta.yml new file mode 100644 index 00000000..f3fb403e --- /dev/null +++ b/modules/nf-core/nextclade/datasetget/meta.yml @@ -0,0 +1,41 @@ +name: nextclade_datasetget +description: Get dataset for SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (C++ implementation) +keywords: + - nextclade + - variant + - consensus +tools: + - nextclade: + description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks + homepage: https://github.com/nextstrain/nextclade + documentation: https://github.com/nextstrain/nextclade + tool_dev_url: https://github.com/nextstrain/nextclade + licence: ["MIT"] +input: + - dataset: + type: string + description: Name of dataset to retrieve. A list of available datasets can be obtained using the nextclade dataset list command. + pattern: ".+" + - reference: + type: string + description: Accession id to download dataset based on a particular reference sequence. A list of available datasets can be obtained using the nextclade dataset list command. + pattern: ".+" + - tag: + type: string + description: Version tag of the dataset to download. A list of available datasets can be obtained using the nextclade dataset list command. + pattern: ".+" +output: + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - prefix: + type: path + description: A directory containing the dataset files needed for nextclade run + pattern: "prefix" +authors: + - "@antunderwood" + - "@drpatelh" +maintainers: + - "@antunderwood" + - "@drpatelh" diff --git a/modules/nf-core/nextclade/run/environment.yml b/modules/nf-core/nextclade/run/environment.yml new file mode 100644 index 00000000..1e50e8d4 --- /dev/null +++ b/modules/nf-core/nextclade/run/environment.yml @@ -0,0 +1,7 @@ +name: nextclade_run +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::nextclade=2.12.0 diff --git a/modules/nf-core/nextclade/run/main.nf b/modules/nf-core/nextclade/run/main.nf new file mode 100644 index 00000000..33fb34c6 --- /dev/null +++ b/modules/nf-core/nextclade/run/main.nf @@ -0,0 +1,47 @@ +process NEXTCLADE_RUN { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/nextclade:2.12.0--h9ee0642_0' : + 'biocontainers/nextclade:2.12.0--h9ee0642_0' }" + + input: + tuple val(meta), path(fasta) + path dataset + + output: + tuple val(meta), path("${prefix}.csv") , optional:true, emit: csv + tuple val(meta), path("${prefix}.errors.csv") , optional:true, emit: csv_errors + tuple val(meta), path("${prefix}.insertions.csv"), optional:true, emit: csv_insertions + tuple val(meta), path("${prefix}.tsv") , optional:true, emit: tsv + tuple val(meta), path("${prefix}.json") , optional:true, emit: json + tuple val(meta), path("${prefix}.auspice.json") , optional:true, emit: json_auspice + tuple val(meta), path("${prefix}.ndjson") , optional:true, emit: ndjson + tuple val(meta), path("${prefix}.aligned.fasta") , optional:true, emit: fasta_aligned + tuple val(meta), path("*.translation.fasta") , optional:true, emit: fasta_translation + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + nextclade \\ + run \\ + $args \\ + --jobs $task.cpus \\ + --input-dataset $dataset \\ + --output-all ./ \\ + --output-basename ${prefix} \\ + $fasta + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + nextclade: \$(echo \$(nextclade --version 2>&1) | sed 's/^.*nextclade //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/nextclade/run/meta.yml b/modules/nf-core/nextclade/run/meta.yml new file mode 100644 index 00000000..ceebfe20 --- /dev/null +++ b/modules/nf-core/nextclade/run/meta.yml @@ -0,0 +1,59 @@ +name: nextclade_run +description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks (C++ implementation) +keywords: + - nextclade + - variant + - consensus +tools: + - nextclade: + description: SARS-CoV-2 genome clade assignment, mutation calling, and sequence quality checks + homepage: https://github.com/nextstrain/nextclade + documentation: https://github.com/nextstrain/nextclade + tool_dev_url: https://github.com/nextstrain/nextclade + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - dataset: + type: path + description: Path containing the dataset files obtained by running nextclade dataset get + pattern: "*" + - fasta: + type: file + description: FASTA file containing one or more consensus sequences + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - csv: + type: file + description: CSV file containing nextclade results + pattern: "*.{csv}" + - json: + type: file + description: JSON file containing nextclade results + pattern: "*.{json}" + - json_tree: + type: file + description: Auspice JSON V2 containing nextclade results + pattern: "*.{tree.json}" + - tsv: + type: file + description: TSV file containing nextclade results + pattern: "*.{tsv}" +authors: + - "@antunderwood" + - "@drpatelh" +maintainers: + - "@antunderwood" + - "@drpatelh" diff --git a/modules/nf-core/pangolin/environment.yml b/modules/nf-core/pangolin/environment.yml new file mode 100644 index 00000000..3c4d98c8 --- /dev/null +++ b/modules/nf-core/pangolin/environment.yml @@ -0,0 +1,7 @@ +name: pangolin +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::pangolin=4.2 diff --git a/modules/nf-core/pangolin/main.nf b/modules/nf-core/pangolin/main.nf new file mode 100644 index 00000000..00cf4290 --- /dev/null +++ b/modules/nf-core/pangolin/main.nf @@ -0,0 +1,35 @@ +process PANGOLIN { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pangolin:4.2--pyhdfd78af_1' : + 'biocontainers/pangolin:4.2--pyhdfd78af_1' }" + + input: + tuple val(meta), path(fasta) + + output: + tuple val(meta), path('*.csv'), emit: report + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + pangolin \\ + $fasta\\ + --outfile ${prefix}.pangolin.csv \\ + --threads $task.cpus \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pangolin: \$(pangolin --version | sed "s/pangolin //g") + END_VERSIONS + """ +} diff --git a/modules/nf-core/pangolin/meta.yml b/modules/nf-core/pangolin/meta.yml new file mode 100644 index 00000000..6493f2c8 --- /dev/null +++ b/modules/nf-core/pangolin/meta.yml @@ -0,0 +1,37 @@ +name: pangolin +description: Phylogenetic Assignment of Named Global Outbreak LINeages +keywords: + - covid + - pangolin + - lineage +tools: + - star: + description: | + Phylogenetic Assignment of Named Global Outbreak LINeages + homepage: https://github.com/cov-lineages/pangolin#pangolearn-description + manual: https://github.com/cov-lineages/pangolin#pangolearn-description + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + - fasta: + type: file + description: | + The genome assembly to be evaluated +output: + - report: + type: file + description: Pangolin lineage report + pattern: "*.{csv}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kevinmenden" + - "@drpatelh" +maintainers: + - "@kevinmenden" + - "@drpatelh" diff --git a/modules/nf-core/picard/collectmultiplemetrics/environment.yml b/modules/nf-core/picard/collectmultiplemetrics/environment.yml new file mode 100644 index 00000000..79b33280 --- /dev/null +++ b/modules/nf-core/picard/collectmultiplemetrics/environment.yml @@ -0,0 +1,7 @@ +name: picard_collectmultiplemetrics +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::picard=3.1.1 diff --git a/modules/nf-core/picard/collectmultiplemetrics/main.nf b/modules/nf-core/picard/collectmultiplemetrics/main.nf new file mode 100644 index 00000000..5640ce94 --- /dev/null +++ b/modules/nf-core/picard/collectmultiplemetrics/main.nf @@ -0,0 +1,67 @@ +process PICARD_COLLECTMULTIPLEMETRICS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' : + 'biocontainers/picard:3.1.1--hdfd78af_0' }" + + input: + tuple val(meta) , path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*_metrics"), emit: metrics + tuple val(meta), path("*.pdf") , emit: pdf, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard CollectMultipleMetrics] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + """ + picard \\ + -Xmx${avail_mem}M \\ + CollectMultipleMetrics \\ + $args \\ + --INPUT $bam \\ + --OUTPUT ${prefix}.CollectMultipleMetrics \\ + $reference + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(picard CollectMultipleMetrics --version 2>&1 | grep -o 'Version.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.CollectMultipleMetrics.alignment_summary_metrics + touch ${prefix}.CollectMultipleMetrics.insert_size_metrics + touch ${prefix}.CollectMultipleMetrics.quality_distribution.pdf + touch ${prefix}.CollectMultipleMetrics.base_distribution_by_cycle_metrics + touch ${prefix}.CollectMultipleMetrics.quality_by_cycle_metrics + touch ${prefix}.CollectMultipleMetrics.read_length_histogram.pdf + touch ${prefix}.CollectMultipleMetrics.base_distribution_by_cycle.pdf + touch ${prefix}.CollectMultipleMetrics.quality_by_cycle.pdf + touch ${prefix}.CollectMultipleMetrics.insert_size_histogram.pdf + touch ${prefix}.CollectMultipleMetrics.quality_distribution_metrics + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard CollectMultipleMetrics --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/picard/collectmultiplemetrics/meta.yml b/modules/nf-core/picard/collectmultiplemetrics/meta.yml new file mode 100644 index 00000000..67bba57b --- /dev/null +++ b/modules/nf-core/picard/collectmultiplemetrics/meta.yml @@ -0,0 +1,70 @@ +name: picard_collectmultiplemetrics +description: Collect multiple metrics from a BAM file +keywords: + - alignment + - metrics + - statistics + - insert + - quality + - bam +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: SAM/BAM/CRAM file + pattern: "*.{sam,bam,cram}" + - bai: + type: file + description: Optional SAM/BAM/CRAM file index + pattern: "*.{sai,bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - fasta: + type: file + description: Genome fasta file + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome'] + - fai: + type: file + description: Index of FASTA file. Only needed when fasta is supplied. + pattern: "*.fai" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - metrics: + type: file + description: Alignment metrics files generated by picard + pattern: "*_{metrics}" + - pdf: + type: file + description: PDF plots of metrics + pattern: "*.{pdf}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/picard/markduplicates/environment.yml b/modules/nf-core/picard/markduplicates/environment.yml new file mode 100644 index 00000000..58b795f5 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/environment.yml @@ -0,0 +1,7 @@ +name: picard_markduplicates +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::picard=3.1.1 diff --git a/modules/nf-core/picard/markduplicates/main.nf b/modules/nf-core/picard/markduplicates/main.nf new file mode 100644 index 00000000..ad0b2963 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/main.nf @@ -0,0 +1,68 @@ +process PICARD_MARKDUPLICATES { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/picard:3.1.1--hdfd78af_0' : + 'biocontainers/picard:3.1.1--hdfd78af_0' }" + + input: + tuple val(meta), path(reads) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.bam") , emit: bam, optional: true + tuple val(meta), path("*.bai") , emit: bai, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.metrics.txt"), emit: metrics + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : "" + def avail_mem = 3072 + if (!task.memory) { + log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.' + } else { + avail_mem = (task.memory.mega*0.8).intValue() + } + + if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + """ + picard \\ + -Xmx${avail_mem}M \\ + MarkDuplicates \\ + $args \\ + --INPUT $reads \\ + --OUTPUT ${prefix}.${suffix} \\ + $reference \\ + --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def suffix = task.ext.suffix ?: "${reads.getExtension()}" + if ("$reads" == "${prefix}.${suffix}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.${suffix} + touch ${prefix}.MarkDuplicates.metrics.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:) + END_VERSIONS + """ +} diff --git a/modules/nf-core/picard/markduplicates/meta.yml b/modules/nf-core/picard/markduplicates/meta.yml new file mode 100644 index 00000000..1f0ffe16 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/meta.yml @@ -0,0 +1,79 @@ +name: picard_markduplicates +description: Locate and tag duplicate reads in a BAM file +keywords: + - markduplicates + - pcr + - duplicates + - bam + - sam + - cram +tools: + - picard: + description: | + A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS) + data and formats such as SAM/BAM/CRAM and VCF. + homepage: https://broadinstitute.github.io/picard/ + documentation: https://broadinstitute.github.io/picard/ + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: Sequence reads file, can be SAM/BAM/CRAM format + pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome fasta file, required for CRAM input + pattern: "*.{fasta,fa}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Reference genome fasta index + pattern: "*.{fai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM file with duplicate reads marked/removed + pattern: "*.{bam}" + - bai: + type: file + description: An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag + pattern: "*.{bai}" + - cram: + type: file + description: Output CRAM file + pattern: "*.{cram}" + - metrics: + type: file + description: Duplicate metrics file generated by picard + pattern: "*.{metrics.txt}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@projectoriented" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/picard/markduplicates/tests/main.nf.test b/modules/nf-core/picard/markduplicates/tests/main.nf.test new file mode 100644 index 00000000..e3e97f6c --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/main.nf.test @@ -0,0 +1,92 @@ +nextflow_process { + + name "Test Process PICARD_MARKDUPLICATES" + script "../main.nf" + process "PICARD_MARKDUPLICATES" + config "./nextflow.config" + tag "modules" + tag "modules_nfcore" + tag "picard" + tag "picard/markduplicates" + + test("sarscov2 [unsorted bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("unsorted_bam_name") }, + { assert snapshot(path(process.out.metrics.get(0).get(1)).readLines()[0..2]).match("unsorted_bam_metrics") }, + { assert snapshot(process.out.versions).match("unsorted_bam_versions") } + ) + } + } + + test("sarscov2 [sorted bam]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + input[1] = [ [:], [] ] + input[2] = [ [:], [] ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("sorted_bam_name") }, + { assert snapshot(path(process.out.metrics.get(0).get(1)).readLines()[0..2]).match("sorted_bam_metrics") }, + { assert snapshot(process.out.versions).match("sorted_bam_versions") } + ) + } + } + + test("homo_sapiens [cram]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_name") }, + { assert snapshot(path(process.out.metrics.get(0).get(1)).readLines()[0..2]).match("cram_metrics") }, + { assert snapshot(process.out.versions).match("cram_versions") } + ) + } + } +} diff --git a/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap b/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap new file mode 100644 index 00000000..eb17111e --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/main.nf.test.snap @@ -0,0 +1,110 @@ +{ + "sorted_bam_versions": { + "content": [ + [ + "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:31:50.928021" + }, + "unsorted_bam_name": { + "content": [ + "test.marked.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-19T10:26:28.100755" + }, + "cram_metrics": { + "content": [ + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.sorted.cram --OUTPUT test.marked.cram --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --REFERENCE_SEQUENCE genome.fasta --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:25:47.518152" + }, + "sorted_bam_metrics": { + "content": [ + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.sorted.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-21T11:39:10.318331" + }, + "cram_name": { + "content": [ + "test.marked.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:25:47.459663" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-19T10:27:03.26989" + }, + "unsorted_bam_versions": { + "content": [ + [ + "versions.yml:md5,b699af51b1956f3810f8a7c066e0ab17" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T15:31:24.040403" + }, + "unsorted_bam_metrics": { + "content": [ + [ + "## htsjdk.samtools.metrics.StringHeader", + "# MarkDuplicates --INPUT test.paired_end.bam --OUTPUT test.marked.bam --METRICS_FILE test.marked.MarkDuplicates.metrics.txt --ASSUME_SORT_ORDER queryname --MAX_SEQUENCES_FOR_DISK_READ_ENDS_MAP 50000 --MAX_FILE_HANDLES_FOR_READ_ENDS_MAP 8000 --SORTING_COLLECTION_SIZE_RATIO 0.25 --TAG_DUPLICATE_SET_MEMBERS false --REMOVE_SEQUENCING_DUPLICATES false --TAGGING_POLICY DontTag --CLEAR_DT true --DUPLEX_UMI false --FLOW_MODE false --FLOW_QUALITY_SUM_STRATEGY false --USE_END_IN_UNPAIRED_READS false --USE_UNPAIRED_CLIPPED_END false --UNPAIRED_END_UNCERTAINTY 0 --FLOW_SKIP_FIRST_N_FLOWS 0 --FLOW_Q_IS_KNOWN_END false --FLOW_EFFECTIVE_QUALITY_THRESHOLD 15 --ADD_PG_TAG_TO_READS true --REMOVE_DUPLICATES false --ASSUME_SORTED false --DUPLICATE_SCORING_STRATEGY SUM_OF_BASE_QUALITIES --PROGRAM_RECORD_ID MarkDuplicates --PROGRAM_GROUP_NAME MarkDuplicates --READ_NAME_REGEX --OPTICAL_DUPLICATE_PIXEL_DISTANCE 100 --MAX_OPTICAL_DUPLICATE_SET_SIZE 300000 --VERBOSITY INFO --QUIET false --VALIDATION_STRINGENCY STRICT --COMPRESSION_LEVEL 5 --MAX_RECORDS_IN_RAM 500000 --CREATE_INDEX false --CREATE_MD5_FILE false --help false --version false --showHidden false --USE_JDK_DEFLATER false --USE_JDK_INFLATER false", + "## htsjdk.samtools.metrics.StringHeader" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-21T10:51:12.831787" + }, + "sorted_bam_name": { + "content": [ + "test.marked.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-01-19T10:26:45.080116" + } +} \ No newline at end of file diff --git a/modules/nf-core/picard/markduplicates/tests/nextflow.config b/modules/nf-core/picard/markduplicates/tests/nextflow.config new file mode 100644 index 00000000..02818dd6 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: PICARD_MARKDUPLICATES { + ext.prefix = { "${meta.id}.marked" } + ext.args = '--ASSUME_SORT_ORDER queryname' + } +} diff --git a/modules/nf-core/picard/markduplicates/tests/tags.yml b/modules/nf-core/picard/markduplicates/tests/tags.yml new file mode 100644 index 00000000..4f213d62 --- /dev/null +++ b/modules/nf-core/picard/markduplicates/tests/tags.yml @@ -0,0 +1,2 @@ +picard/markduplicates: + - modules/nf-core/picard/markduplicates/** diff --git a/modules/nf-core/plasmidid/environment.yml b/modules/nf-core/plasmidid/environment.yml new file mode 100644 index 00000000..1eda21d5 --- /dev/null +++ b/modules/nf-core/plasmidid/environment.yml @@ -0,0 +1,7 @@ +name: plasmidid +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::plasmidid=1.6.5 diff --git a/modules/nf-core/plasmidid/main.nf b/modules/nf-core/plasmidid/main.nf new file mode 100644 index 00000000..32fe7716 --- /dev/null +++ b/modules/nf-core/plasmidid/main.nf @@ -0,0 +1,45 @@ +process PLASMIDID { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/plasmidid:1.6.5--hdfd78af_0' : + 'biocontainers/plasmidid:1.6.5--hdfd78af_0' }" + + input: + tuple val(meta), path(scaffold) + path fasta + + output: + tuple val(meta), path("${prefix}/*final_results.html"), emit: html + tuple val(meta), path("${prefix}/*final_results.tab") , emit: tab + tuple val(meta), path("${prefix}/images/") , emit: images + tuple val(meta), path("${prefix}/logs/") , emit: logs + tuple val(meta), path("${prefix}/data/") , emit: data + tuple val(meta), path("${prefix}/database/") , emit: database + tuple val(meta), path("${prefix}/fasta_files/") , emit: fasta_files + tuple val(meta), path("${prefix}/kmer/") , emit: kmer + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ + plasmidID \\ + -d $fasta \\ + -s $prefix \\ + -c $scaffold \\ + $args \\ + -o . + + mv NO_GROUP/$prefix ./$prefix + cat <<-END_VERSIONS > versions.yml + "${task.process}": + plasmidid: \$(echo \$(plasmidID --version 2>&1)) + END_VERSIONS + """ +} diff --git a/modules/nf-core/plasmidid/meta.yml b/modules/nf-core/plasmidid/meta.yml new file mode 100644 index 00000000..66479359 --- /dev/null +++ b/modules/nf-core/plasmidid/meta.yml @@ -0,0 +1,75 @@ +name: plasmidid +description: assembles bacterial plasmids +keywords: + - assembly + - plasmid + - bacterial +tools: + - plasmidid: + description: Pipeline for plasmid identification and reconstruction + homepage: https://github.com/BU-ISCIII/plasmidID/wiki + documentation: https://github.com/BU-ISCIII/plasmidID#readme + tool_dev_url: https://github.com/BU-ISCIII/plasmidID + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - scaffold: + type: file + description: | + Fasta file containing scaffold + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: html file with results rendered + pattern: "*.{html}" + - tab: + type: file + description: Results in a tabular file + pattern: "*.{tab}" + - images: + type: directory + description: Directory containing the images produced by plasmidid + pattern: "images" + - logs: + type: directory + description: Directory containing the logs produced by plasmidid + pattern: "logs" + - data: + type: directory + description: Directory containing the data produced by plasmidid + pattern: "data" + - database: + type: directory + description: Directory containing the database produced by plasmidid + pattern: "database" + - fasta_files: + type: directory + description: Directory containing the fasta files produced by plasmidid + pattern: "fasta_files" + - kmer: + type: directory + description: Directory containing the kmer files produced by plasmidid + pattern: "database" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/pycoqc/environment.yml b/modules/nf-core/pycoqc/environment.yml new file mode 100644 index 00000000..c2a3a4d1 --- /dev/null +++ b/modules/nf-core/pycoqc/environment.yml @@ -0,0 +1,7 @@ +name: pycoqc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::pycoqc=2.5.2 diff --git a/modules/nf-core/pycoqc/main.nf b/modules/nf-core/pycoqc/main.nf new file mode 100644 index 00000000..98169db2 --- /dev/null +++ b/modules/nf-core/pycoqc/main.nf @@ -0,0 +1,36 @@ +process PYCOQC { + tag "$summary" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/pycoqc:2.5.2--py_0' : + 'biocontainers/pycoqc:2.5.2--py_0' }" + + input: + tuple val(meta), path(summary) + + output: + tuple val(meta), path("*.html"), emit: html + tuple val(meta), path("*.json"), emit: json + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + pycoQC \\ + $args \\ + -f $summary \\ + -o ${prefix}.html \\ + -j ${prefix}.json + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + pycoqc: \$(pycoQC --version 2>&1 | sed 's/^.*pycoQC v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/pycoqc/meta.yml b/modules/nf-core/pycoqc/meta.yml new file mode 100644 index 00000000..cfbd8516 --- /dev/null +++ b/modules/nf-core/pycoqc/meta.yml @@ -0,0 +1,48 @@ +name: pycoqc +description: write your description here +keywords: + - qc + - quality control + - sequencing + - nanopore +tools: + - pycoqc: + description: PycoQC computes metrics and generates interactive QC plots for Oxford Nanopore technologies sequencing data + homepage: https://github.com/tleonardi/pycoQC + documentation: https://tleonardi.github.io/pycoQC/ + tool_dev_url: https://github.com/tleonardi/pycoQC + doi: "10.21105/joss.01236" + licence: ["GNU General Public v3 (GPL v3)"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - summary: + type: file + description: sequencing summary file + pattern: "*.{txt}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - html: + type: file + description: Results in HTML format + - json: + type: file + description: Results in JSON format + pattern: "*.{json}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/pycoqc/tests/main.nf.test b/modules/nf-core/pycoqc/tests/main.nf.test new file mode 100644 index 00000000..afc15a98 --- /dev/null +++ b/modules/nf-core/pycoqc/tests/main.nf.test @@ -0,0 +1,43 @@ +nextflow_process { + + name "Test Process PYCOQC" + script "../main.nf" + config "./nextflow.config" + process "PYCOQC" + tag "modules" + tag "modules_nfcore" + tag "pycoqc" + + test("sarscov2 nanopore [sequencing_summary.txt]") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.test_data['sarscov2']['nanopore']['test_sequencing_summary'], checkIfExists: true) + ] + + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.versions).match("versions") }, + { assert process.out.html.get(0).get(1) ==~ ".*/test.html" }, + { + with (process.out.json.get(0)) { + assert snapshot(path(get(1)).json['All Reads']).match("all_reads") + } + } + ) + } + + } + +} diff --git a/modules/nf-core/pycoqc/tests/main.nf.test.snap b/modules/nf-core/pycoqc/tests/main.nf.test.snap new file mode 100644 index 00000000..6f76ec99 --- /dev/null +++ b/modules/nf-core/pycoqc/tests/main.nf.test.snap @@ -0,0 +1,642 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,b8cbbd20cd14b232853b22dad9097029" + ] + ], + "timestamp": "2023-10-26T12:27:18.329232497" + }, + "all_reads": { + "content": [ + { + "run": { + "run_duration": 0.05634208255343967, + "active_channels": 4, + "runid_number": 85, + "barcodes_number": 2 + }, + "basecall": { + "reads_number": 100, + "bases_number": 1160, + "N50": 12, + "len_percentiles": [ + 7.0, + 7.99, + 8.0, + 8.0, + 8.96, + 9.0, + 9.0, + 9.0, + 9.0, + 9.0, + 9.0, + 9.0, + 9.0, + 9.0, + 9.860000000000001, + 10.0, + 10.0, + 10.0, + 10.0, + 10.0, + 10.0, + 10.0, + 10.0, + 10.0, + 10.759999999999998, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.0, + 11.490000000000002, + 12.0, + 12.0, + 12.0, + 12.0, + 12.0, + 12.0, + 12.0, + 12.0, + 12.0, + 12.0, + 12.0, + 12.0, + 12.0, + 12.350000000000009, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.0, + 13.129999999999995, + 14.0, + 14.0, + 14.0, + 14.0, + 14.0, + 14.0, + 14.0, + 14.0, + 14.0, + 14.030000000000001, + 15.019999999999996, + 16.010000000000005, + 17.0 + ], + "qual_score_percentiles": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "len_hist": { + "x": [ + 7.079467203414813, + 7.1598365548894245, + 7.241118296021455, + 7.323322784675837, + 7.406460496304719, + 7.490542025282382, + 7.575578086255286, + 7.661579515507467, + 7.748557272341409, + 7.836522440474623, + 7.925486229452044, + 8.0154599760745, + 8.106455145843357, + 8.198483334421615, + 8.291556269111537, + 8.38568581034909, + 8.480883953215336, + 8.577162828964994, + 8.674534706572327, + 8.773011994294619, + 8.872607241253366, + 8.973333139033429, + 9.075202523300351, + 9.178228375436028, + 9.282423824192943, + 9.38780214736718, + 9.494376773490446, + 9.60216128354128, + 9.711169412675714, + 9.821415051977546, + 9.93291225022852, + 10.045675215698578, + 10.159718317956445, + 10.275056089700772, + 10.39170322861204, + 10.509674599225537, + 10.628985234825558, + 10.74965033936112, + 10.87168528938343, + 10.995105636005325, + 11.11992710688301, + 11.24616560822024, + 11.373837226795256, + 11.502958232010789, + 11.633545077967261, + 11.765614405559578, + 11.899183044597708, + 12.0342680159513, + 12.170886533718726, + 12.309056007420674, + 12.448794044218685, + 12.590118451158833, + 12.733047237440953, + 12.877598616713547, + 13.023791009394788, + 13.171643045019879, + 13.321173564615009, + 13.472401623098337, + 13.625346491708175, + 13.780027660458732, + 13.936464840623792, + 14.094677967248542, + 14.254687201689917, + 14.416512934185814, + 14.580175786453418, + 14.74569661431711, + 14.913096510366126, + 15.082396806642413, + 15.253619077359039, + 15.426785141649408, + 15.601917066347703, + 15.779037168800926, + 15.958168019712783, + 16.139332446019967, + 16.322553533800992, + 16.507854631218105, + 16.69525935149256, + 16.88479157591372, + 17.076475456882275, + 17.270335420988005, + 17.466396172122547, + 17.66468269462737, + 17.86522025647767, + 18.068034412502243, + 18.27315100763998, + 18.48059618023337, + 18.69039636535932, + 18.902578298197817, + 19.117169017438876, + 19.33419586872804, + 19.553686508151195, + 19.77566890575874, + 20.00017134912987, + 20.22722244697736, + 20.456851132793165, + 20.689086668535484, + 20.923958648357658, + 21.16149700237941, + 21.401732000500846 + ], + "y": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 0.0, + 0.0, + 0.0, + 1.0, + 3.0, + 4.0, + 5.0, + 4.0, + 3.0, + 2.0, + 1.0, + 1.0, + 2.0, + 2.0, + 2.0, + 1.0, + 1.0, + 1.0, + 2.0, + 3.0, + 4.0, + 3.0, + 2.0, + 1.0, + 1.0, + 1.0, + 1.0, + 2.0, + 1.0, + 1.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ] + }, + "qual_score_hist": { + "x": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "y": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 1.0, + 3.0, + 9.0, + 18.0, + 29.0, + 37.0 + ] + } + } + } + ], + "timestamp": "2023-10-26T12:19:31.009207323" + } +} \ No newline at end of file diff --git a/modules/nf-core/pycoqc/tests/nextflow.config b/modules/nf-core/pycoqc/tests/nextflow.config new file mode 100644 index 00000000..d532f8f7 --- /dev/null +++ b/modules/nf-core/pycoqc/tests/nextflow.config @@ -0,0 +1,9 @@ +process { + + publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" } + + withName: PYCOQC { + ext.args = '--min_pass_qual 0' + } + +} diff --git a/modules/nf-core/pycoqc/tests/tags.yml b/modules/nf-core/pycoqc/tests/tags.yml new file mode 100644 index 00000000..ab301426 --- /dev/null +++ b/modules/nf-core/pycoqc/tests/tags.yml @@ -0,0 +1,2 @@ +pycoqc: + - modules/nf-core/pycoqc/** diff --git a/modules/nf-core/quast/environment.yml b/modules/nf-core/quast/environment.yml new file mode 100644 index 00000000..0f9e3079 --- /dev/null +++ b/modules/nf-core/quast/environment.yml @@ -0,0 +1,7 @@ +name: quast +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::quast=5.2.0 diff --git a/modules/nf-core/quast/main.nf b/modules/nf-core/quast/main.nf new file mode 100644 index 00000000..d8f36284 --- /dev/null +++ b/modules/nf-core/quast/main.nf @@ -0,0 +1,136 @@ +process QUAST { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/quast:5.2.0--py39pl5321h2add14b_1' : + 'biocontainers/quast:5.2.0--py39pl5321h2add14b_1' }" + + input: + tuple val(meta) , path(consensus) + tuple val(meta2), path(fasta) + tuple val(meta3), path(gff) + + output: + tuple val(meta), path("${prefix}") , emit: results + tuple val(meta), path("${prefix}.tsv") , emit: tsv + tuple val(meta), path("${prefix}_transcriptome.tsv") , optional: true , emit: transcriptome + tuple val(meta), path("${prefix}_misassemblies.tsv") , optional: true , emit: misassemblies + tuple val(meta), path("${prefix}_unaligned.tsv") , optional: true , emit: unaligned + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def features = gff ? "--features $gff" : '' + def reference = fasta ? "-r $fasta" : '' + """ + quast.py \\ + --output-dir $prefix \\ + $reference \\ + $features \\ + --threads $task.cpus \\ + $args \\ + ${consensus.join(' ')} + + ln -s ${prefix}/report.tsv ${prefix}.tsv + [ -f ${prefix}/contigs_reports/all_alignments_transcriptome.tsv ] && ln -s ${prefix}/contigs_reports/all_alignments_transcriptome.tsv ${prefix}_transcriptome.tsv + [ -f ${prefix}/contigs_reports/misassemblies_report.tsv ] && ln -s ${prefix}/contigs_reports/misassemblies_report.tsv ${prefix}_misassemblies.tsv + [ -f ${prefix}/contigs_reports/unaligned_report.tsv ] && ln -s ${prefix}/contigs_reports/unaligned_report.tsv ${prefix}_unaligned.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + quast: \$(quast.py --version 2>&1 | sed 's/^.*QUAST v//; s/ .*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def features = gff ? "--features $gff" : '' + def reference = fasta ? "-r $fasta" : '' + + """ + mkdir -p $prefix + touch $prefix/report.tsv + touch $prefix/report.html + touch $prefix/report.pdf + touch $prefix/quast.log + touch $prefix/transposed_report.txt + touch $prefix/transposed_report.tex + touch $prefix/icarus.html + touch $prefix/report.tex + touch $prefix/report.txt + + mkdir -p $prefix/basic_stats + touch $prefix/basic_stats/cumulative_plot.pdf + touch $prefix/basic_stats/Nx_plot.pdf + touch $prefix/basic_stats/genome_GC_content_plot.pdf + touch $prefix/basic_stats/GC_content_plot.pdf + + mkdir -p $prefix/icarus_viewers + touch $prefix/icarus_viewers/contig_size_viewer.html + + ln -s $prefix/report.tsv ${prefix}.tsv + + if [ $fasta ]; then + touch $prefix/basic_stats/NGx_plot.pdf + touch $prefix/basic_stats/gc.icarus.txt + + mkdir -p $prefix/aligned_stats + touch $prefix/aligned_stats/NAx_plot.pdf + touch $prefix/aligned_stats/NGAx_plot.pdf + touch $prefix/aligned_stats/cumulative_plot.pdf + + mkdir -p $prefix/contigs_reports + touch $prefix/contigs_reports/all_alignments_transcriptome.tsv + touch $prefix/contigs_reports/contigs_report_transcriptome.mis_contigs.info + touch $prefix/contigs_reports/contigs_report_transcriptome.stderr + touch $prefix/contigs_reports/contigs_report_transcriptome.stdout + touch $prefix/contigs_reports/contigs_report_transcriptome.unaligned.info + mkdir -p $prefix/contigs_reports/minimap_output + touch $prefix/contigs_reports/minimap_output/transcriptome.coords + touch $prefix/contigs_reports/minimap_output/transcriptome.coords.filtered + touch $prefix/contigs_reports/minimap_output/transcriptome.coords_tmp + touch $prefix/contigs_reports/minimap_output/transcriptome.sf + touch $prefix/contigs_reports/minimap_output/transcriptome.unaligned + touch $prefix/contigs_reports/minimap_output/transcriptome.used_snps + touch $prefix/contigs_reports/misassemblies_frcurve_plot.pdf + touch $prefix/contigs_reports/misassemblies_plot.pdf + touch $prefix/contigs_reports/misassemblies_report.tex + touch $prefix/contigs_reports/misassemblies_report.tsv + touch $prefix/contigs_reports/misassemblies_report.txt + touch $prefix/contigs_reports/transcriptome.mis_contigs.fa + touch $prefix/contigs_reports/transposed_report_misassemblies.tex + touch $prefix/contigs_reports/transposed_report_misassemblies.tsv + touch $prefix/contigs_reports/transposed_report_misassemblies.txt + touch $prefix/contigs_reports/unaligned_report.tex + touch $prefix/contigs_reports/unaligned_report.tsv + touch $prefix/contigs_reports/unaligned_report.txt + + mkdir -p $prefix/genome_stats + touch $prefix/genome_stats/genome_info.txt + touch $prefix/genome_stats/transcriptome_gaps.txt + touch $prefix/icarus_viewers/alignment_viewer.html + + ln -sf ${prefix}/contigs_reports/misassemblies_report.tsv ${prefix}_misassemblies.tsv + ln -sf ${prefix}/contigs_reports/unaligned_report.tsv ${prefix}_unaligned.tsv + ln -sf ${prefix}/contigs_reports/all_alignments_transcriptome.tsv ${prefix}_transcriptome.tsv + + fi + + if ([ $fasta ] && [ $gff ]); then + touch $prefix/genome_stats/features_cumulative_plot.pdf + touch $prefix/genome_stats/features_frcurve_plot.pdf + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + quast: \$(quast.py --version 2>&1 | sed 's/^.*QUAST v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/quast/meta.yml b/modules/nf-core/quast/meta.yml new file mode 100644 index 00000000..5850ff98 --- /dev/null +++ b/modules/nf-core/quast/meta.yml @@ -0,0 +1,61 @@ +name: quast +description: Quality Assessment Tool for Genome Assemblies +keywords: + - quast + - assembly + - quality + - contig + - scaffold +tools: + - quast: + description: | + QUAST calculates quality metrics for genome assemblies + homepage: http://bioinf.spbau.ru/quast + doi: 10.1093/bioinformatics/btt086 + licence: ["GPL-2.0-only"] +input: + - consensus: + type: file + description: | + Fasta file containing the assembly of interest + - fasta: + type: file + description: | + The genome assembly to be evaluated. Has to contain at least a non-empty string dummy value. + - gff: + type: file + description: The genome GFF file. Has to contain at least a non-empty string dummy value. +output: + - quast: + type: directory + description: Directory containing complete quast report + pattern: "{prefix}/" + - report: + type: file + description: tab-separated version of the summary, suitable for spreadsheets and mqc + pattern: "${prefix}.tsv" + - misassemblies: + type: file + description: | + Report containing misassemblies, only when a reference fasta is provided + pattern: "${prefix}_misassemblies.tsv" + - transcriptome: + type: file + description: | + Report containing all the alignments of transcriptome to the assembly, only when a reference fasta is provided + pattern: "${prefix}_transcriptome.tsv" + - unaligned: + type: file + description: | + Report containing unaligned contigs, only when a reference fasta is provided + pattern: "${prefix}_unaligned.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@kevinmenden" +maintainers: + - "@drpatelh" + - "@kevinmenden" diff --git a/modules/nf-core/samtools/flagstat/environment.yml b/modules/nf-core/samtools/flagstat/environment.yml new file mode 100644 index 00000000..bd57cb54 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/environment.yml @@ -0,0 +1,8 @@ +name: samtools_flagstat +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/flagstat/main.nf b/modules/nf-core/samtools/flagstat/main.nf new file mode 100644 index 00000000..eb5f5252 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/main.nf @@ -0,0 +1,46 @@ +process SAMTOOLS_FLAGSTAT { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.flagstat"), emit: flagstat + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + samtools \\ + flagstat \\ + --threads ${task.cpus} \\ + $bam \\ + > ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.flagstat + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/flagstat/meta.yml b/modules/nf-core/samtools/flagstat/meta.yml new file mode 100644 index 00000000..97991358 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/meta.yml @@ -0,0 +1,51 @@ +name: samtools_flagstat +description: Counts the number of alignments in a BAM/CRAM/SAM file for each FLAG type +keywords: + - stats + - mapping + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test b/modules/nf-core/samtools/flagstat/tests/main.nf.test new file mode 100644 index 00000000..24c3c04b --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process SAMTOOLS_FLAGSTAT" + script "../main.nf" + process "SAMTOOLS_FLAGSTAT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/flagstat" + + test("BAM") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.flagstat).match("flagstat") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap new file mode 100644 index 00000000..a76fc27e --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/main.nf.test.snap @@ -0,0 +1,32 @@ +{ + "flagstat": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:31:37.783927" + }, + "versions": { + "content": [ + [ + "versions.yml:md5,fd0030ce49ab3a92091ad80260226452" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:11:44.299617452" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/flagstat/tests/tags.yml b/modules/nf-core/samtools/flagstat/tests/tags.yml new file mode 100644 index 00000000..2d2b7255 --- /dev/null +++ b/modules/nf-core/samtools/flagstat/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/flagstat: + - modules/nf-core/samtools/flagstat/** diff --git a/modules/nf-core/samtools/idxstats/environment.yml b/modules/nf-core/samtools/idxstats/environment.yml new file mode 100644 index 00000000..174973b8 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/environment.yml @@ -0,0 +1,8 @@ +name: samtools_idxstats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/idxstats/main.nf b/modules/nf-core/samtools/idxstats/main.nf new file mode 100644 index 00000000..a544026f --- /dev/null +++ b/modules/nf-core/samtools/idxstats/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_IDXSTATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(bam), path(bai) + + output: + tuple val(meta), path("*.idxstats"), emit: idxstats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + samtools \\ + idxstats \\ + --threads ${task.cpus-1} \\ + $bam \\ + > ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + touch ${prefix}.idxstats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/idxstats/meta.yml b/modules/nf-core/samtools/idxstats/meta.yml new file mode 100644 index 00000000..344e92a3 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/meta.yml @@ -0,0 +1,52 @@ +name: samtools_idxstats +description: Reports alignment summary statistics for a BAM/CRAM/SAM file +keywords: + - stats + - mapping + - counts + - chromosome + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: Index for BAM/CRAM/SAM file + pattern: "*.{bai,crai,sai}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test b/modules/nf-core/samtools/idxstats/tests/main.nf.test new file mode 100644 index 00000000..a2dcb27c --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test @@ -0,0 +1,36 @@ +nextflow_process { + + name "Test Process SAMTOOLS_IDXSTATS" + script "../main.nf" + process "SAMTOOLS_IDXSTATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/idxstats" + + test("bam") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.idxstats).match("idxstats") }, + { assert snapshot(process.out.versions).match("versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap new file mode 100644 index 00000000..a7050bdc --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/main.nf.test.snap @@ -0,0 +1,32 @@ +{ + "versions": { + "content": [ + [ + "versions.yml:md5,613dde56f108418039ffcdeeddba397a" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:16:50.147462763" + }, + "idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:36:41.561026" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/idxstats/tests/tags.yml b/modules/nf-core/samtools/idxstats/tests/tags.yml new file mode 100644 index 00000000..d3057c61 --- /dev/null +++ b/modules/nf-core/samtools/idxstats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/idxstats: + - modules/nf-core/samtools/idxstats/** diff --git a/modules/nf-core/samtools/index/environment.yml b/modules/nf-core/samtools/index/environment.yml new file mode 100644 index 00000000..a5e50649 --- /dev/null +++ b/modules/nf-core/samtools/index/environment.yml @@ -0,0 +1,8 @@ +name: samtools_index +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/index/main.nf b/modules/nf-core/samtools/index/main.nf new file mode 100644 index 00000000..dc14f98d --- /dev/null +++ b/modules/nf-core/samtools/index/main.nf @@ -0,0 +1,48 @@ +process SAMTOOLS_INDEX { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("*.bai") , optional:true, emit: bai + tuple val(meta), path("*.csi") , optional:true, emit: csi + tuple val(meta), path("*.crai"), optional:true, emit: crai + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + samtools \\ + index \\ + -@ ${task.cpus-1} \\ + $args \\ + $input + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${input}.bai + touch ${input}.crai + touch ${input}.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/index/meta.yml b/modules/nf-core/samtools/index/meta.yml new file mode 100644 index 00000000..01a4ee03 --- /dev/null +++ b/modules/nf-core/samtools/index/meta.yml @@ -0,0 +1,57 @@ +name: samtools_index +description: Index SAM/BAM/CRAM file +keywords: + - index + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - csi: + type: file + description: CSI index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@maxulysse" +maintainers: + - "@drpatelh" + - "@ewels" + - "@maxulysse" diff --git a/modules/nf-core/samtools/index/tests/csi.nextflow.config b/modules/nf-core/samtools/index/tests/csi.nextflow.config new file mode 100644 index 00000000..0ed260ef --- /dev/null +++ b/modules/nf-core/samtools/index/tests/csi.nextflow.config @@ -0,0 +1,7 @@ +process { + + withName: SAMTOOLS_INDEX { + ext.args = '-c' + } + +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test b/modules/nf-core/samtools/index/tests/main.nf.test new file mode 100644 index 00000000..bb7756d1 --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test @@ -0,0 +1,87 @@ +nextflow_process { + + name "Test Process SAMTOOLS_INDEX" + script "../main.nf" + process "SAMTOOLS_INDEX" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/index" + + test("bai") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.bai).match("bai") }, + { assert snapshot(process.out.versions).match("bai_versions") } + ) + } + } + + test("crai") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.crai).match("crai") }, + { assert snapshot(process.out.versions).match("crai_versions") } + ) + } + } + + test("csi") { + + config "./csi.nextflow.config" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert path(process.out.csi.get(0).get(1)).exists() }, + { assert snapshot(process.out.versions).match("csi_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/index/tests/main.nf.test.snap b/modules/nf-core/samtools/index/tests/main.nf.test.snap new file mode 100644 index 00000000..3dc8e7de --- /dev/null +++ b/modules/nf-core/samtools/index/tests/main.nf.test.snap @@ -0,0 +1,74 @@ +{ + "crai_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:00.324667957" + }, + "csi_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:07.885103162" + }, + "crai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.recalibrated.sorted.cram.crai:md5,14bc3bd5c89cacc8f4541f9062429029" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:41:38.446424" + }, + "bai": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.paired_end.sorted.bam.bai:md5,704c10dd1326482448ca3073fdebc2f4" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T18:40:46.579747" + }, + "bai_versions": { + "content": [ + [ + "versions.yml:md5,cc4370091670b64bba7c7206403ffb3e" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:11:51.641425452" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/index/tests/tags.yml b/modules/nf-core/samtools/index/tests/tags.yml new file mode 100644 index 00000000..e0f58a7a --- /dev/null +++ b/modules/nf-core/samtools/index/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/index: + - modules/nf-core/samtools/index/** diff --git a/modules/nf-core/samtools/sort/environment.yml b/modules/nf-core/samtools/sort/environment.yml new file mode 100644 index 00000000..4d898e48 --- /dev/null +++ b/modules/nf-core/samtools/sort/environment.yml @@ -0,0 +1,8 @@ +name: samtools_sort +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/sort/main.nf b/modules/nf-core/samtools/sort/main.nf new file mode 100644 index 00000000..fc374f98 --- /dev/null +++ b/modules/nf-core/samtools/sort/main.nf @@ -0,0 +1,63 @@ +process SAMTOOLS_SORT { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta) , path(bam) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt cram") ? "cram" : + "bam" + def reference = fasta ? "--reference ${fasta}" : "" + if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + """ + samtools cat \\ + --threads $task.cpus \\ + ${bam} \\ + | \\ + samtools sort \\ + $args \\ + -T ${prefix} \\ + --threads $task.cpus \\ + ${reference} \\ + -o ${prefix}.${extension} \\ + - + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.bam + touch ${prefix}.bam.csi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/sort/meta.yml b/modules/nf-core/samtools/sort/meta.yml new file mode 100644 index 00000000..341a7d0e --- /dev/null +++ b/modules/nf-core/samtools/sort/meta.yml @@ -0,0 +1,71 @@ +name: samtools_sort +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file(s) + pattern: "*.{bam,cram,sam}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome FASTA file + pattern: "*.{fa,fasta,fna}" + optional: true +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM file + pattern: "*.{bam}" + - cram: + type: file + description: Sorted CRAM file + pattern: "*.{cram}" + - crai: + type: file + description: CRAM index file (optional) + pattern: "*.crai" + - csi: + type: file + description: BAM index file (optional) + pattern: "*.csi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" + - "@matthdsm" +maintainers: + - "@drpatelh" + - "@ewels" + - "@matthdsm" diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test b/modules/nf-core/samtools/sort/tests/main.nf.test new file mode 100644 index 00000000..8360e2b1 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test @@ -0,0 +1,96 @@ +nextflow_process { + + name "Test Process SAMTOOLS_SORT" + script "../main.nf" + process "SAMTOOLS_SORT" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/sort" + + test("bam") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("cram") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("bam_stub") { + + config "./nextflow.config" + options "-stub" + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'fasta' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/sort/tests/main.nf.test.snap b/modules/nf-core/samtools/sort/tests/main.nf.test.snap new file mode 100644 index 00000000..38477656 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/main.nf.test.snap @@ -0,0 +1,154 @@ +{ + "cram": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,bc0b7c25da26384a006ed84cc9e4da23" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,8d4e836c2fed6c0bf874d5e8cdba5831" + ] + ], + "4": [ + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,bc0b7c25da26384a006ed84cc9e4da23" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,8d4e836c2fed6c0bf874d5e8cdba5831" + ] + ], + "versions": [ + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-04T15:08:00.830294" + }, + "bam_stub_bam": { + "content": [ + "test.sorted.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:21:04.364044" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:15:00.20800281" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,bc0b7c25da26384a006ed84cc9e4da23" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,8d4e836c2fed6c0bf874d5e8cdba5831" + ] + ], + "4": [ + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" + ], + "bam": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam:md5,bc0b7c25da26384a006ed84cc9e4da23" + ] + ], + "crai": [ + + ], + "cram": [ + + ], + "csi": [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.csi:md5,8d4e836c2fed6c0bf874d5e8cdba5831" + ] + ], + "versions": [ + "versions.yml:md5,e6d43fefc9a8bff91c2ce6e3a1716eca" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-04T15:07:48.773803" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/sort/tests/nextflow.config b/modules/nf-core/samtools/sort/tests/nextflow.config new file mode 100644 index 00000000..f642771f --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + + withName: SAMTOOLS_SORT { + ext.prefix = { "${meta.id}.sorted" } + ext.args = "--write-index" + } + +} diff --git a/modules/nf-core/samtools/sort/tests/tags.yml b/modules/nf-core/samtools/sort/tests/tags.yml new file mode 100644 index 00000000..cd63ea20 --- /dev/null +++ b/modules/nf-core/samtools/sort/tests/tags.yml @@ -0,0 +1,3 @@ +samtools/sort: + - modules/nf-core/samtools/sort/** + - tests/modules/nf-core/samtools/sort/** diff --git a/modules/nf-core/samtools/stats/environment.yml b/modules/nf-core/samtools/stats/environment.yml new file mode 100644 index 00000000..67bb0ca4 --- /dev/null +++ b/modules/nf-core/samtools/stats/environment.yml @@ -0,0 +1,8 @@ +name: samtools_stats +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/stats/main.nf b/modules/nf-core/samtools/stats/main.nf new file mode 100644 index 00000000..52b00f4b --- /dev/null +++ b/modules/nf-core/samtools/stats/main.nf @@ -0,0 +1,49 @@ +process SAMTOOLS_STATS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(input_index) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.stats"), emit: stats + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + """ + samtools \\ + stats \\ + --threads ${task.cpus} \\ + ${reference} \\ + ${input} \\ + > ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.stats + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/stats/meta.yml b/modules/nf-core/samtools/stats/meta.yml new file mode 100644 index 00000000..735ff812 --- /dev/null +++ b/modules/nf-core/samtools/stats/meta.yml @@ -0,0 +1,63 @@ +name: samtools_stats +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file from alignment + pattern: "*.{bam,cram}" + - input_index: + type: file + description: BAI/CRAI file from alignment + pattern: "*.{bai,crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" +maintainers: + - "@drpatelh" + - "@FriederikeHanssen" + - "@ramprasadn" diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test b/modules/nf-core/samtools/stats/tests/main.nf.test new file mode 100644 index 00000000..e3d5cb14 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test @@ -0,0 +1,65 @@ +nextflow_process { + + name "Test Process SAMTOOLS_STATS" + script "../main.nf" + process "SAMTOOLS_STATS" + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/stats" + + test("bam") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = [[],[]] + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } + + test("cram") { + + when { + params { + outdir = "$outputDir" + } + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.recalibrated.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out).match()} + ) + } + } +} diff --git a/modules/nf-core/samtools/stats/tests/main.nf.test.snap b/modules/nf-core/samtools/stats/tests/main.nf.test.snap new file mode 100644 index 00000000..1b7c9ba4 --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/main.nf.test.snap @@ -0,0 +1,72 @@ +{ + "cram": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,01812900aa4027532906c5d431114233" + ] + ], + "1": [ + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,01812900aa4027532906c5d431114233" + ] + ], + "versions": [ + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:15:25.562429714" + }, + "bam": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,5d8681bf541199898c042bf400391d59" + ] + ], + "1": [ + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" + ], + "stats": [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,5d8681bf541199898c042bf400391d59" + ] + ], + "versions": [ + "versions.yml:md5,0514ceb1769b2a88843e08c1f82624a9" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:15:07.857611509" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/stats/tests/tags.yml b/modules/nf-core/samtools/stats/tests/tags.yml new file mode 100644 index 00000000..7c28e30f --- /dev/null +++ b/modules/nf-core/samtools/stats/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/stats: + - modules/nf-core/samtools/stats/** diff --git a/modules/nf-core/samtools/view/environment.yml b/modules/nf-core/samtools/view/environment.yml new file mode 100644 index 00000000..b0676f33 --- /dev/null +++ b/modules/nf-core/samtools/view/environment.yml @@ -0,0 +1,8 @@ +name: samtools_view +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::samtools=1.19.2 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/samtools/view/main.nf b/modules/nf-core/samtools/view/main.nf new file mode 100644 index 00000000..5a8989d6 --- /dev/null +++ b/modules/nf-core/samtools/view/main.nf @@ -0,0 +1,75 @@ +process SAMTOOLS_VIEW { + tag "$meta.id" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' : + 'biocontainers/samtools:1.19.2--h50ea8bc_0' }" + + input: + tuple val(meta), path(input), path(index) + tuple val(meta2), path(fasta) + path qname + + output: + tuple val(meta), path("*.bam"), emit: bam, optional: true + tuple val(meta), path("*.cram"), emit: cram, optional: true + tuple val(meta), path("*.sam"), emit: sam, optional: true + tuple val(meta), path("*.bai"), emit: bai, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val(meta), path("*.crai"), emit: crai, optional: true + path "versions.yml", emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def reference = fasta ? "--reference ${fasta}" : "" + def readnames = qname ? "--qname-file ${qname}": "" + def file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + samtools \\ + view \\ + --threads ${task.cpus-1} \\ + ${reference} \\ + ${readnames} \\ + $args \\ + -o ${prefix}.${file_type} \\ + $input \\ + $args2 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def file_type = args.contains("--output-fmt sam") ? "sam" : + args.contains("--output-fmt bam") ? "bam" : + args.contains("--output-fmt cram") ? "cram" : + input.getExtension() + if ("$input" == "${prefix}.${file_type}") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + + def index = args.contains("--write-index") ? "touch ${prefix}.csi" : "" + + """ + touch ${prefix}.${file_type} + ${index} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/samtools/view/meta.yml b/modules/nf-core/samtools/view/meta.yml new file mode 100644 index 00000000..3dadafae --- /dev/null +++ b/modules/nf-core/samtools/view/meta.yml @@ -0,0 +1,89 @@ +name: samtools_view +description: filter/convert SAM/BAM/CRAM file +keywords: + - view + - bam + - sam + - cram +tools: + - samtools: + description: | + SAMtools is a set of utilities for interacting with and post-processing + short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li. + These files are generated as output by short read aligners like BWA. + homepage: http://www.htslib.org/ + documentation: http://www.htslib.org/doc/samtools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - index: + type: file + description: BAM.BAI/BAM.CSI/CRAM.CRAI file (optional) + pattern: "*.{.bai,.csi,.crai}" + - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'test' ] + - fasta: + type: file + description: Reference file the CRAM was created with (optional) + pattern: "*.{fasta,fa}" + - qname: + type: file + description: Optional file with read names to output only select alignments + pattern: "*.{txt,list}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: optional filtered/converted BAM file + pattern: "*.{bam}" + - cram: + type: file + description: optional filtered/converted CRAM file + pattern: "*.{cram}" + - sam: + type: file + description: optional filtered/converted SAM file + pattern: "*.{sam}" + # bai, csi, and crai are created with `--write-index` + - bai: + type: file + description: optional BAM file index + pattern: "*.{bai}" + - csi: + type: file + description: optional tabix BAM file index + pattern: "*.{csi}" + - crai: + type: file + description: optional CRAM file index + pattern: "*.{crai}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" +maintainers: + - "@drpatelh" + - "@joseespinosa" + - "@FriederikeHanssen" + - "@priyanka-surana" diff --git a/modules/nf-core/samtools/view/tests/bam.config b/modules/nf-core/samtools/view/tests/bam.config new file mode 100644 index 00000000..c10d1081 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/bam_index.config b/modules/nf-core/samtools/view/tests/bam_index.config new file mode 100644 index 00000000..771ae033 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/bam_index.config @@ -0,0 +1,3 @@ +process { + ext.args = "--output-fmt bam --write-index" +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/main.nf.test b/modules/nf-core/samtools/view/tests/main.nf.test new file mode 100644 index 00000000..45a0defb --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test @@ -0,0 +1,212 @@ +nextflow_process { + + name "Test Process SAMTOOLS_VIEW" + script "../main.nf" + process "SAMTOOLS_VIEW" + + tag "modules" + tag "modules_nfcore" + tag "samtools" + tag "samtools/view" + + test("bam") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_bam") }, + { assert snapshot(process.out.bai).match("bam_bai") }, + { assert snapshot(process.out.crai).match("bam_crai") }, + { assert snapshot(process.out.cram).match("bam_cram") }, + { assert snapshot(process.out.csi).match("bam_csi") }, + { assert snapshot(process.out.sam).match("bam_sam") }, + { assert snapshot(process.out.versions).match("bam_versions") } + ) + } + } + + test("cram") { + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.cram[0][1]).name).match("cram_cram") }, + { assert snapshot(process.out.bai).match("cram_bai") }, + { assert snapshot(process.out.bam).match("cram_bam") }, + { assert snapshot(process.out.crai).match("cram_crai") }, + { assert snapshot(process.out.csi).match("cram_csi") }, + { assert snapshot(process.out.sam).match("cram_sam") }, + { assert snapshot(process.out.versions).match("cram_versions") } + ) + } + } + + test("cram_to_bam") { + + config "./bam.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_bam") }, + { assert snapshot(process.out.bai).match("cram_to_bam_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_cram") }, + { assert snapshot(process.out.csi).match("cram_to_bam_csi") }, + { assert snapshot(process.out.sam).match("cram_to_bam_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_versions") } + ) + } + } + + test("cram_to_bam_index") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_versions") } + ) + } + } + + test("cram_to_bam_index_qname") { + + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + [] + ]) + input[1] = Channel.of([ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of("testN:2817", "testN:2814").collectFile(name: "readnames.list", newLine: true) + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("cram_to_bam_index_qname_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("cram_to_bam_index_qname_csi") }, + { assert snapshot(process.out.bai).match("cram_to_bam_index_qname_bai") }, + { assert snapshot(process.out.crai).match("cram_to_bam_index_qname_crai") }, + { assert snapshot(process.out.cram).match("cram_to_bam_index_qname_cram") }, + { assert snapshot(process.out.sam).match("cram_to_bam_index_qname_sam") }, + { assert snapshot(process.out.versions).match("cram_to_bam_index_qname_versions") } + ) + } + } + + test("bam_stub") { + + options "-stub" + config "./bam_index.config" + + when { + process { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true), + [] + ]) + input[1] = [[],[]] + input[2] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(file(process.out.bam[0][1]).name).match("bam_stub_bam") }, + { assert snapshot(file(process.out.csi[0][1]).name).match("bam_stub_csi") }, + { assert snapshot(process.out.bai).match("bam_stub_bai") }, + { assert snapshot(process.out.crai).match("bam_stub_crai") }, + { assert snapshot(process.out.cram).match("bam_stub_cram") }, + { assert snapshot(process.out.sam).match("bam_stub_sam") }, + { assert snapshot(process.out.versions).match("bam_stub_versions") } + ) + } + } +} diff --git a/modules/nf-core/samtools/view/tests/main.nf.test.snap b/modules/nf-core/samtools/view/tests/main.nf.test.snap new file mode 100644 index 00000000..f55943a7 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/main.nf.test.snap @@ -0,0 +1,488 @@ +{ + "bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.256068" + }, + "cram_to_bam_index_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.958617" + }, + "bam_stub_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.065301" + }, + "bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.258578" + }, + "bam_stub_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.071284" + }, + "bam_stub_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:13:09.713353823" + }, + "cram_to_bam_index_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.972288" + }, + "cram_to_bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.999247" + }, + "cram_to_bam_index_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.976457" + }, + "cram_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.497581" + }, + "cram_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.50038" + }, + "cram_to_bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.992239" + }, + "cram_to_bam_index_qname_csi": { + "content": [ + "test.bam.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.325496" + }, + "bam_stub_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.079529" + }, + "cram_cram": { + "content": [ + "test.cram" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.490286" + }, + "bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.262882" + }, + "cram_to_bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.989247" + }, + "cram_to_bam_index_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.967681" + }, + "cram_to_bam_index_qname_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:13:03.935041046" + }, + "cram_to_bam_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.982361" + }, + "cram_to_bam_index_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.95456" + }, + "cram_to_bam_index_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:55.910685496" + }, + "cram_to_bam_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.98601" + }, + "cram_to_bam_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:47.715221169" + }, + "cram_bam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.495512" + }, + "bam_stub_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.076908" + }, + "cram_to_bam_index_qname_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.328458" + }, + "cram_to_bam_index_qname_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.330789" + }, + "cram_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.493129" + }, + "bam_stub_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.074313" + }, + "cram_to_bam_index_qname_bam": { + "content": [ + "test.bam" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.322874" + }, + "bam_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:31.692607421" + }, + "cram_to_bam_index_qname_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.333248" + }, + "bam_crai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.259774" + }, + "bam_cram": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.261287" + }, + "cram_to_bam_csi": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:04.995454" + }, + "cram_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:56.502625" + }, + "cram_versions": { + "content": [ + [ + "versions.yml:md5,4ea32c57d546102a1b32d9693ada7cf1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:12:39.913411036" + }, + "bam_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:37:51.264651" + }, + "cram_to_bam_index_bai": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:12.962863" + }, + "cram_to_bam_index_qname_sam": { + "content": [ + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:23.337634" + }, + "bam_stub_csi": { + "content": [ + "test.csi" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.04.3" + }, + "timestamp": "2024-02-12T19:38:32.068596" + } +} \ No newline at end of file diff --git a/modules/nf-core/samtools/view/tests/tags.yml b/modules/nf-core/samtools/view/tests/tags.yml new file mode 100644 index 00000000..4fdf1dd1 --- /dev/null +++ b/modules/nf-core/samtools/view/tests/tags.yml @@ -0,0 +1,2 @@ +samtools/view: + - "modules/nf-core/samtools/view/**" diff --git a/modules/nf-core/spades/environment.yml b/modules/nf-core/spades/environment.yml new file mode 100644 index 00000000..12315814 --- /dev/null +++ b/modules/nf-core/spades/environment.yml @@ -0,0 +1,7 @@ +name: spades +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::spades=3.15.5 diff --git a/modules/nf-core/spades/main.nf b/modules/nf-core/spades/main.nf new file mode 100644 index 00000000..010525e9 --- /dev/null +++ b/modules/nf-core/spades/main.nf @@ -0,0 +1,73 @@ +process SPADES { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/spades:3.15.5--h95f258a_1' : + 'biocontainers/spades:3.15.5--h95f258a_1' }" + + input: + tuple val(meta), path(illumina), path(pacbio), path(nanopore) + path yml + path hmm + + output: + tuple val(meta), path('*.scaffolds.fa.gz') , optional:true, emit: scaffolds + tuple val(meta), path('*.contigs.fa.gz') , optional:true, emit: contigs + tuple val(meta), path('*.transcripts.fa.gz') , optional:true, emit: transcripts + tuple val(meta), path('*.gene_clusters.fa.gz'), optional:true, emit: gene_clusters + tuple val(meta), path('*.assembly.gfa.gz') , optional:true, emit: gfa + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def maxmem = task.memory.toGiga() + def illumina_reads = illumina ? ( meta.single_end ? "-s $illumina" : "-1 ${illumina[0]} -2 ${illumina[1]}" ) : "" + def pacbio_reads = pacbio ? "--pacbio $pacbio" : "" + def nanopore_reads = nanopore ? "--nanopore $nanopore" : "" + def custom_hmms = hmm ? "--custom-hmms $hmm" : "" + def reads = yml ? "--dataset $yml" : "$illumina_reads $pacbio_reads $nanopore_reads" + """ + spades.py \\ + $args \\ + --threads $task.cpus \\ + --memory $maxmem \\ + $custom_hmms \\ + $reads \\ + -o ./ + mv spades.log ${prefix}.spades.log + + if [ -f scaffolds.fasta ]; then + mv scaffolds.fasta ${prefix}.scaffolds.fa + gzip -n ${prefix}.scaffolds.fa + fi + if [ -f contigs.fasta ]; then + mv contigs.fasta ${prefix}.contigs.fa + gzip -n ${prefix}.contigs.fa + fi + if [ -f transcripts.fasta ]; then + mv transcripts.fasta ${prefix}.transcripts.fa + gzip -n ${prefix}.transcripts.fa + fi + if [ -f assembly_graph_with_scaffolds.gfa ]; then + mv assembly_graph_with_scaffolds.gfa ${prefix}.assembly.gfa + gzip -n ${prefix}.assembly.gfa + fi + + if [ -f gene_clusters.fasta ]; then + mv gene_clusters.fasta ${prefix}.gene_clusters.fa + gzip -n ${prefix}.gene_clusters.fa + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + spades: \$(spades.py --version 2>&1 | sed 's/^.*SPAdes genome assembler v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/spades/meta.yml b/modules/nf-core/spades/meta.yml new file mode 100644 index 00000000..99c482cd --- /dev/null +++ b/modules/nf-core/spades/meta.yml @@ -0,0 +1,94 @@ +name: spades +description: Assembles a small genome (bacterial, fungal, viral) +keywords: + - genome + - assembly + - genome assembler + - small genome + - de novo assembler +tools: + - spades: + description: SPAdes (St. Petersburg genome assembler) is intended for both standard isolates and single-cell MDA bacteria assemblies. + homepage: http://cab.spbu.ru/files/release3.15.0/manual.html + documentation: http://cab.spbu.ru/files/release3.15.0/manual.html + tool_dev_url: https://github.com/ablab/spades + doi: 10.1089/cmb.2012.0021 + licence: ["GPL v2"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - illumina: + type: file + description: | + List of input FastQ (Illumina or PacBio CCS reads) files + of size 1 and 2 for single-end and paired-end data, + respectively. This input data type is required. + - pacbio: + type: file + description: | + List of input PacBio CLR FastQ files of size 1. + - nanopore: + type: file + description: | + List of input FastQ files of size 1, originating from Oxford Nanopore technology. + - yml: + type: file + description: | + Path to yml file containing read information. + The raw FASTQ files listed in this YAML file MUST be supplied to the respective illumina/pacbio/nanopore input channel(s) _in addition_ to this YML. + File entries in this yml must contain only the file name and no paths. + pattern: "*.{yml,yaml}" + - hmm: + type: file + description: File or directory with amino acid HMMs for Spades HMM-guided mode. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - scaffolds: + type: file + description: | + Fasta file containing scaffolds + pattern: "*.fa.gz" + - contigs: + type: file + description: | + Fasta file containing contigs + pattern: "*.fa.gz" + - transcripts: + type: file + description: | + Fasta file containing transcripts + pattern: "*.fa.gz" + - gene_clusters: + type: file + description: | + Fasta file containing gene_clusters + pattern: "*.fa.gz" + - gfa: + type: file + description: | + gfa file containing assembly + pattern: "*.gfa.gz" + - log: + type: file + description: | + Spades log file + pattern: "*.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@JoseEspinosa" + - "@drpatelh" + - "@d4straub" +maintainers: + - "@JoseEspinosa" + - "@drpatelh" + - "@d4straub" diff --git a/modules/nf-core/tabix/bgzip/environment.yml b/modules/nf-core/tabix/bgzip/environment.yml new file mode 100644 index 00000000..361c078b --- /dev/null +++ b/modules/nf-core/tabix/bgzip/environment.yml @@ -0,0 +1,8 @@ +name: tabix_bgzip +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tabix=1.11 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/tabix/bgzip/main.nf b/modules/nf-core/tabix/bgzip/main.nf new file mode 100644 index 00000000..3065dab0 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/main.nf @@ -0,0 +1,55 @@ +process TABIX_BGZIP { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/htslib:1.19.1--h81da01d_1' : + 'biocontainers/htslib:1.19.1--h81da01d_1' }" + + input: + tuple val(meta), path(input) + + output: + tuple val(meta), path("${output}") , emit: output + tuple val(meta), path("${output}.gzi"), emit: gzi, optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + extension = in_bgzip ? input.getBaseName().tokenize(".")[-1] : input.getExtension() + output = in_bgzip ? "${prefix}.${extension}" : "${prefix}.${extension}.gz" + command = in_bgzip ? '-d' : '' + // Name the index according to $prefix, unless a name has been requested + if ((args.matches("(^| )-i\\b") || args.matches("(^| )--index(\$| )")) && !args.matches("(^| )-I\\b") && !args.matches("(^| )--index-name\\b")) { + args = args + " -I ${output}.gzi" + } + """ + bgzip $command -c $args -@${task.cpus} $input > ${output} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + in_bgzip = ["gz", "bgz", "bgzf"].contains(input.getExtension()) + output = in_bgzip ? input.getBaseName() : "${prefix}.${input.getExtension()}.gz" + + """ + echo "" | gzip > ${output} + touch ${output}.gzi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/bgzip/meta.yml b/modules/nf-core/tabix/bgzip/meta.yml new file mode 100644 index 00000000..621d49ea --- /dev/null +++ b/modules/nf-core/tabix/bgzip/meta.yml @@ -0,0 +1,52 @@ +name: tabix_bgzip +description: Compresses/decompresses files +keywords: + - compress + - decompress + - bgzip + - tabix +tools: + - bgzip: + description: | + Bgzip compresses or decompresses files in a similar manner to, and compatible with, gzip. + homepage: https://www.htslib.org/doc/tabix.html + documentation: http://www.htslib.org/doc/bgzip.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: file to compress or to decompress +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - output: + type: file + description: Output compressed/decompressed file + pattern: "*." + - gzi: + type: file + description: Optional gzip index file for compressed inputs + pattern: "*.gzi" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" + - "@nvnieuwk" diff --git a/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config new file mode 100644 index 00000000..6b6ff55f --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/bgzip_compress.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIP { + ext.args = ' -i' + } +} diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test b/modules/nf-core/tabix/bgzip/tests/main.nf.test new file mode 100644 index 00000000..95fd4c50 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/main.nf.test @@ -0,0 +1,111 @@ +nextflow_process { + + name "Test Process TABIX_BGZIP" + script "modules/nf-core/tabix/bgzip/main.nf" + process "TABIX_BGZIP" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/bgzip" + + test("sarscov2_vcf_bgzip_compress") { + when { + process { + """ + input[0] = [ + [ id:'bgzip_test' ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("bgzip_test") + } + ) + } + } + + test("homo_genome_bedgz_compress") { + when { + process { + """ + input[0] = [ + [ id:'bedgz_test' ], + [ file(params.test_data['homo_sapiens']['genome']['genome_bed_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("bedgz_test") + } + ) + } + } + + test("sarscov2_vcf_bgzip_compress_stub") { + options '-stub' + config "./bgzip_compress.config" + + when { + process { + """ + input[0] = [ + [ id:"test_stub" ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.output[0][1]).name + ).match("test_stub") + } + ) + } + } + + test("sarscov2_vcf_bgzip_compress_gzi") { + config "./bgzip_compress.config" + when { + process { + """ + input[0] = [ + [ id:"gzi_compress_test" ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.gzi[0][1]).name + ).match("gzi_compress_test") + } + ) + } + } +} diff --git a/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap new file mode 100644 index 00000000..53d59932 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/main.nf.test.snap @@ -0,0 +1,186 @@ +{ + "gzi_compress_test": { + "content": [ + "gzi_compress_test.vcf.gz.gzi" + ], + "timestamp": "2024-02-19T14:52:29.328146" + }, + "homo_genome_bedgz_compress": { + "content": [ + { + "0": [ + [ + { + "id": "bedgz_test" + }, + "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ], + "gzi": [ + + ], + "output": [ + [ + { + "id": "bedgz_test" + }, + "bedgz_test.bed:md5,87a15eb9c2ff20ccd5cd8735a28708f7" + ] + ], + "versions": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ] + } + ], + "timestamp": "2024-02-19T14:52:12.422209" + }, + "test_stub": { + "content": [ + "test_stub.vcf.gz" + ], + "timestamp": "2024-02-19T14:52:20.811489" + }, + "sarscov2_vcf_bgzip_compress": { + "content": [ + { + "0": [ + [ + { + "id": "bgzip_test" + }, + "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ], + "gzi": [ + + ], + "output": [ + [ + { + "id": "bgzip_test" + }, + "bgzip_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ] + } + ], + "timestamp": "2024-02-19T14:52:03.706028" + }, + "sarscov2_vcf_bgzip_compress_gzi": { + "content": [ + { + "0": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "1": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" + ] + ], + "2": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ], + "gzi": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz.gzi:md5,26fd00d4e26141cd11561f6e7d4a2ad0" + ] + ], + "output": [ + [ + { + "id": "gzi_compress_test" + }, + "gzi_compress_test.vcf.gz:md5,8e722884ffb75155212a3fc053918766" + ] + ], + "versions": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ] + } + ], + "timestamp": "2024-02-19T14:52:29.271494" + }, + "bgzip_test": { + "content": [ + "bgzip_test.vcf.gz" + ], + "timestamp": "2024-02-19T14:52:03.768295" + }, + "bedgz_test": { + "content": [ + "bedgz_test.bed" + ], + "timestamp": "2024-02-19T14:52:12.453855" + }, + "sarscov2_vcf_bgzip_compress_stub": { + "content": [ + { + "0": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ], + "gzi": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz.gzi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "output": [ + [ + { + "id": "test_stub" + }, + "test_stub.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions": [ + "versions.yml:md5,e023292de6ee109a44fc67475d658174" + ] + } + ], + "timestamp": "2024-02-19T14:52:20.769619" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/bgzip/tests/tags.yml b/modules/nf-core/tabix/bgzip/tests/tags.yml new file mode 100644 index 00000000..de0eec86 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/bgzip: + - "modules/nf-core/tabix/bgzip/**" diff --git a/modules/nf-core/tabix/bgzip/tests/vcf_none.config b/modules/nf-core/tabix/bgzip/tests/vcf_none.config new file mode 100644 index 00000000..f3a3c467 --- /dev/null +++ b/modules/nf-core/tabix/bgzip/tests/vcf_none.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_BGZIP { + ext.args = '' + } +} diff --git a/modules/nf-core/tabix/tabix/environment.yml b/modules/nf-core/tabix/tabix/environment.yml new file mode 100644 index 00000000..76b45e16 --- /dev/null +++ b/modules/nf-core/tabix/tabix/environment.yml @@ -0,0 +1,8 @@ +name: tabix_tabix +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::tabix=1.11 + - bioconda::htslib=1.19.1 diff --git a/modules/nf-core/tabix/tabix/main.nf b/modules/nf-core/tabix/tabix/main.nf new file mode 100644 index 00000000..1737141d --- /dev/null +++ b/modules/nf-core/tabix/tabix/main.nf @@ -0,0 +1,42 @@ +process TABIX_TABIX { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/htslib:1.19.1--h81da01d_1' : + 'biocontainers/htslib:1.19.1--h81da01d_1' }" + + input: + tuple val(meta), path(tab) + + output: + tuple val(meta), path("*.tbi"), optional:true, emit: tbi + tuple val(meta), path("*.csi"), optional:true, emit: csi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + """ + tabix $args $tab + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ + + stub: + """ + touch ${tab}.tbi + touch ${tab}.csi + cat <<-END_VERSIONS > versions.yml + + "${task.process}": + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/tabix/tabix/meta.yml b/modules/nf-core/tabix/tabix/meta.yml new file mode 100644 index 00000000..ae5b4f43 --- /dev/null +++ b/modules/nf-core/tabix/tabix/meta.yml @@ -0,0 +1,49 @@ +name: tabix_tabix +description: create tabix index from a sorted bgzip tab-delimited genome file +keywords: + - index + - tabix + - vcf +tools: + - tabix: + description: Generic indexer for TAB-delimited genome position files. + homepage: https://www.htslib.org/doc/tabix.html + documentation: https://www.htslib.org/doc/tabix.1.html + doi: 10.1093/bioinformatics/btq671 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tab: + type: file + description: TAB-delimited genome position file compressed with bgzip + pattern: "*.{bed.gz,gff.gz,sam.gz,vcf.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - tbi: + type: file + description: tabix index file + pattern: "*.{tbi}" + - csi: + type: file + description: coordinate sorted index file + pattern: "*.{csi}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@maxulysse" diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test b/modules/nf-core/tabix/tabix/tests/main.nf.test new file mode 100644 index 00000000..3a150c70 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test @@ -0,0 +1,142 @@ +nextflow_process { + + name "Test Process TABIX_TABIX" + script "modules/nf-core/tabix/tabix/main.nf" + process "TABIX_TABIX" + + tag "modules" + tag "modules_nfcore" + tag "tabix" + tag "tabix/tabix" + + test("sarscov2_bedgz_tbi") { + config "./tabix_bed.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_bed' ], + [ file(params.test_data['sarscov2']['genome']['test_bed_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.tbi[0][1]).name + ).match("tbi_bed") + } + ) + } + } + + test("sarscov2_gff_tbi") { + config "./tabix_gff.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_gff' ], + [ file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.tbi[0][1]).name + ).match("tbi_gff") + } + ) + } + + } + + test("sarscov2_vcf_tbi") { + config "./tabix_vcf_tbi.config" + when { + process { + """ + input[0] = [ + [ id:'tbi_vcf' ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.tbi[0][1]).name + ).match("tbi_vcf") + } + ) + } + + } + + test("sarscov2_vcf_csi") { + config "./tabix_vcf_csi.config" + when { + process { + """ + input[0] = [ + [ id:'vcf_csi' ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.csi[0][1]).name + ).match("vcf_csi") + } + ) + } + + } + + test("sarscov2_vcf_csi_stub") { + config "./tabix_vcf_csi.config" + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'vcf_csi_stub' ], + [ file(params.test_data['sarscov2']['illumina']['test_vcf_gz'], checkIfExists: true) ] + ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert snapshot( + file(process.out.csi[0][1]).name + ).match("vcf_csi_stub") + } + ) + } + + } + +} diff --git a/modules/nf-core/tabix/tabix/tests/main.nf.test.snap b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap new file mode 100644 index 00000000..034e38b6 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/main.nf.test.snap @@ -0,0 +1,217 @@ +{ + "vcf_csi_stub": { + "content": [ + "test.vcf.gz.csi" + ], + "timestamp": "2024-03-04T14:51:59.788002" + }, + "tbi_gff": { + "content": [ + "genome.gff3.gz.tbi" + ], + "timestamp": "2024-02-19T14:53:37.420216" + }, + "sarscov2_gff_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_gff" + }, + "genome.gff3.gz.tbi:md5,53fc683fd217aae47ef10d23c52a9178" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_gff" + }, + "genome.gff3.gz.tbi:md5,53fc683fd217aae47ef10d23c52a9178" + ] + ], + "versions": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ] + } + ], + "timestamp": "2024-02-19T14:53:37.388157" + }, + "sarscov2_bedgz_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_bed" + }, + "test.bed.gz.tbi:md5,0f17d85e7f0a042b2aa367b70df224f8" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_bed" + }, + "test.bed.gz.tbi:md5,0f17d85e7f0a042b2aa367b70df224f8" + ] + ], + "versions": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ] + } + ], + "timestamp": "2024-02-19T14:53:28.879408" + }, + "tbi_vcf": { + "content": [ + "test.vcf.gz.tbi" + ], + "timestamp": "2024-02-19T14:53:46.402522" + }, + "vcf_csi": { + "content": [ + "test.vcf.gz.csi" + ], + "timestamp": "2024-02-19T14:53:54.921189" + }, + "sarscov2_vcf_tbi": { + "content": [ + { + "0": [ + [ + { + "id": "tbi_vcf" + }, + "test.vcf.gz.tbi:md5,897f3f378a811b90e6dee56ce08d2bcf" + ] + ], + "1": [ + + ], + "2": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "tbi_vcf" + }, + "test.vcf.gz.tbi:md5,897f3f378a811b90e6dee56ce08d2bcf" + ] + ], + "versions": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ] + } + ], + "timestamp": "2024-02-19T14:53:46.370358" + }, + "sarscov2_vcf_csi_stub": { + "content": [ + { + "0": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + "versions.yml:md5,3d45df6d80883bad358631069a2940fd" + ], + "csi": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + [ + { + "id": "vcf_csi_stub" + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,3d45df6d80883bad358631069a2940fd" + ] + } + ], + "timestamp": "2024-03-04T14:51:59.766184" + }, + "sarscov2_vcf_csi": { + "content": [ + { + "0": [ + + ], + "1": [ + [ + { + "id": "vcf_csi" + }, + "test.vcf.gz.csi:md5,0731ad6f40104d2bbb1a2cc478ef8f03" + ] + ], + "2": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ], + "csi": [ + [ + { + "id": "vcf_csi" + }, + "test.vcf.gz.csi:md5,0731ad6f40104d2bbb1a2cc478ef8f03" + ] + ], + "tbi": [ + + ], + "versions": [ + "versions.yml:md5,f4feeda7fdd4b567102f7f8e5d7037a3" + ] + } + ], + "timestamp": "2024-02-19T14:53:54.886876" + }, + "tbi_bed": { + "content": [ + "test.bed.gz.tbi" + ], + "timestamp": "2024-02-19T14:53:28.947628" + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_bed.config b/modules/nf-core/tabix/tabix/tests/tabix_bed.config new file mode 100644 index 00000000..7ff05905 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_bed.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p bed' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_gff.config b/modules/nf-core/tabix/tabix/tests/tabix_gff.config new file mode 100644 index 00000000..20c0a1e3 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_gff.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p gff' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config b/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config new file mode 100644 index 00000000..eb4f2d7e --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_vcf_csi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p vcf --csi' + } +} diff --git a/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config b/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config new file mode 100644 index 00000000..2774c8a9 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tabix_vcf_tbi.config @@ -0,0 +1,5 @@ +process { + withName: TABIX_TABIX { + ext.args = '-p vcf' + } +} \ No newline at end of file diff --git a/modules/nf-core/tabix/tabix/tests/tags.yml b/modules/nf-core/tabix/tabix/tests/tags.yml new file mode 100644 index 00000000..6eda0653 --- /dev/null +++ b/modules/nf-core/tabix/tabix/tests/tags.yml @@ -0,0 +1,2 @@ +tabix/tabix: + - "modules/nf-core/tabix/tabix/**" diff --git a/modules/nf-core/unicycler/environment.yml b/modules/nf-core/unicycler/environment.yml new file mode 100644 index 00000000..bf5c977b --- /dev/null +++ b/modules/nf-core/unicycler/environment.yml @@ -0,0 +1,7 @@ +name: unicycler +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::unicycler=0.4.8 diff --git a/modules/nf-core/unicycler/main.nf b/modules/nf-core/unicycler/main.nf new file mode 100644 index 00000000..58d60cbe --- /dev/null +++ b/modules/nf-core/unicycler/main.nf @@ -0,0 +1,46 @@ +process UNICYCLER { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/unicycler:0.4.8--py38h8162308_3' : + 'biocontainers/unicycler:0.4.8--py38h8162308_3' }" + + input: + tuple val(meta), path(shortreads), path(longreads) + + output: + tuple val(meta), path('*.scaffolds.fa.gz'), emit: scaffolds + tuple val(meta), path('*.assembly.gfa.gz'), emit: gfa + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def short_reads = shortreads ? ( meta.single_end ? "-s $shortreads" : "-1 ${shortreads[0]} -2 ${shortreads[1]}" ) : "" + def long_reads = longreads ? "-l $longreads" : "" + """ + unicycler \\ + --threads $task.cpus \\ + $args \\ + $short_reads \\ + $long_reads \\ + --out ./ + + mv assembly.fasta ${prefix}.scaffolds.fa + gzip -n ${prefix}.scaffolds.fa + mv assembly.gfa ${prefix}.assembly.gfa + gzip -n ${prefix}.assembly.gfa + mv unicycler.log ${prefix}.unicycler.log + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + unicycler: \$(echo \$(unicycler --version 2>&1) | sed 's/^.*Unicycler v//; s/ .*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/unicycler/meta.yml b/modules/nf-core/unicycler/meta.yml new file mode 100644 index 00000000..406b1470 --- /dev/null +++ b/modules/nf-core/unicycler/meta.yml @@ -0,0 +1,64 @@ +name: unicycler +description: Assembles bacterial genomes +keywords: + - genome + - assembly + - genome assembler + - small genome +tools: + - unicycler: + description: Hybrid assembly pipeline for bacterial genomes + homepage: https://github.com/rrwick/Unicycler + documentation: https://github.com/rrwick/Unicycler + tool_dev_url: https://github.com/rrwick/Unicycler + doi: 10.1371/journal.pcbi.1005595 + licence: ["GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - shortreads: + type: file + description: | + List of input Illumina FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - longreads: + type: file + description: | + List of input FastQ files of size 1, PacBio or Nanopore long reads. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - scaffolds: + type: file + description: Fasta file containing scaffolds + pattern: "*.{scaffolds.fa.gz}" + - gfa: + type: file + description: gfa file containing assembly + pattern: "*.{assembly.gfa.gz}" + - log: + type: file + description: unicycler log file + pattern: "*.{log}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@JoseEspinosa" + - "@drpatelh" + - "@d4straub" +maintainers: + - "@JoseEspinosa" + - "@drpatelh" + - "@d4straub" diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml new file mode 100644 index 00000000..0c9cbb10 --- /dev/null +++ b/modules/nf-core/untar/environment.yml @@ -0,0 +1,11 @@ +name: untar + +channels: + - conda-forge + - bioconda + - defaults + +dependencies: + - conda-forge::grep=3.11 + - conda-forge::sed=4.7 + - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf new file mode 100644 index 00000000..8a75bb95 --- /dev/null +++ b/modules/nf-core/untar/main.nf @@ -0,0 +1,63 @@ +process UNTAR { + tag "$archive" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : + 'nf-core/ubuntu:20.04' }" + + input: + tuple val(meta), path(archive) + + output: + tuple val(meta), path("$prefix"), emit: untar + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) + + """ + mkdir $prefix + + ## Ensures --strip-components only applied when top level of tar contents is a directory + ## If just files or multiple directories, place all in prefix + if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then + tar \\ + -C $prefix --strip-components 1 \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + else + tar \\ + -C $prefix \\ + -xavf \\ + $args \\ + $archive \\ + $args2 + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) + """ + mkdir $prefix + touch ${prefix}/file.txt + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') + END_VERSIONS + """ +} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml new file mode 100644 index 00000000..a9a2110f --- /dev/null +++ b/modules/nf-core/untar/meta.yml @@ -0,0 +1,46 @@ +name: untar +description: Extract files. +keywords: + - untar + - uncompress + - extract +tools: + - untar: + description: | + Extract tar.gz files. + documentation: https://www.gnu.org/software/tar/manual/ + licence: ["GPL-3.0-or-later"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - archive: + type: file + description: File to be untar + pattern: "*.{tar}.{gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - untar: + type: directory + description: Directory containing contents of archive + pattern: "*/" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" +maintainers: + - "@joseespinosa" + - "@drpatelh" + - "@matthdsm" + - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test new file mode 100644 index 00000000..2a7c97bf --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test @@ -0,0 +1,47 @@ +nextflow_process { + + name "Test Process UNTAR" + script "../main.nf" + process "UNTAR" + tag "modules" + tag "modules_nfcore" + tag "untar" + test("test_untar") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar") }, + ) + } + + } + + test("test_untar_onlyfiles") { + + when { + process { + """ + input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] + """ + } + } + + then { + assertAll ( + { assert process.success }, + { assert snapshot(process.out.untar).match("test_untar_onlyfiles") }, + ) + } + + } + +} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap new file mode 100644 index 00000000..64550292 --- /dev/null +++ b/modules/nf-core/untar/tests/main.nf.test.snap @@ -0,0 +1,42 @@ +{ + "test_untar_onlyfiles": { + "content": [ + [ + [ + [ + + ], + [ + "hello.txt:md5,e59ff97941044f85df5297e1c302d260" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T11:49:41.320643" + }, + "test_untar": { + "content": [ + [ + [ + [ + + ], + [ + "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", + "opts.k2d:md5,a033d00cf6759407010b21700938f543", + "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" + ] + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-02-28T11:49:33.795172" + } +} \ No newline at end of file diff --git a/modules/nf-core/untar/tests/tags.yml b/modules/nf-core/untar/tests/tags.yml new file mode 100644 index 00000000..feb6f15c --- /dev/null +++ b/modules/nf-core/untar/tests/tags.yml @@ -0,0 +1,2 @@ +untar: + - modules/nf-core/untar/** diff --git a/modules/nf-core/vcflib/vcfuniq/environment.yml b/modules/nf-core/vcflib/vcfuniq/environment.yml new file mode 100644 index 00000000..dff6ed89 --- /dev/null +++ b/modules/nf-core/vcflib/vcfuniq/environment.yml @@ -0,0 +1,7 @@ +name: vcflib_vcfuniq +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bioconda::vcflib=1.0.3 diff --git a/modules/nf-core/vcflib/vcfuniq/main.nf b/modules/nf-core/vcflib/vcfuniq/main.nf new file mode 100644 index 00000000..15899b0a --- /dev/null +++ b/modules/nf-core/vcflib/vcfuniq/main.nf @@ -0,0 +1,35 @@ +process VCFLIB_VCFUNIQ { + tag "$meta.id" + label 'process_low' + + // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions. + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/vcflib:1.0.3--hecb563c_1': + 'biocontainers/vcflib:1.0.3--hecb563c_1' }" + + input: + tuple val(meta), path(vcf), path(tbi) + + output: + tuple val(meta), path("*.gz"), emit: vcf + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.0.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. + """ + vcfuniq \\ + $vcf \\ + | bgzip -c $args > ${prefix}.vcf.gz + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + vcflib: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/vcflib/vcfuniq/meta.yml b/modules/nf-core/vcflib/vcfuniq/meta.yml new file mode 100644 index 00000000..8adf6b62 --- /dev/null +++ b/modules/nf-core/vcflib/vcfuniq/meta.yml @@ -0,0 +1,45 @@ +name: vcflib_vcfuniq +description: List unique genotypes. Like GNU uniq, but for VCF records. Remove records which have the same position, ref, and alt as the previous record. +keywords: + - vcf + - uniq + - deduplicate +tools: + - vcflib: + description: Command-line tools for manipulating VCF files + homepage: https://github.com/vcflib/vcflib + documentation: https://github.com/vcflib/vcflib#USAGE + doi: "10.1101/2021.05.21.445151" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + - tbi: + type: file + description: Index of VCF file + pattern: "*.vcf.gz.tbi" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - vcf: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/nextflow.config b/nextflow.config index e59f91c2..4e881662 100644 --- a/nextflow.config +++ b/nextflow.config @@ -9,14 +9,51 @@ // Global default params, used in configs params { - // TODO nf-core: Specify your pipeline's command line flags // Input options input = null + platform = null + protocol = null // References genome = null - igenomes_base = 's3://ngi-igenomes/igenomes/' - igenomes_ignore = false + primer_set = null + primer_set_version = null + primer_fasta = null + primer_left_suffix = '_LEFT' + primer_right_suffix = '_RIGHT' + save_reference = false + additional_annotation = null + + // Nanopore options + fastq_dir = null + fast5_dir = null + sequencing_summary = null + min_barcode_reads = 100 + min_guppyplex_reads = 10 + artic_minion_caller = 'nanopolish' + artic_minion_aligner = 'minimap2' + artic_minion_medaka_model = null + skip_pycoqc = false + skip_nanoplot = false + + // Nanopore/Illumina options + asciigenome_read_depth = 50 + asciigenome_window_size = 50 + skip_freyja = false + skip_freyja_boot = false + freyja_repeats = 100 + freyja_depthcutoff = null + freyja_db_name = 'freyja_db' + freyja_barcodes = null + freyja_lineages = null + skip_mosdepth = false + skip_pangolin = false + skip_nextclade = false + skip_variants_quast = false + skip_snpeff = false + skip_asciigenome = false + skip_variants_long_table = false + skip_multiqc = false // MultiQC options multiqc_config = null @@ -25,6 +62,47 @@ params { max_multiqc_email_size = '25.MB' multiqc_methods_description = null + // Illumina QC, read trimming and filtering options + kraken2_db = 's3://ngi-igenomes/test-data/viralrecon/kraken2_human.tar.gz' + kraken2_db_name = 'human' + kraken2_variants_host_filter = false + kraken2_assembly_host_filter = true + save_trimmed_fail = false + skip_fastqc = false + skip_kraken2 = false + skip_fastp = false + skip_cutadapt = false + + // Illumina variant calling options + variant_caller = null + consensus_caller = 'bcftools' + min_mapped_reads = 1000 + ivar_trim_noprimer = false + ivar_trim_offset = null + filter_duplicates = false + save_unaligned = false + save_mpileup = false + skip_ivar_trim = false + skip_markduplicates = true + skip_picard_metrics = false + skip_consensus_plots = false + skip_consensus = false + skip_variants = false + + // Illumina de novo assembly options + assemblers = 'spades' + spades_mode = 'rnaviral' + spades_hmm = null + blast_db = null + min_contig_length = 200 + min_perc_contig_aligned = 0.7 + skip_bandage = false + skip_blast = false + skip_abacas = false + skip_plasmidid = true + skip_assembly_quast = false + skip_assembly = false + // Boilerplate options outdir = null publish_dir_mode = 'copy' @@ -154,16 +232,20 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_sispa { includeConfig 'conf/test_sispa.config' } + test_nanopore { includeConfig 'conf/test_nanopore.config' } + test_full { includeConfig 'conf/test_full.config' } + test_full_illumina { includeConfig 'conf/test_full.config' } + test_full_nanopore { includeConfig 'conf/test_full_nanopore.config' } + test_full_sispa { includeConfig 'conf/test_full_sispa.config' } } // Load nf-core custom profiles from different Institutions includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/nfcore_custom.config" : "/dev/null" // Load nf-core/viralrecon custom profiles from different institutions. -// TODO nf-core: Optionally, you can add a pipeline-specific nf-core config at https://github.com/nf-core/configs -// includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/viralrecon.config" : "/dev/null" +includeConfig !System.getenv('NXF_OFFLINE') && params.custom_config_base ? "${params.custom_config_base}/pipeline/viralrecon.config" : "/dev/null" // Set default registry for Apptainer, Docker, Podman, Charliecloud and Singularity independent of -profile // Will not be used unless Apptainer / Docker / Podman / Charliecloud / Singularity are enabled @@ -174,9 +256,6 @@ podman.registry = 'quay.io' singularity.registry = 'quay.io' charliecloud.registry = 'quay.io' -// Load igenomes.config if required -includeConfig !params.igenomes_ignore ? 'conf/igenomes.config' : 'conf/igenomes_ignored.config' - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -227,7 +306,7 @@ manifest { mainScript = 'main.nf' nextflowVersion = '!>=24.04.2' version = '2.7.0dev' - doi = '' + doi = 'https://doi.org/10.5281/zenodo.3901628' } // Nextflow plugins @@ -252,7 +331,7 @@ validation { \033[0;35m ${manifest.name} ${manifest.version}\033[0m -\033[2m----------------------------------------------------\033[0m- """ - afterText = """${manifest.doi ? "* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} + afterText = """${manifest.doi ? "\n* The pipeline\n" : ""}${manifest.doi.tokenize(",").collect { " https://doi.org/${it.trim().replace('https://doi.org/','')}"}.join("\n")}${manifest.doi ? "\n" : ""} * The nf-core framework https://doi.org/10.1038/s41587-020-0439-x diff --git a/nextflow_schema.json b/nextflow_schema.json index e3e6cc94..cc792a69 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -10,7 +10,7 @@ "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": ["outdir"], "properties": { "input": { "type": "string", @@ -19,9 +19,21 @@ "schema": "assets/schema_input.json", "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "description": "Path to comma-separated file containing information about the samples in the experiment.", - "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/viralrecon/usage#samplesheet-input).", - "fa_icon": "fas fa-file-csv" + "fa_icon": "fas fa-file-csv", + "help_text": "You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/viralrecon/docs/usage#introduction).", + "description": "Path to comma-separated file containing information about the samples you would like to analyse." + }, + "platform": { + "type": "string", + "fa_icon": "fas fa-hdd", + "description": "NGS platform used to sequence the samples.", + "enum": ["illumina", "nanopore"] + }, + "protocol": { + "type": "string", + "description": "Specifies the type of protocol used for sequencing.", + "fa_icon": "fas fa-vials", + "enum": ["metagenomic", "amplicon"] }, "outdir": { "type": "string", @@ -47,88 +59,534 @@ "title": "Reference genome options", "type": "object", "fa_icon": "fas fa-dna", - "description": "Reference genome related files and options required for the workflow.", + "description": "Options for the reference genome indices used to align reads.", "properties": { "genome": { "type": "string", - "description": "Name of iGenomes reference.", + "description": "Name of viral reference genome.", "fa_icon": "fas fa-book", - "help_text": "If using a reference genome configured in the pipeline using iGenomes, use this parameter to give the ID for the reference. This is then used to build the full paths for all required reference genome files e.g. `--genome GRCh38`. \n\nSee the [nf-core website docs](https://nf-co.re/usage/reference_genomes) for more details." + "help_text": "You can find the keys to specify the genomes in the [Genomes config file](https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config)." }, "fasta": { "type": "string", "format": "file-path", - "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "fa_icon": "fas fa-font", "description": "Path to FASTA genome file.", - "help_text": "This parameter is *mandatory* if `--genome` is not specified. If you don't have a BWA index available this will be generated for you automatically. Combine with `--save_reference` to save BWA index for future runs.", - "fa_icon": "far fa-file-code" + "help_text": "If you have no genome reference available, the pipeline can build one using a FASTA file. This requires additional time and resources, so it's better to use a pre-build index if possible." + }, + "gff": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.gff(\\.gz)?$", + "description": "Full path to GFF annotation file.", + "fa_icon": "fas fa-file-invoice" }, - "igenomes_ignore": { + "additional_annotation": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+(\\.gff|\\.gtf)(\\.gz)?$", + "description": "Full path to additional annotation file in GTF or GFF format.", + "fa_icon": "fas fa-file-invoice" + }, + "bowtie2_index": { + "type": "string", + "format": "path", + "description": "Path to directory or tar.gz archive for pre-built Bowtie2 index.", + "fa_icon": "fas fa-bezier-curve" + }, + "primer_bed": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.bed(\\.gz)?$", + "description": "If the '--protocol amplicon' parameter is provided then iVar is used to trim primer sequences after read alignment and before variant calling.", + "help_text": "iVar uses the primer positions relative to the viral genome supplied in this file to soft clip primer sequences from a coordinate sorted BAM file. The file must be in BED format as highlighted below:\n\n```\nMN908947.3 30 54 nCoV-2019_1_LEFT 60 -\nMN908947.3 385 410 nCoV-2019_1_RIGHT 60 +\nMN908947.3 320 342 nCoV-2019_2_LEFT 60 -\nMN908947.3 704 726 nCoV-2019_2_RIGHT 60 +\n```", + "fa_icon": "fas fa-stream" + }, + "primer_fasta": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", + "description": "If the '--protocol amplicon' parameter is provided then Cutadapt is used to trim primer sequences from FastQ files before de novo assembly.", + "help_text": "This file must contain amplicon primer sequences in Fasta format. An example is shown below:\n\n```\n>nCoV-2019_1_LEFT\nACCAACCAACTTTCGATCTCTTGT\n>nCoV-2019_1_RIGHT\nCATCTTTAAGATGTTGACGTGCCTC\n>nCoV-2019_2_LEFT\nCTGTTTTACAGGTTCGCGACGT\n>nCoV-2019_2_RIGHT\nTAAGGATCAGTGCCAAGCTCGT\n```", + "fa_icon": "fas fa-stream" + }, + "primer_set": { + "type": "string", + "fa_icon": "fas fa-sliders-h", + "description": "The primer set to be used for the data analysis.", + "help_text": "Where possible we are trying to collate links and settings for standard primer sets to make it easier to run the pipeline with standard keys. See https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config" + }, + "primer_set_version": { + "type": "number", + "fa_icon": "fas fa-code-branch", + "description": "Version of the primer set e.g. '--primer_set artic --primer_set_version 3'.", + "help_text": "Where possible we are trying to collate links and settings for standard primer sets to make it easier to run the pipeline with standard keys. See https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config" + }, + "primer_left_suffix": { + "type": "string", + "default": "_LEFT", + "fa_icon": "fas fa-arrow-left", + "description": "Suffix used in name field of '--primer_bed' to indicate left primer position." + }, + "primer_right_suffix": { + "type": "string", + "default": "_RIGHT", + "fa_icon": "fas fa-arrow-right", + "description": "Suffix used in name field of '--primer_bed' to indicate right primer position." + }, + "save_reference": { "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "hidden": true, - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`." + "fa_icon": "fas fa-save", + "description": "If generated by the pipeline save reference genome related files to the results folder." + } + } + }, + "nanopore_options": { + "title": "Nanopore options", + "type": "object", + "description": "Options exclusive to running the pipeline on Nanopore data using the ARTIC fieldbioinformatics pipeline.", + "default": "", + "fa_icon": "fas fa-truck-loading", + "properties": { + "fastq_dir": { + "type": "string", + "format": "directory-path", + "description": "Path to a folder containing fastq files from the Nanopore run.", + "help_text": "e.g. '--fastq_dir ./20191023_1522_MC-110615_0_FAO93606_12bf9b4f/fastq_pass/'.", + "fa_icon": "fas fa-folder-open" }, - "igenomes_base": { + "fast5_dir": { "type": "string", "format": "directory-path", - "description": "The base path to the igenomes reference files", - "fa_icon": "fas fa-ban", - "hidden": true, - "default": "s3://ngi-igenomes/igenomes/" + "description": "Path to a folder containing fast5 files from the Nanopore run.", + "help_text": "e.g. '--fast5_dir ./20191023_1522_MC-110615_0_FAO93606_12bf9b4f/fast5_pass/'. Not required when running the pipeline with the '--artic_minion_caller medaka' workflow.", + "fa_icon": "fas fa-folder-open" + }, + "sequencing_summary": { + "type": "string", + "format": "file-path", + "mimetype": "text/plain", + "pattern": "^\\S+\\.txt$", + "description": "Sequencing summary file generated after Nanopore run completion.", + "help_text": " e.g. '--sequencing_summary ./20191023_1522_MC-110615_0_FAO93606_12bf9b4f/sequencing_summary.txt'. Not required when running the pipeline with the '--artic_minion_caller medaka' workflow.", + "fa_icon": "fas fa-file" + }, + "min_barcode_reads": { + "type": "integer", + "default": 100, + "description": "Minimum number of raw reads required per sample/barcode in order to be considered for the downstream processing steps.", + "fa_icon": "fas fa-hand-paper" + }, + "min_guppyplex_reads": { + "type": "integer", + "default": 10, + "description": "Minimum number of reads required after the artic guppyplex process per sample/barcode in order to be considered for the downstream processing steps.", + "fa_icon": "fas fa-hand-paper" + }, + "artic_minion_caller": { + "type": "string", + "default": "nanopolish", + "description": "Variant caller used when running artic minion (default: 'nanopolish').", + "fa_icon": "fas fa-phone-volume", + "enum": ["nanopolish", "medaka"] + }, + "artic_minion_aligner": { + "type": "string", + "default": "minimap2", + "description": "Aligner used when running artic minion (default: 'minimap2').", + "fa_icon": "fas fa-map-signs", + "enum": ["minimap2", "bwa"] + }, + "artic_scheme": { + "type": "string", + "description": "Primer scheme recognised by the artic minion command.", + "help_text": "e.g. '--artic_scheme ncov-2019'. See https://artic.readthedocs.io/en/latest/primer-schemes/ and https://github.com/artic-network/primer-schemes/blob/master/schemes_manifest.json.", + "fa_icon": "fas fa-stream" + }, + "artic_minion_medaka_model": { + "type": "string", + "description": "Parameter passed to artic minion and required when using the '--artic_minion_caller medaka' workflow.", + "help_text": "See https://github.com/nanoporetech/medaka", + "fa_icon": "fas fa-train" + }, + "skip_pycoqc": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip pycoQC." + }, + "skip_nanoplot": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip NanoPlot." } } }, - "institutional_config_options": { - "title": "Institutional config options", + "nanopore_illumina_options": { + "title": "Nanopore/Illumina options", "type": "object", - "fa_icon": "fas fa-university", - "description": "Parameters used to describe centralised config profiles. These should not be edited.", - "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "description": "Options common to both the Nanopore and Illumina workflows in the pipeline.", + "default": "", "properties": { - "custom_config_version": { + "nextclade_dataset": { "type": "string", - "description": "Git commit id for Institutional configs.", - "default": "master", - "hidden": true, - "fa_icon": "fas fa-users-cog" + "description": "Full path to Nextclade dataset required for 'nextclade run' command.", + "fa_icon": "fas fa-project-diagram" }, - "custom_config_base": { + "nextclade_dataset_name": { "type": "string", - "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", - "hidden": true, - "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", - "fa_icon": "fas fa-users-cog" + "description": "Name of Nextclade dataset to retrieve. A list of available datasets can be obtained using the 'nextclade dataset list' command.", + "fa_icon": "fas fa-project-diagram" }, - "config_profile_name": { + "nextclade_dataset_reference": { "type": "string", - "description": "Institutional config name.", - "hidden": true, - "fa_icon": "fas fa-users-cog" + "description": "Accession id to download dataset based on a particular reference sequence. A list of available datasets can be obtained using the 'nextclade dataset list' command.", + "fa_icon": "fas fa-project-diagram" }, - "config_profile_description": { + "nextclade_dataset_tag": { "type": "string", - "description": "Institutional config description.", - "hidden": true, - "fa_icon": "fas fa-users-cog" + "description": "Version tag of the dataset to download. A list of available datasets can be obtained using the 'nextclade dataset list' command.", + "fa_icon": "fas fa-project-diagram" }, - "config_profile_contact": { + "asciigenome_read_depth": { + "type": "integer", + "default": 50, + "description": "Maximum read depth used to generate ASCIIGenome screenshots for variant locii.", + "fa_icon": "fas fa-align-center" + }, + "asciigenome_window_size": { + "type": "integer", + "default": 50, + "description": "Maximum window size before and after variant locii used to generate ASCIIGenome screenshots.", + "fa_icon": "fab fa-windows" + }, + "skip_freyja": { + "type": "boolean", + "fa_icon": "fas fa-forward", + "description": "Skip freyja deep SARS-CoV-2 variant analysis using a depth weighted approach." + }, + "skip_freyja_boot": { + "type": "boolean", + "fa_icon": "fas fa-forward", + "description": "Skip the bootstrapping module of Freyja" + }, + "freyja_db_name": { "type": "string", - "description": "Institutional config contact information.", - "hidden": true, - "fa_icon": "fas fa-users-cog" + "default": "freyja_db", + "fa_icon": "fas fa-folder-open", + "description": "Specify the name where to store UShER database (default: 'freyja_db')." }, - "config_profile_url": { + "freyja_depthcutoff": { + "type": "number", + "description": "Specify a coverage depth minimum which excludes sites with coverage less than the specified value", + "fa_icon": "fas fa-hand-paper", + "help_text": "Using the `depthcutoff` option may result in some distinct lineages now having identical barcodes, which are grouped into the format `[lineage]-like(num)` (based on their shared phylogeny) in the output." + }, + "freyja_repeats": { + "type": "integer", + "default": 100, + "fa_icon": "fas fa-hand-paper", + "description": "Specify the number of bootstrap repeats to do.", + "minimum": 1 + }, + "freyja_barcodes": { "type": "string", - "description": "Institutional config URL link.", + "format": "path", + "fa_icon": "fas fa-file", + "description": "Lineage defining barcodes, default is most recent from UShER database." + }, + "freyja_lineages": { + "type": "string", + "format": "path", + "fa_icon": "fas fa-file", + "description": "Metadata of lineages that match barcode, default is most recent from UShER database." + }, + "max_multiqc_email_size": { + "type": "string", + "description": "File size limit when attaching MultiQC reports to summary emails.", + "default": "25.MB", + "fa_icon": "fas fa-file-upload", "hidden": true, - "fa_icon": "fas fa-users-cog" + "help_text": "If file generated by pipeline exceeds the threshold, it will not be attached." + }, + "skip_mosdepth": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip genome-wide and amplicon coverage plot generation from mosdepth output." + }, + "skip_pangolin": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip Pangolin lineage analysis for genome consensus sequence." + }, + "skip_nextclade": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip Nextclade clade assignment, mutation calling, and sequence quality checks for genome consensus sequence." + }, + "skip_asciigenome": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip variant screenshot generation with ASCIIGenome." + }, + "skip_variants_quast": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip generation of QUAST aggregated report for consensus sequences." + }, + "skip_variants_long_table": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip long table generation for reporting variants." + }, + "skip_multiqc": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip MultiQC." } - } + }, + "fa_icon": "fas fa-retweet" + }, + "illumina_qc_read_trimming_and_filtering_options": { + "title": "Illumina QC, read trimming and filtering options", + "type": "object", + "description": "Options to adjust QC, read trimming and host read filtering with Kraken2 for the Illumina workflow.", + "default": "", + "properties": { + "kraken2_db": { + "type": "string", + "format": "path", + "default": "s3://ngi-igenomes/test-data/viralrecon/kraken2_human.tar.gz", + "fa_icon": "fab fa-gitkraken", + "description": "Full path to Kraken2 database built from host genome." + }, + "kraken2_db_name": { + "type": "string", + "default": "human", + "fa_icon": "fab fa-gitkraken", + "description": "Name for host genome as recognised by Kraken2 when using the 'kraken2 build' command." + }, + "kraken2_variants_host_filter": { + "type": "boolean", + "fa_icon": "fab fa-gitkraken", + "description": "Remove host reads identified by Kraken2 before running variant calling steps in the pipeline." + }, + "kraken2_assembly_host_filter": { + "type": "boolean", + "default": true, + "fa_icon": "fab fa-gitkraken", + "description": "Remove host reads identified by Kraken2 before running aseembly steps in the pipeline." + }, + "save_trimmed_fail": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Save the trimmed FastQ files in the results directory.", + "help_text": "By default, trimmed FastQ files will not be saved to the results directory. Specify this flag (or set to true in your config file) to copy these files to the results directory when complete." + }, + "skip_fastqc": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip FastQC." + }, + "skip_kraken2": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip Kraken2 process for removing host classified reads." + }, + "skip_fastp": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip the initial read trimming step peformed by fastp." + }, + "skip_cutadapt": { + "type": "boolean", + "description": "Skip the amplicon trimming step with Cutadapt when using --protocol amplicon.", + "fa_icon": "fas fa-fast-forward" + } + }, + "fa_icon": "fas fa-cut" + }, + "illumina_variant_calling_options": { + "title": "Illumina variant calling options", + "type": "object", + "description": "Various options for the variant calling branch of the Illumina workflow.", + "default": "", + "properties": { + "variant_caller": { + "type": "string", + "fa_icon": "fas fa-phone-volume", + "description": "Specify which variant calling algorithm you would like to use. Available options are 'ivar' (default for '--protocol amplicon') and 'bcftools' (default for '--protocol metagenomic').", + "enum": ["ivar", "bcftools"] + }, + "consensus_caller": { + "type": "string", + "default": "bcftools", + "fa_icon": "fas fa-phone-volume", + "description": "Specify which consensus calling algorithm you would like to use. Available options are 'bcftools' and 'ivar' (default: 'bcftools').", + "enum": ["ivar", "bcftools"] + }, + "min_mapped_reads": { + "type": "integer", + "default": 1000, + "description": "Minimum number of mapped reads below which samples are removed from further processing. Some downstream steps in the pipeline will fail if this threshold is too low.", + "fa_icon": "fas fa-hand-paper" + }, + "ivar_trim_noprimer": { + "type": "boolean", + "description": "This option unsets the '-e' parameter in 'ivar trim' to discard reads without primers.", + "fa_icon": "fas fa-cut" + }, + "ivar_trim_offset": { + "type": "integer", + "description": "This option sets the '-x' parameter in 'ivar trim' so that reads that occur at the specified offset positions relative to primer positions will also be trimmed.", + "fa_icon": "fas fa-cut", + "help_text": "This parameter will need to be set for some amplicon-based sequencing protocols (e.g. SWIFT) as described and implemented [here](https://github.com/andersen-lab/ivar/pull/88)" + }, + "filter_duplicates": { + "type": "boolean", + "fa_icon": "fas fa-clone", + "description": "Filtered duplicates reads detected by Picard MarkDuplicates from alignments." + }, + "save_unaligned": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Save unaligned reads in FastQ format from Bowtie 2 to the results directory." + }, + "save_mpileup": { + "type": "boolean", + "fa_icon": "fas fa-save", + "description": "Save mpileup files generated when calling variants with iVar variants or iVar consensus." + }, + "skip_ivar_trim": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip iVar primer trimming step. Not recommended for --protocol amplicon." + }, + "skip_markduplicates": { + "type": "boolean", + "default": true, + "fa_icon": "fas fa-fast-forward", + "description": "Skip picard MarkDuplicates step." + }, + "skip_picard_metrics": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip Picard CollectMultipleMetrics steps." + }, + "skip_snpeff": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip SnpEff and SnpSift annotation of variants." + }, + "skip_consensus_plots": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip creation of consensus base density plots." + }, + "skip_consensus": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip genome consensus creation step and any downstream QC." + }, + "skip_variants": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Specify this parameter to skip all of the variant calling and mapping steps in the pipeline." + } + }, + "fa_icon": "fas fa-dna" + }, + "illumina_de_novo_assembly_options": { + "title": "Illumina de novo assembly options", + "type": "object", + "description": "Various options for the de novo assembly branch of the Illumina workflow.", + "default": "", + "properties": { + "assemblers": { + "type": "string", + "default": "spades", + "description": "Specify which assembly algorithms you would like to use. Available options are 'spades', 'unicycler' and 'minia'.", + "fa_icon": "fas fa-random" + }, + "spades_mode": { + "type": "string", + "default": "rnaviral", + "fa_icon": "fab fa-digg", + "description": "Specify the SPAdes mode you would like to run (default: 'rnaviral').", + "enum": [ + "rnaviral", + "corona", + "metaviral", + "meta", + "metaplasmid", + "plasmid", + "isolate", + "rna", + "bio" + ] + }, + "spades_hmm": { + "type": "string", + "format": "file-path", + "fa_icon": "fab fa-digg", + "description": "Path to profile HMMs specific for gene/organism to enhance SPAdes assembly." + }, + "blast_db": { + "type": "string", + "format": "path", + "fa_icon": "fas fa-database", + "description": "Path to directory or tar.gz archive for pre-built BLAST database." + }, + "skip_bandage": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip Bandage image creation for assembly visualisation." + }, + "skip_blast": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip blastn of assemblies relative to reference genome." + }, + "skip_abacas": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip ABACAS process for assembly contiguation." + }, + "skip_plasmidid": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip assembly report generation by PlasmidID.", + "default": true + }, + "skip_assembly_quast": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Skip generation of QUAST aggregated report for assemblies." + }, + "skip_assembly": { + "type": "boolean", + "fa_icon": "fas fa-fast-forward", + "description": "Specify this parameter to skip all of the de novo assembly steps in the pipeline." + }, + "min_contig_length": { + "type": "integer", + "default": 200, + "fa_icon": "fas fa-sliders-h", + "description": "Minimum contig length to filter from BLAST results." + }, + "min_perc_contig_aligned": { + "type": "number", + "default": 0.7, + "fa_icon": "fas fa-sliders-h", + "description": "Minimum percentage of contig aligned to filter from BLAST results." + } + }, + "fa_icon": "fas fa-random" }, "generic_options": { "title": "Generic options", @@ -157,34 +615,28 @@ "description": "Email address for completion summary, only when pipeline fails.", "fa_icon": "fas fa-exclamation-triangle", "pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$", - "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.", - "hidden": true + "hidden": true, + "help_text": "This works exactly as with `--email`, except emails are only sent if the workflow is not successful." }, "plaintext_email": { "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", - "hidden": true - }, - "max_multiqc_email_size": { - "type": "string", - "description": "File size limit when attaching MultiQC reports to summary emails.", - "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$", - "default": "25.MB", - "fa_icon": "fas fa-file-upload", - "hidden": true + "hidden": true, + "help_text": "Set to receive plain-text e-mails instead of HTML formatted." }, "monochrome_logs": { "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", - "hidden": true + "hidden": true, + "help_text": "Set to disable colourful command line output and live life in monochrome." }, "hook_url": { "type": "string", "description": "Incoming hook URL for messaging service", "fa_icon": "fas fa-people-group", - "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", + "help_text": "Incoming hook URL for messaging service. Currently, only MS Teams is supported.", "hidden": true }, "multiqc_config": { @@ -220,6 +672,55 @@ "hidden": true } } + }, + "institutional_config_options": { + "title": "Institutional config options", + "type": "object", + "fa_icon": "fas fa-university", + "description": "Parameters used to describe centralised config profiles. These should not be edited.", + "help_text": "The centralised nf-core configuration profiles use a handful of pipeline parameters to describe themselves. This information is then printed to the Nextflow log when you run a pipeline. You should not need to change these values when you run a pipeline.", + "properties": { + "custom_config_version": { + "type": "string", + "description": "Git commit id for Institutional configs.", + "default": "master", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "custom_config_base": { + "type": "string", + "format": "directory-path", + "description": "Base directory for Institutional configs.", + "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "hidden": true, + "help_text": "If you're running offline, nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell nextflow where to find them with the `custom_config_base` option. For example:\n\n```bash\n## Download and unzip the config files\ncd /path/to/my/configs\nwget https://github.com/nf-core/configs/archive/master.zip\nunzip master.zip\n\n## Run the pipeline\ncd /path/to/my/data\nnextflow run /path/to/pipeline/ --custom_config_base /path/to/my/configs/configs-master/\n```\n\n> Note that the nf-core/tools helper package has a `download` command to download all required pipeline files + singularity containers + institutional configs in one go for you, to make this process easier.", + "fa_icon": "fas fa-users-cog" + }, + "config_profile_name": { + "type": "string", + "description": "Institutional config name.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_description": { + "type": "string", + "description": "Institutional config description.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_contact": { + "type": "string", + "description": "Institutional config contact information.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + }, + "config_profile_url": { + "type": "string", + "description": "Institutional config URL link.", + "hidden": true, + "fa_icon": "fas fa-users-cog" + } + } } }, "allOf": [ @@ -230,10 +731,25 @@ "$ref": "#/$defs/reference_genome_options" }, { - "$ref": "#/$defs/institutional_config_options" + "$ref": "#/$defs/nanopore_options" + }, + { + "$ref": "#/$defs/nanopore_illumina_options" + }, + { + "$ref": "#/$defs/illumina_qc_read_trimming_and_filtering_options" + }, + { + "$ref": "#/$defs/illumina_variant_calling_options" + }, + { + "$ref": "#/$defs/illumina_de_novo_assembly_options" }, { "$ref": "#/$defs/generic_options" + }, + { + "$ref": "#/$defs/institutional_config_options" } ] } diff --git a/subworkflows/local/additional_annotation.nf b/subworkflows/local/additional_annotation.nf new file mode 100644 index 00000000..81160bb6 --- /dev/null +++ b/subworkflows/local/additional_annotation.nf @@ -0,0 +1,95 @@ +// +// Run snpEff, bgzip, tabix, stats and SnpSift commands +// + +include { GUNZIP as GUNZIP_GFF } from '../../modules/nf-core/gunzip/main' +include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' +include { SNPEFF_ANN } from '../../modules/local/snpeff_ann' +include { SNPSIFT_EXTRACTFIELDS } from '../../modules/local/snpsift_extractfields' +include { VCF_BGZIP_TABIX_STATS } from './vcf_bgzip_tabix_stats' +include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/query/main' +include { MAKE_VARIANTS_LONG_TABLE as MAKE_VARIANTS_LONG_TABLE_ADDITIONAL } from '../../modules/local/make_variants_long_table' + + +workflow ADDITIONAL_ANNOTATION { + take: + vcf // channel: [ val(meta), [ vcf ] ] + tbi // channel: [ val(meta), [ tbi ] ] + fasta // path : genome.fasta + annot // path : additional_annotation + pangolin // channel: [ val(meta), [ csv ] ] + + main: + + ch_versions = Channel.empty() + + // + // Uncompress additional annotation file + // + ch_annot = Channel.empty() + + if (params.additional_annotation.endsWith('.gz')) { + GUNZIP_GFF ( + [ [:], annot ] + ) + ch_annot = GUNZIP_GFF.out.gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) + } else { + ch_annot = Channel.value(file(params.additional_annotation)) + } + + // + // Make snpEff database + // + ch_snpeff_db = Channel.empty() + ch_snpeff_config = Channel.empty() + + SNPEFF_BUILD ( + fasta, + ch_annot + ) + ch_snpeff_db = SNPEFF_BUILD.out.db + ch_snpeff_config = SNPEFF_BUILD.out.config + ch_versions = ch_versions.mix(SNPEFF_BUILD.out.versions) + + SNPEFF_ANN ( + vcf, + ch_snpeff_db, + ch_snpeff_config, + fasta + ) + ch_versions = ch_versions.mix(SNPEFF_ANN.out.versions.first()) + + VCF_BGZIP_TABIX_STATS ( + SNPEFF_ANN.out.vcf, + [ [:], [] ], + [ [:], [] ], + [ [:], [] ] + ) + ch_versions = ch_versions.mix(VCF_BGZIP_TABIX_STATS.out.versions) + + SNPSIFT_EXTRACTFIELDS ( + VCF_BGZIP_TABIX_STATS.out.vcf + ) + ch_versions = ch_versions.mix(SNPSIFT_EXTRACTFIELDS.out.versions.first()) + + BCFTOOLS_QUERY ( + vcf.join(tbi, by: [0]), + [], + [], + [] + ) + ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions.first()) + + MAKE_VARIANTS_LONG_TABLE_ADDITIONAL ( + BCFTOOLS_QUERY.out.output.collect{it[1]}, + SNPSIFT_EXTRACTFIELDS.out.txt.collect{it[1]}.ifEmpty([]), + pangolin.collect{it[1]}.ifEmpty([]) + ) + ch_versions = ch_versions.mix(MAKE_VARIANTS_LONG_TABLE_ADDITIONAL.out.versions) + + emit: + long_table = MAKE_VARIANTS_LONG_TABLE_ADDITIONAL.out.csv // channel: [ val(meta), [ csv ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/assembly_minia.nf b/subworkflows/local/assembly_minia.nf new file mode 100644 index 00000000..31f1179d --- /dev/null +++ b/subworkflows/local/assembly_minia.nf @@ -0,0 +1,75 @@ +// +// Assembly and downstream processing for minia scaffolds +// + +include { MINIA } from '../../modules/nf-core/minia/main' + +include { ASSEMBLY_QC } from './assembly_qc' + +workflow ASSEMBLY_MINIA { + take: + reads // channel: [ val(meta), [ reads ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + blast_db // channel: /path/to/blast_db/ + blast_header // channel: /path/to/blast_header.txt + blast_filtered_header // channel: /path/to/blast_filtered_header.txt + + main: + + ch_versions = Channel.empty() + + // + // Assemble reads with minia + // + MINIA ( + reads + ) + ch_versions = ch_versions.mix(MINIA.out.versions.first()) + + // + // Filter for empty contig files + // + MINIA + .out + .contigs + .filter { meta, contig -> contig.size() > 0 } + .set { ch_contigs } + + // + // Downstream assembly steps + // + ASSEMBLY_QC ( + ch_contigs, + fasta, + gff, + blast_db, + blast_header, + blast_filtered_header + ) + ch_versions = ch_versions.mix(ASSEMBLY_QC.out.versions) + + emit: + contigs = MINIA.out.contigs // channel: [ val(meta), [ contigs ] ] + unitigs = MINIA.out.unitigs // channel: [ val(meta), [ unitigs ] ] + h5 = MINIA.out.h5 // channel: [ val(meta), [ h5 ] ] + + blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] + blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] + + quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + + abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] + + plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] + plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] + plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] + plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] + plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] + plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] + plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] + plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/assembly_qc.nf b/subworkflows/local/assembly_qc.nf new file mode 100644 index 00000000..315c8cef --- /dev/null +++ b/subworkflows/local/assembly_qc.nf @@ -0,0 +1,126 @@ +// +// Downstream analysis for assembly scaffolds +// + +include { FILTER_BLASTN } from '../../modules/local/filter_blastn' +include { ABACAS } from '../../modules/nf-core/abacas/main' +include { BLAST_BLASTN } from '../../modules/nf-core/blast/blastn/main' +include { PLASMIDID } from '../../modules/nf-core/plasmidid/main' +include { QUAST } from '../../modules/nf-core/quast/main' + +workflow ASSEMBLY_QC { + take: + scaffolds // channel: [ val(meta), [ scaffolds ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + blast_db // channel: /path/to/blast_db/ + blast_header // channel: /path/to/blast_header.txt + blast_filtered_header // channel: /path/to/blast_filtered_header.txt + + main: + + ch_versions = Channel.empty() + + // + // Run blastn on assembly scaffolds + // + ch_blast_txt = Channel.empty() + ch_blast_filter_txt = Channel.empty() + if (!params.skip_blast) { + BLAST_BLASTN ( + scaffolds, + blast_db + ) + ch_versions = ch_versions.mix(BLAST_BLASTN.out.versions.first()) + + FILTER_BLASTN ( + BLAST_BLASTN.out.txt, + blast_header, + blast_filtered_header + ) + ch_blast_txt = FILTER_BLASTN.out.blast + ch_blast_filter_txt = FILTER_BLASTN.out.txt + ch_versions = ch_versions.mix(FILTER_BLASTN.out.versions.first()) + } + + // + // Assembly QC across all samples with QUAST + // + ch_quast_results = Channel.empty() + ch_quast_tsv = Channel.empty() + if (!params.skip_assembly_quast) { + scaffolds + .collect{ it[1] } + .map { scaffolds_collect -> tuple([id: "quast"], scaffolds_collect) } + .set { ch_to_quast } + + QUAST ( + ch_to_quast, + fasta.map { [ [:], it ] }, + gff + ) + ch_quast_results = QUAST.out.results + ch_quast_tsv = QUAST.out.tsv + ch_versions = ch_versions.mix(QUAST.out.versions) + } + + // + // Contiguate assembly with ABACAS + // + ch_abacas_results = Channel.empty() + if (!params.skip_abacas) { + ABACAS ( + scaffolds, + fasta + ) + ch_abacas_results = ABACAS.out.results + ch_versions = ch_versions.mix(ABACAS.out.versions.first()) + } + + // + // Assembly report with PlasmidID + // + ch_plasmidid_html = Channel.empty() + ch_plasmidid_tab = Channel.empty() + ch_plasmidid_images = Channel.empty() + ch_plasmidid_logs = Channel.empty() + ch_plasmidid_data = Channel.empty() + ch_plasmidid_database = Channel.empty() + ch_plasmidid_fasta = Channel.empty() + ch_plasmidid_kmer = Channel.empty() + if (!params.skip_plasmidid) { + PLASMIDID ( + scaffolds, + fasta + ) + ch_plasmidid_html = PLASMIDID.out.html + ch_plasmidid_tab = PLASMIDID.out.tab + ch_plasmidid_images = PLASMIDID.out.images + ch_plasmidid_logs = PLASMIDID.out.logs + ch_plasmidid_data = PLASMIDID.out.data + ch_plasmidid_database = PLASMIDID.out.database + ch_plasmidid_fasta = PLASMIDID.out.fasta_files + ch_plasmidid_kmer = PLASMIDID.out.kmer + ch_versions = ch_versions.mix(PLASMIDID.out.versions.first()) + } + + emit: + blast_txt = ch_blast_txt // channel: [ val(meta), [ txt ] ] + blast_filter_txt = ch_blast_filter_txt // channel: [ val(meta), [ txt ] ] + + quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] + + abacas_results = ch_abacas_results // channel: [ val(meta), [ results ] ] + + plasmidid_html = ch_plasmidid_html // channel: [ val(meta), [ html ] ] + plasmidid_tab = ch_plasmidid_tab // channel: [ val(meta), [ tab ] ] + plasmidid_images = ch_plasmidid_images // channel: [ val(meta), [ images/ ] ] + plasmidid_logs = ch_plasmidid_logs // channel: [ val(meta), [ logs/ ] ] + plasmidid_data = ch_plasmidid_data // channel: [ val(meta), [ data/ ] ] + plasmidid_database = ch_plasmidid_database // channel: [ val(meta), [ database/ ] ] + plasmidid_fasta = ch_plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] + plasmidid_kmer = ch_plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/assembly_spades.nf b/subworkflows/local/assembly_spades.nf new file mode 100644 index 00000000..ee11801d --- /dev/null +++ b/subworkflows/local/assembly_spades.nf @@ -0,0 +1,133 @@ +// +// Assembly and downstream processing for SPAdes scaffolds +// + +include { SPADES } from '../../modules/nf-core/spades/main' +include { BANDAGE_IMAGE } from '../../modules/nf-core/bandage/image/main' +include { GUNZIP as GUNZIP_SCAFFOLDS } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_GFA } from '../../modules/nf-core/gunzip/main' + +include { ASSEMBLY_QC } from './assembly_qc' + +workflow ASSEMBLY_SPADES { + take: + reads // channel: [ val(meta), [ reads ] ] + mode // string : spades assembly mode e.g. 'rnaviral' + hmm // channel: /path/to/spades.hmm + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + blast_db // channel: /path/to/blast_db/ + blast_header // channel: /path/to/blast_header.txt + blast_filtered_header // channel: /path/to/blast_filtered_header.txt + + main: + + ch_versions = Channel.empty() + + // + // Filter for paired-end samples if running metaSPAdes / metaviralSPAdes / metaplasmidSPAdes + // + ch_reads = reads + if (mode.contains('meta') || mode.contains('bio')) { + reads + .filter { meta, illumina, pacbio, nanopore -> !meta.single_end } + .set { ch_reads } + } + + // + // Assemble reads with SPAdes + // + SPADES ( + ch_reads, + [], + hmm + ) + ch_versions = ch_versions.mix(SPADES.out.versions.first()) + + // + // Unzip scaffolds file + // + GUNZIP_SCAFFOLDS ( + SPADES.out.scaffolds + ) + ch_versions = ch_versions.mix(GUNZIP_SCAFFOLDS.out.versions.first()) + + // + // Unzip gfa file + // + GUNZIP_GFA ( + SPADES.out.gfa + ) + + // + // Filter for empty scaffold files + // + GUNZIP_SCAFFOLDS + .out + .gunzip + .filter { meta, scaffold -> scaffold.size() > 0 } + .set { ch_scaffolds } + + GUNZIP_GFA + .out + .gunzip + .filter { meta, gfa -> gfa.size() > 0 } + .set { ch_gfa } + + // + // Generate assembly visualisation with Bandage + // + ch_bandage_png = Channel.empty() + ch_bandage_svg = Channel.empty() + if (!params.skip_bandage) { + BANDAGE_IMAGE ( + ch_gfa + ) + ch_bandage_png = BANDAGE_IMAGE.out.png + ch_bandage_svg = BANDAGE_IMAGE.out.svg + ch_versions = ch_versions.mix(BANDAGE_IMAGE.out.versions.first()) + } + + // + // Downstream assembly steps + // + ASSEMBLY_QC ( + ch_scaffolds, + fasta, + gff, + blast_db, + blast_header, + blast_filtered_header + ) + ch_versions = ch_versions.mix(ASSEMBLY_QC.out.versions) + + emit: + scaffolds = SPADES.out.scaffolds // channel: [ val(meta), [ scaffolds ] ] + contigs = SPADES.out.contigs // channel: [ val(meta), [ contigs ] ] + transcripts = SPADES.out.transcripts // channel: [ val(meta), [ transcripts ] ] + gene_clusters = SPADES.out.gene_clusters // channel: [ val(meta), [ gene_clusters ] ] + gfa = SPADES.out.gfa // channel: [ val(meta), [ gfa ] ] + log_out = SPADES.out.log // channel: [ val(meta), [ log ] ] + + bandage_png = ch_bandage_png // channel: [ val(meta), [ png ] ] + bandage_svg = ch_bandage_svg // channel: [ val(meta), [ svg ] ] + + blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] + blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] + + quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + + abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] + + plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] + plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] + plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] + plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] + plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] + plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] + plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] + plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/assembly_unicycler.nf b/subworkflows/local/assembly_unicycler.nf new file mode 100644 index 00000000..d6b0574d --- /dev/null +++ b/subworkflows/local/assembly_unicycler.nf @@ -0,0 +1,116 @@ +// +// Assembly and downstream processing for Unicycler scaffolds +// + +include { UNICYCLER } from '../../modules/nf-core/unicycler/main' +include { BANDAGE_IMAGE } from '../../modules/nf-core/bandage/image/main' +include { GUNZIP as GUNZIP_SCAFFOLDS } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_GFA } from '../../modules/nf-core/gunzip/main' + +include { ASSEMBLY_QC } from './assembly_qc' + +workflow ASSEMBLY_UNICYCLER { + take: + reads // channel: [ val(meta), [ reads ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + blast_db // channel: /path/to/blast_db/ + blast_header // channel: /path/to/blast_header.txt + blast_filtered_header // channel: /path/to/blast_filtered_header.txt + + main: + + ch_versions = Channel.empty() + + // + // Assemble reads with Unicycler + // + UNICYCLER ( + reads + ) + ch_versions = ch_versions.mix(UNICYCLER.out.versions.first()) + + // + // Unzip scaffolds file + // + GUNZIP_SCAFFOLDS ( + UNICYCLER.out.scaffolds + ) + ch_versions = ch_versions.mix(GUNZIP_SCAFFOLDS.out.versions.first()) + + // + // Unzip gfa file + // + GUNZIP_GFA ( + UNICYCLER.out.gfa + ) + + // + // Filter for empty scaffold files + // + GUNZIP_SCAFFOLDS + .out + .gunzip + .filter { meta, scaffold -> scaffold.size() > 0 } + .set { ch_scaffolds } + + GUNZIP_GFA + .out + .gunzip + .filter { meta, gfa -> gfa.size() > 0 } + .set { ch_gfa } + + // + // Generate assembly visualisation with Bandage + // + ch_bandage_png = Channel.empty() + ch_bandage_svg = Channel.empty() + if (!params.skip_bandage) { + BANDAGE_IMAGE ( + ch_gfa + ) + ch_bandage_png = BANDAGE_IMAGE.out.png + ch_bandage_svg = BANDAGE_IMAGE.out.svg + ch_versions = ch_versions.mix(BANDAGE_IMAGE.out.versions.first()) + } + + // + // Downstream assembly steps + // + ASSEMBLY_QC ( + ch_scaffolds, + fasta, + gff, + blast_db, + blast_header, + blast_filtered_header + ) + ch_versions = ch_versions.mix(ASSEMBLY_QC.out.versions) + + emit: + scaffolds = UNICYCLER.out.scaffolds // channel: [ val(meta), [ scaffolds ] ] + gfa = UNICYCLER.out.gfa // channel: [ val(meta), [ gfa ] ] + log_out = UNICYCLER.out.log // channel: [ val(meta), [ log ] ] + + bandage_png = ch_bandage_png // channel: [ val(meta), [ png ] ] + bandage_svg = ch_bandage_svg // channel: [ val(meta), [ svg ] ] + + blast_txt = ASSEMBLY_QC.out.blast_txt // channel: [ val(meta), [ txt ] ] + blast_filter_txt = ASSEMBLY_QC.out.blast_filter_txt // channel: [ val(meta), [ txt ] ] + + quast_results = ASSEMBLY_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ASSEMBLY_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + + abacas_results = ASSEMBLY_QC.out.abacas_results // channel: [ val(meta), [ results ] ] + + plasmidid_html = ASSEMBLY_QC.out.plasmidid_html // channel: [ val(meta), [ html ] ] + plasmidid_tab = ASSEMBLY_QC.out.plasmidid_tab // channel: [ val(meta), [ tab ] ] + plasmidid_images = ASSEMBLY_QC.out.plasmidid_images // channel: [ val(meta), [ images/ ] ] + plasmidid_logs = ASSEMBLY_QC.out.plasmidid_logs // channel: [ val(meta), [ logs/ ] ] + plasmidid_data = ASSEMBLY_QC.out.plasmidid_data // channel: [ val(meta), [ data/ ] ] + plasmidid_database = ASSEMBLY_QC.out.plasmidid_database // channel: [ val(meta), [ database/ ] ] + plasmidid_fasta = ASSEMBLY_QC.out.plasmidid_fasta // channel: [ val(meta), [ fasta_files/ ] ] + plasmidid_kmer = ASSEMBLY_QC.out.plasmidid_kmer // channel: [ val(meta), [ kmer/ ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/bam_trim_primers_ivar.nf b/subworkflows/local/bam_trim_primers_ivar.nf new file mode 100644 index 00000000..7a82ce30 --- /dev/null +++ b/subworkflows/local/bam_trim_primers_ivar.nf @@ -0,0 +1,48 @@ +// +// iVar trim, sort, index BAM file and run samtools stats, flagstat and idxstats +// + +include { IVAR_TRIM } from '../../modules/nf-core/ivar/trim/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../nf-core/bam_sort_stats_samtools/main' + +workflow BAM_TRIM_PRIMERS_IVAR { + take: + bam // channel: [ val(meta), [ bam ], [bai] ] + bed // path : bed + fasta // channel: reference.fasta + + main: + + ch_versions = Channel.empty() + + // + // iVar trim primers + // + IVAR_TRIM ( + bam, + bed + ) + ch_versions = ch_versions.mix(IVAR_TRIM.out.versions.first()) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( + IVAR_TRIM.out.bam, + fasta + ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + bam_orig = IVAR_TRIM.out.bam // channel: [ val(meta), bam ] + log_out = IVAR_TRIM.out.log // channel: [ val(meta), log ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/consensus_bcftools.nf b/subworkflows/local/consensus_bcftools.nf new file mode 100644 index 00000000..1a3ae3a2 --- /dev/null +++ b/subworkflows/local/consensus_bcftools.nf @@ -0,0 +1,108 @@ +// +// Consensus calling with BCFTools and downstream processing QC +// + +include { BCFTOOLS_FILTER } from '../../modules/nf-core/bcftools/filter/main' +include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' +include { BEDTOOLS_MERGE } from '../../modules/nf-core/bedtools/merge/main' +include { BEDTOOLS_MASKFASTA } from '../../modules/nf-core/bedtools/maskfasta/main' +include { BCFTOOLS_CONSENSUS } from '../../modules/nf-core/bcftools/consensus/main' +include { MAKE_BED_MASK } from '../../modules/local/make_bed_mask' +include { RENAME_FASTA_HEADER } from '../../modules/local/rename_fasta_header' +include { CONSENSUS_QC } from './consensus_qc' + +workflow CONSENSUS_BCFTOOLS { + take: + bam // channel: [ val(meta), [ bam ] ] + vcf // channel: [ val(meta), [ vcf ] ] + tbi // channel: [ val(meta), [ tbi ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + nextclade_db // channel: /path/to/nextclade_db/ + + main: + + ch_versions = Channel.empty() + + // + // Filter variants by allele frequency, zip and index + // + BCFTOOLS_FILTER ( + vcf + ) + ch_versions = ch_versions.mix(BCFTOOLS_FILTER.out.versions.first()) + + TABIX_TABIX ( + BCFTOOLS_FILTER.out.vcf + ) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) + + // + // Create BED file with consensus regions to mask + // + MAKE_BED_MASK ( + bam.join(BCFTOOLS_FILTER.out.vcf, by: [0]), + fasta, + params.save_mpileup + ) + ch_versions = ch_versions.mix(MAKE_BED_MASK.out.versions.first()) + + // + // Merge intervals with BEDTools + // + BEDTOOLS_MERGE ( + MAKE_BED_MASK.out.bed + ) + ch_versions = ch_versions.mix(BEDTOOLS_MERGE.out.versions.first()) + + // + // Mask regions in consensus with BEDTools + // + BEDTOOLS_MASKFASTA ( + BEDTOOLS_MERGE.out.bed, + fasta + ) + ch_versions = ch_versions.mix(BEDTOOLS_MASKFASTA.out.versions.first()) + + // + // Call consensus sequence with BCFTools + // + BCFTOOLS_CONSENSUS ( + BCFTOOLS_FILTER.out.vcf.join(TABIX_TABIX.out.tbi, by: [0]).join(BEDTOOLS_MASKFASTA.out.fasta, by: [0]) + ) + ch_versions = ch_versions.mix(BCFTOOLS_CONSENSUS.out.versions.first()) + + // + // Rename consensus header adding sample name + // + RENAME_FASTA_HEADER ( + BCFTOOLS_CONSENSUS.out.fasta + ) + ch_versions = ch_versions.mix(RENAME_FASTA_HEADER.out.versions.first()) + + // + // Consensus sequence QC + // + CONSENSUS_QC ( + RENAME_FASTA_HEADER.out.fasta, + fasta, + gff, + nextclade_db + ) + ch_versions = ch_versions.mix(CONSENSUS_QC.out.versions.first()) + + emit: + consensus = RENAME_FASTA_HEADER.out.fasta // channel: [ val(meta), [ fasta ] ] + + quast_results = CONSENSUS_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = CONSENSUS_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + + pangolin_report = CONSENSUS_QC.out.pangolin_report // channel: [ val(meta), [ csv ] ] + + nextclade_report = CONSENSUS_QC.out.nextclade_report // channel: [ val(meta), [ csv ] ] + + bases_tsv = CONSENSUS_QC.out.bases_tsv // channel: [ val(meta), [ tsv ] ] + bases_pdf = CONSENSUS_QC.out.bases_pdf // channel: [ val(meta), [ pdf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/consensus_ivar.nf b/subworkflows/local/consensus_ivar.nf new file mode 100644 index 00000000..e96ffd44 --- /dev/null +++ b/subworkflows/local/consensus_ivar.nf @@ -0,0 +1,55 @@ +// +// Consensus calling with iVar and downstream processing QC +// + +include { IVAR_CONSENSUS } from '../../modules/nf-core/ivar/consensus/main' +include { CONSENSUS_QC } from './consensus_qc' + +workflow CONSENSUS_IVAR { + take: + bam // channel: [ val(meta), [ bam ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + nextclade_db // channel: /path/to/nextclade_db/ + + main: + + ch_versions = Channel.empty() + + // + // Call consensus sequence with iVar + // + IVAR_CONSENSUS ( + bam, + fasta, + params.save_mpileup + ) + ch_versions = ch_versions.mix(IVAR_CONSENSUS.out.versions.first()) + + // + // Consensus sequence QC + // + CONSENSUS_QC ( + IVAR_CONSENSUS.out.fasta, + fasta, + gff, + nextclade_db + ) + ch_versions = ch_versions.mix(CONSENSUS_QC.out.versions.first()) + + emit: + consensus = IVAR_CONSENSUS.out.fasta // channel: [ val(meta), [ fasta ] ] + consensus_qual = IVAR_CONSENSUS.out.qual // channel: [ val(meta), [ qual.txt ] ] + + quast_results = CONSENSUS_QC.out.quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = CONSENSUS_QC.out.quast_tsv // channel: [ val(meta), [ tsv ] ] + + pangolin_report = CONSENSUS_QC.out.pangolin_report // channel: [ val(meta), [ csv ] ] + + nextclade_report = CONSENSUS_QC.out.nextclade_report // channel: [ val(meta), [ csv ] ] + + bases_tsv = CONSENSUS_QC.out.bases_tsv // channel: [ val(meta), [ tsv ] ] + bases_pdf = CONSENSUS_QC.out.bases_pdf // channel: [ val(meta), [ pdf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/consensus_qc.nf b/subworkflows/local/consensus_qc.nf new file mode 100644 index 00000000..f498a9be --- /dev/null +++ b/subworkflows/local/consensus_qc.nf @@ -0,0 +1,93 @@ +// +// Consensus calling QC +// + +include { QUAST } from '../../modules/nf-core/quast/main' +include { PANGOLIN } from '../../modules/nf-core/pangolin/main' +include { NEXTCLADE_RUN } from '../../modules/nf-core/nextclade/run/main' +include { PLOT_BASE_DENSITY } from '../../modules/local/plot_base_density' + +workflow CONSENSUS_QC { + take: + consensus // channel: [ val(meta), [ consensus ] ] + fasta // channel: /path/to/genome.fasta + gff // channel: /path/to/genome.gff + nextclade_db // channel: /path/to/nextclade_db/ + + main: + + ch_versions = Channel.empty() + + // + // Consensus QC report across samples with QUAST + // + ch_quast_results = Channel.empty() + ch_quast_tsv = Channel.empty() + if (!params.skip_variants_quast) { + consensus + .collect{ it[1] } + .map { consensus_collect -> tuple([id: "quast"], consensus_collect) } + .set { ch_to_quast } + + QUAST ( + ch_to_quast, + fasta.map { [ [:], it ] }, + gff + ) + ch_quast_results = QUAST.out.results + ch_quast_tsv = QUAST.out.tsv + ch_versions = ch_versions.mix(QUAST.out.versions) + } + + // + // Lineage analysis with Pangolin + // + ch_pangolin_report = Channel.empty() + if (!params.skip_pangolin) { + PANGOLIN ( + consensus + ) + ch_pangolin_report = PANGOLIN.out.report + ch_versions = ch_versions.mix(PANGOLIN.out.versions.first()) + } + + // + // Lineage analysis with Nextclade + // + ch_nextclade_report = Channel.empty() + if (!params.skip_nextclade) { + NEXTCLADE_RUN ( + consensus, + nextclade_db + ) + ch_nextclade_report = NEXTCLADE_RUN.out.csv + ch_versions = ch_versions.mix(NEXTCLADE_RUN.out.versions.first()) + } + + // + // Plot consensus base density + // + ch_bases_tsv = Channel.empty() + ch_bases_pdf = Channel.empty() + if (!params.skip_consensus_plots) { + PLOT_BASE_DENSITY ( + consensus + ) + ch_bases_tsv = PLOT_BASE_DENSITY.out.tsv + ch_bases_pdf = PLOT_BASE_DENSITY.out.pdf + ch_versions = ch_versions.mix(PLOT_BASE_DENSITY.out.versions.first()) + } + + emit: + quast_results = ch_quast_results // channel: [ val(meta), [ results ] ] + quast_tsv = ch_quast_tsv // channel: [ val(meta), [ tsv ] ] + + pangolin_report = ch_pangolin_report // channel: [ val(meta), [ csv ] ] + + nextclade_report = ch_nextclade_report // channel: [ val(meta), [ csv ] ] + + bases_tsv = ch_bases_tsv // channel: [ val(meta), [ tsv ] ] + bases_pdf = ch_bases_pdf // channel: [ val(meta), [ pdf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/fastq_trim_fastp_fastqc.nf b/subworkflows/local/fastq_trim_fastp_fastqc.nf new file mode 100644 index 00000000..cdfcc871 --- /dev/null +++ b/subworkflows/local/fastq_trim_fastp_fastqc.nf @@ -0,0 +1,103 @@ +// +// Read QC and trimming +// + +include { FASTQC as FASTQC_RAW } from '../../modules/nf-core/fastqc/main' +include { FASTQC as FASTQC_TRIM } from '../../modules/nf-core/fastqc/main' +include { FASTP } from '../../modules/nf-core/fastp/main' + +// +// Function that parses fastp json output file to get total number of reads after trimming +// +import groovy.json.JsonSlurper + +def getFastpReadsAfterFiltering(json_file) { + def Map json = (Map) new JsonSlurper().parseText(json_file.text).get('summary') + return json['after_filtering']['total_reads'].toInteger() +} + +workflow FASTQ_TRIM_FASTP_FASTQC { + take: + reads // channel: [ val(meta), [ reads ] ] + adapter_fasta // file: adapter.fasta + discard_trimmed_pass // value: boolean + save_trimmed_fail // value: boolean + save_merged // value: boolean + + main: + + ch_versions = Channel.empty() + + fastqc_raw_html = Channel.empty() + fastqc_raw_zip = Channel.empty() + if (!params.skip_fastqc) { + FASTQC_RAW ( + reads + ) + fastqc_raw_html = FASTQC_RAW.out.html + fastqc_raw_zip = FASTQC_RAW.out.zip + ch_versions = ch_versions.mix(FASTQC_RAW.out.versions.first()) + } + + trim_reads = reads + trim_json = Channel.empty() + trim_html = Channel.empty() + trim_log = Channel.empty() + trim_reads_fail = Channel.empty() + trim_reads_merged = Channel.empty() + fastqc_trim_html = Channel.empty() + fastqc_trim_zip = Channel.empty() + if (!params.skip_fastp) { + FASTP ( + reads, + adapter_fasta, + discard_trimmed_pass, + save_trimmed_fail, + save_merged + ) + trim_reads = FASTP.out.reads + trim_json = FASTP.out.json + trim_html = FASTP.out.html + trim_log = FASTP.out.log + trim_reads_fail = FASTP.out.reads_fail + trim_reads_merged = FASTP.out.reads_merged + ch_versions = ch_versions.mix(FASTP.out.versions.first()) + + // + // Filter empty FastQ files after adapter trimming so FastQC doesn't fail + // + trim_reads + .join(trim_json) + .map { + meta, reads, json -> + if (getFastpReadsAfterFiltering(json) > 0) { + [ meta, reads ] + } + } + .set { trim_reads } + + if (!params.skip_fastqc) { + FASTQC_TRIM ( + trim_reads + ) + fastqc_trim_html = FASTQC_TRIM.out.html + fastqc_trim_zip = FASTQC_TRIM.out.zip + ch_versions = ch_versions.mix(FASTQC_TRIM.out.versions.first()) + } + } + + emit: + reads = trim_reads // channel: [ val(meta), [ reads ] ] + trim_json // channel: [ val(meta), [ json ] ] + trim_html // channel: [ val(meta), [ html ] ] + trim_log // channel: [ val(meta), [ log ] ] + trim_reads_fail // channel: [ val(meta), [ fastq.gz ] ] + trim_reads_merged // channel: [ val(meta), [ fastq.gz ] ] + + fastqc_raw_html // channel: [ val(meta), [ html ] ] + fastqc_raw_zip // channel: [ val(meta), [ zip ] ] + fastqc_trim_html // channel: [ val(meta), [ html ] ] + fastqc_trim_zip // channel: [ val(meta), [ zip ] ] + + versions = ch_versions.ifEmpty(null) // channel: [ versions.yml ] +} diff --git a/subworkflows/local/filter_bam_samtools.nf b/subworkflows/local/filter_bam_samtools.nf new file mode 100644 index 00000000..2d47a29a --- /dev/null +++ b/subworkflows/local/filter_bam_samtools.nf @@ -0,0 +1,50 @@ +// +// Filter co-ordinate sorted BAM, index and run samtools stats, flagstat and idxstats +// + +include { SAMTOOLS_VIEW } from '../../modules/nf-core/samtools/view/main' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../nf-core/bam_stats_samtools/main' + +workflow FILTER_BAM_SAMTOOLS { + take: + bam_bai // channel: [ val(meta), [ bam ], [ bai ] ] + fasta // path : fasta + + main: + + ch_versions = Channel.empty() + + // + // Filter BAM using Samtools view + // + SAMTOOLS_VIEW ( + bam_bai, + fasta, + [] + ) + ch_versions = ch_versions.mix(SAMTOOLS_VIEW.out.versions.first()) + + // + // Index BAM file and run samtools stats, flagstat and idxstats + // + SAMTOOLS_INDEX ( + SAMTOOLS_VIEW.out.bam + ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + BAM_STATS_SAMTOOLS ( + SAMTOOLS_VIEW.out.bam.join(SAMTOOLS_INDEX.out.bai, by: [0]), + fasta + ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = SAMTOOLS_VIEW.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/prepare_genome_illumina.nf b/subworkflows/local/prepare_genome_illumina.nf new file mode 100644 index 00000000..efc79b39 --- /dev/null +++ b/subworkflows/local/prepare_genome_illumina.nf @@ -0,0 +1,260 @@ +// +// Uncompress and prepare reference genome files +// + +include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_GFF } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PRIMER_BED } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PRIMER_FASTA } from '../../modules/nf-core/gunzip/main' +include { UNTAR as UNTAR_BOWTIE2_INDEX } from '../../modules/nf-core/untar/main' +include { UNTAR as UNTAR_NEXTCLADE_DB } from '../../modules/nf-core/untar/main' +include { UNTAR as UNTAR_KRAKEN2_DB } from '../../modules/nf-core/untar/main' +include { UNTAR as UNTAR_BLAST_DB } from '../../modules/nf-core/untar/main' +include { BOWTIE2_BUILD } from '../../modules/nf-core/bowtie2/build/main' +include { BLAST_MAKEBLASTDB } from '../../modules/nf-core/blast/makeblastdb/main' +include { BEDTOOLS_GETFASTA } from '../../modules/nf-core/bedtools/getfasta/main' +include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main' +include { NEXTCLADE_DATASETGET } from '../../modules/nf-core/nextclade/datasetget/main' +include { COLLAPSE_PRIMERS } from '../../modules/local/collapse_primers' +include { KRAKEN2_BUILD } from '../../modules/local/kraken2_build' +include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' + +workflow PREPARE_GENOME { + + take: + fasta + gff + primer_bed + bowtie2_index + nextclade_dataset + nextclade_dataset_name + nextclade_dataset_reference + nextclade_dataset_tag + + + main: + + ch_versions = Channel.empty() + + // + // Uncompress genome fasta file if required + // + if (fasta.endsWith('.gz')) { + GUNZIP_FASTA ( + [ [:], fasta ] + ) + ch_fasta = GUNZIP_FASTA.out.gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) + } else { + ch_fasta = Channel.value(file(fasta)) + } + + // + // Uncompress GFF annotation file + // + ch_gff = Channel.empty() + if (gff) { + if (gff.endsWith('.gz')) { + GUNZIP_GFF ( + [ [:], gff ] + ) + ch_gff = GUNZIP_GFF.out.gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) + } else { + ch_gff = Channel.value(file(gff)) + } + } + + // + // Create chromosome sizes file + // + CUSTOM_GETCHROMSIZES ( + ch_fasta.map { [ [:], it ] } + ) + ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] } + ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] } + ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + + // + // Prepare reference files required for variant calling + // + ch_kraken2_db = Channel.empty() + if (!params.skip_kraken2) { + if (params.kraken2_db) { + if (params.kraken2_db.endsWith('.tar.gz')) { + UNTAR_KRAKEN2_DB ( + [ [:], params.kraken2_db ] + ) + ch_kraken2_db = UNTAR_KRAKEN2_DB.out.untar.map { it[1] } + ch_versions = ch_versions.mix(UNTAR_KRAKEN2_DB.out.versions) + } else { + ch_kraken2_db = Channel.value(file(params.kraken2_db)) + } + } else { + KRAKEN2_BUILD ( + params.kraken2_db_name + ) + ch_kraken2_db = KRAKEN2_BUILD.out.db.first() + ch_versions = ch_versions.mix(KRAKEN2_BUILD.out.versions) + } + } + + // + // Prepare files required for amplicon data + // + ch_primer_bed = Channel.empty() + ch_primer_fasta = Channel.empty() + ch_primer_collapsed_bed = Channel.empty() + if (params.protocol == 'amplicon') { + if (primer_bed) { + if (primer_bed.endsWith('.gz')) { + GUNZIP_PRIMER_BED ( + [ [:], primer_bed ] + ) + ch_primer_bed = GUNZIP_PRIMER_BED.out.gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_PRIMER_BED.out.versions) + } else { + ch_primer_bed = Channel.value(file(primer_bed)) + } + } + + if (!params.skip_variants && !params.skip_mosdepth) { + COLLAPSE_PRIMERS ( + ch_primer_bed, + params.primer_left_suffix, + params.primer_right_suffix + ) + ch_primer_collapsed_bed = COLLAPSE_PRIMERS.out.bed + ch_versions = ch_versions.mix(COLLAPSE_PRIMERS.out.versions) + } + + if (!params.skip_assembly && !params.skip_cutadapt) { + if (params.primer_fasta) { + if (params.primer_fasta.endsWith('.gz')) { + GUNZIP_PRIMER_FASTA ( + [ [:], params.primer_fasta ] + ) + ch_primer_fasta = GUNZIP_PRIMER_FASTA.out.gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_PRIMER_FASTA.out.versions) + } else { + ch_primer_fasta = Channel.value(file(params.primer_fasta)) + } + } else { + BEDTOOLS_GETFASTA ( + ch_primer_bed.map { [ [:], it ] }, + ch_fasta + ) + ch_primer_fasta = BEDTOOLS_GETFASTA.out.fasta + ch_versions = ch_versions.mix(BEDTOOLS_GETFASTA.out.versions) + } + } + } + + // + // Prepare reference files required for variant calling + // + ch_bowtie2_index = Channel.empty() + if (!params.skip_variants) { + if (bowtie2_index) { + if (bowtie2_index.endsWith('.tar.gz')) { + UNTAR_BOWTIE2_INDEX ( + [ [:], file(bowtie2_index) ] + ) + ch_bowtie2_index = UNTAR_BOWTIE2_INDEX.out.untar + ch_versions = ch_versions.mix(UNTAR_BOWTIE2_INDEX.out.versions) + } else { + ch_bowtie2_index = [ [:], file(bowtie2_index) ] + } + } else { + BOWTIE2_BUILD ( + ch_fasta.map { [ [:], it ] } + ) + ch_bowtie2_index = BOWTIE2_BUILD.out.index + ch_versions = ch_versions.mix(BOWTIE2_BUILD.out.versions) + } + } + + // + // Prepare Nextclade dataset + // + ch_nextclade_db = Channel.empty() + if (!params.skip_consensus && !params.skip_nextclade) { + if (nextclade_dataset) { + if (nextclade_dataset.endsWith('.tar.gz')) { + UNTAR_NEXTCLADE_DB ( + [ [:], nextclade_dataset ] + ) + ch_nextclade_db = UNTAR_NEXTCLADE_DB.out.untar.map { it[1] } + ch_versions = ch_versions.mix(UNTAR_NEXTCLADE_DB.out.versions) + } else { + ch_nextclade_db = Channel.value(file(nextclade_dataset)) + } + } else if (nextclade_dataset_name) { + NEXTCLADE_DATASETGET ( + nextclade_dataset_name, + nextclade_dataset_reference, + nextclade_dataset_tag + ) + ch_nextclade_db = NEXTCLADE_DATASETGET.out.dataset + ch_versions = ch_versions.mix(NEXTCLADE_DATASETGET.out.versions) + } + } + + // + // Prepare reference files required for de novo assembly + // + ch_blast_db = Channel.empty() + if (!params.skip_assembly) { + if (!params.skip_blast) { + if (params.blast_db) { + if (params.blast_db.endsWith('.tar.gz')) { + UNTAR_BLAST_DB ( + [ [:], params.blast_db ] + ) + ch_blast_db = UNTAR_BLAST_DB.out.untar.map { it[1] } + ch_versions = ch_versions.mix(UNTAR_BLAST_DB.out.versions) + } else { + ch_blast_db = Channel.value(file(params.blast_db)) + } + } else { + BLAST_MAKEBLASTDB ( + ch_fasta.map { [ [:], it ] } + ) + ch_blast_db = BLAST_MAKEBLASTDB.out.db + ch_versions = ch_versions.mix(BLAST_MAKEBLASTDB.out.versions) + } + } + } + + // + // Make snpEff database + // + ch_snpeff_db = Channel.empty() + ch_snpeff_config = Channel.empty() + if (!params.skip_variants && !params.skip_snpeff) { + SNPEFF_BUILD ( + ch_fasta, + ch_gff + ) + ch_snpeff_db = SNPEFF_BUILD.out.db + ch_snpeff_config = SNPEFF_BUILD.out.config + ch_versions = ch_versions.mix(SNPEFF_BUILD.out.versions) + } + + emit: + fasta = ch_fasta // path: genome.fasta + gff = ch_gff // path: genome.gff + fai = ch_fai // path: genome.fai + chrom_sizes = ch_chrom_sizes // path: genome.sizes + bowtie2_index = ch_bowtie2_index // channel: [ [:], bowtie2/index/ ] + primer_bed = ch_primer_bed // path: primer.bed + primer_collapsed_bed = ch_primer_collapsed_bed // path: primer.collapsed.bed + primer_fasta = ch_primer_fasta // path: primer.fasta + nextclade_db = ch_nextclade_db // path: nextclade_db + blast_db = ch_blast_db // path: blast_db/ + kraken2_db = ch_kraken2_db // path: kraken2_db/ + snpeff_db = ch_snpeff_db // path: snpeff_db + snpeff_config = ch_snpeff_config // path: snpeff.config + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/prepare_genome_nanopore.nf b/subworkflows/local/prepare_genome_nanopore.nf new file mode 100644 index 00000000..77a30645 --- /dev/null +++ b/subworkflows/local/prepare_genome_nanopore.nf @@ -0,0 +1,152 @@ +// +// Uncompress and prepare reference genome files +// + +include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_GFF } from '../../modules/nf-core/gunzip/main' +include { GUNZIP as GUNZIP_PRIMER_BED } from '../../modules/nf-core/gunzip/main' +include { UNTAR } from '../../modules/nf-core/untar/main' +include { CUSTOM_GETCHROMSIZES } from '../../modules/nf-core/custom/getchromsizes/main' +include { NEXTCLADE_DATASETGET } from '../../modules/nf-core/nextclade/datasetget/main' +include { COLLAPSE_PRIMERS } from '../../modules/local/collapse_primers' +include { SNPEFF_BUILD } from '../../modules/local/snpeff_build' + +workflow PREPARE_GENOME { + + take: + fasta + gff + primer_bed + bowtie2_index + nextclade_dataset + nextclade_dataset_name + nextclade_dataset_reference + nextclade_dataset_tag + + main: + + ch_versions = Channel.empty() + + // + // Uncompress genome fasta file if required + // + if (fasta.endsWith('.gz')) { + GUNZIP_FASTA ( + [ [:], fasta ] + ) + ch_fasta = GUNZIP_FASTA.out.gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions) + } else { + ch_fasta = Channel.value(file(fasta)) + } + + // + // Uncompress GFF annotation file + // + ch_gff = Channel.empty() + if (gff) { + if (gff.endsWith('.gz')) { + GUNZIP_GFF ( + [ [:], gff ] + ) + ch_gff = GUNZIP_GFF.out.gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions) + } else { + ch_gff = Channel.value(file(gff)) + } + } + + // + // Create chromosome sizes file + // + CUSTOM_GETCHROMSIZES ( + ch_fasta.map { [ [:], it ] } + ) + ch_fai = CUSTOM_GETCHROMSIZES.out.fai.map { it[1] } + ch_chrom_sizes = CUSTOM_GETCHROMSIZES.out.sizes.map { it[1] } + ch_versions = ch_versions.mix(CUSTOM_GETCHROMSIZES.out.versions) + + // + // Uncompress primer BED file + // + ch_primer_bed = Channel.empty() + if (primer_bed) { + if (primer_bed.endsWith('.gz')) { + GUNZIP_PRIMER_BED ( + [ [:], primer_bed ] + ) + ch_primer_bed = GUNZIP_PRIMER_BED.out.gunzip.map { it[1] } + ch_versions = ch_versions.mix(GUNZIP_PRIMER_BED.out.versions) + } else { + ch_primer_bed = Channel.value(file(primer_bed)) + } + } + + // + // Generate collapsed BED file + // + ch_primer_collapsed_bed = Channel.empty() + if (!params.skip_mosdepth) { + COLLAPSE_PRIMERS ( + ch_primer_bed, + params.primer_left_suffix, + params.primer_right_suffix + ) + ch_primer_collapsed_bed = COLLAPSE_PRIMERS.out.bed + ch_versions = ch_versions.mix(COLLAPSE_PRIMERS.out.versions) + } + + // + // Prepare Nextclade dataset + // + ch_nextclade_db = Channel.empty() + if (!params.skip_consensus && !params.skip_nextclade) { + if (nextclade_dataset) { + if (nextclade_dataset.endsWith('.tar.gz')) { + UNTAR ( + [ [:], nextclade_dataset ] + ) + ch_nextclade_db = UNTAR.out.untar.map { it[1] } + ch_versions = ch_versions.mix(UNTAR.out.versions) + } else { + ch_nextclade_db = Channel.value(file(nextclade_dataset)) + } + } else if (nextclade_dataset_name) { + NEXTCLADE_DATASETGET ( + nextclade_dataset_name, + nextclade_dataset_reference, + nextclade_dataset_tag + ) + ch_nextclade_db = NEXTCLADE_DATASETGET.out.dataset + ch_versions = ch_versions.mix(NEXTCLADE_DATASETGET.out.versions) + } + } + + // + // Make snpEff database + // + ch_snpeff_db = Channel.empty() + ch_snpeff_config = Channel.empty() + if (!params.skip_snpeff) { + SNPEFF_BUILD ( + ch_fasta, + ch_gff + ) + ch_snpeff_db = SNPEFF_BUILD.out.db + ch_snpeff_config = SNPEFF_BUILD.out.config + ch_versions = ch_versions.mix(SNPEFF_BUILD.out.versions) + } + + emit: + fasta = ch_fasta // path: genome.fasta + gff = ch_gff // path: genome.gff + fai = ch_fai // path: genome.fai + chrom_sizes = ch_chrom_sizes // path: genome.sizes + primer_bed = ch_primer_bed // path: primer.bed + primer_collapsed_bed = ch_primer_collapsed_bed // path: primer.collapsed.bed + nextclade_db = ch_nextclade_db // path: nextclade_db + snpeff_db = ch_snpeff_db // path: snpeff_db + snpeff_config = ch_snpeff_config // path: snpeff.config + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/snpeff_snpsift.nf b/subworkflows/local/snpeff_snpsift.nf new file mode 100644 index 00000000..a3c6c7f7 --- /dev/null +++ b/subworkflows/local/snpeff_snpsift.nf @@ -0,0 +1,55 @@ +// +// Run snpEff, bgzip, tabix, stats and SnpSift commands +// + +include { SNPEFF_ANN } from '../../modules/local/snpeff_ann' +include { SNPSIFT_EXTRACTFIELDS } from '../../modules/local/snpsift_extractfields' + +include { VCF_BGZIP_TABIX_STATS } from './vcf_bgzip_tabix_stats' + +workflow SNPEFF_SNPSIFT { + take: + vcf // channel: [ val(meta), [ vcf ] ] + db // path : snpEff database + config // path : snpEff config + fasta // path : genome.fasta + + main: + + ch_versions = Channel.empty() + + SNPEFF_ANN ( + vcf, + db, + config, + fasta + ) + ch_versions = ch_versions.mix(SNPEFF_ANN.out.versions.first()) + + VCF_BGZIP_TABIX_STATS ( + SNPEFF_ANN.out.vcf, + [ [:], [] ], + [ [:], [] ], + [ [:], [] ] + ) + ch_versions = ch_versions.mix(VCF_BGZIP_TABIX_STATS.out.versions) + + SNPSIFT_EXTRACTFIELDS ( + VCF_BGZIP_TABIX_STATS.out.vcf + ) + ch_versions = ch_versions.mix(SNPSIFT_EXTRACTFIELDS.out.versions.first()) + + emit: + csv = SNPEFF_ANN.out.csv // channel: [ val(meta), [ csv ] ] + txt = SNPEFF_ANN.out.txt // channel: [ val(meta), [ txt ] ] + html = SNPEFF_ANN.out.html // channel: [ val(meta), [ html ] ] + + vcf = VCF_BGZIP_TABIX_STATS.out.vcf // channel: [ val(meta), [ vcf.gz ] ] + tbi = VCF_BGZIP_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + csi = VCF_BGZIP_TABIX_STATS.out.csi // channel: [ val(meta), [ csi ] ] + stats = VCF_BGZIP_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] + + snpsift_txt = SNPSIFT_EXTRACTFIELDS.out.txt // channel: [ val(meta), [ txt ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/utils_nfcore_viralrecon_pipeline/main.nf b/subworkflows/local/utils_nfcore_viralrecon_pipeline/main.nf index 4bbe95ca..cf6714f6 100644 --- a/subworkflows/local/utils_nfcore_viralrecon_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_viralrecon_pipeline/main.nf @@ -71,26 +71,38 @@ workflow PIPELINE_INITIALISATION { // // Create channel from input file provided through params.input // - - Channel - .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) - .map { - meta, fastq_1, fastq_2 -> - if (!fastq_2) { - return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] - } else { - return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + if (params.platform == 'illumina') { + Channel + .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + .map { + meta, fastq_1, fastq_2, barcode-> + if (!fastq_2) { + return [ meta.id, meta + [ single_end:true ], [ fastq_1 ] ] + } else { + return [ meta.id, meta + [ single_end:false ], [ fastq_1, fastq_2 ] ] + } + } + .groupTuple() + .map { + validateInputSamplesheet(it) + } + .map { + meta, fastqs -> + return [ meta, fastqs.flatten() ] + } + .set { ch_samplesheet } + } else { + ch_samplesheet = Channel.empty() + if (input){ + Channel + .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + .map { + meta, fastq_1, fastq_2, barcode-> + tuple( "barcode"+ String.format('%02d', barcode).toString(), meta.id) } + .set { ch_samplesheet } } - .groupTuple() - .map { samplesheet -> - validateInputSamplesheet(samplesheet) - } - .map { - meta, fastqs -> - return [ meta, fastqs.flatten() ] - } - .set { ch_samplesheet } + } emit: samplesheet = ch_samplesheet @@ -170,17 +182,6 @@ def validateInputSamplesheet(input) { return [ metas[0], fastqs ] } -// -// Get attribute from genome config file e.g. fasta -// -def getGenomeAttribute(attribute) { - if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) { - if (params.genomes[ params.genome ].containsKey(attribute)) { - return params.genomes[ params.genome ][ attribute ] - } - } - return null -} // // Exit pipeline if incorrect --genome key provided diff --git a/subworkflows/local/variants_bcftools.nf b/subworkflows/local/variants_bcftools.nf new file mode 100644 index 00000000..e4a78c72 --- /dev/null +++ b/subworkflows/local/variants_bcftools.nf @@ -0,0 +1,109 @@ +// +// Variant calling with BCFTools, downstream processing and QC +// + +include { BCFTOOLS_MPILEUP } from '../../modules/nf-core/bcftools/mpileup/main' +include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm/main' +include { VCF_TABIX_STATS } from './vcf_tabix_stats' +include { VARIANTS_QC } from './variants_qc' + +workflow VARIANTS_BCFTOOLS { + take: + bam // channel: [ val(meta), [ bam ] ] + fasta // channel: /path/to/genome.fasta + sizes // channel: /path/to/genome.sizes + gff // channel: /path/to/genome.gff + bed // channel: /path/to/primers.bed + snpeff_db // channel: /path/to/snpeff_db/ + snpeff_config // channel: /path/to/snpeff.config + + main: + + ch_versions = Channel.empty() + + // + // Call variants + // + BCFTOOLS_MPILEUP ( + bam.map{ meta, bam_file -> [ meta, bam_file, [] ] }, + fasta.map { [ [:], it ] }, + params.save_mpileup + ) + ch_versions = ch_versions.mix(BCFTOOLS_MPILEUP.out.versions.first()) + + // Filter out samples with 0 variants + BCFTOOLS_MPILEUP + .out + .vcf + .join(BCFTOOLS_MPILEUP.out.tbi) + .join(BCFTOOLS_MPILEUP.out.stats) + .filter { meta, vcf, tbi, stats -> WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0 } + .set { ch_vcf_tbi_stats } + + ch_vcf_tbi_stats + .map { meta, vcf, tbi, stats -> [ meta, vcf ] } + .set { ch_vcf } + + ch_vcf_tbi_stats + .map { meta, vcf, tbi, stats -> [ meta, tbi ] } + .set { ch_tbi } + + ch_vcf_tbi_stats + .map { meta, vcf, tbi, stats -> [ meta, stats ] } + .set { ch_stats } + + // + // Split multi-allelic positions + // + BCFTOOLS_NORM ( + ch_vcf.join(ch_tbi, by: [0]), + fasta.map { [ [:], it ] } + ) + ch_versions = ch_versions.mix(BCFTOOLS_NORM.out.versions.first()) + + VCF_TABIX_STATS ( + BCFTOOLS_NORM.out.vcf, + [ [:], [] ], + [ [:], [] ], + [ [:], [] ] + ) + ch_versions = ch_versions.mix(VCF_TABIX_STATS.out.versions) + + // + // Run downstream tools for variants QC + // + VARIANTS_QC ( + bam, + BCFTOOLS_NORM.out.vcf, + VCF_TABIX_STATS.out.stats, + fasta, + sizes, + gff, + bed, + snpeff_db, + snpeff_config + ) + ch_versions = ch_versions.mix(VARIANTS_QC.out.versions) + + emit: + vcf_orig = ch_vcf // channel: [ val(meta), [ vcf ] ] + tbi_orig = ch_tbi // channel: [ val(meta), [ tbi ] ] + stats_orig = ch_stats // channel: [ val(meta), [ txt ] ] + + vcf = BCFTOOLS_NORM.out.vcf // channel: [ val(meta), [ vcf ] ] + tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + csi = VCF_TABIX_STATS.out.csi // channel: [ val(meta), [ csi ] ] + stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] + + snpeff_vcf = VARIANTS_QC.out.snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] + snpeff_tbi = VARIANTS_QC.out.snpeff_tbi // channel: [ val(meta), [ tbi ] ] + snpeff_stats = VARIANTS_QC.out.snpeff_stats // channel: [ val(meta), [ txt ] ] + snpeff_csv = VARIANTS_QC.out.snpeff_csv // channel: [ val(meta), [ csv ] ] + snpeff_txt = VARIANTS_QC.out.snpeff_txt // channel: [ val(meta), [ txt ] ] + snpeff_html = VARIANTS_QC.out.snpeff_html // channel: [ val(meta), [ html ] ] + snpsift_txt = VARIANTS_QC.out.snpsift_txt // channel: [ val(meta), [ txt ] ] + + asciigenome_pdf = VARIANTS_QC.out.asciigenome_pdf // channel: [ val(meta), [ pdf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/variants_ivar.nf b/subworkflows/local/variants_ivar.nf new file mode 100644 index 00000000..eceab2bd --- /dev/null +++ b/subworkflows/local/variants_ivar.nf @@ -0,0 +1,108 @@ +// +// Variant calling with IVar, downstream processing and QC +// + +include { IVAR_VARIANTS } from '../../modules/nf-core/ivar/variants/main' +include { IVAR_VARIANTS_TO_VCF } from '../../modules/local/ivar_variants_to_vcf' +include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main' +include { VCF_TABIX_STATS } from './vcf_tabix_stats' +include { VARIANTS_QC } from './variants_qc' + +workflow VARIANTS_IVAR { + take: + bam // channel: [ val(meta), [ bam ] ] + fasta // channel: /path/to/genome.fasta + fai // channel: /path/to/genome.fai + sizes // channel: /path/to/genome.sizes + gff // channel: /path/to/genome.gff + bed // channel: /path/to/primers.bed + snpeff_db // channel: /path/to/snpeff_db/ + snpeff_config // channel: /path/to/snpeff.config + ivar_multiqc_header // channel: /path/to/multiqc_header for ivar variants + + main: + + ch_versions = Channel.empty() + + // + // Call variants + // + IVAR_VARIANTS ( + bam, + fasta, + fai, + gff, + params.save_mpileup + ) + ch_versions = ch_versions.mix(IVAR_VARIANTS.out.versions.first()) + + // Filter out samples with 0 variants + IVAR_VARIANTS + .out + .tsv + .filter { meta, tsv -> WorkflowCommons.getNumLinesInFile(tsv) > 1 } + .set { ch_ivar_tsv } + + // + // Convert original iVar output to VCF, zip and index + // + IVAR_VARIANTS_TO_VCF ( + ch_ivar_tsv, + fasta, + ivar_multiqc_header + ) + ch_versions = ch_versions.mix(IVAR_VARIANTS_TO_VCF.out.versions.first()) + + BCFTOOLS_SORT ( + IVAR_VARIANTS_TO_VCF.out.vcf + ) + ch_versions = ch_versions.mix(BCFTOOLS_SORT.out.versions.first()) + + VCF_TABIX_STATS ( + BCFTOOLS_SORT.out.vcf, + [ [:], [] ], + [ [:], [] ], + [ [:], [] ] + ) + ch_versions = ch_versions.mix(VCF_TABIX_STATS.out.versions) + + // + // Run downstream tools for variants QC + // + VARIANTS_QC ( + bam, + BCFTOOLS_SORT.out.vcf, + VCF_TABIX_STATS.out.stats, + fasta, + sizes, + gff, + bed, + snpeff_db, + snpeff_config + ) + ch_versions = ch_versions.mix(VARIANTS_QC.out.versions) + + emit: + tsv = ch_ivar_tsv // channel: [ val(meta), [ tsv ] ] + + vcf_orig = IVAR_VARIANTS_TO_VCF.out.vcf // channel: [ val(meta), [ vcf ] ] + log_out = IVAR_VARIANTS_TO_VCF.out.log // channel: [ val(meta), [ log ] ] + multiqc_tsv = IVAR_VARIANTS_TO_VCF.out.tsv // channel: [ val(meta), [ tsv ] ] + + vcf = BCFTOOLS_SORT.out.vcf // channel: [ val(meta), [ vcf ] ] + tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + csi = VCF_TABIX_STATS.out.csi // channel: [ val(meta), [ csi ] ] + stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] + + snpeff_vcf = VARIANTS_QC.out.snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] + snpeff_tbi = VARIANTS_QC.out.snpeff_tbi // channel: [ val(meta), [ tbi ] ] + snpeff_stats = VARIANTS_QC.out.snpeff_stats // channel: [ val(meta), [ txt ] ] + snpeff_csv = VARIANTS_QC.out.snpeff_csv // channel: [ val(meta), [ csv ] ] + snpeff_txt = VARIANTS_QC.out.snpeff_txt // channel: [ val(meta), [ txt ] ] + snpeff_html = VARIANTS_QC.out.snpeff_html // channel: [ val(meta), [ html ] ] + snpsift_txt = VARIANTS_QC.out.snpsift_txt // channel: [ val(meta), [ txt ] ] + + asciigenome_pdf = VARIANTS_QC.out.asciigenome_pdf // channel: [ val(meta), [ pdf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/variants_long_table.nf b/subworkflows/local/variants_long_table.nf new file mode 100644 index 00000000..e0919055 --- /dev/null +++ b/subworkflows/local/variants_long_table.nf @@ -0,0 +1,39 @@ +// +// Create a long table with variant information including AA changes and lineage info +// + +include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/query/main' +include { MAKE_VARIANTS_LONG_TABLE } from '../../modules/local/make_variants_long_table' + +workflow VARIANTS_LONG_TABLE { + take: + vcf // channel: [ val(meta), [ vcf ] ] + tbi // channel: [ val(meta), [ tbi ] ] + snpsift // channel: [ val(meta), [ txt ] ] + pangolin // channel: [ val(meta), [ csv ] ] + + main: + + ch_versions = Channel.empty() + + BCFTOOLS_QUERY ( + vcf.join(tbi, by: [0]), + [], + [], + [] + ) + ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions.first()) + + MAKE_VARIANTS_LONG_TABLE ( + BCFTOOLS_QUERY.out.output.collect{it[1]}, + snpsift.collect{it[1]}.ifEmpty([]), + pangolin.collect{it[1]}.ifEmpty([]) + ) + ch_versions = ch_versions.mix(MAKE_VARIANTS_LONG_TABLE.out.versions) + + emit: + query_table = BCFTOOLS_QUERY.out.output // channel: [ val(meta), [ txt ] ] + long_table = MAKE_VARIANTS_LONG_TABLE.out.csv // channel: [ val(meta), [ csv ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/variants_qc.nf b/subworkflows/local/variants_qc.nf new file mode 100644 index 00000000..cd5762b3 --- /dev/null +++ b/subworkflows/local/variants_qc.nf @@ -0,0 +1,91 @@ +// +// Variant calling QC +// + +include { ASCIIGENOME } from '../../modules/local/asciigenome' +include { SNPEFF_SNPSIFT } from './snpeff_snpsift' + +workflow VARIANTS_QC { + take: + bam // channel: [ val(meta), [ bam ] ] + vcf // channel: [ val(meta), [ vcf ] ] + stats // channel: [ val(meta), [ bcftools_stats ] ] + fasta // channel: /path/to/genome.fasta + sizes // channel: /path/to/genome.sizes + gff // channel: /path/to/genome.gff + bed // channel: /path/to/primers.bed + snpeff_db // channel: /path/to/snpeff_db/ + snpeff_config // channel: /path/to/snpeff.config + + main: + + ch_versions = Channel.empty() + + // + // Annotate variants + // + ch_snpeff_vcf = Channel.empty() + ch_snpeff_tbi = Channel.empty() + ch_snpeff_stats = Channel.empty() + ch_snpeff_csv = Channel.empty() + ch_snpeff_txt = Channel.empty() + ch_snpeff_html = Channel.empty() + ch_snpsift_txt = Channel.empty() + if (gff && !params.skip_snpeff) { + SNPEFF_SNPSIFT ( + vcf, + snpeff_db, + snpeff_config, + fasta + ) + ch_snpeff_vcf = SNPEFF_SNPSIFT.out.vcf + ch_snpeff_tbi = SNPEFF_SNPSIFT.out.tbi + ch_snpeff_stats = SNPEFF_SNPSIFT.out.stats + ch_snpeff_csv = SNPEFF_SNPSIFT.out.csv + ch_snpeff_txt = SNPEFF_SNPSIFT.out.txt + ch_snpeff_html = SNPEFF_SNPSIFT.out.html + ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt + ch_versions = ch_versions.mix(SNPEFF_SNPSIFT.out.versions) + } + + // + // Variant screenshots with ASCIIGenome + // + ch_asciigenome_pdf = Channel.empty() + if (!params.skip_asciigenome) { + bam + .join(vcf, by: [0]) + .join(stats, by: [0]) + .map { meta, bam, vcf, stats -> + if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { + return [ meta, bam, vcf ] + } + } + .set { ch_asciigenome } + + ASCIIGENOME ( + ch_asciigenome, + fasta, + sizes, + gff, + bed, + params.asciigenome_window_size, + params.asciigenome_read_depth + ) + ch_asciigenome_pdf = ASCIIGENOME.out.pdf + ch_versions = ch_versions.mix(ASCIIGENOME.out.versions.first()) + } + + emit: + snpeff_vcf = ch_snpeff_vcf // channel: [ val(meta), [ vcf.gz ] ] + snpeff_tbi = ch_snpeff_tbi // channel: [ val(meta), [ tbi ] ] + snpeff_stats = ch_snpeff_stats // channel: [ val(meta), [ txt ] ] + snpeff_csv = ch_snpeff_csv // channel: [ val(meta), [ csv ] ] + snpeff_txt = ch_snpeff_txt // channel: [ val(meta), [ txt ] ] + snpeff_html = ch_snpeff_html // channel: [ val(meta), [ html ] ] + snpsift_txt = ch_snpsift_txt // channel: [ val(meta), [ txt ] ] + + asciigenome_pdf = ch_asciigenome_pdf // channel: [ val(meta), [ pdf ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/vcf_bgzip_tabix_stats.nf b/subworkflows/local/vcf_bgzip_tabix_stats.nf new file mode 100644 index 00000000..60f1b274 --- /dev/null +++ b/subworkflows/local/vcf_bgzip_tabix_stats.nf @@ -0,0 +1,40 @@ +// +// Run BCFTools bgzip, tabix and stats commands +// + +include { TABIX_BGZIP } from '../../modules/nf-core/tabix/bgzip/main' +include { VCF_TABIX_STATS } from './vcf_tabix_stats' + +workflow VCF_BGZIP_TABIX_STATS { + take: + vcf // channel: [ val(meta), [ vcf ] ] + regions // file: regions.txt + targets // file: targets.txt + samples // file: samples.txt + + main: + + ch_versions = Channel.empty() + + TABIX_BGZIP ( + vcf + ) + ch_versions = ch_versions.mix(TABIX_BGZIP.out.versions.first()) + + VCF_TABIX_STATS ( + TABIX_BGZIP.out.output, + regions, + targets, + samples + ) + ch_versions = ch_versions.mix(VCF_TABIX_STATS.out.versions) + + emit: + vcf = TABIX_BGZIP.out.output // channel: [ val(meta), [ vcf.gz ] ] + + tbi = VCF_TABIX_STATS.out.tbi // channel: [ val(meta), [ tbi ] ] + csi = VCF_TABIX_STATS.out.csi // channel: [ val(meta), [ csi ] ] + stats = VCF_TABIX_STATS.out.stats // channel: [ val(meta), [ txt ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/local/vcf_tabix_stats.nf b/subworkflows/local/vcf_tabix_stats.nf new file mode 100644 index 00000000..2198fc95 --- /dev/null +++ b/subworkflows/local/vcf_tabix_stats.nf @@ -0,0 +1,42 @@ +// +// Run BCFTools tabix and stats commands +// + +include { TABIX_TABIX } from '../../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_STATS } from '../../modules/nf-core/bcftools/stats/main' + +workflow VCF_TABIX_STATS { + take: + vcf // channel: [ val(meta), [ vcf ] ] + regions // file: regions.txt + targets // file: targets.txt + samples // file: samples.txt + + main: + + ch_versions = Channel.empty() + + TABIX_TABIX ( + vcf + ) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) + + BCFTOOLS_STATS ( + vcf.join(TABIX_TABIX.out.tbi, by: [0]), + regions, + targets, + samples, + [ [:], [] ], + [ [:], [] ] + ) + ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions.first()) + + emit: + tbi = TABIX_TABIX.out.tbi // channel: [ val(meta), [ tbi ] ] + csi = TABIX_TABIX.out.csi // channel: [ val(meta), [ csi ] ] + + stats = BCFTOOLS_STATS.out.stats // channel: [ val(meta), [ txt ] ] + + versions = ch_versions // channel: [ versions.yml ] + +} diff --git a/subworkflows/nf-core/bam_markduplicates_picard/main.nf b/subworkflows/nf-core/bam_markduplicates_picard/main.nf new file mode 100644 index 00000000..2de059b8 --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/main.nf @@ -0,0 +1,54 @@ +// +// Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats +// + +include { PICARD_MARKDUPLICATES } from '../../../modules/nf-core/picard/markduplicates/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_MARKDUPLICATES_PICARD { + + take: + ch_reads // channel: [ val(meta), path(reads) ] + ch_fasta // channel: [ path(fasta) ] + ch_fai // channel: [ path(fai) ] + + main: + + ch_versions = Channel.empty() + + PICARD_MARKDUPLICATES ( ch_reads, ch_fasta, ch_fai ) + ch_versions = ch_versions.mix(PICARD_MARKDUPLICATES.out.versions.first()) + + ch_markdup = PICARD_MARKDUPLICATES.out.bam.mix(PICARD_MARKDUPLICATES.out.cram) + + SAMTOOLS_INDEX ( ch_markdup ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + ch_reads_index = ch_markdup + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.crai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map{meta, reads, bai, crai, csi -> + if (bai) [ meta, reads, bai ] + else if (crai) [ meta, reads, crai ] + else [ meta, reads, csi ] + } + + BAM_STATS_SAMTOOLS ( ch_reads_index, ch_fasta ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = PICARD_MARKDUPLICATES.out.bam // channel: [ val(meta), path(bam) ] + cram = PICARD_MARKDUPLICATES.out.cram // channel: [ val(meta), path(cram) ] + metrics = PICARD_MARKDUPLICATES.out.metrics // channel: [ val(meta), path(metrics) ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), path(bai) ] + crai = SAMTOOLS_INDEX.out.crai // channel: [ val(meta), path(crai) ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), path(csi) ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_markduplicates_picard/meta.yml b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml new file mode 100644 index 00000000..433d35b2 --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/meta.yml @@ -0,0 +1,71 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bam_markduplicates_picard" +description: Picard MarkDuplicates, index BAM file and run samtools stats, flagstat and idxstats +keywords: + - markduplicates + - bam + - sam + - cram +components: + - picard/markduplicates + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools +input: + - ch_reads: + description: | + Sequence reads in BAM/CRAM/SAM format + Structure: [ val(meta), path(reads) ] + - ch_fasta: + description: | + Reference genome fasta file required for CRAM input + Structure: [ path(fasta) ] + - ch_fasta: + description: | + Index of the reference genome fasta file + Structure: [ path(fai) ] +output: + - bam: + description: | + processed BAM/SAM file + Structure: [ val(meta), path(bam) ] + - bai: + description: | + BAM/SAM samtools index + Structure: [ val(meta), path(bai) ] + - cram: + description: | + processed CRAM file + Structure: [ val(meta), path(cram) ] + - crai: + description: | + CRAM samtools index + Structure: [ val(meta), path(crai) ] + - csi: + description: | + CSI samtools index + Structure: [ val(meta), path(csi) ] + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats) ] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@dmarron" + - "@drpatelh" +maintainers: + - "@dmarron" + - "@drpatelh" diff --git a/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test new file mode 100644 index 00000000..5ef337dc --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test @@ -0,0 +1,93 @@ +nextflow_workflow { + + name "Test Workflow BAM_MARKDUPLICATES_PICARD" + script "../main.nf" + workflow "BAM_MARKDUPLICATES_PICARD" + + tag "picard" + tag "picard/markduplicates" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_markduplicates_picard" + tag "subworkflows/bam_markduplicates_picard" + tag "subworkflows/bam_stats_samtools" + tag "bam_stats_samtools" + tag "samtools" + tag "samtools/flagstat" + tag "samtools/idxstats" + tag "samtools/index" + tag "samtools/stats" + + test("sarscov2 - bam") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end: false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + path(workflow.out.bam[0][1]), + path(workflow.out.bai[0][1]), + path(workflow.out.flagstat[0][1]), + path(workflow.out.idxstats[0][1]), + path(workflow.out.stats[0][1]), + ).match("sarscov2 - bam") }, + { assert path(workflow.out.metrics.get(0).get(1)).getText().contains("97") } + ) + } + } + + test("homo_sapiens - cram") { + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + input[2] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + file(workflow.out.cram[0][1]).name, + path(workflow.out.crai[0][1]), + path(workflow.out.flagstat[0][1]), + path(workflow.out.idxstats[0][1]), + path(workflow.out.stats[0][1]), + ).match("homo_sapiens - cram") }, + { assert path(workflow.out.metrics.get(0).get(1)).getText().contains("0.999986") } + ) + } + } + +} diff --git a/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap new file mode 100644 index 00000000..caf4ac8a --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/tests/main.nf.test.snap @@ -0,0 +1,30 @@ +{ + "homo_sapiens - cram": { + "content": [ + "test.cram", + "test.cram.crai:md5,78d47ba01ac4e05f3ae1e353902a989e", + "test.flagstat:md5,93b0ef463df947ede1f42ff60396c34d", + "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15", + "test.stats:md5,c2f74a4d9b2377bcf4f4f184da3801af" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T20:45:38.364189" + }, + "sarscov2 - bam": { + "content": [ + "test.bam:md5,3091fe6ba1b7530f382fe40b9fd8f45b", + "test.bam.bai:md5,4d3ae8d013444b55e17aa0149a2ab404", + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783", + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2", + "test.stats:md5,d7796222a087b9bb97f631f1c21b9c95" + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-21T11:38:08.434529" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_markduplicates_picard/tests/tags.yml b/subworkflows/nf-core/bam_markduplicates_picard/tests/tags.yml new file mode 100644 index 00000000..10b85270 --- /dev/null +++ b/subworkflows/nf-core/bam_markduplicates_picard/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_markduplicates_picard: + - subworkflows/nf-core/bam_markduplicates_picard/** diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/main.nf b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf new file mode 100644 index 00000000..b716375b --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/main.nf @@ -0,0 +1,50 @@ +// +// Sort, index BAM file and run samtools stats, flagstat and idxstats +// + +include { SAMTOOLS_SORT } from '../../../modules/nf-core/samtools/sort/main' +include { SAMTOOLS_INDEX } from '../../../modules/nf-core/samtools/index/main' +include { BAM_STATS_SAMTOOLS } from '../bam_stats_samtools/main' + +workflow BAM_SORT_STATS_SAMTOOLS { + take: + ch_bam // channel: [ val(meta), [ bam ] ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + + ch_versions = Channel.empty() + + SAMTOOLS_SORT ( ch_bam, ch_fasta ) + ch_versions = ch_versions.mix(SAMTOOLS_SORT.out.versions.first()) + + SAMTOOLS_INDEX ( SAMTOOLS_SORT.out.bam ) + ch_versions = ch_versions.mix(SAMTOOLS_INDEX.out.versions.first()) + + SAMTOOLS_SORT.out.bam + .join(SAMTOOLS_INDEX.out.bai, by: [0], remainder: true) + .join(SAMTOOLS_INDEX.out.csi, by: [0], remainder: true) + .map { + meta, bam, bai, csi -> + if (bai) { + [ meta, bam, bai ] + } else { + [ meta, bam, csi ] + } + } + .set { ch_bam_bai } + + BAM_STATS_SAMTOOLS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(BAM_STATS_SAMTOOLS.out.versions) + + emit: + bam = SAMTOOLS_SORT.out.bam // channel: [ val(meta), [ bam ] ] + bai = SAMTOOLS_INDEX.out.bai // channel: [ val(meta), [ bai ] ] + csi = SAMTOOLS_INDEX.out.csi // channel: [ val(meta), [ csi ] ] + + stats = BAM_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml new file mode 100644 index 00000000..e01f9ccf --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/meta.yml @@ -0,0 +1,70 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_sort_stats_samtools +description: Sort SAM/BAM/CRAM file +keywords: + - sort + - bam + - sam + - cram +components: + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - fasta: + type: file + description: Reference genome fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - bam: + type: file + description: Sorted BAM/CRAM/SAM file + pattern: "*.{bam,cram,sam}" + - bai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - crai: + type: file + description: BAM/CRAM/SAM index file + pattern: "*.{bai,crai,sai}" + - stats: + type: file + description: File containing samtools stats output + pattern: "*.{stats}" + - flagstat: + type: file + description: File containing samtools flagstat output + pattern: "*.{flagstat}" + - idxstats: + type: file + description: File containing samtools idxstats output + pattern: "*.{idxstats}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@drpatelh" + - "@ewels" +maintainers: + - "@drpatelh" + - "@ewels" diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test new file mode 100644 index 00000000..75b5b934 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test @@ -0,0 +1,82 @@ +nextflow_workflow { + + name "Test Workflow BAM_SORT_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_SORT_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/bam_sort_stats_samtools" + tag "bam_sort_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "bam_stats_samtools" + tag "samtools" + tag "samtools/index" + tag "samtools/sort" + tag "samtools/stats" + tag "samtools/idxstats" + tag "samtools/flagstat" + + test("test_bam_sort_stats_samtools_single_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot(workflow.out.stats).match("test_bam_sort_stats_samtools_single_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_sort_stats_samtools_single_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_sort_stats_samtools_single_end_idxstats") } + ) + } + } + + test("test_bam_sort_stats_samtools_paired_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert workflow.out.bam.get(0).get(1) ==~ ".*.bam"}, + { assert workflow.out.bai.get(0).get(1) ==~ ".*.bai"}, + { assert snapshot(workflow.out.stats).match("test_bam_sort_stats_samtools_paired_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_sort_stats_samtools_paired_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_sort_stats_samtools_paired_end_idxstats") } + ) + } + } +} diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..6645a092 --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,110 @@ +{ + "test_bam_sort_stats_samtools_paired_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-10-22T20:25:03.687121177" + }, + "test_bam_sort_stats_samtools_paired_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-10-22T20:25:03.709648916" + }, + "test_bam_sort_stats_samtools_single_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,cb0bf2b79de52fdf0c61e80efcdb0bb4" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:44:38.553256801" + }, + "test_bam_sort_stats_samtools_paired_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d7796222a087b9bb97f631f1c21b9c95" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:44:48.355870518" + }, + "test_bam_sort_stats_samtools_single_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-01-18T17:10:02.84631" + }, + "test_bam_sort_stats_samtools_single_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-01-18T17:10:02.829756" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml new file mode 100644 index 00000000..30b69d6a --- /dev/null +++ b/subworkflows/nf-core/bam_sort_stats_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_sort_stats_samtools: + - subworkflows/nf-core/bam_sort_stats_samtools/** diff --git a/subworkflows/nf-core/bam_stats_samtools/main.nf b/subworkflows/nf-core/bam_stats_samtools/main.nf new file mode 100644 index 00000000..44d4c010 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/main.nf @@ -0,0 +1,32 @@ +// +// Run SAMtools stats, flagstat and idxstats +// + +include { SAMTOOLS_STATS } from '../../../modules/nf-core/samtools/stats/main' +include { SAMTOOLS_IDXSTATS } from '../../../modules/nf-core/samtools/idxstats/main' +include { SAMTOOLS_FLAGSTAT } from '../../../modules/nf-core/samtools/flagstat/main' + +workflow BAM_STATS_SAMTOOLS { + take: + ch_bam_bai // channel: [ val(meta), path(bam), path(bai) ] + ch_fasta // channel: [ val(meta), path(fasta) ] + + main: + ch_versions = Channel.empty() + + SAMTOOLS_STATS ( ch_bam_bai, ch_fasta ) + ch_versions = ch_versions.mix(SAMTOOLS_STATS.out.versions) + + SAMTOOLS_FLAGSTAT ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_FLAGSTAT.out.versions) + + SAMTOOLS_IDXSTATS ( ch_bam_bai ) + ch_versions = ch_versions.mix(SAMTOOLS_IDXSTATS.out.versions) + + emit: + stats = SAMTOOLS_STATS.out.stats // channel: [ val(meta), path(stats) ] + flagstat = SAMTOOLS_FLAGSTAT.out.flagstat // channel: [ val(meta), path(flagstat) ] + idxstats = SAMTOOLS_IDXSTATS.out.idxstats // channel: [ val(meta), path(idxstats) ] + + versions = ch_versions // channel: [ path(versions.yml) ] +} diff --git a/subworkflows/nf-core/bam_stats_samtools/meta.yml b/subworkflows/nf-core/bam_stats_samtools/meta.yml new file mode 100644 index 00000000..809bf736 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/meta.yml @@ -0,0 +1,43 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: bam_stats_samtools +description: Produces comprehensive statistics from SAM/BAM/CRAM file +keywords: + - statistics + - counts + - bam + - sam + - cram +components: + - samtools/stats + - samtools/idxstats + - samtools/flagstat +input: + - ch_bam_bai: + description: | + The input channel containing the BAM/CRAM and it's index + Structure: [ val(meta), path(bam), path(bai) ] + - ch_fasta: + description: | + Reference genome fasta file + Structure: [ path(fasta) ] +output: + - stats: + description: | + File containing samtools stats output + Structure: [ val(meta), path(stats) ] + - flagstat: + description: | + File containing samtools flagstat output + Structure: [ val(meta), path(flagstat) ] + - idxstats: + description: | + File containing samtools idxstats output + Structure: [ val(meta), path(idxstats)] + - versions: + description: | + Files containing software versions + Structure: [ path(versions.yml) ] +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test new file mode 100644 index 00000000..c8b21f28 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test @@ -0,0 +1,108 @@ +nextflow_workflow { + + name "Test Workflow BAM_STATS_SAMTOOLS" + script "../main.nf" + workflow "BAM_STATS_SAMTOOLS" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_stats_samtools" + tag "subworkflows/bam_stats_samtools" + tag "samtools" + tag "samtools/flagstat" + tag "samtools/idxstats" + tag "samtools/stats" + + test("test_bam_stats_samtools_single_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.single_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_single_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_single_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_single_end_idxstats") } + ) + } + } + + test("test_bam_stats_samtools_paired_end") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:true ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_paired_end_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_paired_end_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_paired_end_idxstats") } + ) + } + } + + test("test_bam_stats_samtools_paired_end_cram") { + + when { + params { + outdir = "$outputDir" + } + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true) + ]) + input[1] = Channel.of([ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.stats).match("test_bam_stats_samtools_paired_end_cram_stats") }, + { assert snapshot(workflow.out.flagstat).match("test_bam_stats_samtools_paired_end_cram_flagstats") }, + { assert snapshot(workflow.out.idxstats).match("test_bam_stats_samtools_paired_end_cram_idxstats") } + ) + } + } + +} diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap new file mode 100644 index 00000000..bf0b0c69 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/main.nf.test.snap @@ -0,0 +1,164 @@ +{ + "test_bam_stats_samtools_paired_end_cram_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.flagstat:md5,a53f3d26e2e9851f7d528442bbfe9781" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-11-06T09:31:26.194017574" + }, + "test_bam_stats_samtools_paired_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,ddaf8f33fe9c1ebe9b06933213aec8ed" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:45:06.230091746" + }, + "test_bam_stats_samtools_paired_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,4f7ffd1e6a5e85524d443209ac97d783" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-01-18T17:17:27.717482" + }, + "test_bam_stats_samtools_single_end_flagstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.flagstat:md5,2191911d72575a2358b08b1df64ccb53" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-11-06T09:26:10.340046381" + }, + "test_bam_stats_samtools_paired_end_cram_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.idxstats:md5,e179601fa7b8ebce81ac3765206f6c15" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-11-06T09:31:26.207052003" + }, + "test_bam_stats_samtools_single_end_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.stats:md5,dc178e1a4956043aba8abc83e203521b" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:44:57.442208382" + }, + "test_bam_stats_samtools_paired_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,df60a8c8d6621100d05178c93fb053a2" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-01-18T17:17:27.726719" + }, + "test_bam_stats_samtools_single_end_idxstats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": true + }, + "test.idxstats:md5,613e048487662c694aa4a2f73ca96a20" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2023-11-06T09:26:10.349439801" + }, + "test_bam_stats_samtools_paired_end_cram_stats": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.stats:md5,d3345c4887f4a9ea4f7f56405b495db0" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.01.0" + }, + "timestamp": "2024-02-13T16:45:14.997164209" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml new file mode 100644 index 00000000..ec2f2d68 --- /dev/null +++ b/subworkflows/nf-core/bam_stats_samtools/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_stats_samtools: + - subworkflows/nf-core/bam_stats_samtools/** diff --git a/subworkflows/nf-core/bam_variant_demix_boot_freyja/main.nf b/subworkflows/nf-core/bam_variant_demix_boot_freyja/main.nf new file mode 100644 index 00000000..ad1fdc08 --- /dev/null +++ b/subworkflows/nf-core/bam_variant_demix_boot_freyja/main.nf @@ -0,0 +1,81 @@ +include { FREYJA_VARIANTS } from '../../../modules/nf-core/freyja/variants' +include { FREYJA_UPDATE } from '../../../modules/nf-core/freyja/update' +include { FREYJA_DEMIX } from '../../../modules/nf-core/freyja/demix' +include { FREYJA_BOOT } from '../../../modules/nf-core/freyja/boot' + +workflow BAM_VARIANT_DEMIX_BOOT_FREYJA { + + take: + ch_bam // channel: [ val(meta), path(bam) ] + ch_fasta // channel: [ path(fasta) ] + val_skip_boot // value skip_boot + val_repeats // value repeats + val_db_name // string db_name + ch_barcodes // channel: [ path(barcodes)] + ch_lineages_meta // channel: [ path(lineages_meta)] + + main: + ch_versions = Channel.empty() + + // + // Variant calling + // + FREYJA_VARIANTS ( + ch_bam, + ch_fasta + ) + ch_freyja_variants = FREYJA_VARIANTS.out.variants + ch_versions = ch_versions.mix(FREYJA_VARIANTS.out.versions.first()) + + // + // Update the database if none are given. + // + if (!ch_barcodes || !ch_lineages_meta) { + FREYJA_UPDATE ( + val_db_name + ) + ch_barcodes = FREYJA_UPDATE.out.barcodes + ch_lineages_meta = FREYJA_UPDATE.out.lineages_meta + ch_versions = ch_versions.mix(FREYJA_UPDATE.out.versions.first()) + } + + + // + // demix and define minimum variant abundances + // + FREYJA_DEMIX ( + ch_freyja_variants, + ch_barcodes, + ch_lineages_meta + ) + ch_freyja_demix = FREYJA_DEMIX.out.demix + ch_versions = ch_versions.mix(FREYJA_DEMIX.out.versions.first()) + + + // + // Perform bootstrapping to get more accurate estimates of abundancies + // + ch_lineages = Channel.empty() + ch_summarized = Channel.empty() + if (!val_skip_boot){ + FREYJA_BOOT ( + ch_freyja_variants, + val_repeats, + ch_barcodes, + ch_lineages_meta + ) + ch_lineages = FREYJA_BOOT.out.lineages + ch_summarized = FREYJA_BOOT.out.summarized + ch_versions = ch_versions.mix(FREYJA_BOOT.out.versions.first()) + } + + emit: + variants = FREYJA_VARIANTS.out.variants // channel: [ val(meta), path(variants_tsv), path(depths_tsv) ] + demix = FREYJA_DEMIX.out.demix // channel: [ val(meta), path(demix_tsv) ] + lineages = ch_lineages // channel: [ val(meta), path(lineages_csv) ] + summarized = ch_summarized // channel: [ val(meta), path(summarized_csv) ] + barcodes = ch_barcodes // channel: [ path(barcodes) ] + lineages_meta = ch_lineages_meta // channel: [ path(lineages_meta) ] + versions = ch_versions // channel: [ path(versions.yml) ] + } + diff --git a/subworkflows/nf-core/bam_variant_demix_boot_freyja/meta.yml b/subworkflows/nf-core/bam_variant_demix_boot_freyja/meta.yml new file mode 100644 index 00000000..3afe864a --- /dev/null +++ b/subworkflows/nf-core/bam_variant_demix_boot_freyja/meta.yml @@ -0,0 +1,84 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: "bam_variant_demix_boot_freyja" +description: Recover relative lineage abundances from mixed SARS-CoV-2 samples from a sequencing dataset (BAM aligned to the Hu-1 reference) +keywords: + - bam + - variants + - cram +components: + - freyja/variants + - freyja/demix + - freyja/update + - freyja/boot +input: + - ch_bam: + type: file + description: | + Structure: [ val(meta), path(bam) ] + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] and sorted BAM file + - ch_fasta: + type: file + description: | + Structure: [ val(meta), path(fasta) ] + Groovy Map containing sample information e.g. [ id:'test', single_end:false ] and the fasta reference used for the sorted BAM file + - val_repeats: + type: value (int) + description: Number of bootstrap repeats to perform + - val_db_name: + type: value (string) + description: Name of the dir where UShER's files will be stored + - ch_barcodes: + type: file + description: | + Structure: path(barcodes) + File containing lineage defining barcodes + - ch_lineages_meta: + type: file + description: | + Structure: path(lineages_meta) + File containing lineage metadata that correspond to barcodes +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - variants: + type: file + description: | + Structure: [ val(meta), path(variants) ] + File containing identified variants in a gff-like format + - depths: + type: file + description: | + Structure: [ val(meta), path(variants) ] + File containing depth of the variants + - demix: + type: file + description: | + Structure: [ val(meta), path(demix) ] + a tsv file that includes the lineages present, their corresponding abundances, and summarization by constellation + - lineages: + type: file + description: | + Structure: [ val(meta), path(lineages) ] + a csv file that includes the lineages present and their corresponding abundances + - summarized: + type: file + description: | + Structure: [ val(meta), path(lineages) ] + a csv file that includes the lineages present but summarized by constellation and their corresponding abundances + - barcodes: + type: file + description: path(barcodes) a csv file that includes the lineages present but summarized by constellation and their corresponding abundances + - lineages_meta: + type: file + description: path(lineages_meta) a csv file that includes the lineages present but summarized by constellation and their corresponding abundances + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" diff --git a/subworkflows/nf-core/bam_variant_demix_boot_freyja/tests/main.nf.test b/subworkflows/nf-core/bam_variant_demix_boot_freyja/tests/main.nf.test new file mode 100644 index 00000000..8ea5c8ba --- /dev/null +++ b/subworkflows/nf-core/bam_variant_demix_boot_freyja/tests/main.nf.test @@ -0,0 +1,127 @@ +nextflow_workflow { + + name "Test Workflow BAM_VARIANT_DEMIX_BOOT_FREYJA" + script "../main.nf" + workflow "BAM_VARIANT_DEMIX_BOOT_FREYJA" + tag "subworkflows" + tag "subworkflows_nfcore" + tag "bam_variant_demix_boot_freyja" + tag "subworkflows/bam_variant_demix_boot_freyja" + tag "freyja" + tag "freyja/variants" + tag "freyja/update" + tag "freyja/demix" + tag "freyja/boot" + + test("sarscov2 - bam - val - nodb"){ + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ]) + input[1] = Channel.of([file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + input[2] = false + input[3] = 3 + input[4] = 'freyja_db' + input[5] = [] + input[6] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.variants, + workflow.out.versions).match() }, + // All depend on a database that gets updated regularly + { assert path(workflow.out.demix.get(0).get(1)).getText().contains("summarized") }, + { assert path(workflow.out.summarized.get(0).get(1)).getText().contains("0.025,") }, + { assert path(workflow.out.lineages.get(0).get(1)).getText().contains("0.025,") }, + { assert path(workflow.out.barcodes.get(0)).exists() }, + { assert path(workflow.out.lineages_meta.get(0)).exists() }, + ) + } + } + + test("sarscov2 - bam - val - db"){ + + setup { + run("FREYJA_UPDATE") { + script "../../../../modules/nf-core/freyja/update/main.nf" + process { + """ + input[0] = "test_db" + """ + } + } + } + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ]) + input[1] = Channel.of([file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + input[2] = false + input[3] = 3 + input[4] = 'test_db' + input[5] = FREYJA_UPDATE.out.barcodes + input[6] = FREYJA_UPDATE.out.lineages_meta + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.variants, + workflow.out.versions).match() }, + // All depend on a database that gets updated regularly + { assert path(workflow.out.demix.get(0).get(1)).getText().contains("summarized") }, + { assert path(workflow.out.summarized.get(0).get(1)).getText().contains("0.025,") }, + { assert path(workflow.out.lineages.get(0).get(1)).getText().contains("0.025,") }, + { assert path(workflow.out.barcodes.get(0)).exists() }, + { assert path(workflow.out.lineages_meta.get(0)).exists() }, + ) + } + } + + test("sarscov2 - bam - skip - val - nodb"){ + + when { + workflow { + """ + input[0] = Channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + ]) + input[1] = Channel.of([file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]) + input[2] = true + input[3] = 0 + input[4] = 'test_db' + input[5] = [] + input[6] = [] + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot(workflow.out.variants, + workflow.out.versions).match() }, + // All depend on a database that gets updated regularly + { assert path(workflow.out.demix.get(0).get(1)).getText().contains("summarized") }, + { assert path(workflow.out.barcodes.get(0)).exists() }, + { assert path(workflow.out.lineages_meta.get(0)).exists() }, + ) + } + } + +} diff --git a/subworkflows/nf-core/bam_variant_demix_boot_freyja/tests/main.nf.test.snap b/subworkflows/nf-core/bam_variant_demix_boot_freyja/tests/main.nf.test.snap new file mode 100644 index 00000000..c16d8793 --- /dev/null +++ b/subworkflows/nf-core/bam_variant_demix_boot_freyja/tests/main.nf.test.snap @@ -0,0 +1,75 @@ +{ + "sarscov2 - bam - val - db": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.variants.tsv:md5,966450bae4d9abae278572927b821983", + "test.depth.tsv:md5,27f79b28a365a8af915895b484d1153e" + ] + ], + [ + "versions.yml:md5,d3e911c025e35de092f7ef0073ccf807", + "versions.yml:md5,df54ea89f3bcb7e5952e7e3dae1d22a4", + "versions.yml:md5,e66aeb30f8646d583bec6fbf4fe02426" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-02T16:12:55.505491641" + }, + "sarscov2 - bam - skip - val - nodb": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.variants.tsv:md5,966450bae4d9abae278572927b821983", + "test.depth.tsv:md5,27f79b28a365a8af915895b484d1153e" + ] + ], + [ + "versions.yml:md5,4b7dccb03c75e8e94d7c6bdefaa90c95", + "versions.yml:md5,df54ea89f3bcb7e5952e7e3dae1d22a4", + "versions.yml:md5,e66aeb30f8646d583bec6fbf4fe02426" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-02T16:13:29.413908034" + }, + "sarscov2 - bam - val - nodb": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test.variants.tsv:md5,966450bae4d9abae278572927b821983", + "test.depth.tsv:md5,27f79b28a365a8af915895b484d1153e" + ] + ], + [ + "versions.yml:md5,4b7dccb03c75e8e94d7c6bdefaa90c95", + "versions.yml:md5,d3e911c025e35de092f7ef0073ccf807", + "versions.yml:md5,df54ea89f3bcb7e5952e7e3dae1d22a4", + "versions.yml:md5,e66aeb30f8646d583bec6fbf4fe02426" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-02T16:12:09.796799525" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/bam_variant_demix_boot_freyja/tests/tags.yml b/subworkflows/nf-core/bam_variant_demix_boot_freyja/tests/tags.yml new file mode 100644 index 00000000..f0c88761 --- /dev/null +++ b/subworkflows/nf-core/bam_variant_demix_boot_freyja/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/bam_variant_demix_boot_freyja: + - subworkflows/nf-core/bam_variant_demix_boot_freyja/** diff --git a/subworkflows/nf-core/fastq_align_bowtie2/main.nf b/subworkflows/nf-core/fastq_align_bowtie2/main.nf new file mode 100644 index 00000000..cafaa9bf --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/main.nf @@ -0,0 +1,45 @@ +// +// Alignment with Bowtie2 +// + +include { BOWTIE2_ALIGN } from '../../../modules/nf-core/bowtie2/align/main' +include { BAM_SORT_STATS_SAMTOOLS } from '../bam_sort_stats_samtools/main' + +workflow FASTQ_ALIGN_BOWTIE2 { + take: + ch_reads // channel: [ val(meta), [ reads ] ] + ch_index // channel: /path/to/bowtie2/index/ + save_unaligned // val + sort_bam // val + ch_fasta // channel: /path/to/reference.fasta + + main: + + ch_versions = Channel.empty() + + // + // Map reads with Bowtie2 + // + BOWTIE2_ALIGN ( ch_reads, ch_index, ch_fasta, save_unaligned, sort_bam ) + ch_versions = ch_versions.mix(BOWTIE2_ALIGN.out.versions) + + // + // Sort, index BAM file and run samtools stats, flagstat and idxstats + // + BAM_SORT_STATS_SAMTOOLS ( BOWTIE2_ALIGN.out.bam, ch_fasta ) + ch_versions = ch_versions.mix(BAM_SORT_STATS_SAMTOOLS.out.versions) + + emit: + bam_orig = BOWTIE2_ALIGN.out.bam // channel: [ val(meta), aligned ] + log_out = BOWTIE2_ALIGN.out.log // channel: [ val(meta), log ] + fastq = BOWTIE2_ALIGN.out.fastq // channel: [ val(meta), fastq ] + + bam = BAM_SORT_STATS_SAMTOOLS.out.bam // channel: [ val(meta), [ bam ] ] + bai = BAM_SORT_STATS_SAMTOOLS.out.bai // channel: [ val(meta), [ bai ] ] + csi = BAM_SORT_STATS_SAMTOOLS.out.csi // channel: [ val(meta), [ csi ] ] + stats = BAM_SORT_STATS_SAMTOOLS.out.stats // channel: [ val(meta), [ stats ] ] + flagstat = BAM_SORT_STATS_SAMTOOLS.out.flagstat // channel: [ val(meta), [ flagstat ] ] + idxstats = BAM_SORT_STATS_SAMTOOLS.out.idxstats // channel: [ val(meta), [ idxstats ] ] + + versions = ch_versions // channel: [ versions.yml ] +} diff --git a/subworkflows/nf-core/fastq_align_bowtie2/meta.yml b/subworkflows/nf-core/fastq_align_bowtie2/meta.yml new file mode 100644 index 00000000..b18e4054 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/meta.yml @@ -0,0 +1,67 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: fastq_align_bowtie2 +description: Align reads to a reference genome using bowtie2 then sort with samtools +keywords: + - align + - fasta + - genome + - reference +components: + - bowtie2/align + - samtools/sort + - samtools/index + - samtools/stats + - samtools/idxstats + - samtools/flagstat + - bam_sort_stats_samtools +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ch_reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. + - ch_index: + type: file + description: Bowtie2 genome index files + pattern: "*.ebwt" + - save_unaligned: + type: boolean + description: | + Save reads that do not map to the reference (true) or discard them (false) + (default: false) + - sort_bam: + type: boolean + description: | + Use samtools sort (true) or samtools view (false) + default: false + - ch_fasta: + type: file + description: Reference fasta file + pattern: "*.{fasta,fa}" +# TODO Update when we decide on a standard for subworkflow docs +output: + - bam: + type: file + description: Output BAM file containing read alignments + pattern: "*.{bam}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" + - fastq: + type: file + description: Unaligned FastQ files + pattern: "*.fastq.gz" + - log: + type: file + description: Alignment log + pattern: "*.log" +authors: + - "@drpatelh" +maintainers: + - "@drpatelh" diff --git a/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test b/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test new file mode 100644 index 00000000..b5e84f51 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test @@ -0,0 +1,99 @@ +nextflow_workflow { + + name "Test Subworkflow FASTQ_ALIGN_BOWTIE2" + script "../main.nf" + config "./nextflow.config" + workflow "FASTQ_ALIGN_BOWTIE2" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/fastq_align_bowtie2" + tag "subworkflows/bam_sort_stats_samtools" + tag "bowtie2" + tag "bowtie2/build" + tag "bowtie2/align" + + test("test_align_bowtie2_single_end") { + setup { + run("BOWTIE2_BUILD") { + script "../../../../modules/nf-core/bowtie2/build/main.nf" + process { + """ + input[0] = Channel.value([ [ id:'genome' ],file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)]) + """ + } + } + } + when { + workflow { + """ + input[0] = Channel.of([[ id:'test', single_end:true ], [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]]) + input[1] = BOWTIE2_BUILD.out.index + input[2] = false + input[3] = false + input[4] = Channel.value([ [ id:'genome' ],file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + file(workflow.out.bam_orig[0][1]).name, + workflow.out.fastq, + workflow.out.log_out, + file(workflow.out.bam[0][1]).name, + file(workflow.out.bai[0][1]).name, + workflow.out.csi, + workflow.out.stats, + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.versions, + ).match()} + ) + } + } + + test("test_align_bowtie2_paired_end") { + setup { + run("BOWTIE2_BUILD") { + script "../../../../modules/nf-core/bowtie2/build/main.nf" + process { + """ + input[0] = Channel.value([ [ id:'genome' ],file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)]) + """ + } + } + } + when { + workflow { + """ + input[0] = Channel.of([[ id:'test', single_end:false ], [file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true), file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)]]) + input[1] = BOWTIE2_BUILD.out.index + input[2] = false + input[3] = false + input[4] = Channel.value([ [ id:'genome' ],file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)]) + """ + } + } + + then { + assertAll( + { assert workflow.success}, + { assert snapshot( + file(workflow.out.bam_orig[0][1]).name, + workflow.out.fastq, + workflow.out.log_out, + file(workflow.out.bam[0][1]).name, + file(workflow.out.bai[0][1]).name, + workflow.out.csi, + workflow.out.stats, + workflow.out.flagstat, + workflow.out.idxstats, + workflow.out.versions, + ).match()} + ) + } + } +} diff --git a/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test.snap new file mode 100644 index 00000000..c0f3f8bf --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/tests/main.nf.test.snap @@ -0,0 +1,126 @@ +{ + "test_align_bowtie2_single_end": { + "content": [ + "test.bam", + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.bowtie2.log:md5,7b8a9e61b7646da1089b041333c41a87" + ] + ], + "test.sorted.bam", + "test.sorted.bam.bai", + [ + + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.stats:md5,9a65272e49581873b1ea211f738e992f" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.flagstat:md5,e9ce9093133116bc54fd335cfe698372" + ] + ], + [ + [ + { + "id": "test", + "single_end": true + }, + "test.sorted.bam.idxstats:md5,e16eb632f7f462514b0873c7ac8ac905" + ] + ], + [ + "versions.yml:md5,5d5ab1d650a93d8bb5ed142943798a6a", + "versions.yml:md5,666dbae2343fc479e483656c35d3d8a1", + "versions.yml:md5,aab337e63eac9055aadb9a35cec16053", + "versions.yml:md5,c27f74d9c37fbb3365c437a9f7e81c27", + "versions.yml:md5,eb9364a9f1745d6a345b8b4b03aebe25", + "versions.yml:md5,f982efa9031f340ace29f76dd47a8ce1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T14:40:54.318808117" + }, + "test_align_bowtie2_paired_end": { + "content": [ + "test.bam", + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.bowtie2.log:md5,bd89ce1b28c93bf822bae391ffcedd19" + ] + ], + "test.sorted.bam", + "test.sorted.bam.bai", + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.stats:md5,1086d408391af2a5c80c6dee0efa7e59" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.flagstat:md5,49f3d51a8804ce58fe9cecd2549d279b" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test.sorted.bam.idxstats:md5,29ff2fa56d35b2a47625b8f517f1a947" + ] + ], + [ + "versions.yml:md5,5d5ab1d650a93d8bb5ed142943798a6a", + "versions.yml:md5,666dbae2343fc479e483656c35d3d8a1", + "versions.yml:md5,aab337e63eac9055aadb9a35cec16053", + "versions.yml:md5,c27f74d9c37fbb3365c437a9f7e81c27", + "versions.yml:md5,eb9364a9f1745d6a345b8b4b03aebe25", + "versions.yml:md5,f982efa9031f340ace29f76dd47a8ce1" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-18T14:41:11.243874685" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_align_bowtie2/tests/nextflow.config b/subworkflows/nf-core/fastq_align_bowtie2/tests/nextflow.config new file mode 100644 index 00000000..2f85e807 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/tests/nextflow.config @@ -0,0 +1,8 @@ +process { + withName: '.*:BAM_SORT_STATS_SAMTOOLS:SAMTOOLS_.*' { + ext.prefix = { "${meta.id}.sorted" } + } + withName: '.*:BAM_SORT_STATS_SAMTOOLS:BAM_STATS_SAMTOOLS:.*' { + ext.prefix = { "${meta.id}.sorted.bam" } + } +} diff --git a/subworkflows/nf-core/fastq_align_bowtie2/tests/tags.yml b/subworkflows/nf-core/fastq_align_bowtie2/tests/tags.yml new file mode 100644 index 00000000..267bcc77 --- /dev/null +++ b/subworkflows/nf-core/fastq_align_bowtie2/tests/tags.yml @@ -0,0 +1,2 @@ +subworkflows/fastq_align_bowtie2: + - subworkflows/nf-core/fastq_align_bowtie2/** diff --git a/tower.yml b/tower.yml index 787aedfe..68f9e947 100644 --- a/tower.yml +++ b/tower.yml @@ -1,5 +1,17 @@ reports: multiqc_report.html: display: "MultiQC HTML report" - samplesheet.csv: - display: "Auto-created samplesheet with collated metadata and FASTQ paths" + summary_variants_metrics_mqc.csv: + display: "Summary variant calling metrics CSV file" + summary_assembly_metrics_mqc.csv: + display: "Summary assembly metrics CSV file" + variants_long_table.csv: + display: "Variants long table with functional effect prediction and lineage analysis" + all_samples.mosdepth.heatmap.pdf: + display: "All samples amplicon coverage heatmap PDF file" + report.pdf: + display: "QUAST PDF report" + "*.mosdepth.coverage.pdf": + display: "Per-sample amplicon coverage PDF file" + "**/bandage/*.png": + display: "Assembly BANDAGE image" diff --git a/workflows/illumina.nf b/workflows/illumina.nf new file mode 100644 index 00000000..21df7cc8 --- /dev/null +++ b/workflows/illumina.nf @@ -0,0 +1,727 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + PRINT PARAMS SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { paramsSummaryLog } from 'plugin/nf-schema' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_viralrecon_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def valid_params = [ + protocols : ['metagenomic', 'amplicon'], + variant_callers : ['ivar', 'bcftools'], + consensus_callers : ['ivar', 'bcftools'], + assemblers : ['spades', 'unicycler', 'minia'], + spades_modes : ['rnaviral', 'corona', 'metaviral', 'meta', 'metaplasmid', 'plasmid', 'isolate', 'rna', 'bio'] +] + +// Check input path parameters to see if they exist +def checkPathParamList = [ + params.input, params.fasta, params.gff, params.bowtie2_index, + params.kraken2_db, params.primer_bed, params.primer_fasta, + params.blast_db, params.spades_hmm, params.multiqc_config, + params.freyja_barcodes, params.freyja_lineages, params.additional_annotation +] +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet file not specified!' } +if (params.spades_hmm) { ch_spades_hmm = file(params.spades_hmm) } else { ch_spades_hmm = [] } +if (params.additional_annotation) { ch_additional_gtf = file(params.additional_annotation) } else { additional_annotation = [] } + +def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.trim().toLowerCase() } : [] + +def variant_caller = params.variant_caller +if (!variant_caller) { variant_caller = params.protocol == 'amplicon' ? 'ivar' : 'bcftools' } + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = file("$projectDir/assets/multiqc_config_illumina.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? file(params.multiqc_config) : [] + +// Header files +ch_blast_outfmt6_header = file("$projectDir/assets/headers/blast_outfmt6_header.txt", checkIfExists: true) +ch_blast_filtered_outfmt6_header = file("$projectDir/assets/headers/blast_filtered_outfmt6_header.txt", checkIfExists: true) +ch_ivar_variants_header_mqc = file("$projectDir/assets/headers/ivar_variants_header_mqc.txt", checkIfExists: true) + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Loaded from modules/local/ +// +include { CUTADAPT } from '../modules/local/cutadapt' +include { MULTIQC } from '../modules/local/multiqc_illumina' +include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_GENOME } from '../modules/local/plot_mosdepth_regions' +include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_AMPLICON } from '../modules/local/plot_mosdepth_regions' + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome_illumina' +include { VARIANTS_IVAR } from '../subworkflows/local/variants_ivar' +include { VARIANTS_BCFTOOLS } from '../subworkflows/local/variants_bcftools' +include { CONSENSUS_IVAR } from '../subworkflows/local/consensus_ivar' +include { CONSENSUS_BCFTOOLS } from '../subworkflows/local/consensus_bcftools' +include { VARIANTS_LONG_TABLE } from '../subworkflows/local/variants_long_table' +include { ADDITIONAL_ANNOTATION } from '../subworkflows/local/additional_annotation' +include { ASSEMBLY_SPADES } from '../subworkflows/local/assembly_spades' +include { ASSEMBLY_UNICYCLER } from '../subworkflows/local/assembly_unicycler' +include { ASSEMBLY_MINIA } from '../subworkflows/local/assembly_minia' +include { BAM_TRIM_PRIMERS_IVAR } from '../subworkflows/local/bam_trim_primers_ivar' +include { FASTQ_TRIM_FASTP_FASTQC } from '../subworkflows/local/fastq_trim_fastp_fastqc' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { CAT_FASTQ } from '../modules/nf-core/cat/fastq/main' +include { FASTQC } from '../modules/nf-core/fastqc/main' +include { KRAKEN2_KRAKEN2 } from '../modules/nf-core/kraken2/kraken2/main' +include { PICARD_COLLECTMULTIPLEMETRICS } from '../modules/nf-core/picard/collectmultiplemetrics/main' +include { MOSDEPTH as MOSDEPTH_GENOME } from '../modules/nf-core/mosdepth/main' +include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/mosdepth/main' + +// +// SUBWORKFLOW: Consisting entirely of nf-core/modules +// +include { FASTQ_ALIGN_BOWTIE2 } from '../subworkflows/nf-core/fastq_align_bowtie2/main' +include { BAM_MARKDUPLICATES_PICARD } from '../subworkflows/nf-core/bam_markduplicates_picard/main' +include { BAM_VARIANT_DEMIX_BOOT_FREYJA } from '../subworkflows/nf-core/bam_variant_demix_boot_freyja/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Info required for completion email and summary +def pass_mapped_reads = [:] +def fail_mapped_reads = [:] + +workflow ILLUMINA { + + take: + ch_samplesheet // channel: samplesheet read in from --input + ch_genome_fasta + ch_genome_gff + ch_primer_bed + ch_bowtie2_index + ch_nextclade_dataset + ch_nextclade_dataset_name + ch_nextclade_dataset_reference + ch_nextclade_dataset_tag + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + multiqc_report = Channel.empty() + + // + // SUBWORKFLOW: Uncompress and prepare reference genome files + // + PREPARE_GENOME ( + ch_genome_fasta, + ch_genome_gff, + ch_primer_bed, + ch_bowtie2_index, + ch_nextclade_dataset, + ch_nextclade_dataset_name, + ch_nextclade_dataset_reference, + ch_nextclade_dataset_tag + ) + ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) + + // Check genome fasta only contains a single contig + PREPARE_GENOME + .out + .fasta + .map { WorkflowIllumina.isMultiFasta(it, log) } + + if (params.protocol == 'amplicon' && !params.skip_variants) { + // Check primer BED file only contains suffixes provided --primer_left_suffix / --primer_right_suffix + PREPARE_GENOME + .out + .primer_bed + .map { WorkflowCommons.checkPrimerSuffixes(it, params.primer_left_suffix, params.primer_right_suffix, log) } + + // Check whether the contigs in the primer BED file are present in the reference genome + PREPARE_GENOME + .out + .primer_bed + .map { [ WorkflowCommons.getColFromFile(it, col=0, uniqify=true, sep='\t') ] } + .set { ch_bed_contigs } + + PREPARE_GENOME + .out + .fai + .map { [ WorkflowCommons.getColFromFile(it, col=0, uniqify=true, sep='\t') ] } + .concat(ch_bed_contigs) + .collect() + .map { fai, bed -> WorkflowCommons.checkContigsInBED(fai, bed, log) } + + // Check whether the primer BED file supplied to the pipeline is from the SWIFT/SNAP protocol + if (!params.ivar_trim_offset) { + PREPARE_GENOME + .out + .primer_bed + .map { WorkflowIllumina.checkIfSwiftProtocol(it, 'covid19genome', log) } + } + } + + // + // MODULE: Concatenate FastQ files from same sample if required + // + CAT_FASTQ ( + ch_samplesheet + ) + .reads + .set { ch_cat_fastq } + ch_versions = ch_versions.mix(CAT_FASTQ.out.versions.first()) + + // + // SUBWORKFLOW: Read QC and trim adapters + // + FASTQ_TRIM_FASTP_FASTQC ( + ch_cat_fastq, + [], + false, + params.save_trimmed_fail, + false + ) + ch_variants_fastq = FASTQ_TRIM_FASTP_FASTQC.out.reads + ch_versions = ch_versions.mix(FASTQ_TRIM_FASTP_FASTQC.out.versions) + + // + // Filter empty FastQ files after adapter trimming + // + ch_fail_reads_multiqc = Channel.empty() + if (!params.skip_fastp) { + ch_variants_fastq + .join(FASTQ_TRIM_FASTP_FASTQC.out.trim_json) + .map { + meta, reads, json -> + pass = WorkflowIllumina.getFastpReadsAfterFiltering(json) > 0 + [ meta, reads, json, pass ] + } + .set { ch_pass_fail_reads } + + ch_pass_fail_reads + .map { meta, reads, json, pass -> if (pass) [ meta, reads ] } + .set { ch_variants_fastq } + + ch_pass_fail_reads + .map { + meta, reads, json, pass -> + if (!pass) { + fail_mapped_reads[meta.id] = 0 + num_reads = WorkflowIllumina.getFastpReadsBeforeFiltering(json) + return [ "$meta.id\t$num_reads" ] + } + } + .collect() + .map { + tsv_data -> + def header = ['Sample', 'Reads before trimming'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } + .set { ch_fail_reads_multiqc } + } + + // + // MODULE: Run Kraken2 for removal of host reads + // + ch_assembly_fastq = ch_variants_fastq + ch_kraken2_multiqc = Channel.empty() + if (!params.skip_kraken2) { + KRAKEN2_KRAKEN2 ( + ch_variants_fastq, + PREPARE_GENOME.out.kraken2_db, + params.kraken2_variants_host_filter || params.kraken2_assembly_host_filter, + params.kraken2_variants_host_filter || params.kraken2_assembly_host_filter + ) + ch_kraken2_multiqc = KRAKEN2_KRAKEN2.out.report + ch_versions = ch_versions.mix(KRAKEN2_KRAKEN2.out.versions.first()) + + if (params.kraken2_variants_host_filter) { + ch_variants_fastq = KRAKEN2_KRAKEN2.out.unclassified_reads_fastq + } + + if (params.kraken2_assembly_host_filter) { + ch_assembly_fastq = KRAKEN2_KRAKEN2.out.unclassified_reads_fastq + } + } + + // + // SUBWORKFLOW: Alignment with Bowtie2 + // + ch_bam = Channel.empty() + ch_bai = Channel.empty() + ch_bowtie2_multiqc = Channel.empty() + ch_bowtie2_flagstat_multiqc = Channel.empty() + if (!params.skip_variants) { + FASTQ_ALIGN_BOWTIE2 ( + ch_variants_fastq, + PREPARE_GENOME.out.bowtie2_index, + params.save_unaligned, + false, + PREPARE_GENOME.out.fasta.map { [ [:], it ] } + ) + ch_bam = FASTQ_ALIGN_BOWTIE2.out.bam + ch_bai = FASTQ_ALIGN_BOWTIE2.out.bai + ch_bowtie2_multiqc = FASTQ_ALIGN_BOWTIE2.out.log_out + ch_bowtie2_flagstat_multiqc = FASTQ_ALIGN_BOWTIE2.out.flagstat + ch_versions = ch_versions.mix(FASTQ_ALIGN_BOWTIE2.out.versions) + } + + // + // Filter channels to get samples that passed Bowtie2 minimum mapped reads threshold + // + ch_fail_mapping_multiqc = Channel.empty() + if (!params.skip_variants) { + ch_bowtie2_flagstat_multiqc + .map { meta, flagstat -> [ meta ] + WorkflowIllumina.getFlagstatMappedReads(flagstat, params) } + .set { ch_mapped_reads } + + ch_bam + .join(ch_mapped_reads, by: [0]) + .map { meta, ofile, mapped, pass -> if (pass) [ meta, ofile ] } + .set { ch_bam } + + ch_bai + .join(ch_mapped_reads, by: [0]) + .map { meta, ofile, mapped, pass -> if (pass) [ meta, ofile ] } + .set { ch_bai } + + ch_mapped_reads + .branch { meta, mapped, pass -> + pass: pass + pass_mapped_reads[meta.id] = mapped + return [ "$meta.id\t$mapped" ] + fail: !pass + fail_mapped_reads[meta.id] = mapped + return [ "$meta.id\t$mapped" ] + } + .set { ch_pass_fail_mapped } + + ch_pass_fail_mapped + .fail + .collect() + .map { + tsv_data -> + def header = ['Sample', 'Mapped reads'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } + .set { ch_fail_mapping_multiqc } + } + + // + // SUBWORKFLOW: Trim primer sequences from reads with iVar + // + ch_ivar_trim_flagstat_multiqc = Channel.empty() + if (!params.skip_variants && !params.skip_ivar_trim && params.protocol == 'amplicon') { + BAM_TRIM_PRIMERS_IVAR ( + ch_bam.join(ch_bai, by: [0]), + PREPARE_GENOME.out.primer_bed, + PREPARE_GENOME.out.fasta.map { [ [:], it ] } + ) + ch_bam = BAM_TRIM_PRIMERS_IVAR.out.bam + ch_bai = BAM_TRIM_PRIMERS_IVAR.out.bai + ch_ivar_trim_flagstat_multiqc = BAM_TRIM_PRIMERS_IVAR.out.flagstat + ch_versions = ch_versions.mix(BAM_TRIM_PRIMERS_IVAR.out.versions) + } + + // + // SUBWORKFLOW: Mark duplicate reads + // + ch_markduplicates_flagstat_multiqc = Channel.empty() + if (!params.skip_variants && !params.skip_markduplicates) { + BAM_MARKDUPLICATES_PICARD ( + ch_bam, + PREPARE_GENOME.out.fasta.map { [ [:], it ] }, + PREPARE_GENOME.out.fai + ) + ch_bam = BAM_MARKDUPLICATES_PICARD.out.bam + ch_bai = BAM_MARKDUPLICATES_PICARD.out.bai + ch_markduplicates_flagstat_multiqc = BAM_MARKDUPLICATES_PICARD.out.flagstat + ch_versions = ch_versions.mix(BAM_MARKDUPLICATES_PICARD.out.versions) + } + + // + // MODULE: Picard metrics + // + if (!params.skip_variants && !params.skip_picard_metrics) { + PICARD_COLLECTMULTIPLEMETRICS ( + ch_bam.join(ch_bai, by: [0]), + PREPARE_GENOME.out.fasta.map { [ [:], it ] }, + [ [:], [] ] + ) + ch_versions = ch_versions.mix(PICARD_COLLECTMULTIPLEMETRICS.out.versions.first()) + } + + // + // MODULE: Genome-wide and amplicon-specific coverage QC plots + // + ch_mosdepth_multiqc = Channel.empty() + ch_amplicon_heatmap_multiqc = Channel.empty() + if (!params.skip_variants && !params.skip_mosdepth) { + MOSDEPTH_GENOME ( + ch_bam + .join(ch_bai, by: [0]) + .map { meta, bam, bai -> [ meta, bam, bai, [] ] }, + [ [:], [] ], + ) + ch_mosdepth_multiqc = MOSDEPTH_GENOME.out.global_txt + ch_versions = ch_versions.mix(MOSDEPTH_GENOME.out.versions.first()) + + PLOT_MOSDEPTH_REGIONS_GENOME ( + MOSDEPTH_GENOME.out.regions_bed.collect { it[1] } + ) + ch_versions = ch_versions.mix(PLOT_MOSDEPTH_REGIONS_GENOME.out.versions) + + if (params.protocol == 'amplicon') { + MOSDEPTH_AMPLICON ( + ch_bam + .join(ch_bai, by: [0]) + .combine(PREPARE_GENOME.out.primer_collapsed_bed), + [ [:], [] ], + ) + ch_versions = ch_versions.mix(MOSDEPTH_AMPLICON.out.versions.first()) + + PLOT_MOSDEPTH_REGIONS_AMPLICON ( + MOSDEPTH_AMPLICON.out.regions_bed.collect { it[1] } + ) + ch_amplicon_heatmap_multiqc = PLOT_MOSDEPTH_REGIONS_AMPLICON.out.heatmap_tsv + ch_versions = ch_versions.mix(PLOT_MOSDEPTH_REGIONS_AMPLICON.out.versions) + } + } + + // + // SUBWORKFLOW: Call variants with IVar + // + ch_vcf = Channel.empty() + ch_tbi = Channel.empty() + ch_ivar_counts_multiqc = Channel.empty() + ch_bcftools_stats_multiqc = Channel.empty() + ch_snpsift_txt = Channel.empty() + ch_snpeff_multiqc = Channel.empty() + if (!params.skip_variants && variant_caller == 'ivar') { + VARIANTS_IVAR ( + ch_bam, + PREPARE_GENOME.out.fasta, + (params.protocol == 'amplicon' || !params.skip_asciigenome || !params.skip_markduplicates) ? PREPARE_GENOME.out.fai : [], + (params.protocol == 'amplicon' || !params.skip_asciigenome || !params.skip_markduplicates) ? PREPARE_GENOME.out.chrom_sizes : [], + ch_genome_gff ? PREPARE_GENOME.out.gff : [], + (params.protocol == 'amplicon' && ch_primer_bed) ? PREPARE_GENOME.out.primer_bed : [], + PREPARE_GENOME.out.snpeff_db, + PREPARE_GENOME.out.snpeff_config, + ch_ivar_variants_header_mqc + ) + ch_vcf = VARIANTS_IVAR.out.vcf + ch_tbi = VARIANTS_IVAR.out.tbi + ch_ivar_counts_multiqc = VARIANTS_IVAR.out.multiqc_tsv + ch_bcftools_stats_multiqc = VARIANTS_IVAR.out.stats + ch_snpeff_multiqc = VARIANTS_IVAR.out.snpeff_csv + ch_snpsift_txt = VARIANTS_IVAR.out.snpsift_txt + ch_versions = ch_versions.mix(VARIANTS_IVAR.out.versions) + } + + // + // SUBWORKFLOW: Call variants with BCFTools + // + if (!params.skip_variants && variant_caller == 'bcftools') { + VARIANTS_BCFTOOLS ( + ch_bam, + PREPARE_GENOME.out.fasta, + (params.protocol == 'amplicon' || !params.skip_asciigenome || !params.skip_markduplicates) ? PREPARE_GENOME.out.chrom_sizes : [], + ch_genome_gff ? PREPARE_GENOME.out.gff : [], + (params.protocol == 'amplicon' && ch_primer_bed) ? PREPARE_GENOME.out.primer_bed : [], + PREPARE_GENOME.out.snpeff_db, + PREPARE_GENOME.out.snpeff_config + ) + ch_vcf = VARIANTS_BCFTOOLS.out.vcf + ch_tbi = VARIANTS_BCFTOOLS.out.tbi + ch_bcftools_stats_multiqc = VARIANTS_BCFTOOLS.out.stats + ch_snpeff_multiqc = VARIANTS_BCFTOOLS.out.snpeff_csv + ch_snpsift_txt = VARIANTS_BCFTOOLS.out.snpsift_txt + ch_versions = ch_versions.mix(VARIANTS_BCFTOOLS.out.versions) + } + + // + // SUBWORKFLOW: Determine variants with Freyja + // + ch_freyja_multiqc = Channel.empty() + if (!params.skip_variants && !params.skip_freyja) { + BAM_VARIANT_DEMIX_BOOT_FREYJA( + ch_bam, + PREPARE_GENOME.out.fasta, + params.skip_freyja_boot, + params.freyja_repeats, + params.freyja_db_name, + params.freyja_barcodes, + params.freyja_lineages, + ) + ch_versions = ch_versions.mix(BAM_VARIANT_DEMIX_BOOT_FREYJA.out.versions) + ch_freyja_multiqc = BAM_VARIANT_DEMIX_BOOT_FREYJA.out.demix + } + + // + // SUBWORKFLOW: Call consensus with iVar and downstream QC + // + ch_quast_multiqc = Channel.empty() + ch_pangolin_multiqc = Channel.empty() + ch_nextclade_report = Channel.empty() + if (!params.skip_consensus && params.consensus_caller == 'ivar') { + CONSENSUS_IVAR ( + ch_bam, + PREPARE_GENOME.out.fasta, + ch_genome_gff ? PREPARE_GENOME.out.gff.map { [ [:], it ] } : [ [:], [] ], + PREPARE_GENOME.out.nextclade_db + ) + + ch_quast_multiqc = CONSENSUS_IVAR.out.quast_tsv + ch_pangolin_multiqc = CONSENSUS_IVAR.out.pangolin_report + ch_nextclade_report = CONSENSUS_IVAR.out.nextclade_report + ch_versions = ch_versions.mix(CONSENSUS_IVAR.out.versions) + } + + // + // SUBWORKFLOW: Call consensus with BCFTools + // + if (!params.skip_consensus && params.consensus_caller == 'bcftools' && variant_caller) { + CONSENSUS_BCFTOOLS ( + ch_bam, + ch_vcf, + ch_tbi, + PREPARE_GENOME.out.fasta, + ch_genome_gff ? PREPARE_GENOME.out.gff.map { [ [:], it ] } : [ [:], [] ], + PREPARE_GENOME.out.nextclade_db + ) + + ch_quast_multiqc = CONSENSUS_BCFTOOLS.out.quast_tsv + ch_pangolin_multiqc = CONSENSUS_BCFTOOLS.out.pangolin_report + ch_nextclade_report = CONSENSUS_BCFTOOLS.out.nextclade_report + ch_versions = ch_versions.mix(CONSENSUS_BCFTOOLS.out.versions) + } + + // + // MODULE: Get Nextclade clade information for MultiQC report + // + ch_nextclade_multiqc = Channel.empty() + if (!params.skip_nextclade) { + ch_nextclade_report + .map { meta, csv -> + def clade = WorkflowCommons.getNextcladeFieldMapFromCsv(csv)['clade'] + return [ "$meta.id\t$clade" ] + } + .collect() + .map { + tsv_data -> + def header = ['Sample', 'clade'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } + .set { ch_nextclade_multiqc } + } + + // + // SUBWORKFLOW: Create variants long table report + // + if (!params.skip_variants && !params.skip_variants_long_table && ch_genome_gff && !params.skip_snpeff) { + VARIANTS_LONG_TABLE ( + ch_vcf, + ch_tbi, + ch_snpsift_txt, + ch_pangolin_multiqc + ) + ch_versions = ch_versions.mix(VARIANTS_LONG_TABLE.out.versions) + } + + // + // SUBWORKFLOW: Create variants long table report for additional annotation file + // + if (params.additional_annotation) { + ADDITIONAL_ANNOTATION ( + ch_vcf, + ch_tbi, + PREPARE_GENOME.out.fasta, + ch_additional_gtf, + ch_pangolin_multiqc + + ) + ch_versions = ch_versions.mix(ADDITIONAL_ANNOTATION.out.versions) + } + + // + // MODULE: Primer trimming with Cutadapt + // + ch_cutadapt_multiqc = Channel.empty() + if (params.protocol == 'amplicon' && !params.skip_assembly && !params.skip_cutadapt) { + CUTADAPT ( + ch_assembly_fastq, + PREPARE_GENOME.out.primer_fasta.collect { it[1] } + ) + ch_assembly_fastq = CUTADAPT.out.reads + ch_cutadapt_multiqc = CUTADAPT.out.log + ch_versions = ch_versions.mix(CUTADAPT.out.versions.first()) + + if (!params.skip_fastqc) { + FASTQC ( + CUTADAPT.out.reads + ) + ch_versions = ch_versions.mix(FASTQC.out.versions.first()) + } + } + + // + // SUBWORKFLOW: Run SPAdes assembly and downstream analysis + // + ch_spades_quast_multiqc = Channel.empty() + if (!params.skip_assembly && 'spades' in assemblers) { + ASSEMBLY_SPADES ( + ch_assembly_fastq.map { meta, fastq -> [ meta, fastq, [], [] ] }, + params.spades_mode, + ch_spades_hmm, + PREPARE_GENOME.out.fasta, + ch_genome_gff ? PREPARE_GENOME.out.gff.map { [ [:], it ] } : [ [:], [] ], + PREPARE_GENOME.out.blast_db, + ch_blast_outfmt6_header, + ch_blast_filtered_outfmt6_header + ) + ch_spades_quast_multiqc = ASSEMBLY_SPADES.out.quast_tsv + ch_versions = ch_versions.mix(ASSEMBLY_SPADES.out.versions) + } + + // + // SUBWORKFLOW: Run Unicycler assembly and downstream analysis + // + ch_unicycler_quast_multiqc = Channel.empty() + if (!params.skip_assembly && 'unicycler' in assemblers) { + ASSEMBLY_UNICYCLER ( + ch_assembly_fastq.map { meta, fastq -> [ meta, fastq, [] ] }, + PREPARE_GENOME.out.fasta, + ch_genome_gff ? PREPARE_GENOME.out.gff.map { [ [:], it ] } : [ [:], [] ], + PREPARE_GENOME.out.blast_db, + ch_blast_outfmt6_header, + ch_blast_filtered_outfmt6_header + ) + ch_unicycler_quast_multiqc = ASSEMBLY_UNICYCLER.out.quast_tsv + ch_versions = ch_versions.mix(ASSEMBLY_UNICYCLER.out.versions) + } + + // + // SUBWORKFLOW: Run minia assembly and downstream analysis + // + ch_minia_quast_multiqc = Channel.empty() + if (!params.skip_assembly && 'minia' in assemblers) { + ASSEMBLY_MINIA ( + ch_assembly_fastq, + PREPARE_GENOME.out.fasta, + ch_genome_gff ? PREPARE_GENOME.out.gff.map { [ [:], it ] } : [ [:], [] ], + PREPARE_GENOME.out.blast_db, + ch_blast_outfmt6_header, + ch_blast_filtered_outfmt6_header + ) + ch_minia_quast_multiqc = ASSEMBLY_MINIA.out.quast_tsv + ch_versions = ch_versions.mix(ASSEMBLY_MINIA.out.versions) + } + + // + // Collate and save software versions + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_pipeline_software_mqc_versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } + + // + // MODULE: MultiQC + // + if (!params.skip_multiqc) { + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.empty() + + ch_multiqc_files = ch_multiqc_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: false)) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config, + ch_multiqc_custom_config, + ch_multiqc_logo.toList(), + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), + ch_fail_reads_multiqc.collectFile(name: 'fail_mapped_reads_mqc.tsv').ifEmpty([]), + ch_fail_mapping_multiqc.collectFile(name: 'fail_mapped_samples_mqc.tsv').ifEmpty([]), + ch_amplicon_heatmap_multiqc.ifEmpty([]), + FASTQ_TRIM_FASTP_FASTQC.out.fastqc_raw_zip.collect{it[1]}.ifEmpty([]), + FASTQ_TRIM_FASTP_FASTQC.out.trim_json.collect{it[1]}.ifEmpty([]), + ch_kraken2_multiqc.collect{it[1]}.ifEmpty([]), + ch_bowtie2_flagstat_multiqc.collect{it[1]}.ifEmpty([]), + ch_bowtie2_multiqc.collect{it[1]}.ifEmpty([]), + ch_ivar_trim_flagstat_multiqc.collect{it[1]}.ifEmpty([]), + ch_markduplicates_flagstat_multiqc.collect{it[1]}.ifEmpty([]), + ch_mosdepth_multiqc.collect{it[1]}.ifEmpty([]), + ch_ivar_counts_multiqc.collect{it[1]}.ifEmpty([]), + ch_bcftools_stats_multiqc.collect{it[1]}.ifEmpty([]), + ch_snpeff_multiqc.collect{it[1]}.ifEmpty([]), + ch_quast_multiqc.collect{it[1]}.ifEmpty([]), + ch_pangolin_multiqc.collect{it[1]}.ifEmpty([]), + ch_nextclade_multiqc.collectFile(name: 'nextclade_clade_mqc.tsv').ifEmpty([]), + ch_cutadapt_multiqc.collect{it[1]}.ifEmpty([]), + ch_spades_quast_multiqc.collect{it[1]}.ifEmpty([]), + ch_unicycler_quast_multiqc.collect{it[1]}.ifEmpty([]), + ch_minia_quast_multiqc.collect{it[1]}.ifEmpty([]), + ch_freyja_multiqc.collect{it[1]}.ifEmpty([]), + ) + + multiqc_report = MULTIQC.out.report.toList() + } + + emit: + multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] + +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/nanopore.nf b/workflows/nanopore.nf new file mode 100644 index 00000000..096825e8 --- /dev/null +++ b/workflows/nanopore.nf @@ -0,0 +1,637 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + PRINT PARAMS SUMMARY +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +include { paramsSummaryLog } from 'plugin/nf-schema' +include { paramsSummaryMap } from 'plugin/nf-schema' +include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' +include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_viralrecon_pipeline' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + VALIDATE INPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +def valid_params = [ + artic_minion_caller : ['nanopolish', 'medaka'], + artic_minion_aligner : ['minimap2', 'bwa'] +] + +def checkPathParamList = [ + params.input, params.fastq_dir, params.fast5_dir, + params.sequencing_summary, params.gff, + params.freyja_barcodes, params.freyja_lineages, params.additional_annotation +] +for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } } + +if (params.fast5_dir) { ch_fast5_dir = file(params.fast5_dir) } else { ch_fast5_dir = [] } +if (params.sequencing_summary) { ch_sequencing_summary = file(params.sequencing_summary) } else { ch_sequencing_summary = [] } +if (params.additional_annotation) { ch_additional_gtf = file(params.additional_annotation) } else { additional_annotation = [] } + +// Need to stage medaka model properly depending on whether it is a string or a file +ch_medaka_model = Channel.empty() +if (params.artic_minion_caller == 'medaka') { + if (file(params.artic_minion_medaka_model).exists()) { + ch_medaka_model = Channel.fromPath(params.artic_minion_medaka_model) + } +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + CONFIG FILES +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +ch_multiqc_config = file("$projectDir/assets/multiqc_config_nanopore.yml", checkIfExists: true) +ch_multiqc_custom_config = params.multiqc_config ? file(params.multiqc_config) : [] + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT LOCAL MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Loaded from modules/local/ +// +include { ASCIIGENOME } from '../modules/local/asciigenome' +include { MULTIQC } from '../modules/local/multiqc_nanopore' +include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_GENOME } from '../modules/local/plot_mosdepth_regions' +include { PLOT_MOSDEPTH_REGIONS as PLOT_MOSDEPTH_REGIONS_AMPLICON } from '../modules/local/plot_mosdepth_regions' + +// +// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules +// +include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome_nanopore' +include { SNPEFF_SNPSIFT } from '../subworkflows/local/snpeff_snpsift' +include { ADDITIONAL_ANNOTATION } from '../subworkflows/local/additional_annotation' +include { VARIANTS_LONG_TABLE } from '../subworkflows/local/variants_long_table' +include { FILTER_BAM_SAMTOOLS } from '../subworkflows/local/filter_bam_samtools' +include { BAM_VARIANT_DEMIX_BOOT_FREYJA } from '../subworkflows/nf-core/bam_variant_demix_boot_freyja/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + IMPORT NF-CORE MODULES/SUBWORKFLOWS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// +// MODULE: Installed directly from nf-core/modules +// +include { PYCOQC } from '../modules/nf-core/pycoqc/main' +include { NANOPLOT } from '../modules/nf-core/nanoplot/main' +include { ARTIC_GUPPYPLEX } from '../modules/nf-core/artic/guppyplex/main' +include { ARTIC_MINION } from '../modules/nf-core/artic/minion/main' +include { VCFLIB_VCFUNIQ } from '../modules/nf-core/vcflib/vcfuniq/main' +include { TABIX_TABIX } from '../modules/nf-core/tabix/tabix/main' +include { BCFTOOLS_STATS } from '../modules/nf-core/bcftools/stats/main' +include { QUAST } from '../modules/nf-core/quast/main' +include { PANGOLIN } from '../modules/nf-core/pangolin/main' +include { NEXTCLADE_RUN } from '../modules/nf-core/nextclade/run/main' +include { MOSDEPTH as MOSDEPTH_GENOME } from '../modules/nf-core/mosdepth/main' +include { MOSDEPTH as MOSDEPTH_AMPLICON } from '../modules/nf-core/mosdepth/main' + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + RUN MAIN WORKFLOW +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +// Info required for completion email and summary +def pass_barcode_reads = [:] +def fail_barcode_reads = [:] + +workflow NANOPORE { + + take: + ch_samplesheet // channel: samplesheet read in from --input + ch_genome_fasta + ch_genome_gff + ch_primer_bed + ch_artic_scheme + ch_bowtie2_index + ch_nextclade_dataset + ch_nextclade_dataset_name + ch_nextclade_dataset_reference + ch_nextclade_dataset_tag + + main: + ch_versions = Channel.empty() + ch_multiqc_files = Channel.empty() + multiqc_report = Channel.empty() + + // + // MODULE: PycoQC on sequencing summary file + // + ch_pycoqc_multiqc = Channel.empty() + if (params.sequencing_summary && !params.skip_pycoqc) { + PYCOQC ( + Channel.of(ch_sequencing_summary).map { [ [:], it ] } + ) + ch_pycoqc_multiqc = PYCOQC.out.json + ch_versions = ch_versions.mix(PYCOQC.out.versions) + } + + // + // SUBWORKFLOW: Uncompress and prepare reference genome files + // + PREPARE_GENOME ( + ch_genome_fasta, + ch_genome_gff, + ch_primer_bed, + ch_bowtie2_index, + ch_nextclade_dataset, + ch_nextclade_dataset_name, + ch_nextclade_dataset_reference, + ch_nextclade_dataset_tag + ) + ch_versions = ch_versions.mix(PREPARE_GENOME.out.versions) + + // Check primer BED file only contains suffixes provided --primer_left_suffix / --primer_right_suffix + PREPARE_GENOME + .out + .primer_bed + .map { WorkflowCommons.checkPrimerSuffixes(it, params.primer_left_suffix, params.primer_right_suffix, log) } + + // Check whether the contigs in the primer BED file are present in the reference genome + PREPARE_GENOME + .out + .primer_bed + .map { [ WorkflowCommons.getColFromFile(it, col=0, uniqify=true, sep='\t') ] } + .set { ch_bed_contigs } + + PREPARE_GENOME + .out + .fai + .map { [ WorkflowCommons.getColFromFile(it, col=0, uniqify=true, sep='\t') ] } + .concat(ch_bed_contigs) + .collect() + .map { fai, bed -> WorkflowCommons.checkContigsInBED(fai, bed, log) } + + barcode_dirs = file("${params.fastq_dir}/barcode*", type: 'dir' , maxdepth: 1) + single_barcode_dir = file("${params.fastq_dir}/*.fastq" , type: 'file', maxdepth: 1) + ch_custom_no_sample_name_multiqc = Channel.empty() + ch_custom_no_barcodes_multiqc = Channel.empty() + if (barcode_dirs) { + Channel + .fromPath( barcode_dirs ) + .filter( ~/.*barcode[0-9]{1,4}$/ ) + .map { dir -> + def count = 0 + for (x in dir.listFiles()) { + if (x.isFile() && x.toString().contains('.fastq')) { + count += x.countFastq() + } + } + return [ dir.baseName , dir, count ] + } + .set { ch_fastq_dirs } + + // + // SUBWORKFLOW: Read in samplesheet containing sample to barcode mappings + // + if (params.input) { + ch_samplesheet + .join(ch_fastq_dirs, remainder: true) + .set { ch_fastq_dirs } + + // + // MODULE: Create custom content file for MultiQC to report barcodes were allocated reads >= params.min_barcode_reads but no sample name in samplesheet + // + ch_fastq_dirs + .filter { it[1] == null } + .filter { it[-1] >= params.min_barcode_reads } + .map { it -> [ "${it[0]}\t${it[-1]}" ] } + .collect() + .map { + tsv_data -> + def header = ['Barcode', 'Read count'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } + .set { ch_custom_no_sample_name_multiqc } + + // + // MODULE: Create custom content file for MultiQC to report samples that were in samplesheet but have no barcodes + // + ch_fastq_dirs + .filter { it[-1] == null } + .map { it -> [ "${it[1]}\t${it[0]}" ] } + .collect() + .map { + tsv_data -> + def header = ['Sample', 'Missing barcode'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } + .set { ch_custom_no_barcodes_multiqc } + + ch_fastq_dirs + .filter { (it[1] != null) } + .filter { (it[-1] != null) } + .set { ch_fastq_dirs } + + } else { + ch_fastq_dirs + .map { barcode, dir, count -> [ barcode, barcode, dir, count ] } + .set { ch_fastq_dirs } + } + } else if (single_barcode_dir) { + Channel + .fromPath("${params.fastq_dir}", type: 'dir', maxDepth: 1) + .map { it -> [ 'SAMPLE_1', 'single_barcode', it, 10000000 ] } + .set{ ch_fastq_dirs } + } else { + log.error "Please specify a valid folder containing ONT basecalled, barcoded fastq files generated by guppy_barcoder or guppy_basecaller e.g. '--fastq_dir ./20191023_1522_MC-110615_0_FAO93606_12bf9b4f/fastq_pass/" + System.exit(1) + } + + // + // MODULE: Create custom content file for MultiQC to report samples with reads < params.min_barcode_reads + // + ch_fastq_dirs + .branch { barcode, sample, dir, count -> + pass: count > params.min_barcode_reads + pass_barcode_reads[sample] = count + return [ "$sample\t$count" ] + fail: count < params.min_barcode_reads + fail_barcode_reads[sample] = count + return [ "$sample\t$count" ] + } + .set { ch_pass_fail_barcode_count } + + ch_pass_fail_barcode_count + .fail + .collect() + .map { + tsv_data -> + def header = ['Sample', 'Barcode count'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } + .set { ch_custom_fail_barcodes_count_multiqc } + + // Re-arrange channels to have meta map of information for sample + ch_fastq_dirs + .filter { it[-1] > params.min_barcode_reads } + .map { barcode, sample, dir, count -> [ [ id: sample, barcode:barcode ], dir ] } + .set { ch_fastq_dirs } + + // + // MODULE: Run Artic Guppyplex + // + ARTIC_GUPPYPLEX ( + ch_fastq_dirs + ) + ch_versions = ch_versions.mix(ARTIC_GUPPYPLEX.out.versions.first()) + + // + // MODULE: Create custom content file for MultiQC to report samples with reads < params.min_guppyplex_reads + // + ARTIC_GUPPYPLEX + .out + .fastq + .branch { meta, fastq -> + def count = fastq.countFastq() + pass: count > params.min_guppyplex_reads + return [ "$meta.id\t$count" ] + fail: count < params.min_guppyplex_reads + return [ "$meta.id\t$count" ] + } + .set { ch_pass_fail_guppyplex_count } + + ch_pass_fail_guppyplex_count + .fail + .collect() + .map { + tsv_data -> + def header = ['Sample', 'Read count'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } + .set { ch_custom_fail_guppyplex_count_multiqc } + + // + // MODULE: Nanoplot QC for FastQ files + // + if (!params.skip_nanoplot) { + NANOPLOT ( + ARTIC_GUPPYPLEX.out.fastq + ) + ch_versions = ch_versions.mix(NANOPLOT.out.versions.first()) + } + + // + // MODULE: Run Artic minion + // + ARTIC_MINION ( + ARTIC_GUPPYPLEX.out.fastq.filter { it[-1].countFastq() > params.min_guppyplex_reads }, + ch_fast5_dir, + ch_sequencing_summary, + PREPARE_GENOME.out.fasta.collect(), + PREPARE_GENOME.out.primer_bed.collect(), + ch_medaka_model.collect().ifEmpty([]), + params.artic_minion_medaka_model ?: '', + ch_artic_scheme, + params.primer_set_version + ) + ch_versions = ch_versions.mix(ARTIC_MINION.out.versions.first()) + + // + // MODULE: Remove duplicate variants + // + VCFLIB_VCFUNIQ ( + ARTIC_MINION.out.vcf.join(ARTIC_MINION.out.tbi, by: [0]), + ) + ch_versions = ch_versions.mix(VCFLIB_VCFUNIQ.out.versions.first()) + + // + // MODULE: Index VCF file + // + TABIX_TABIX ( + VCFLIB_VCFUNIQ.out.vcf + ) + ch_versions = ch_versions.mix(TABIX_TABIX.out.versions.first()) + + // + // MODULE: VCF stats with bcftools stats + // + BCFTOOLS_STATS ( + VCFLIB_VCFUNIQ.out.vcf.join(TABIX_TABIX.out.tbi, by: [0]), + [ [:], [] ], + [ [:], [] ], + [ [:], [] ], + [ [:], [] ], + [ [:], [] ] + ) + ch_versions = ch_versions.mix(BCFTOOLS_STATS.out.versions.first()) + + // + // SUBWORKFLOW: Filter unmapped reads from BAM + // + FILTER_BAM_SAMTOOLS ( + ARTIC_MINION.out.bam.join(ARTIC_MINION.out.bai, by: [0]), + [ [:], [] ] + ) + ch_versions = ch_versions.mix(FILTER_BAM_SAMTOOLS.out.versions) + + // + // MODULE: Genome-wide and amplicon-specific coverage QC plots + // + ch_mosdepth_multiqc = Channel.empty() + ch_amplicon_heatmap_multiqc = Channel.empty() + if (!params.skip_mosdepth) { + + MOSDEPTH_GENOME ( + ARTIC_MINION.out.bam_primertrimmed + .join(ARTIC_MINION.out.bai_primertrimmed, by: [0]) + .map { meta, bam, bai -> [ meta, bam, bai, [] ] }, + [ [:], [] ] + ) + ch_mosdepth_multiqc = MOSDEPTH_GENOME.out.global_txt + ch_versions = ch_versions.mix(MOSDEPTH_GENOME.out.versions.first()) + + PLOT_MOSDEPTH_REGIONS_GENOME ( + MOSDEPTH_GENOME.out.regions_bed.collect { it[1] } + ) + ch_versions = ch_versions.mix(PLOT_MOSDEPTH_REGIONS_GENOME.out.versions) + + MOSDEPTH_AMPLICON ( + ARTIC_MINION.out.bam_primertrimmed.join(ARTIC_MINION.out.bai_primertrimmed, by: [0]).join(PREPARE_GENOME.out.primer_collapsed_bed), + [ [:], [] ] + ) + ch_versions = ch_versions.mix(MOSDEPTH_AMPLICON.out.versions.first()) + + PLOT_MOSDEPTH_REGIONS_AMPLICON ( + MOSDEPTH_AMPLICON.out.regions_bed.collect { it[1] } + ) + ch_amplicon_heatmap_multiqc = PLOT_MOSDEPTH_REGIONS_AMPLICON.out.heatmap_tsv + ch_versions = ch_versions.mix(PLOT_MOSDEPTH_REGIONS_AMPLICON.out.versions) + } + + // + // MODULE: Lineage analysis with Pangolin + // + ch_pangolin_multiqc = Channel.empty() + if (!params.skip_pangolin) { + PANGOLIN ( + ARTIC_MINION.out.fasta + ) + ch_pangolin_multiqc = PANGOLIN.out.report + ch_versions = ch_versions.mix(PANGOLIN.out.versions.first()) + } + + // + // MODULE: Clade assignment, mutation calling, and sequence quality checks with Nextclade + // + ch_nextclade_multiqc = Channel.empty() + if (!params.skip_nextclade) { + NEXTCLADE_RUN ( + ARTIC_MINION.out.fasta, + PREPARE_GENOME.out.nextclade_db.collect() + ) + ch_versions = ch_versions.mix(NEXTCLADE_RUN.out.versions.first()) + + // + // MODULE: Get Nextclade clade information for MultiQC report + // + NEXTCLADE_RUN + .out + .csv + .map { + meta, csv -> + def clade = WorkflowCommons.getNextcladeFieldMapFromCsv(csv)['clade'] + return [ "$meta.id\t$clade" ] + } + .collect() + .map { + tsv_data -> + def header = ['Sample', 'clade'] + WorkflowCommons.multiqcTsvFromList(tsv_data, header) + } + .set { ch_nextclade_multiqc } + } + + // + // SUBWORKFLOW: Determine variants with Freyja + // + ch_freyja_multiqc = Channel.empty() + if (!params.skip_freyja) { + BAM_VARIANT_DEMIX_BOOT_FREYJA( + ARTIC_MINION.out.bam_primertrimmed, + PREPARE_GENOME.out.fasta, + params.skip_freyja_boot, + params.freyja_repeats, + params.freyja_db_name, + params.freyja_barcodes, + params.freyja_lineages, + ) + ch_versions = ch_versions.mix(BAM_VARIANT_DEMIX_BOOT_FREYJA.out.versions) + ch_freyja_multiqc = BAM_VARIANT_DEMIX_BOOT_FREYJA.out.demix + } + + // + // MODULE: Consensus QC across all samples with QUAST + // + ch_quast_multiqc = Channel.empty() + if (!params.skip_variants_quast) { + ARTIC_MINION.out.fasta + .collect{ it[1] } + .map { consensus_collect -> tuple([id: "quast"], consensus_collect) } + .set { ch_to_quast } + QUAST ( + ch_to_quast, + PREPARE_GENOME.out.fasta.collect().map { [ [:], it ] }, + ch_genome_gff ? PREPARE_GENOME.out.gff.map { [ [:], it ] } : [ [:], [] ], + ) + ch_quast_multiqc = QUAST.out.tsv + ch_versions = ch_versions.mix(QUAST.out.versions) + } + + // + // SUBWORKFLOW: Annotate variants with snpEff + // + ch_snpeff_multiqc = Channel.empty() + ch_snpsift_txt = Channel.empty() + if (ch_genome_gff && !params.skip_snpeff) { + SNPEFF_SNPSIFT ( + VCFLIB_VCFUNIQ.out.vcf, + PREPARE_GENOME.out.snpeff_db.collect(), + PREPARE_GENOME.out.snpeff_config.collect(), + PREPARE_GENOME.out.fasta.collect() + ) + ch_snpeff_multiqc = SNPEFF_SNPSIFT.out.csv + ch_snpsift_txt = SNPEFF_SNPSIFT.out.snpsift_txt + ch_versions = ch_versions.mix(SNPEFF_SNPSIFT.out.versions) + } + + // + // MODULE: Variant screenshots with ASCIIGenome + // + if (!params.skip_asciigenome) { + ARTIC_MINION + .out + .bam_primertrimmed + .join(VCFLIB_VCFUNIQ.out.vcf, by: [0]) + .join(BCFTOOLS_STATS.out.stats, by: [0]) + .map { meta, bam, vcf, stats -> + if (WorkflowCommons.getNumVariantsFromBCFToolsStats(stats) > 0) { + return [ meta, bam, vcf ] + } + } + .set { ch_asciigenome } + + ASCIIGENOME ( + ch_asciigenome, + PREPARE_GENOME.out.fasta.collect(), + PREPARE_GENOME.out.chrom_sizes.collect(), + ch_genome_gff ? PREPARE_GENOME.out.gff : [], + PREPARE_GENOME.out.primer_bed.collect(), + params.asciigenome_window_size, + params.asciigenome_read_depth + ) + ch_versions = ch_versions.mix(ASCIIGENOME.out.versions.first()) + } + + // + // SUBWORKFLOW: Create variants long table report + // + if (!params.skip_variants_long_table && ch_genome_gff && !params.skip_snpeff) { + VARIANTS_LONG_TABLE ( + VCFLIB_VCFUNIQ.out.vcf, + TABIX_TABIX.out.tbi, + ch_snpsift_txt, + ch_pangolin_multiqc + ) + ch_versions = ch_versions.mix(VARIANTS_LONG_TABLE.out.versions) + } + + // + // SUBWORKFLOW: Create variants long table report for additional annotation file + // + if (params.additional_annotation) { + ADDITIONAL_ANNOTATION ( + VCFLIB_VCFUNIQ.out.vcf, + TABIX_TABIX.out.tbi, + PREPARE_GENOME.out.fasta, + ch_additional_gtf, + ch_pangolin_multiqc + + ) + ch_versions = ch_versions.mix(ADDITIONAL_ANNOTATION.out.versions) + } + + // + // MODULE: Pipeline reporting + // + softwareVersionsToYAML(ch_versions) + .collectFile( + storeDir: "${params.outdir}/pipeline_info", + name: 'nf_core_pipeline_software_mqc_versions.yml', + sort: true, + newLine: true + ).set { ch_collated_versions } + + + // + // MODULE: MultiQC + // + if (!params.skip_multiqc) { + summary_params = paramsSummaryMap( + workflow, parameters_schema: "nextflow_schema.json") + ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) + ch_multiqc_custom_methods_description = params.multiqc_methods_description ? + file(params.multiqc_methods_description, checkIfExists: true) : + file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) + ch_methods_description = Channel.value( + methodsDescriptionText(ch_multiqc_custom_methods_description)) + + ch_multiqc_logo = params.multiqc_logo ? + Channel.fromPath(params.multiqc_logo, checkIfExists: true) : + Channel.empty() + + ch_multiqc_files = ch_multiqc_files.mix( + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) + ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) + ch_multiqc_files = ch_multiqc_files.mix( + ch_methods_description.collectFile( + name: 'methods_description_mqc.yaml', + sort: false)) + + MULTIQC ( + ch_multiqc_files.collect(), + ch_multiqc_config, + ch_multiqc_custom_config, + ch_multiqc_logo.toList(), + ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml'), + ch_custom_no_sample_name_multiqc.collectFile(name: 'fail_barcodes_no_sample_mqc.tsv').ifEmpty([]), + ch_custom_no_barcodes_multiqc.collectFile(name: 'fail_no_barcode_samples_mqc.tsv').ifEmpty([]), + ch_custom_fail_barcodes_count_multiqc.collectFile(name: 'fail_barcode_count_samples_mqc.tsv').ifEmpty([]), + ch_custom_fail_guppyplex_count_multiqc.collectFile(name: 'fail_guppyplex_count_samples_mqc.tsv').ifEmpty([]), + ch_amplicon_heatmap_multiqc.ifEmpty([]), + ch_pycoqc_multiqc.collect{it[1]}.ifEmpty([]), + ARTIC_MINION.out.json.collect{it[1]}.ifEmpty([]), + FILTER_BAM_SAMTOOLS.out.flagstat.collect{it[1]}.ifEmpty([]), + BCFTOOLS_STATS.out.stats.collect{it[1]}.ifEmpty([]), + ch_mosdepth_multiqc.collect{it[1]}.ifEmpty([]), + ch_quast_multiqc.collect{it[1]}.ifEmpty([]), + ch_snpeff_multiqc.collect{it[1]}.ifEmpty([]), + ch_pangolin_multiqc.collect{it[1]}.ifEmpty([]), + ch_nextclade_multiqc.collectFile(name: 'nextclade_clade_mqc.tsv').ifEmpty([]), + ch_freyja_multiqc.collect{it[1]}.ifEmpty([]), + ) + + multiqc_report = MULTIQC.out.report.toList() + } + + emit: + multiqc_report // channel: /path/to/multiqc_report.html + versions = ch_versions // channel: [ path(versions.yml) ] + +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + THE END +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ diff --git a/workflows/viralrecon.nf b/workflows/viralrecon.nf deleted file mode 100644 index 7ea56421..00000000 --- a/workflows/viralrecon.nf +++ /dev/null @@ -1,97 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ -include { FASTQC } from '../modules/nf-core/fastqc/main' -include { MULTIQC } from '../modules/nf-core/multiqc/main' -include { paramsSummaryMap } from 'plugin/nf-schema' -include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' -include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_viralrecon_pipeline' - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - RUN MAIN WORKFLOW -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ - -workflow VIRALRECON { - - take: - ch_samplesheet // channel: samplesheet read in from --input - main: - - ch_versions = Channel.empty() - ch_multiqc_files = Channel.empty() - // - // MODULE: Run FastQC - // - FASTQC ( - ch_samplesheet - ) - ch_multiqc_files = ch_multiqc_files.mix(FASTQC.out.zip.collect{it[1]}) - ch_versions = ch_versions.mix(FASTQC.out.versions.first()) - - // - // Collate and save software versions - // - softwareVersionsToYAML(ch_versions) - .collectFile( - storeDir: "${params.outdir}/pipeline_info", - name: 'nf_core_' + 'pipeline_software_' + 'mqc_' + 'versions.yml', - sort: true, - newLine: true - ).set { ch_collated_versions } - - - // - // MODULE: MultiQC - // - ch_multiqc_config = Channel.fromPath( - "$projectDir/assets/multiqc_config.yml", checkIfExists: true) - ch_multiqc_custom_config = params.multiqc_config ? - Channel.fromPath(params.multiqc_config, checkIfExists: true) : - Channel.empty() - ch_multiqc_logo = params.multiqc_logo ? - Channel.fromPath(params.multiqc_logo, checkIfExists: true) : - Channel.empty() - - summary_params = paramsSummaryMap( - workflow, parameters_schema: "nextflow_schema.json") - ch_workflow_summary = Channel.value(paramsSummaryMultiqc(summary_params)) - ch_multiqc_files = ch_multiqc_files.mix( - ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) - ch_multiqc_custom_methods_description = params.multiqc_methods_description ? - file(params.multiqc_methods_description, checkIfExists: true) : - file("$projectDir/assets/methods_description_template.yml", checkIfExists: true) - ch_methods_description = Channel.value( - methodsDescriptionText(ch_multiqc_custom_methods_description)) - - ch_multiqc_files = ch_multiqc_files.mix(ch_collated_versions) - ch_multiqc_files = ch_multiqc_files.mix( - ch_methods_description.collectFile( - name: 'methods_description_mqc.yaml', - sort: true - ) - ) - - MULTIQC ( - ch_multiqc_files.collect(), - ch_multiqc_config.toList(), - ch_multiqc_custom_config.toList(), - ch_multiqc_logo.toList(), - [], - [] - ) - - emit:multiqc_report = MULTIQC.out.report.toList() // channel: /path/to/multiqc_report.html - versions = ch_versions // channel: [ path(versions.yml) ] - -} - -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - THE END -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/