nextflow.config

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    ebi-metagenomics/miassembler Nextflow config file
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    Default config options for all compute environments
----------------------------------------------------------------------------------------
*/

// Global default params, used in configs
params {

    // Input options
    study_accession            = null
    reads_accession            = null
    private_study              = false

    // For already fetched data
    samplesheet                = null

    // The pipeline will use the metadata from ENA (obtained by the fetch_tool)
    // As the metadata can be incorrect, we provide the following parameters to
    // "force" them
    single_end                       = null
    library_layout                   = null
    library_strategy                 = null
    platform                         = null

    // QC FILTERING

    // Short reads QC filtering options
    short_reads_filter_ratio_threshold    = 0.1
    short_reads_low_reads_count_threshold = 1000

    // Long reads options
    long_reads_min_read_length        = 200

    // Short reads reference databases (name to be selected from list)
    bwamem2_reference_genomes_folder = ""
    blast_reference_genomes_folder   = ""

    // Long reads reference genome
    reference_genome            = null

    // Short-read sequences and assemblies are
    // automatically polished from human and phix seqs
    // Both blast and bwa indices are needed
    remove_human_phix                = true
    human_phix_blast_index_name      = "human_phix"
    human_phix_bwamem2_index_name    = "human_phix"

    // Long-read assemblies don't require phiX
    // nor indices, just a fasta file
    reference_genomes_folder         = ""
    remove_human                     = true
    human_fasta_prefix               = "human"

    // ASSEMBLY

    /* By default the pipeline will pick
    * - metaspades for paired-end short reads
    * - megahit for single-end short reads
    * - flye for long reads
    *
    * Setting --assembler will force the assembler
    *
    * - spades: Use for assembling single end reads
    *
    * - metaspades: Use for assembling paired end reads
    *   with moderate memory and runtime requirements
    *
    * - megahit: Use when memory or runtime requirements
    *   for metaspades are prohibitively high, such as:
    *    - Memory >1TB
    *    - Runtime >3-4 days
    *
    * - flye: Use for any long-read assembly. long_reads_assembler_config
    *   should be selected depending on input data (if ONT or
    *   pacbio, and if data quality is high or low)
    */
    assembler                        = null

    // Assembly options
    spades_only_assembler               = true
    short_reads_min_contig_length       = 500
    short_reads_min_contig_length_metat = 200
    short_reads_contig_threshold        = 2
    long_reads_assembler_config         = null
    assembly_memory                     = 100

    // MultiQC options
    multiqc_config                   = null
    multiqc_title                    = null
    multiqc_logo                     = null
    max_multiqc_email_size           = '25.MB'
    multiqc_methods_description      = null

    // Boilerplate options
    outdir                           = "results"
    publish_dir_mode                 = 'copy'
    email                            = null
    email_on_fail                    = null
    plaintext_email                  = false
    monochrome_logs                  = false
    hook_url                         = null
    help                             = false
    version                          = false

    max_spades_retries               = 3
    max_megahit_retries              = 3

    // Assembler versions
    spades_version                   = "3.15.5"
    megahit_version                  = "1.2.9"
    flye_version                     = "2.9"

}

validation {
    failUnrecognisedParams = true
    lenientMode            = false
    help {
        enabled = true
        showHidden = false
        command = "nextflow run ebi-metagenomics/miassembler --samplesheet samplesheet.csv --outdir output"
    }
}

// Load base.config by default for all pipelines
includeConfig 'conf/base.config'


profiles {
    debug {
        dumpHashes             = true
        process.beforeScript   = 'echo $HOSTNAME'
        cleanup                = false
        nextflow.enable.configProcessNamesValidation = true
    }
    conda {
        conda.enabled          = true
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    mamba {
        conda.enabled          = true
        conda.useMamba         = true
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    docker {
        docker.enabled         = true
        conda.enabled          = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
        docker.runOptions      = '-u $(id -u):$(id -g)'
    }
    arm {
        docker.runOptions      = '-u $(id -u):$(id -g) --platform=linux/amd64'
    }
    singularity {
        singularity.enabled    = true
        singularity.autoMounts = true
        conda.enabled          = false
        docker.enabled         = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    podman {
        podman.enabled         = true
        conda.enabled          = false
        docker.enabled         = false
        singularity.enabled    = false
        shifter.enabled        = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    shifter {
        shifter.enabled        = true
        conda.enabled          = false
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        charliecloud.enabled   = false
        apptainer.enabled      = false
    }
    charliecloud {
        charliecloud.enabled   = true
        conda.enabled          = false
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        apptainer.enabled      = false
    }
    apptainer {
        apptainer.enabled      = true
        apptainer.autoMounts   = true
        conda.enabled          = false
        docker.enabled         = false
        singularity.enabled    = false
        podman.enabled         = false
        shifter.enabled        = false
        charliecloud.enabled   = false
    }
    gitpod {
        executor.name          = 'local'
        executor.cpus          = 4
        executor.memory        = 8.GB
    }
    test {
        includeConfig 'conf/test.config'
    }
    codon_slurm { includeConfig 'conf/codon_slurm.config' }
}

// Test profiles
includeConfig 'conf/test.config'

// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled
// Set to your registry if you have a mirror of containers
apptainer.registry   = 'quay.io'
docker.registry      = 'quay.io'
podman.registry      = 'quay.io'
singularity.registry = 'quay.io'

// Nextflow plugins
plugins {
    id 'nf-schema@2.2.0'
}

// Export these variables to prevent local Python/R libraries from conflicting with those in the container
// The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container.
// See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable.

env {
    PYTHONNOUSERSITE = 1
    R_PROFILE_USER   = "/.Rprofile"
    R_ENVIRON_USER   = "/.Renviron"
    JULIA_DEPOT_PATH = "/usr/local/share/julia"
}

// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']

// Disable process selector warnings by default. Use debug profile to enable warnings.
nextflow.enable.configProcessNamesValidation = false

def trace_timestamp = new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')
timeline {
    enabled = true
    file    = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html"
}
report {
    enabled = true
    file    = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html"
}
trace {
    enabled = true
    file    = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt"
}
dag {
    enabled = true
    file    = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html"
}

manifest {
    name            = 'ebi-metagenomics/miassembler'
    author          = """Microbiome Informatics Team - EMBL-EBI"""
    homePage        = 'https://github.com/ebi-metagenomics/miassembler'
    description     = """Microbiome Informatics metagenomes assembly pipeline"""
    mainScript      = 'main.nf'
    nextflowVersion = '!>=24.04.0'
    version         = '1.1.0'
    doi             = ''
}

// Load modules.config for DSL2 module specific options
includeConfig 'conf/modules.config'

// Function to return study / reads folder SRPXXX/SRPXXXXXX/SRRYYYY/SRRYYYYYY
def study_reads_folder( meta = null ) {
    // The reads accession is used as the id for samplesheets
    def reads_accession = meta?.id ?: params.reads_accession

    return study_folder( meta ) + "/" + [
        reads_accession.substring(0, 7),
        reads_accession,
    ].join("/")
}

// Function to return study folder SRPXXX/SRPXXXXXX
def study_folder( meta = null ) {
    def study_accession = meta?.study_accession ?: params.study_accession
    return [
        study_accession.substring(0, 7),
        study_accession,
    ].join("/")
}