nextflow.config

/**
 * # Parameters for nf-encyclopedia
 *
 * A NextFlow pipeline for analyzing data-indepent acquisition proteomics
 * using chromatogram libraries with EncyclopeDIA.
 */
params {
    /** \group{Input/Output Parameters} */
    /** \type{string} \required
     *  A comma-separated values (CSV) file specifying the mass spectrometry data
     *  files to be analyzed. This file must have at least two columns: *file*,
     *  *chrlib*. The *file* columns contains the paths to the mass
     *  spectrometry data files in formats supported by MSconvert. The *chrlib*
     *  column contains either `true` or `false` depending on whether a file
     *  should be considered part of a chromatogram library or not. The optional
     *  *group* is used to specify which chromatogram library files should be used
     *  to analyze each quantitative run. Optionally, *condition* and *bioreplicate*
     *  columns can be included and will be used by MSstats. See the
     *  [MSstats documentation](https://msstats.org/msstats-2/) for details.
     *  Example:
     *  | file    | chrlib | condition |
     *  |---------|--------|-----------|
     *  | S01.raw | true   | lib       |
     *  | S02.raw | false  | A         |
     *  | S03.raw | false  | B         |
     */
    input = null

    /** \type{string} \required
     *  A spectral library in EncyclopeDIA's DLIB format. See the
     *  [EncyclopeDIA](https://bitbucket.org/searleb/encyclopedia/wiki/Home) 
     * documentation for details.
     */
    dlib = null

    /** \type{string} \required
     *  The FASTA containing the subset of proteins sequences for which to
     *  search.
     */
    fasta = null


    /** \type{string}
     *  Contrasts to test with MSstats provided as a CSV file. See the
     *  [MSstats documentation](https://msstats.org/msstats-2/) for more details.
     *  Example:
     *  |      |  A  | B   |
     *  |------|-----|-----|
     *  | AvsB |  1  | -1  |
     */
    contrasts = null

    result_dir = 'results' /** \type{str} Where results will be saved. */
    report_dir = 'reports' /** \type{str} Where reports will be saved. */
    mzml_dir = 'mzml'      /** \type{str} Where mzML files will be saved. */
    email = null           /** \type{str} An email to alert on completion. */

    /** \group{Grouping Parameters} */
    /** \type{boolean}
     *  Aggregate groups into a single analysis. If `true`, each group will
     *  be searched with EncyclopeDIA separately against their respective
     *  chomatogram library. These search results are subsequently aggregated
     *  and the false discovery rate (FDR) is estimated for the combined
     *  groups. If `false`, the groups are analyzed individually and FDR is
     *  estimated within each group.
     */
    aggregate = false

    /** \type{string} The file prefix to use for the aggregated data.*/
    agg_name = 'aggregated'

    /** \group{MSconvert Parameters} */
    /** \type{boolean} Demultiplex overlapping DIA windows. */
    msconvert.demultiplex = true

    /** \type{boolean} Force existing mzML files to be reconverted. */
    msconvert.force = false

    /** \group{EncyclopeDIA Parameters} */
    /** \type{string} The suffix to append to chromatogram library result files */
    encyclopedia.chrlib_suffix = 'chrlib'

    /** \type{string} The suffix to append to quantitative result files */
    encyclopedia.quant_suffix = 'quant'

    /** \type{string}
     *  Command line arguments to pass to all EncyclopeDIA analyses.
     *  The default attempts to match the defaults from the graphical
     *  user interface.
     */
    encyclopedia.args = '-percolatorVersion v3-01 -quantifyAcrossSamples true -scoringBreadthType window'

    /** Additional command line arguments to use when searching files. */
    encyclopedia.local.args = ''

    /**
     * Additional command line arguments to use when quantifying
     * detected peptides across multiple runs.
     */
    encyclopedia.global.args = ''

    /**
     * The location of the EncyclopeDIA jar file. Use `null` if
     * EncyclopeDIA was installed from bioconda.
     */
    encyclopedia.jar = '/code/encyclopedia.jar'

    /** \group{MSstats Parameters} */
    /** \type{boolean} Enable MSstats quantification */
    msstats.enabled = true

    /** \type{string}
     *  The normalization method used by MSstats.
     *  Must be one of 'equalizeMedians', 'quantile', or 'none'.
     */
    msstats.normalization = 'equalizeMedians'

    /** \type{bool} Generate MSstats reports */
    msstats.reports = false

    /** \group{Resources}
     *  Change these to reflect your compute environment.
     */
    max_memory = '128.GB' /** \type{string} The maximum memory allowed for each process. */
    max_cpus = 16         /** \type{integer} The maximum number of CPUs for each process. */
    max_time = '240.h'    /** \type{string} The maximum executrion time for each process. */
}


// Containers
process {
    container = "ghcr.io/talusbio/nf-encyclopedia:latest"
    withName: 'MSCONVERT' {
        container = 'chambm/pwiz-skyline-i-agree-to-the-vendor-licenses:latest'
    }
}


// Manifest
manifest {
    name            = 'nf-encyclopedia'
    author          = 'William E Fondrie'
    homePage        = 'https://github.com/nf-encyclopedia'
    description     = 'Analyze GPF DIA proteomics data with EncyclopeDIA'
    mainScript      = 'main.nf'
    nextflowVersion = '!>=21.10.3'
}


// Export these variables to prevent local Python/R libraries from conflicting
// with those in the container The JULIA depot path has been adjusted to a
// fixed path `/usr/local/share/julia` that needs to be used for packages in
// the container. See https://apeltzer.github.io/post/03-julia-lang-nextflow/
// for details on that. Once we have a common agreement on where to keep Julia
// packages, this is adjustable.
// Copied from the nf-core template.
env {
    PYTHONNOUSERSITE = 1
    R_PROFILE_USER   = "/.Rprofile"
    R_ENVIRON_USER   = "/.Renviron"
    JULIA_DEPOT_PATH = "/usr/local/share/julia"
}

// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']
def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
timeline {
    enabled = true
    file    = "${params.report_dir}/execution_timeline_${trace_timestamp}.html"
}
report {
    enabled = true
    file    = "${params.report_dir}/execution_report_${trace_timestamp}.html"
}
trace {
    enabled = true
    file    = "${params.report_dir}/execution_trace_${trace_timestamp}.txt"
}
dag {
    enabled = true
    file    = "${params.report_dir}/pipeline_dag_${trace_timestamp}.html"
}

// Load base.config by default for all pipelines
includeConfig 'conf/base.config'

// Function to ensure that resource requirements don't go beyond
// a maximum limit. Copied from the nf-core template.
def check_max(obj, type) {
    if (type == 'memory') {
        try {
            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
                return params.max_memory as nextflow.util.MemoryUnit
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'time') {
        try {
            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
                return params.max_time as nextflow.util.Duration
            else
                return obj
        } catch (all) {
            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
            return obj
        }
    } else if (type == 'cpus') {
        try {
            return Math.min( obj, params.max_cpus as int )
        } catch (all) {
            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
            return obj
        }
    }
}