-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathnextflow.config
218 lines (190 loc) · 7.79 KB
/
nextflow.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
/**
* # Parameters for nf-encyclopedia
*
* A NextFlow pipeline for analyzing data-indepent acquisition proteomics
* using chromatogram libraries with EncyclopeDIA.
*/
params {
/** \group{Input/Output Parameters} */
/** \type{string} \required
* A comma-separated values (CSV) file specifying the mass spectrometry data
* files to be analyzed. This file must have at least two columns: *file*,
* *chrlib*. The *file* columns contains the paths to the mass
* spectrometry data files in formats supported by MSconvert. The *chrlib*
* column contains either `true` or `false` depending on whether a file
* should be considered part of a chromatogram library or not. The optional
* *group* is used to specify which chromatogram library files should be used
* to analyze each quantitative run. Optionally, *condition* and *bioreplicate*
* columns can be included and will be used by MSstats. See the
* [MSstats documentation](https://msstats.org/msstats-2/) for details.
* Example:
* | file | chrlib | condition |
* |---------|--------|-----------|
* | S01.raw | true | lib |
* | S02.raw | false | A |
* | S03.raw | false | B |
*/
input = null
/** \type{string} \required
* A spectral library in EncyclopeDIA's DLIB format. See the
* [EncyclopeDIA](https://bitbucket.org/searleb/encyclopedia/wiki/Home)
* documentation for details.
*/
dlib = null
/** \type{string} \required
* The FASTA containing the subset of proteins sequences for which to
* search.
*/
fasta = null
/** \type{string}
* Contrasts to test with MSstats provided as a CSV file. See the
* [MSstats documentation](https://msstats.org/msstats-2/) for more details.
* Example:
* | | A | B |
* |------|-----|-----|
* | AvsB | 1 | -1 |
*/
contrasts = null
result_dir = 'results' /** \type{str} Where results will be saved. */
report_dir = 'reports' /** \type{str} Where reports will be saved. */
mzml_dir = 'mzml' /** \type{str} Where mzML files will be saved. */
email = null /** \type{str} An email to alert on completion. */
/** \group{Grouping Parameters} */
/** \type{boolean}
* Aggregate groups into a single analysis. If `true`, each group will
* be searched with EncyclopeDIA separately against their respective
* chomatogram library. These search results are subsequently aggregated
* and the false discovery rate (FDR) is estimated for the combined
* groups. If `false`, the groups are analyzed individually and FDR is
* estimated within each group.
*/
aggregate = false
/** \type{string} The file prefix to use for the aggregated data.*/
agg_name = 'aggregated'
/** \group{MSconvert Parameters} */
/** \type{boolean} Demultiplex overlapping DIA windows. */
msconvert.demultiplex = true
/** \type{boolean} Force existing mzML files to be reconverted. */
msconvert.force = false
/** \group{EncyclopeDIA Parameters} */
/** \type{string} The suffix to append to chromatogram library result files */
encyclopedia.chrlib_suffix = 'chrlib'
/** \type{string} The suffix to append to quantitative result files */
encyclopedia.quant_suffix = 'quant'
/** \type{string}
* Command line arguments to pass to all EncyclopeDIA analyses.
* The default attempts to match the defaults from the graphical
* user interface.
*/
encyclopedia.args = '-percolatorVersion v3-01 -quantifyAcrossSamples true -scoringBreadthType window'
/** Additional command line arguments to use when searching files. */
encyclopedia.local.args = ''
/**
* Additional command line arguments to use when quantifying
* detected peptides across multiple runs.
*/
encyclopedia.global.args = ''
/**
* The location of the EncyclopeDIA jar file. Use `null` if
* EncyclopeDIA was installed from bioconda.
*/
encyclopedia.jar = '/code/encyclopedia.jar'
/** \group{MSstats Parameters} */
/** \type{boolean} Enable MSstats quantification */
msstats.enabled = true
/** \type{string}
* The normalization method used by MSstats.
* Must be one of 'equalizeMedians', 'quantile', or 'none'.
*/
msstats.normalization = 'equalizeMedians'
/** \type{bool} Generate MSstats reports */
msstats.reports = false
/** \group{Resources}
* Change these to reflect your compute environment.
*/
max_memory = '128.GB' /** \type{string} The maximum memory allowed for each process. */
max_cpus = 16 /** \type{integer} The maximum number of CPUs for each process. */
max_time = '240.h' /** \type{string} The maximum executrion time for each process. */
}
// Containers
process {
container = "ghcr.io/talusbio/nf-encyclopedia:latest"
withName: 'MSCONVERT' {
container = 'chambm/pwiz-skyline-i-agree-to-the-vendor-licenses:latest'
}
}
// Manifest
manifest {
name = 'nf-encyclopedia'
author = 'William E Fondrie'
homePage = 'https://github.com/nf-encyclopedia'
description = 'Analyze GPF DIA proteomics data with EncyclopeDIA'
mainScript = 'main.nf'
nextflowVersion = '!>=21.10.3'
}
// Export these variables to prevent local Python/R libraries from conflicting
// with those in the container The JULIA depot path has been adjusted to a
// fixed path `/usr/local/share/julia` that needs to be used for packages in
// the container. See https://apeltzer.github.io/post/03-julia-lang-nextflow/
// for details on that. Once we have a common agreement on where to keep Julia
// packages, this is adjustable.
// Copied from the nf-core template.
env {
PYTHONNOUSERSITE = 1
R_PROFILE_USER = "/.Rprofile"
R_ENVIRON_USER = "/.Renviron"
JULIA_DEPOT_PATH = "/usr/local/share/julia"
}
// Capture exit codes from upstream processes when piping
process.shell = ['/bin/bash', '-euo', 'pipefail']
def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
timeline {
enabled = true
file = "${params.report_dir}/execution_timeline_${trace_timestamp}.html"
}
report {
enabled = true
file = "${params.report_dir}/execution_report_${trace_timestamp}.html"
}
trace {
enabled = true
file = "${params.report_dir}/execution_trace_${trace_timestamp}.txt"
}
dag {
enabled = true
file = "${params.report_dir}/pipeline_dag_${trace_timestamp}.html"
}
// Load base.config by default for all pipelines
includeConfig 'conf/base.config'
// Function to ensure that resource requirements don't go beyond
// a maximum limit. Copied from the nf-core template.
def check_max(obj, type) {
if (type == 'memory') {
try {
if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
return params.max_memory as nextflow.util.MemoryUnit
else
return obj
} catch (all) {
println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj"
return obj
}
} else if (type == 'time') {
try {
if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
return params.max_time as nextflow.util.Duration
else
return obj
} catch (all) {
println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj"
return obj
}
} else if (type == 'cpus') {
try {
return Math.min( obj, params.max_cpus as int )
} catch (all) {
println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
return obj
}
}
}