forked from qbic-pipelines/rnadeseq
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnextflow_schema.json
418 lines (418 loc) · 20.9 KB
/
nextflow_schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/qbic-pipelines/rnadeseq/master/nextflow_schema.json",
"title": "qbic-pipelines/rnadeseq pipeline parameters",
"description": "Differential gene expression analysis and pathway analysis of RNAseq data",
"type": "object",
"definitions": {
"main_arguments": {
"title": "Main input arguments",
"type": "object",
"fa_icon": "fas fa-terminal",
"properties": {
"input_type": {
"type": "string",
"default": "featurecounts",
"description": "Which type of input is provided, one of featurecounts, rsem, salmon, smrnaseq."
},
"gene_counts": {
"type": "string",
"description": "If input_type = featurecounts: Raw count table (TSV). Columns are samples and rows are genes. 1st column Ensembl_ID, 2nd column gene_name. If input_type = rsem or input_type = salmon: Path to rsem/salmon output folder. If input_type = smrnaseq: Path to smrnaseq output folder containing hairpin and non-hairpin mature .sorted.idxstats files."
},
"input": {
"type": "string",
"format": "file-path",
"exists": true,
"description": "Metadata table/samplesheet (TSV). Rows are samples and columns contain sample grouping."
},
"model": {
"type": "string",
"format": "file-path",
"exists": true,
"description": "Linear model function to calculate the contrasts (TXT). Variable names should be columns in metadata file/samplesheet."
},
"gtf": {
"type": "string",
"description": "Not necessary if genome is set or input_type is featurecounts. GTF file to be used for DESeq if input is rsem or salmon."
}
},
"required": ["input_type", "gene_counts", "input", "model"]
},
"deseq2_arguments": {
"title": "DESeq2 arguments",
"type": "object",
"description": "",
"default": "",
"properties": {
"contrast_matrix": {
"type": "string",
"default": "DEFAULT",
"description": "Tsv indicating which contrasts to consider, one contrast per column. 1 or 0 for every coefficient of the linear model. Check contrasts docs."
},
"contrast_list": {
"type": "string",
"default": "DEFAULT1",
"description": "Tsv indicating list of the contrasts to calculate. 3 columns: factor name, contrast numerator and denominator. Check contrasts docs."
},
"contrast_pairs": {
"type": "string",
"default": "DEFAULT2",
"description": "Tsv indicating list of contrast pairs to calculate. 3 columns: contrast name, numerator and denominator. Check contrasts docs."
},
"genelist": {
"type": "string",
"default": "NO_FILE",
"description": "Txt file with list of genes (one per line) of which to plot heatmaps for normalized counts across all samples."
},
"relevel": {
"type": "string",
"default": "NO_FILE2",
"description": "Tsv indicating list of factors (conditions in the metadata table/samplesheet) and the new level on which to relevel the factor. Check contrasts docs."
},
"batch_effect": {
"type": "boolean",
"description": "Turn on this flag if you wish to consider batch effects. You need to add the batch effect to the linear model too!"
},
"logFC_threshold": {
"type": "integer",
"default": 0,
"description": "Threshold (int) to apply to Log 2 Fold Change to consider a gene as differentially expressed."
},
"adj_pval_threshold": {
"type": "number",
"default": 0.05,
"description": "p value (float) to consider a gene as differentially expressed. The default value is 0.05."
},
"norm_method": {
"type": "string",
"default": "rlog",
"enum": ["rlog", "vst", "vst-force"],
"description": "Which transformation to use during DE analysis; either rlog or vst or vst-force. If vst: the pipeline will override this and run rlog if it detects a strong variation in size factors in the input data (this will be stated in the report). To disable this behavior, set to vst-force."
},
"vst_genes_number": {
"type": "integer",
"default": 1000,
"description": "Integer indicating how many genes to subset to during vst transformation (when using rlog, will ignore this number). Default: 1000."
},
"round_DE": {
"type": "integer",
"default": -1,
"description": "Number of decimals to which to round result tables, default: -1 (this means no rounding)."
}
}
},
"pathway_arguments": {
"title": "Pathway analysis arguments",
"type": "object",
"description": "",
"default": "",
"properties": {
"organism": {
"type": "string",
"description": "Not necessary if genome is set. Which organism name to use for pathway analysis, e.g. `hsapiens`, not necessary if `--run_pathway_analysis = false`."
},
"species_library": {
"type": "string",
"description": "Not necessary if genome is set. Which bioconductor library to use for pathway analysis, e.g. org.Hs.eg.db, not necessary if --run_pathway_analysis = false."
},
"keytype": {
"type": "string",
"description": "Not necessary if genome is set. Which keytype to use for pathway analysis, e.g. ENSEMBL, not necessary if --run_pathway_analysis = false."
},
"run_pathway_analysis": {
"type": "boolean",
"description": "Turn on this flag if you wish to run pathway analysis."
},
"custom_gmt": {
"type": "string",
"default": "NO_FILE3",
"description": "Path to custom GMT file for gost query, this allows e.g. to query older versions of databases for pathway analysis, not necessary if --skip_pathway_analysis = true. If --datasources is set, the GMT file will be filtered for these datasources."
},
"set_background": {
"type": "boolean",
"default": true,
"description": "Whether to limit pathway analysis to a background list of genes (i.e. those genes expressed in at least one sample), default = true."
},
"custom_background": {
"type": "string",
"default": "NO_FILE7",
"description": "Path to custom background TXT file for gost query containing a gene ID in each line, not necessary if --skip_pathway_analysis = true or --set_background = false."
},
"min_DEG_pathway": {
"type": "integer",
"default": 1,
"description": "Integer indicating how many genes in a pathway must be differentially expressed to be considered as enriched, and report these pathways in tables and the final report. The default value is 1."
},
"datasources": {
"type": "string",
"description": "Which datasources to use for pathway analysis, comma-separated string like 'KEGG,REAC'. See param 'sources' on https://rdrr.io/cran/gprofiler2/man/gost.html for a list of available sources. If not set, will use all sources. If set while a --custom_gmt is provided, will filter the GMT for these datasources (will not filter for the GO subtypes like GO:BP, just for GO)."
},
"heatmaps_cluster_rows": {
"type": "boolean",
"default": true,
"description": "Whether to activate row clustering when generating heatmaps of gene expression in enriched pathways."
},
"heatmaps_cluster_cols": {
"type": "boolean",
"default": false,
"description": "Whether to activate column clustering when generating heatmaps of gene expression in enriched pathways."
},
"pathway_adj_pval_threshold": {
"type": "number",
"default": -1,
"description": "Adjusted p value (float) to use as threshold for pathway analysis. If omitted, will use the value of the parameter adj_pval_threshold (default 0.05)."
}
}
},
"report_arguments": {
"title": "Report arguments",
"type": "object",
"description": "",
"default": "",
"properties": {
"project_summary": {
"type": "string",
"description": "Project summary file downloaded from the qPortal."
},
"multiqc": {
"type": "string",
"default": "NO_FILE4",
"description": "multiqc.zip folder containing the multiQC plots and report."
},
"quote": {
"type": "string",
"default": "NO_FILE5",
"description": "Quote file to link in the report."
},
"software_versions": {
"type": "string",
"default": "NO_FILE6",
"description": "CSV/YML containing nf-core/rnaseq software versions."
},
"report_file": {
"type": "string",
"default": "${projectDir}/assets/RNAseq_report.Rmd",
"description": "Report Rmd file, should not be changed."
},
"references_file": {
"type": "string",
"default": "${projectDir}/assets/references.bibtex",
"description": "Reference bibtex file for the report, should not be changed."
},
"css": {
"type": "string",
"default": "${projectDir}/assets/corp-styles.css",
"description": "Stylesheet css file for the report, should not be changed."
},
"logo": {
"type": "string",
"default": "${projectDir}/assets/logo.png",
"description": "QBiC logo png file for the report, should not be changed."
}
}
},
"genome_options": {
"title": "Genome options",
"type": "object",
"description": "",
"default": "",
"properties": {
"genome": {
"type": "string",
"description": "Which genome to use for analysis, e.g. GRCh37; see /conf/igenomes.config for which genomes are available."
},
"igenomes_base": {
"type": "string",
"default": "s3://ngi-igenomes/igenomes",
"description": "Directory / URL base for iGenomes references."
},
"igenomes_ignore": {
"type": "boolean",
"description": "Do not load the iGenomes reference config."
}
}
},
"github_options": {
"title": "Github options",
"type": "object",
"description": "",
"default": "",
"properties": {
"citest": {
"type": "boolean",
"description": "Only enable for CI tests on the github servers",
"hidden": true
}
}
},
"nextflow_options": {
"title": "Nextflow options",
"type": "object",
"description": "",
"default": "",
"properties": {
"publish_dir_mode": {
"type": "string",
"description": "Method used to save pipeline results to output directory.",
"help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
"hidden": true
},
"outdir": {
"type": "string",
"description": "The output directory where the results will be saved",
"default": "./results",
"fa_icon": "fas fa-folder-open"
}
}
},
"boilerplate_options": {
"title": "Boilerplate options",
"type": "object",
"description": "",
"default": "",
"properties": {
"email": {
"type": "string",
"description": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits"
},
"email_on_fail": {
"type": "string",
"description": "Email address for completion summary, only when pipeline fails.",
"fa_icon": "fas fa-exclamation-triangle",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$",
"help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.",
"hidden": true
},
"plaintext_email": {
"type": "boolean",
"description": "Send plain-text email instead of HTML.",
"fa_icon": "fas fa-remove-format",
"hidden": true
},
"monochrome_logs": {
"type": "boolean",
"description": "Do not use coloured log outputs.",
"hidden": true
},
"help": {
"type": "boolean",
"description": "Display help text.",
"fa_icon": "fas fa-question-circle"
},
"tracedir": {
"type": "string",
"default": "./results/pipeline_info",
"description": "Directory to keep pipeline Nextflow logs and reports.",
"hidden": true
},
"custom_config_version": {
"type": "string",
"default": "master",
"description": "Git commit id for Institutional configs.",
"hidden": true
},
"custom_config_base": {
"type": "string",
"default": "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}",
"description": "Base directory for Institutional configs.",
"hidden": true
},
"config_profile_description": {
"type": "string",
"description": "Institutional config description.",
"hidden": true
},
"config_profile_contact": {
"type": "string",
"description": "Institutional config contact information.",
"hidden": true
},
"config_profile_url": {
"type": "string",
"description": "Institutional config URL link.",
"hidden": true
},
"hook_url": {
"type": "string",
"description": "Incoming hook URL for messaging service",
"fa_icon": "fas fa-people-group",
"help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.",
"hidden": true
},
"validationShowHiddenParams": {
"type": "boolean",
"fa_icon": "far fa-eye-slash",
"description": "Show all params when using `--help`",
"hidden": true,
"help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters."
}
}
},
"max_job_request_options": {
"title": "Max job request options",
"type": "object",
"fa_icon": "fab fa-acquisitions-incorporated",
"description": "Set the top limit for requested resources for any single job.",
"help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.",
"properties": {
"max_cpus": {
"type": "integer",
"description": "Maximum number of CPUs that can be requested for any single job.",
"default": 16,
"fa_icon": "fas fa-microchip",
"hidden": true,
"help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`"
},
"max_memory": {
"type": "string",
"description": "Maximum amount of memory that can be requested for any single job.",
"default": "128.GB",
"fa_icon": "fas fa-memory",
"pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
"hidden": true,
"help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`"
},
"max_time": {
"type": "string",
"description": "Maximum amount of time that can be requested for any single job.",
"default": "240.h",
"fa_icon": "far fa-clock",
"pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$",
"hidden": true,
"help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`"
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/main_arguments"
},
{
"$ref": "#/definitions/deseq2_arguments"
},
{
"$ref": "#/definitions/pathway_arguments"
},
{
"$ref": "#/definitions/report_arguments"
},
{
"$ref": "#/definitions/github_options"
},
{
"$ref": "#/definitions/genome_options"
},
{
"$ref": "#/definitions/nextflow_options"
},
{
"$ref": "#/definitions/boilerplate_options"
},
{
"$ref": "#/definitions/max_job_request_options"
}
]
}