-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathnextflow_schema.json
340 lines (338 loc) · 16.2 KB
/
nextflow_schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json",
"title": "epi2me-labs/wf-pore-c",
"workflow_title": "Pore-c Workflow",
"description": "Workflow for analysing Pore-c data for chromatin conformation capture.",
"demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-pore-c/wf-pore-c-demo.tar.gz",
"aws_demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-pore-c/wf-pore-c-demo/aws.nextflow.config",
"url": "https://github.com/epi2me-labs/wf-pore-c",
"type": "object",
"definitions": {
"input_options": {
"title": "Input Options",
"type": "object",
"fa_icon": "fas fa-terminal",
"description": "Parameters for finding and handling input data for analysis.",
"properties": {
"bam": {
"type": "string",
"format": "path",
"title": "Unaligned BAM",
"description": "An unaligned BAM file containing Pore-C concatemer sequences.",
"help_text": "This accepts one of three cases: (i) the path to a single BAM file; (ii) the path to a top-level directory containing BAM files; (iii) the path to a directory containing one level of sub-directories which in turn contain BAM files. In the first and second case, a sample name can be supplied with `--sample`. In the last case, the data is assumed to be multiplexed with the names of the sub-directories as barcodes. In this case, a sample sheet can be provided with `--sample_sheet`."
},
"fastq": {
"type": "string",
"format": "path",
"title": "FASTQ",
"description": "FASTQ files to use in the analysis.",
"help_text": "This accepts one of three cases: (i) the path to a single FASTQ file; (ii) the path to a top-level directory containing FASTQ files; (iii) the path to a directory containing one level of sub-directories which in turn contain FASTQ files. In the first and second case, a sample name can be supplied with `--sample`. In the last case, the data is assumed to be multiplexed with the names of the sub-directories as barcodes. In this case, a sample sheet can be provided with `--sample_sheet`."
},
"sample_sheet": {
"type": "string",
"format": "file-path",
"title": "Sample sheet",
"description": "A CSV file used to map barcodes to sample aliases and optionally provide per-sample parameters. The sample sheet can be provided when the input data is a directory containing sub-directories with FASTQ files.",
"help_text": "The sample sheet is a CSV file with, minimally, columns named `barcode` and `alias`. Optionally, a `cutter` column can contain the name of the enzyme used per sample (see the `--cutter` parameter for more details) and a `vcf` column can be used to provide a phased VCF file per sample if you require haplotagged alignments."
},
"sample": {
"type": "string",
"description": "A single sample name for non-multiplexed data. Permissible if passing a single .fastq(.gz) file or directory of .fastq(.gz) files."
},
"analyse_unclassified": {
"type": "boolean",
"default": false,
"description": "Analyse unclassified reads from input directory. By default the workflow will not process reads in the unclassified directory.",
"help_text": "If selected and if the input is a multiplex directory the workflow will also process the unclassified directory."
},
"ref": {
"type": "string",
"title": "Reference FASTA",
"format": "file-path",
"description": "A FASTA file containing the reference genome to map against."
},
"vcf": {
"type": "string",
"title": "VCF",
"format": "file-path",
"description": "An optional phased VCF file that will be used to haplotag alignments."
},
"cutter": {
"type": "string",
"default": "NlaIII",
"description": "The enzyme used in the restriction digest.",
"help_text": "Any enzyme from the Biopython restriction dictionary can be used. See `https://github.com/biopython/biopython/blob/master/Bio/Restriction/Restriction_Dictionary.py`. This can also be defined per sample: see `--sample_sheet` parameter."
}
},
"allOf": [
{
"required": [
"ref"
]
},
{
"oneOf": [
{
"required": [
"fastq"
]
},
{
"required": [
"bam"
]
}
]
}
]
},
"output_options": {
"title": "Output Options",
"type": "object",
"description": "Parameters for saving and naming workflow outputs.",
"default": "",
"properties": {
"out_dir": {
"type": "string",
"default": "output",
"format": "directory-path",
"description": "Directory for output of all user-facing files."
},
"hi_c": {
"type": "boolean",
"title": "Hi-C",
"default": false,
"description": "Output a Hi-C formatted file; will convert pairs format to a Hi-C (`.hic`) file which will be compatible with [juicer](https://github.com/aidenlab/juicer).",
"help_text": "Load this file with [Juice box](https://www.aidenlab.org/juicebox/) for an alternative contact map visualisation."
},
"bed": {
"type": "boolean",
"title": "BED",
"default": false,
"description": "Output a BED file of the paired-end BAM alignments for use with downstream tools. Setting this to true will also trigger creation of the paired-end BAM.",
"help_text": "Will use the paired-end BAM to create a BED file compatible with downstream tools including scaffolding tool [Yahs](https://github.com/c-zhou/yahs)."
}
}
},
"advanced_options": {
"title": "Advanced Options",
"type": "object",
"description": "Avanced options for configuring processes inside the workflow.",
"default": "",
"properties": {
"chunk_size": {
"type": "integer",
"default": 20000,
"description": "Process input in chunks of this number of reads.",
"help_text": "To reduce per-process memory requirements for large datasets, process the inputs in chunks of reads. Set to 0 to process entire dataset in one go."
},
"threads": {
"type": "integer",
"default": 4,
"description": "Set maximum number of threads to use for more intense processes (limited by config executor cpus). We recommend a minimum of 4, but if available 19.",
"help": "Increasing this will speed up some individual processes, but reduce the number of processes that can run in parallel, potentially increasing the time for the workflow to run overall."
}
},
"help_text": "These advanced options do not need to be changed for typical use, but allow fine tuning of workflows for users who want more control over the workflow."
},
"pore_c_tools_options": {
"title": "Pore-C Tools Options",
"type": "object",
"description": "Parameters to control the pore-c tools",
"properties": {
"minimap2_settings": {
"type": "string",
"default": "-x map-ont",
"description": "The minimap2 settings for mapping monomers."
},
"max_monomers": {
"type": "integer",
"title": "Maximum monomers",
"default": 250,
"description": "The maximum number of monomers allowed for a read to be included in downstream analysis.",
"help": "Any reads that have more than this number will be filtered out, and output in a per sample filtered_bam file."
},
"coverage": {
"type": "boolean",
"default": false,
"description": "Calculate restriction-fragment coverage using mosdepth."
},
"summary_json": {
"type": "boolean",
"title": "Summary JSON",
"default": true,
"description": "Output pore-c-py annotation summary in json format."
}
}
},
"chromunity_options": {
"title": "Chromunity Options",
"type": "object",
"description": "Create files for Chromunity analyses.",
"properties": {
"chromunity": {
"type": "boolean",
"default": false,
"description": "Create parquet files for Chromunity.",
"help_text": "See the chromunity documentation for further details 'https://github.com/mskilab/chromunity'."
},
"chromunity_merge_distance": {
"type": "integer",
"default": -1,
"description": "Merge colinear alignments separated by less than this base pair distance into a single monomer."
}
}
},
"4dn_files_options": {
"title": "4DN files Options",
"type": "object",
"description": "Create files for the 4D nucleome toolset.",
"properties": {
"pairs": {
"type": "boolean",
"default": false,
"description": "Create a 4DN-format pairs file (also calculate stats).",
"help_text": "Outputs a directory with a pairs stats report and a pairs file which can be used for downstream anaylsis."
},
"pairtools_chunksize": {
"type": "integer",
"default": 100000,
"description": "Number of pairs to be processed in each chunk in the prepare_hic process which uses the pairtools dedup tool.",
"help_text": "Reduce for lower memory footprint. Below 10,000 performance starts suffering significantly."
},
"mcool": {
"type": "boolean",
"default": false,
"title": "Multi-resolution cooler file (mcool)",
"description": "Create a multi-resolution cooler file. Will output the cooler formatted file which you can load with cooler.",
"help_text": "See 'https://open2c.github.io/cooler' for more details."
},
"cool_bin_size": {
"type": "integer",
"title": "Cooler file bin size",
"default": 1000,
"description": "The bin size of the cooler output file in base pairs.",
"help_text": "See 'https://open2c.github.io/cooler' for more details."
},
"mcool_resolutions": {
"type": "string",
"default": "1000,2000,5000N",
"description": "The resolutions of the mcool file in pixels (see cooler documentation for details).",
"help_text": "Comma-separated list of target resolutions. Use suffixes B or N to specify a progression: B for binary (geometric steps of factor 2), N for nice (geometric steps of factor 10 interleaved with steps of 2 and 5). This is the equivalent of the `--resolutions` flag in cooler; see an example here 'https://cooler.readthedocs.io/en/latest/cli.html'."
}
}
},
"paired_end_bam_options": {
"title": "Paired-end BAM Options",
"type": "object",
"description": "Create mock paired-end BAM files for legacy tools.",
"properties": {
"paired_end": {
"type": "boolean",
"title": "Paired-end BAM",
"description": "Create mock paired-end BAM files.",
"default": false
},
"filter_pairs": {
"type": "boolean",
"default": false,
"description": "Filter paired-end reads using minimum and maximum distance parameters."
},
"paired_end_minimum_distance": {
"type": "integer",
"default": -1,
"description": "Remove trans/cis pairs separated by a distance shorter than this."
},
"paired_end_maximum_distance": {
"type": "integer",
"default": -1,
"description": "Remove trans/cis pairs separated by a distance greater than this."
}
}
},
"misc": {
"title": "Misc",
"type": "object",
"description": "",
"default": "",
"properties": {
"help": {
"type": "boolean",
"description": "Display help text.",
"fa_icon": "fas fa-question-circle",
"hidden": true,
"default": false
},
"disable_ping": {
"type": "boolean",
"default": false,
"description": "Enable to prevent sending a workflow ping."
},
"version": {
"type": "boolean",
"default": false,
"hidden": true
}
}
}
},
"allOf": [
{
"$ref": "#/definitions/input_options"
},
{
"$ref": "#/definitions/output_options"
},
{
"$ref": "#/definitions/advanced_options"
},
{
"$ref": "#/definitions/pore_c_tools_options"
},
{
"$ref": "#/definitions/chromunity_options"
},
{
"$ref": "#/definitions/4dn_files_options"
},
{
"$ref": "#/definitions/paired_end_bam_options"
},
{
"$ref": "#/definitions/misc"
}
],
"properties": {
"aws_image_prefix": {
"type": "string",
"hidden": true
},
"aws_queue": {
"type": "string",
"hidden": true
},
"monochrome_logs": {
"type": "boolean"
},
"validate_params": {
"type": "boolean",
"default": true
},
"show_hidden_params": {
"type": "boolean"
}
},
"resources": {
"recommended": {
"cpus": 64,
"memory": "128GB"
},
"minimum": {
"cpus": 8,
"memory": "32GB"
},
"run_time": "12 hours for 100GB input BAM using the recommended resources, this will vary depending on number of monomers found per read.",
"arm_support": false
}
}