nextflow_schema.json

{
    "$schema": "http://json-schema.org/draft-07/schema",
    "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json",
    "title": "epi2me-labs/wf-pore-c",
    "workflow_title": "Pore-c Workflow",
    "description": "Workflow for analysing Pore-c data for chromatin conformation capture.",
    "demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-pore-c/wf-pore-c-demo.tar.gz",
    "aws_demo_url": "https://ont-exd-int-s3-euwst1-epi2me-labs.s3.amazonaws.com/wf-pore-c/wf-pore-c-demo/aws.nextflow.config",
    "url": "https://github.com/epi2me-labs/wf-pore-c",
    "type": "object",
    "definitions": {
        "input_options": {
            "title": "Input Options",
            "type": "object",
            "fa_icon": "fas fa-terminal",
            "description": "Parameters for finding and handling input data for analysis.",
            "properties": {
                "bam": {
                    "type": "string",
                    "format": "path",
                    "title": "Unaligned BAM",
                    "description": "An unaligned BAM file containing Pore-C concatemer sequences.",
                    "help_text": "This accepts one of three cases: (i) the path to a single BAM file; (ii) the path to a top-level directory containing BAM files; (iii) the path to a directory containing one level of sub-directories which in turn contain BAM files. In the first and second case, a sample name can be supplied with `--sample`. In the last case, the data is assumed to be multiplexed with the names of the sub-directories as barcodes. In this case, a sample sheet can be provided with `--sample_sheet`."
                },
                "fastq": {
                    "type": "string",
                    "format": "path",
                    "title": "FASTQ",
                    "description": "FASTQ files to use in the analysis.",
                    "help_text": "This accepts one of three cases: (i) the path to a single FASTQ file; (ii) the path to a top-level directory containing FASTQ files; (iii) the path to a directory containing one level of sub-directories which in turn contain FASTQ files. In the first and second case, a sample name can be supplied with `--sample`. In the last case, the data is assumed to be multiplexed with the names of the sub-directories as barcodes. In this case, a sample sheet can be provided with `--sample_sheet`."
                },
                "sample_sheet": {
                    "type": "string",
                    "format": "file-path",
                    "title": "Sample sheet",
                    "description": "A CSV file used to map barcodes to sample aliases and optionally provide per-sample parameters. The sample sheet can be provided when the input data is a directory containing sub-directories with FASTQ files.",
                    "help_text": "The sample sheet is a CSV file with, minimally, columns named `barcode` and `alias`. Optionally, a `cutter` column can contain the name of the enzyme used per sample (see the `--cutter` parameter for more details) and a `vcf` column can be used to provide a phased VCF file per sample if you require haplotagged alignments."
                },
                "sample": {
                    "type": "string",
                    "description": "A single sample name for non-multiplexed data. Permissible if passing a single .fastq(.gz) file or directory of .fastq(.gz) files."
                },
                "analyse_unclassified": {
                    "type": "boolean",
                    "default": false,
                    "description": "Analyse unclassified reads from input directory. By default the workflow will not process reads in the unclassified directory.",
                    "help_text": "If selected and if the input is a multiplex directory the workflow will also process the unclassified directory."
                },
                "ref": {
                    "type": "string",
                    "title": "Reference FASTA",
                    "format": "file-path",
                    "description": "A FASTA file containing the reference genome to map against."
                },
                "vcf": {
                    "type": "string",
                    "title": "VCF",
                    "format": "file-path",
                    "description": "An optional phased VCF file that will be used to haplotag alignments."
                },
                "cutter": {
                    "type": "string",
                    "default": "NlaIII",
                    "description": "The enzyme used in the restriction digest.",
                    "help_text": "Any enzyme from the Biopython restriction dictionary can be used. See `https://github.com/biopython/biopython/blob/master/Bio/Restriction/Restriction_Dictionary.py`. This can also be defined per sample: see `--sample_sheet` parameter."
                }
            },
            "allOf": [
                {
                    "required": [
                        "ref"
                    ]
                },
                {
                    "oneOf": [
                        {
                            "required": [
                                "fastq"
                            ]
                        },
                        {
                            "required": [
                                "bam"
                            ]
                        }
                    ]
                }
            ]
        },
        "output_options": {
            "title": "Output Options",
            "type": "object",
            "description": "Parameters for saving and naming workflow outputs.",
            "default": "",
            "properties": {
                "out_dir": {
                    "type": "string",
                    "default": "output",
                    "format": "directory-path",
                    "description": "Directory for output of all user-facing files."
                },
                "hi_c": {
                    "type": "boolean",
                    "title": "Hi-C",
                    "default": false,
                    "description": "Output a Hi-C formatted file; will convert pairs format to a Hi-C (`.hic`) file which will be compatible with [juicer](https://github.com/aidenlab/juicer).",
                    "help_text": "Load this file with [Juice box](https://www.aidenlab.org/juicebox/) for an alternative contact map visualisation."
                },
                "bed": {
                    "type": "boolean",
                    "title": "BED",
                    "default": false,
                    "description": "Output a BED file of the paired-end BAM alignments for use with downstream tools. Setting this to true will also trigger creation of the paired-end BAM.",
                    "help_text": "Will use the paired-end BAM to create a BED file compatible with downstream tools including scaffolding tool [Yahs](https://github.com/c-zhou/yahs)."
  

                }
            }
        },
        "advanced_options": {
            "title": "Advanced Options",
            "type": "object",
            "description": "Avanced options for configuring processes inside the workflow.",
            "default": "",
            "properties": {
                "chunk_size": {
                    "type": "integer",
                    "default": 20000,
                    "description": "Process input in chunks of this number of reads.",
                    "help_text": "To reduce per-process memory requirements for large datasets, process the inputs in chunks of reads. Set to 0 to process entire dataset in one go."
                },
                "threads": {
                    "type": "integer",
                    "default": 4,
                    "description": "Set maximum number of threads to use for more intense processes (limited by config executor cpus). We recommend a minimum of 4, but if available 19.",
                    "help": "Increasing this will speed up some individual processes, but reduce the number of processes that can run in parallel, potentially increasing the time for the workflow to run overall."
                }
            },
            "help_text": "These advanced options do not need to be changed for typical use, but allow fine tuning of workflows for users who want more control over the workflow."
        },
        "pore_c_tools_options": {
            "title": "Pore-C Tools Options",
            "type": "object",
            "description": "Parameters to control the pore-c tools",
            "properties": {
                "minimap2_settings": {
                    "type": "string",
                    "default": "-x map-ont",
                    "description": "The minimap2 settings for mapping monomers."
                },
                "max_monomers": {
                    "type": "integer",
                    "title": "Maximum monomers",
                    "default": 250,
                    "description": "The maximum number of monomers allowed for a read to be included in downstream analysis.",
                    "help": "Any reads that have more than this number will be filtered out, and output in a per sample filtered_bam file."
                },
                "coverage": {
                    "type": "boolean",
                    "default": false,
                    "description": "Calculate restriction-fragment coverage using mosdepth."
                },
                "summary_json": {
                    "type": "boolean",
                    "title": "Summary JSON",
                    "default": true,
                    "description": "Output pore-c-py annotation summary in json format."
                }
            }
        },
        "chromunity_options": {
            "title": "Chromunity Options",
            "type": "object",
            "description": "Create files for Chromunity analyses.",
            "properties": {
                "chromunity": {
                    "type": "boolean",
                    "default": false,
                    "description": "Create parquet files for Chromunity.",
                    "help_text": "See the chromunity documentation for further details 'https://github.com/mskilab/chromunity'."
                },
                "chromunity_merge_distance": {
                    "type": "integer",
                    "default": -1,
                    "description": "Merge colinear alignments separated by less than this base pair distance into a single monomer."
                }
            }
        },
        "4dn_files_options": {
            "title": "4DN files Options",
            "type": "object",
            "description": "Create files for the 4D nucleome toolset.",
            "properties": {
                "pairs": {
                    "type": "boolean",
                    "default": false,
                    "description": "Create a 4DN-format pairs file (also calculate stats).",
                    "help_text": "Outputs a directory with a pairs stats report and a pairs file which can be used for downstream anaylsis."
                },
                "pairtools_chunksize": {
                    "type": "integer",
                    "default": 100000,
                    "description": "Number of pairs to be processed in each chunk in the prepare_hic process which uses the pairtools dedup tool.",
                    "help_text": "Reduce for lower memory footprint. Below 10,000 performance starts suffering significantly."
                },
                "mcool": {
                    "type": "boolean",
                    "default": false,
                    "title": "Multi-resolution cooler file (mcool)",
                    "description": "Create a multi-resolution cooler file. Will output the cooler formatted file which you can load with cooler.",
                    "help_text": "See 'https://open2c.github.io/cooler' for more details."
                },
                "cool_bin_size": {
                    "type": "integer",
                    "title": "Cooler file bin size",
                    "default": 1000,
                    "description": "The bin size of the cooler output file in base pairs.",
                    "help_text": "See 'https://open2c.github.io/cooler' for more details."
                },
                "mcool_resolutions": {
                    "type": "string",
                    "default": "1000,2000,5000N",
                    "description": "The resolutions of the mcool file in pixels (see cooler documentation for details).",
                    "help_text": "Comma-separated list of target resolutions. Use suffixes B or N to specify a progression: B for binary (geometric steps of factor 2), N for nice (geometric steps of factor 10 interleaved with steps of 2 and 5). This is the equivalent of the `--resolutions` flag in cooler; see an example here 'https://cooler.readthedocs.io/en/latest/cli.html'."
                }
            }
        },
        "paired_end_bam_options": {
            "title": "Paired-end BAM Options",
            "type": "object",
            "description": "Create mock paired-end BAM files for legacy tools.",
            "properties": {
                "paired_end": {
                    "type": "boolean",
                    "title": "Paired-end BAM",
                    "description": "Create mock paired-end BAM files.",
                    "default": false
                },
                "filter_pairs": {
                    "type": "boolean",
                    "default": false,
                    "description": "Filter paired-end reads using minimum and maximum distance parameters."
                },
                "paired_end_minimum_distance": {
                    "type": "integer",
                    "default": -1,
                    "description": "Remove trans/cis pairs separated by a distance shorter than this."
                },
                "paired_end_maximum_distance": {
                    "type": "integer",
                    "default": -1,
                    "description": "Remove trans/cis pairs separated by a distance greater than this."
                }
            }
        },
        "misc": {
            "title": "Misc",
            "type": "object",
            "description": "",
            "default": "",
            "properties": {
                "help": {
                    "type": "boolean",
                    "description": "Display help text.",
                    "fa_icon": "fas fa-question-circle",
                    "hidden": true,
                    "default": false
                },
                "disable_ping": {
                    "type": "boolean",
                    "default": false,
                    "description": "Enable to prevent sending a workflow ping."
                },
                "version": {
                    "type": "boolean",
                    "default": false,
                    "hidden": true
                }
            }
        }
    },
    "allOf": [
        {
            "$ref": "#/definitions/input_options"
        },
        {
            "$ref": "#/definitions/output_options"
        },
        {
            "$ref": "#/definitions/advanced_options"
        },
        {
            "$ref": "#/definitions/pore_c_tools_options"
        },
        {
            "$ref": "#/definitions/chromunity_options"
        },
        {
            "$ref": "#/definitions/4dn_files_options"
        },
        {
            "$ref": "#/definitions/paired_end_bam_options"
        },
        {
            "$ref": "#/definitions/misc"
        }
    ],
    "properties": {
        "aws_image_prefix": {
            "type": "string",
            "hidden": true
        },
        "aws_queue": {
            "type": "string",
            "hidden": true
        },
        "monochrome_logs": {
            "type": "boolean"
        },
        "validate_params": {
            "type": "boolean",
            "default": true
        },
        "show_hidden_params": {
            "type": "boolean"
        }
    },
    "resources": {
        "recommended": {
            "cpus": 64,
            "memory": "128GB"
        },
        "minimum": {
            "cpus": 8,
            "memory": "32GB"
        },
        "run_time": "12 hours for 100GB input BAM using the recommended resources, this will vary depending on number of monomers found per read.",
        "arm_support": false
    }
}