diff --git a/nextflow_schema.json b/nextflow_schema.json index ea2f96c70e..fcd324a3a2 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -82,18 +82,6 @@ "description": "Specify how many reads each split of a FastQ file contains. Set 0 to turn off splitting at all.", "help_text": "Use the the tool FastP to split FASTQ file by number of reads. This parallelizes across fastq file shards speeding up mapping. Note although the minimum value is 250 reads, if you have fewer than 250 reads a single FASTQ shard will still be created." }, - "wes": { - "type": "boolean", - "fa_icon": "fas fa-dna", - "description": "Enable when exome or panel data is provided.", - "help_text": "With this parameter flags in various tools are set for targeted sequencing data. It is recommended to enable for whole-exome and panel data analysis." - }, - "intervals": { - "type": "string", - "fa_icon": "fas fa-file-alt", - "help_text": "To speed up preprocessing and variant calling processes, the execution is parallelized across a reference chopped into smaller pieces.\n\nParts of preprocessing and variant calling are done by these intervals, the different resulting files are then merged.\nThis can parallelize processes, and push down wall clock time significantly.\n\nWe are aligning to the whole genome, and then run Base Quality Score Recalibration and Variant Calling on the supplied regions.\n\n**Whole Genome Sequencing:**\n\nThe (provided) intervals are chromosomes cut at their centromeres (so each chromosome arm processed separately) also additional unassigned contigs.\n\nWe are ignoring the `hs37d5` contig that contains concatenated decoy sequences.\n\nThe calling intervals can be defined using a .list or a BED file.\nA .list file contains one interval per line in the format `chromosome:start-end` (1-based coordinates).\nA BED file must be a tab-separated text file with one interval per line.\nThere must be at least three columns: chromosome, start, and end (0-based coordinates).\nAdditionally, the score column of the BED file can be used to provide an estimate of how many seconds it will take to call variants on that interval.\nThe fourth column remains unused.\n\n```\n|chr1|10000|207666|NA|47.3|\n```\nThis indicates that variant calling on the interval chr1:10001-207666 takes approximately 47.3 seconds.\n\nThe runtime estimate is used in two different ways.\nFirst, when there are multiple consecutive intervals in the file that take little time to compute, they are processed as a single job, thus reducing the number of processes that needs to be spawned.\nSecond, the jobs with largest processing time are started first, which reduces wall-clock time.\nIf no runtime is given, a time of 200000 nucleotides per second is assumed. See `--nucleotides_per_second` on how to customize this.\nActual figures vary from 2 nucleotides/second to 30000 nucleotides/second.\nIf you prefer, you can specify the full path to your reference genome when you run the pipeline:\n\n> **NB** If none provided, will be generated automatically from the FASTA reference\n> **NB** Use --no_intervals to disable automatic generation.\n\n**Targeted Sequencing:**\n\nThe recommended flow for targeted sequencing data is to use the workflow as it is, but also provide a `BED` file containing targets for all steps using the `--intervals` option. In addition, the parameter `--wes` should be set.\nIt is advised to pad the variant calling regions (exons or target) to some extent before submitting to the workflow.\n\nThe procedure is similar to whole genome sequencing, except that only BED file are accepted. See above for formatting description.\nAdding every exon as an interval in case of `WES` can generate >200K processes or jobs, much more forks, and similar number of directories in the Nextflow work directory. These are appropriately grouped together to reduce number of processes run in parallel (see above and `--nucleotides_per_second` for details). \nFurthermore, primers and/or baits are not 100% specific, (certainly not for MHC and KIR, etc.), quite likely there going to be reads mapping to multiple locations.\nIf you are certain that the target is unique for your genome (all the reads will certainly map to only one location), and aligning to the whole genome is an overkill, it is actually better to change the reference itself.", - "description": "Path to target bed file in case of whole exome or targeted sequencing or intervals file." - }, "nucleotides_per_second": { "type": "integer", "fa_icon": "fas fa-clock", @@ -101,12 +89,24 @@ "help_text": "Intervals are parts of the chopped up genome used to speed up preprocessing and variant calling. See `--intervals` for more info. \n\nChanging this parameter, changes the number of intervals that are grouped and processed together. Bed files from target sequencing can contain thousands or small intervals. Spinning up a new process for each can be quite resource intensive. Instead it can be desired to process small intervals together on larger nodes. \nIn order to make use of this parameter, no runtime estimate can be present in the bed file (column 5). ", "default": 200000 }, + "intervals": { + "type": "string", + "fa_icon": "fas fa-file-alt", + "help_text": "To speed up preprocessing and variant calling processes, the execution is parallelized across a reference chopped into smaller pieces.\n\nParts of preprocessing and variant calling are done by these intervals, the different resulting files are then merged.\nThis can parallelize processes, and push down wall clock time significantly.\n\nWe are aligning to the whole genome, and then run Base Quality Score Recalibration and Variant Calling on the supplied regions.\n\n**Whole Genome Sequencing:**\n\nThe (provided) intervals are chromosomes cut at their centromeres (so each chromosome arm processed separately) also additional unassigned contigs.\n\nWe are ignoring the `hs37d5` contig that contains concatenated decoy sequences.\n\nThe calling intervals can be defined using a .list or a BED file.\nA .list file contains one interval per line in the format `chromosome:start-end` (1-based coordinates).\nA BED file must be a tab-separated text file with one interval per line.\nThere must be at least three columns: chromosome, start, and end (0-based coordinates).\nAdditionally, the score column of the BED file can be used to provide an estimate of how many seconds it will take to call variants on that interval.\nThe fourth column remains unused.\n\n```\n|chr1|10000|207666|NA|47.3|\n```\nThis indicates that variant calling on the interval chr1:10001-207666 takes approximately 47.3 seconds.\n\nThe runtime estimate is used in two different ways.\nFirst, when there are multiple consecutive intervals in the file that take little time to compute, they are processed as a single job, thus reducing the number of processes that needs to be spawned.\nSecond, the jobs with largest processing time are started first, which reduces wall-clock time.\nIf no runtime is given, a time of 200000 nucleotides per second is assumed. See `--nucleotides_per_second` on how to customize this.\nActual figures vary from 2 nucleotides/second to 30000 nucleotides/second.\nIf you prefer, you can specify the full path to your reference genome when you run the pipeline:\n\n> **NB** If none provided, will be generated automatically from the FASTA reference\n> **NB** Use --no_intervals to disable automatic generation.\n\n**Targeted Sequencing:**\n\nThe recommended flow for targeted sequencing data is to use the workflow as it is, but also provide a `BED` file containing targets for all steps using the `--intervals` option. In addition, the parameter `--wes` should be set.\nIt is advised to pad the variant calling regions (exons or target) to some extent before submitting to the workflow.\n\nThe procedure is similar to whole genome sequencing, except that only BED file are accepted. See above for formatting description.\nAdding every exon as an interval in case of `WES` can generate >200K processes or jobs, much more forks, and similar number of directories in the Nextflow work directory. These are appropriately grouped together to reduce number of processes run in parallel (see above and `--nucleotides_per_second` for details). \nFurthermore, primers and/or baits are not 100% specific, (certainly not for MHC and KIR, etc.), quite likely there going to be reads mapping to multiple locations.\nIf you are certain that the target is unique for your genome (all the reads will certainly map to only one location), and aligning to the whole genome is an overkill, it is actually better to change the reference itself.", + "description": "Path to target bed file in case of whole exome or targeted sequencing or intervals file." + }, "no_intervals": { "type": "boolean", "fa_icon": "fas fa-ban", "description": "Disable usage of intervals.", "help_text": "Intervals are parts of the chopped up genome used to speed up preprocessing and variant calling. See `--intervals` for more info. \n\nIf `--no_intervals` is set no intervals will be taken into account for speed up or data processing." }, + "wes": { + "type": "boolean", + "fa_icon": "fas fa-dna", + "description": "Enable when exome or panel data is provided.", + "help_text": "With this parameter flags in various tools are set for targeted sequencing data. It is recommended to enable for whole-exome and panel data analysis." + }, "tools": { "type": "string", "fa_icon": "fas fa-toolbox", @@ -142,54 +142,47 @@ "default": 0, "fa_icon": "fas fa-cut", "description": "Remove bp from the 5' end of read 1", - "help_text": "This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end. Corresponds to the FastP flag `--trim_front1`.", - "hidden": true + "help_text": "This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end. Corresponds to the FastP flag `--trim_front1`." }, "clip_r2": { "type": "integer", "default": 0, "description": "Remove bp from the 5' end of read 2", "help_text": "This may be useful if the qualities were very poor, or if there is some sort of unwanted bias at the 5' end. Corresponds to the FastP flag `--trim_front2`.", - "fa_icon": "fas fa-cut", - "hidden": true + "fa_icon": "fas fa-cut" }, "three_prime_clip_r1": { "type": "integer", "default": 0, "fa_icon": "fas fa-cut", "description": "Remove bp from the 3' end of read 1", - "help_text": "This may remove some unwanted bias from the 3'. Corresponds to the FastP flag `--trim_tail1`.", - "hidden": true + "help_text": "This may remove some unwanted bias from the 3'. Corresponds to the FastP flag `--trim_tail1`." }, "three_prime_clip_r2": { "type": "integer", "default": 0, "fa_icon": "fas fa-cut", "description": "Remove bp from the 3' end of read 2", - "help_text": "This may remove some unwanted bias from the 3' end. Corresponds to the FastP flag `--trim_tail2`.", - "hidden": true + "help_text": "This may remove some unwanted bias from the 3' end. Corresponds to the FastP flag `--trim_tail2`." }, "trim_nextseq": { "type": "integer", "default": 0, "fa_icon": "fas fa-cut", "description": "Removing poly-G tails.", - "help_text": "DetectS polyG in read tails and trim them. Corresponds to the FastP flag `--trim_poly_g`.", - "hidden": true + "help_text": "DetectS polyG in read tails and trim them. Corresponds to the FastP flag `--trim_poly_g`." }, "length_required": { "type": "integer", "default": 15, "fa_icon": "fas fa-cut", "description": "Minimum length of reads to keep", - "help_text": "This is the minimum length of reads to keep after trimming. Corresponds to the FastP flag `--length_required` (default in FastP is 15bp).", - "hidden": true + "help_text": "This is the minimum length of reads to keep after trimming. Corresponds to the FastP flag `--length_required` (default in FastP is 15bp)." }, "save_trimmed": { "type": "boolean", "fa_icon": "fas fa-save", - "description": "Save trimmed FastQ file intermediates.", - "hidden": true + "description": "Save trimmed FastQ file intermediates." }, "umi_read_structure": { "type": "string", @@ -202,14 +195,12 @@ "default": "Adjacency", "fa_icon": "fas fa-tape", "description": "Default strategy with UMI", - "hidden": true, "help_text": "Available values: Identity, Edit, Adjacency, Paired" }, "save_split_fastqs": { "type": "boolean", "fa_icon": "fas fa-vial", - "description": "If set, publishes split FASTQ files. Intended for testing purposes.", - "hidden": true + "description": "If set, publishes split FASTQ files. Intended for testing purposes." } } }, @@ -255,35 +246,17 @@ "default": "", "fa_icon": "fas fa-toolbox", "properties": { - "concatenate_vcfs": { - "type": "boolean", - "fa_icon": "fas fa-merge", - "description": "Option for concatenating germline vcf-files.", - "help_text": "Concatenating the germline vcf-files from each applied variant-caller into one vcf-file using bfctools concat." - }, "only_paired_variant_calling": { "type": "boolean", "fa_icon": "fas fa-forward", "description": "If true, skips germline variant calling for matched normal to tumor sample. Normal samples without matched tumor will still be processed through germline variant calling tools.", "help_text": "This can speed up computation for somatic variant calling with matched normal samples. If false, all normal samples are processed as well through the germline variantcalling tools. If true, only somatic variant calling is done." }, - "joint_germline": { - "type": "boolean", - "fa_icon": "fas fa-toolbox", - "description": "Turn on the joint germline variant calling for GATK haplotypecaller", - "help_text": "Uses all normal germline samples (as designated by `status` in the input csv) in the joint germline variant calling process." - }, - "joint_mutect2": { - "type": "boolean", - "fa_icon": "fas fa-angle-double-right", - "description": "Runs Mutect2 in joint (multi-sample) mode for better concordance among variant calls of tumor samples from the same patient. Mutect2 outputs will be stored in a subfolder named with patient ID under `variant_calling/mutect2/` folder. Only a single normal sample per patient is allowed. Tumor-only mode is also supported." - }, "ascat_min_base_qual": { "type": "integer", "default": 20, "fa_icon": "fas fa-greater-than", "description": "Overwrite Ascat min base quality required for a read to be counted.", - "hidden": true, "help_text": "For more details see [here](https://raw.githubusercontent.com/VanLoo-lab/ascat/master/man/ASCAT-manual.pdf)" }, "ascat_min_counts": { @@ -291,7 +264,6 @@ "default": 10, "fa_icon": "fas fa-align-center", "description": "Overwrite Ascat minimum depth required in the normal for a SNP to be considered.", - "hidden": true, "help_text": "For more details, see [here](https://raw.githubusercontent.com/VanLoo-lab/ascat/master/man/ASCAT-manual.pdf)." }, "ascat_min_map_qual": { @@ -299,43 +271,37 @@ "default": 35, "fa_icon": "fas fa-balance-scale-left", "description": "Overwrite Ascat min mapping quality required for a read to be counted.", - "hidden": true, "help_text": "For more details, see [here](https://raw.githubusercontent.com/VanLoo-lab/ascat/master/man/ASCAT-manual.pdf)." }, "ascat_ploidy": { "type": "number", "fa_icon": "fas fa-bacon", "help_text": "ASCAT: optional argument to override ASCAT optimization and supply psi parameter (expert parameter, do not adapt unless you know what you are doing). See [here](https://raw.githubusercontent.com/VanLoo-lab/ascat/master/man/ASCAT-manual.pdf)", - "hidden": true, "description": "Overwrite ASCAT ploidy." }, "ascat_purity": { "type": "number", "fa_icon": "fas fa-broom", "description": "Overwrite ASCAT purity.", - "help_text": "Overwrites ASCAT's `rho_manual` parameter. Expert use only, see [here](https://raw.githubusercontent.com/VanLoo-lab/ascat/master/man/ASCAT-manual.pdf) for details.\nRequires that `--ascat_ploidy` is set.", - "hidden": true + "help_text": "Overwrites ASCAT's `rho_manual` parameter. Expert use only, see [here](https://raw.githubusercontent.com/VanLoo-lab/ascat/master/man/ASCAT-manual.pdf) for details.\nRequires that `--ascat_ploidy` is set." }, "cf_chrom_len": { "type": "string", "fa_icon": "fas fa-ruler-horizontal", "description": "Specify a custom chromosome length file.", - "help_text": "Control-FREEC requires a file containing all chromosome lenghts. By default the fasta.fai is used. If the fasta.fai file contains chromosomes not present in the intervals, it fails (see: https://github.com/BoevaLab/FREEC/issues/106).\n\nIn this case, a custom chromosome length can be specified. It must be of the same format as the fai, but only contain the relevant chromosomes.\n\n\n\n", - "hidden": true + "help_text": "Control-FREEC requires a file containing all chromosome lenghts. By default the fasta.fai is used. If the fasta.fai file contains chromosomes not present in the intervals, it fails (see: https://github.com/BoevaLab/FREEC/issues/106).\n\nIn this case, a custom chromosome length can be specified. It must be of the same format as the fai, but only contain the relevant chromosomes.\n\n\n\n" }, "cf_coeff": { "type": "number", "default": 0.05, "fa_icon": "fas fa-wrench", "description": "Overwrite Control-FREEC coefficientOfVariation", - "hidden": true, "help_text": "Details, see [ControlFREEC manual](http://boevalab.inf.ethz.ch/FREEC/tutorial.html)." }, "cf_contamination_adjustment": { "type": "boolean", "fa_icon": "fas fa-broom", "description": "Overwrite Control-FREEC contaminationAdjustement", - "hidden": true, "help_text": "Details, see [ControlFREEC manual](http://boevalab.inf.ethz.ch/FREEC/tutorial.html)." }, "cf_contamination": { @@ -343,14 +309,12 @@ "default": 0, "fa_icon": "fas fa-broom", "description": "Design known contamination value for Control-FREEC", - "hidden": true, "help_text": "Details, see [ControlFREEC manual](http://boevalab.inf.ethz.ch/FREEC/tutorial.html)." }, "cf_minqual": { "type": "integer", "default": 0, "fa_icon": "fas fa-greater-than", - "hidden": true, "description": "Minimal sequencing quality for a position to be considered in BAF analysis.", "help_text": "Details, see [ControlFREEC manual](http://boevalab.inf.ethz.ch/FREEC/tutorial.html)." }, @@ -358,7 +322,6 @@ "type": "integer", "default": 0, "fa_icon": "fas fa-align-center", - "hidden": true, "description": "Minimal read coverage for a position to be considered in BAF analysis.", "help_text": "Details, see [ControlFREEC manual](http://boevalab.inf.ethz.ch/FREEC/tutorial.html)." }, @@ -367,29 +330,36 @@ "default": 2, "fa_icon": "fas fa-bacon", "help_text": "In case of doubt, you can set different values and Control-FREEC will select the one that explains most observed CNAs Example: ploidy=2 , ploidy=2,3,4. For more details, see the [manual](http://boevalab.inf.ethz.ch/FREEC/tutorial.html).", - "description": "Genome ploidy used by ControlFREEC", - "hidden": true + "description": "Genome ploidy used by ControlFREEC" }, "cf_window": { "type": "number", "fa_icon": "fas fa-window-maximize", "description": "Overwrite Control-FREEC window size.", - "help_text": "Details, see [ControlFREEC manual](http://boevalab.inf.ethz.ch/FREEC/tutorial.html).", - "hidden": true + "help_text": "Details, see [ControlFREEC manual](http://boevalab.inf.ethz.ch/FREEC/tutorial.html)." }, "cnvkit_reference": { "type": "string", "fa_icon": "fas fa-file", "help_text": "https://cnvkit.readthedocs.io/en/stable/pipeline.html?highlight=reference.cnn#batch", - "description": "Copy-number reference for CNVkit", - "hidden": true + "description": "Copy-number reference for CNVkit" + }, + "joint_germline": { + "type": "boolean", + "fa_icon": "fas fa-toolbox", + "description": "Turn on the joint germline variant calling for GATK haplotypecaller", + "help_text": "Uses all normal germline samples (as designated by `status` in the input csv) in the joint germline variant calling process." + }, + "joint_mutect2": { + "type": "boolean", + "fa_icon": "fas fa-angle-double-right", + "description": "Runs Mutect2 in joint (multi-sample) mode for better concordance among variant calls of tumor samples from the same patient. Mutect2 outputs will be stored in a subfolder named with patient ID under `variant_calling/mutect2/` folder. Only a single normal sample per patient is allowed. Tumor-only mode is also supported." }, "ignore_soft_clipped_bases": { "type": "boolean", "fa_icon": "fas fa-ban", "description": "Do not analyze soft clipped bases in the reads for GATK Mutect2.", - "help_text": "use the `--dont-use-soft-clipped-bases` params with GATK Mutect2.", - "hidden": true + "help_text": "use the `--dont-use-soft-clipped-bases` params with GATK Mutect2." }, "sentieon_haplotyper_emit_mode": { "type": "string", @@ -397,14 +367,12 @@ "description": "Option for selecting output and emit-mode of Sentieon's Haplotyper.", "fa_icon": "fas fa-toolbox", "help_text": "The option `--sentieon_haplotyper_emit_mode` can be set to the same string values as the Haplotyper's `--emit_mode`. To output both a vcf and a gvcf, specify both a vcf-option (currently, `all`, `confident` and `variant`) and `gvcf`. For example, to obtain a vcf and gvcf one could set `--sentieon_haplotyper_emit_mode` to `variant, gvcf`.", - "hidden": true, "pattern": "^(all|confident|gvcf|variant|gvcf,all|gvcf,confident|gvcf,variant|all,gvcf|confident,gvcf|variant,gvcf)(? **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.", - "hidden": true + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--bwa false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs." }, "bwamem2": { "type": "string", "fa_icon": "fas fa-copy", "description": "Path to bwa-mem2 mem indices.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--bwamem2 false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner bwa-mem2` is specified. Combine with `--save_reference` to save for future runs.", - "hidden": true + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--bwamem2 false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner bwa-mem2` is specified. Combine with `--save_reference` to save for future runs." }, "chr_dir": { "type": "string", "fa_icon": "fas fa-folder-open", "description": "Path to chromosomes folder used with ControLFREEC.", - "hidden": true, "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, "dbsnp": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to dbsnp file.", - "hidden": true, "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, "dbsnp_tbi": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to dbsnp index.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the dbsnp file. Combine with `--save_reference` to save for future runs.", - "hidden": true + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the dbsnp file. Combine with `--save_reference` to save for future runs." }, "dbsnp_vqsr": { "type": "string", @@ -659,15 +646,13 @@ "type": "string", "fa_icon": "fas fa-file", "description": "Path to FASTA dictionary file.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs.", - "hidden": true + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference. Combine with `--save_reference` to save for future runs." }, "dragmap": { "type": "string", "fa_icon": "fas fa-copy", "description": "Path to dragmap indices.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--dragmap false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner dragmap` is specified. Combine with `--save_reference` to save for future runs.", - "hidden": true + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nIf you wish to recompute indices available on igenomes, set `--dragmap false`.\n\n> **NB** If none provided, will be generated automatically from the FASTA reference, if `--aligner dragmap` is specified. Combine with `--save_reference` to save for future runs." }, "fasta": { "type": "string", @@ -677,7 +662,7 @@ "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nThis parameter is *mandatory* if `--genome` is not specified.", - "fa_icon": "far fa-file-code" + "fa_icon": "fas fa-file" }, "fasta_fai": { "type": "string", @@ -689,33 +674,29 @@ "type": "string", "fa_icon": "fas fa-file", "description": "Path to GATK Mutect2 Germline Resource File.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nThe germline resource VCF file (bgzipped and tabixed) needed by GATK4 Mutect2 is a collection of calls that are likely present in the sample, with allele frequencies.\nThe AF info field must be present.\nYou can find a smaller, stripped gnomAD VCF file (most of the annotation is removed and only calls signed by PASS are stored) in the AWS iGenomes Annotation/GermlineResource folder.", - "hidden": true + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nThe germline resource VCF file (bgzipped and tabixed) needed by GATK4 Mutect2 is a collection of calls that are likely present in the sample, with allele frequencies.\nThe AF info field must be present.\nYou can find a smaller, stripped gnomAD VCF file (most of the annotation is removed and only calls signed by PASS are stored) in the AWS iGenomes Annotation/GermlineResource folder." }, "germline_resource_tbi": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to GATK Mutect2 Germline Resource Index.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the Germline Resource file, if provided. Combine with `--save_reference` to save for future runs.", - "hidden": true + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the Germline Resource file, if provided. Combine with `--save_reference` to save for future runs." }, "known_indels": { "type": "string", "fa_icon": "fas fa-copy", "description": "Path to known indels file.", - "hidden": true, "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, "known_indels_tbi": { "type": "string", "fa_icon": "fas fa-copy", "description": "Path to known indels file index.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs.", - "hidden": true + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n> **NB** If none provided, will be generated automatically from the known index file, if provided. Combine with `--save_reference` to save for future runs." }, "known_indels_vqsr": { "type": "string", - "fa_icon": "fas fa-copy", + "fa_icon": "fas fa-book", "description": "If you use AWS iGenomes, this has already been set for you appropriately.\n\n1st label string for VariantRecalibration (haplotypecaller joint variant calling)" }, "known_snps": { @@ -731,14 +712,13 @@ }, "known_snps_vqsr": { "type": "string", - "fa_icon": "fas fa-copy", + "fa_icon": "fas fa-book", "description": "If you use AWS iGenomes, this has already been set for you appropriately.\n\nlabel string for VariantRecalibration (haplotypecaller joint variant calling)" }, "mappability": { "type": "string", "fa_icon": "fas fa-file", "description": "Path to Control-FREEC mappability file.", - "hidden": true, "help_text": "If you use AWS iGenomes, this has already been set for you appropriately." }, "ngscheckmate_bed": { @@ -751,40 +731,41 @@ "type": "string", "fa_icon": "fas fa-file", "description": "Panel-of-normals VCF (bgzipped) for GATK Mutect2", - "help_text": "Without PON, there will be no calls with PASS in the INFO field, only an unfiltered VCF is written.\nIt is highly recommended to make your own PON, as it depends on sequencer and library preparation.\n\nThe pipeline is shipped with a panel-of-normals for `--genome GATK.GRCh38` provided by [GATK](https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-). \n\nSee [PON documentation](https://gatk.broadinstitute.org/hc/en-us/articles/360042479112-CreateSomaticPanelOfNormals-BETA)\n> **NB** PON file should be bgzipped.", - "hidden": true + "help_text": "Without PON, there will be no calls with PASS in the INFO field, only an unfiltered VCF is written.\nIt is highly recommended to make your own PON, as it depends on sequencer and library preparation.\n\nThe pipeline is shipped with a panel-of-normals for `--genome GATK.GRCh38` provided by [GATK](https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-). \n\nSee [PON documentation](https://gatk.broadinstitute.org/hc/en-us/articles/360042479112-CreateSomaticPanelOfNormals-BETA)\n> **NB** PON file should be bgzipped." }, "pon_tbi": { "type": "string", "fa_icon": "fas fa-file", "description": "Index of PON panel-of-normals VCF.", - "help_text": "If none provided, will be generated automatically from the PON bgzipped VCF file.", - "hidden": true + "help_text": "If none provided, will be generated automatically from the PON bgzipped VCF file." }, "sentieon_dnascope_model": { "type": "string", - "fa_icon": "fas fa-database", - "hidden": true, + "fa_icon": "fas fa-file", "description": "Machine learning model for Sentieon Dnascope.", "help_text": " It is recommended to use DNAscope with a machine learning model to perform variant calling with higher accuracy by improving the candidate detection and filtering. Sentieon can provide you with a model trained using a subset of the data from the GiAB truth-set found in https://github.com/genome-in-a-bottle. In addition, Sentieon can assist you in the creation of models using your own data, which will calibrate the specifics of your sequencing and bio-informatics processing." }, + "snpeff_cache": { + "type": "string", + "format": "directory-path", + "fa_icon": "fas fa-cloud-download-alt", + "default": "s3://annotation-cache/snpeff_cache/", + "description": "Path to snpEff cache.", + "help_text": "Path to snpEff cache which should contain the relevant genome and build directory in the path ${snpeff_species}.${snpeff_version}" + }, "snpeff_db": { "type": "string", "fa_icon": "fas fa-database", "description": "snpEff DB version.", "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the database to be use to annotate with.\nAlternatively databases' names can be listed with the `snpEff databases`." }, - "vep_genome": { - "type": "string", - "fa_icon": "fas fa-microscope", - "description": "VEP genome.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the genome when looking for local cache, or cloud based cache." - }, - "vep_species": { + "vep_cache": { "type": "string", - "fa_icon": "fas fa-microscope", - "description": "VEP species.", - "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nAlternatively species listed in Ensembl Genomes caches can be used." + "format": "directory-path", + "fa_icon": "fas fa-cloud-download-alt", + "default": "s3://annotation-cache/vep_cache/", + "description": "Path to VEP cache.", + "help_text": "Path to VEP cache which should contain the relevant species, genome and build directories at the path ${vep_species}/${vep_genome}_${vep_cache_version}" }, "vep_cache_version": { "type": "string", @@ -792,52 +773,17 @@ "description": "VEP cache version.", "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nAlternatively cache version can be use to specify the correct Ensembl Genomes version number as these differ from the concurrent Ensembl/VEP version numbers" }, - "save_reference": { - "type": "boolean", - "fa_icon": "fas fa-download", - "description": "Save built references.", - "help_text": "Set this parameter, if you wish to save all computed reference files. This is useful to avoid re-computation on future runs." - }, - "build_only_index": { - "type": "boolean", - "fa_icon": "fas fa-download", - "description": "Only built references.", - "help_text": "Set this parameter, if you wish to compute and save all computed reference files. No alignment or any other downstream steps will be performed." - }, - "download_cache": { - "type": "boolean", - "fa_icon": "fas fa-download", - "description": "Download annotation cache.", - "help_text": "Set this parameter, if you wish to download annotation cache.\nUsing this parameter will download cache even if --snpeff_cache and --vep_cache are provided." - }, - "igenomes_base": { - "type": "string", - "format": "directory-path", - "description": "The base path to the igenomes reference files", - "fa_icon": "fas fa-ban", - "default": "s3://ngi-igenomes/igenomes/" - }, - "igenomes_ignore": { - "type": "boolean", - "description": "Do not load the iGenomes reference config.", - "fa_icon": "fas fa-ban", - "help_text": "Do not load `igenomes.config` when running the pipeline. You may choose this option if you observe clashes between custom parameters and those supplied in `igenomes.config`. **NB** You can then run `Sarek` by specifying at least a FASTA genome file" - }, - "vep_cache": { + "vep_genome": { "type": "string", - "format": "directory-path", - "fa_icon": "fas fa-cloud-download-alt", - "default": "s3://annotation-cache/vep_cache/", - "description": "Path to VEP cache.", - "help_text": "Path to VEP cache which should contain the relevant species, genome and build directories at the path ${vep_species}/${vep_genome}_${vep_cache_version}" + "fa_icon": "fas fa-microscope", + "description": "VEP genome.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nThis is used to specify the genome when looking for local cache, or cloud based cache." }, - "snpeff_cache": { + "vep_species": { "type": "string", - "format": "directory-path", - "fa_icon": "fas fa-cloud-download-alt", - "default": "s3://annotation-cache/snpeff_cache/", - "description": "Path to snpEff cache.", - "help_text": "Path to snpEff cache which should contain the relevant genome and build directory in the path ${snpeff_species}.${snpeff_version}" + "fa_icon": "fas fa-microscope", + "description": "VEP species.", + "help_text": "If you use AWS iGenomes, this has already been set for you appropriately.\nAlternatively species listed in Ensembl Genomes caches can be used." } }, "help_text": "The pipeline config files come bundled with paths to the Illumina iGenomes reference index files.\nThe configuration is set up to use the AWS-iGenomes resource\ncf https://ewels.github.io/AWS-iGenomes/." @@ -1039,6 +985,9 @@ { "$ref": "#/$defs/annotation" }, + { + "$ref": "#/$defs/general_reference_genome_options" + }, { "$ref": "#/$defs/reference_genome_options" },