Skip to content

Commit

Permalink
Split vep plugins into vep cache and vep plugins
Browse files Browse the repository at this point in the history
  • Loading branch information
fellen31 committed Aug 14, 2024
1 parent ced1328 commit d87c379
Show file tree
Hide file tree
Showing 11 changed files with 83 additions and 8 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#266](https://github.com/genomic-medicine-sweden/nallo/pull/266) - Added CADD to dynamically calculate indel CADD-scores
- [#270](https://github.com/genomic-medicine-sweden/nallo/pull/270) - Added SNV phasing stats to MultiQC
- [#271](https://github.com/genomic-medicine-sweden/nallo/pull/271) - Added a `--skip_aligned_read_qc` parameter to skip the qc aligned reads subworkflow
- [#314](https://github.com/genomic-medicine-sweden/nallo/pull/314) - Added a `--vep_plugin_files` parameter to separate VEP plugins from cache

### `Changed`

Expand Down Expand Up @@ -93,6 +94,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
| `--extra_gvcfs` | |
| `--extra_snfs` | |
| `--dipcall_par` | `--par_regions` |
| | `--vep_plugin_files` |

> [!NOTE]
> Parameter has been updated if both old and new parameter information is present.
Expand Down
26 changes: 26 additions & 0 deletions assets/vep_plugin_files_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"$schema": "http://json-schema.org/draft-07/schema",
"$id": "https://raw.githubusercontent.com/genomic-medicine-sweden/nallo/master/assets/vep_plugin_files_schema.json",
"title": "Schema for VEP plugin files and their indices",
"description": "Schema for VEP plugin files and their indices",
"type": "array",
"items": {
"type": "object",
"properties": {
"vep_files": {
"type": "string",
"anyOf": [
{
"format": "file-path"
},
{
"format": "directory-path"
}
],
"exists": true,
"description": "Path to vep plugin files and their indices"
}
},
"required": ["vep_files"]
}
}
8 changes: 4 additions & 4 deletions conf/modules/snv_annotation.config
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ process {
withName: '.*:SNV_ANNOTATION:ENSEMBLVEP_VEP' {
ext.prefix = { "${meta.id}_vep" }
ext.args = { [
"--dir_plugins ${cache}/Plugins",
"--plugin LoFtool,${cache}/LoFtool_scores.txt",
"--plugin pLI,${cache}/pLI_values.txt",
"--plugin SpliceAI,snv=${cache}/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=${cache}/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz",
"--dir_plugins .",
"--plugin LoFtool,LoFtool_scores.txt",
"--plugin pLI,pLI_values.txt",
"--plugin SpliceAI,snv=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz,indel=spliceai_21_scores_raw_snv_-v1.3-.vcf.gz",
'--distance 5000',
'--buffer_size 20000',
'--format vcf --max_sv_size 248387328',
Expand Down
5 changes: 3 additions & 2 deletions conf/test.config
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ params {
variant_catalog = params.pipelines_testdata_base_path + 'nallo/reference/variant_catalog_grch38.json'

// SNV Annotation
vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz'
snp_db = params.pipelines_testdata_base_path + 'nallo/testdata/snp_dbs.csv'
vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz'
vep_plugin_files = params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv'
snp_db = params.pipelines_testdata_base_path + 'nallo/testdata/snp_dbs.csv'

// Rank variants
reduced_penetrance = params.pipelines_testdata_base_path + 'nallo/reference/reduced_penetrance.tsv'
Expand Down
18 changes: 18 additions & 0 deletions docs/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,23 @@ Some workflows require additional files:
- If running without `--skip_repeat_annotation`, download a json variant catalog, (e.g. [variant_catalog_grch38.json](https://github.com/Clinical-Genomics/stranger/raw/main/stranger/resources/variant_catalog_grch38.json)) matching your reference genome to supply with `--variant_catalog`.

- If running without `--skip_snv_annotation`, download [VEP cache](https://ftp.ensembl.org/pub/release-110/variation/vep/homo_sapiens_vep_110_GRCh38.tar.gz) to supply with `--vep_cache` and prepare a samplesheet with annotation databases ([`echtvar encode`](https://github.com/brentp/echtvar)) to supply with `--snp_db`:
- If running without `--skip_snv_annotation`, you will also need to download VEP plugin files to supply with `--vep_plugin_files` see [example](https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugin_files.csv). PLI, LoFtool and SpliceAI are required.

```
vep_files
https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/SpliceAI.pm
https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/LoFtool.pm
https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz
https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/dbNSFP.pm
https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/plugin_config.txt
https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_indel_-v1.3-.vcf.gz.tbi
https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_indel_-v1.3-.vcf.gz
https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/spliceai_21_scores_raw_snv_-v1.3-.vcf.gz.tbi
https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/pLI_values.txt
https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/pLI.pm
https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/MaxEntScan.pm
https://raw.githubusercontent.com/genomic-medicine-sweden/test-datasets/nallo/reference/vep_plugins/LoFtool_scores.txt
```

```
sample,file
Expand Down Expand Up @@ -247,6 +264,7 @@ Different processes may need extra input files
| `trgt_repeats` | BED-file for repeats to be genotyped | `string` | | | |
| `snp_db` | Extra echtvar-databases to annotate SNVs with | `string` | | | |
| `vep_cache` | Path to directory of vep_cache | `string` | | | |
| `vep_plugin_files` | A csv file with paths to vep plugin files, pLI, LoFtool and SpliceAI is required. | `string` | | | |
| `bed` | BED file with regions of interest | `string` | | | |
| `hificnv_xy` | | `string` | | | |
| `hificnv_xx` | | `string` | | | |
Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ params {
snp_db = null
variant_consequences_snv = null
vep_cache = null
vep_plugin_files = null
hificnv_xy = null
hificnv_xx = null
hificnv_exclude = null
Expand Down
6 changes: 6 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,12 @@
"default": 110,
"description": "VEP cache version"
},
"vep_plugin_files": {
"type": "string",
"mimetype": "text/csv",
"description": "A csv file with paths to vep plugin files, pLI, LoFtool and SpliceAI is required.",
"schema": "assets/vep_plugin_files_schema.json"
},
"deepvariant_model_type": {
"type": "string",
"default": "PACBIO",
Expand Down
3 changes: 2 additions & 1 deletion subworkflows/local/snv_annotation/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ workflow SNV_ANNOTATION {
ch_fai // channel: [mandatory] [ val(meta), path(fai) ]
ch_vep_cache // channel: [mandatory] [ path(cache) ]
val_vep_cache_version // string: [mandatory] default: 110
ch_vep_extra_files // channel: [mandatory] [ path(files) ]
val_annotate_cadd // bool: [mandatory]
ch_cadd_header // channel: [mandatory] [ path(txt) ]
ch_cadd_resources // channel: [mandatory] [ path(annotation) ]
Expand Down Expand Up @@ -59,7 +60,7 @@ workflow SNV_ANNOTATION {
val_vep_cache_version,
ch_vep_cache,
ch_fasta,
[]
ch_vep_extra_files
)
ch_versions = ch_versions.mix(ENSEMBLVEP_VEP.out.versions)

Expand Down
2 changes: 1 addition & 1 deletion subworkflows/local/utils_nfcore_nallo_pipeline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def fileDependencies = [
mapping : ["fasta", "somalier_sites"],
assembly : ["fasta", "par_regions"], // The assembly workflow should be split into two - assembly and variant calling (requires ref)
snv_calling : ["fasta", "par_regions"],
snv_annotation : ["snp_db", "vep_cache", "reduced_penetrance", "score_config_snv", "variant_consequences_snv"],
snv_annotation : ["snp_db", "vep_cache", "vep_plugin_files", "reduced_penetrance", "score_config_snv", "variant_consequences_snv"],
cnv_calling : ["hificnv_xy", "hificnv_xx", "hificnv_exclude"],
repeat_calling : ["trgt_repeats"],
repeat_annotation: ["variant_catalog"],
Expand Down
2 changes: 2 additions & 0 deletions tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ nextflow_pipeline {
trgt_repeats = params.pipelines_testdata_base_path + 'nallo/reference/pathogenic_repeats.hg38.bed'
variant_catalog = params.pipelines_testdata_base_path + 'nallo/reference/variant_catalog_grch38.json'
vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz'
vep_plugin_files = params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv'
snp_db = params.pipelines_testdata_base_path + 'nallo/testdata/snp_dbs.csv'
somalier_sites = params.pipelines_testdata_base_path + 'nallo/reference/somalier_sites.vcf.gz'
reduced_penetrance = params.pipelines_testdata_base_path + 'nallo/reference/reduced_penetrance.tsv'
Expand Down Expand Up @@ -155,6 +156,7 @@ nextflow_pipeline {
trgt_repeats = params.pipelines_testdata_base_path + 'nallo/reference/pathogenic_repeats.hg38.bed'
variant_catalog = params.pipelines_testdata_base_path + 'nallo/reference/variant_catalog_grch38.json'
vep_cache = params.pipelines_testdata_base_path + 'nallo/reference/vep_cache_test_data.tar.gz'
vep_plugin_files = params.pipelines_testdata_base_path + 'nallo/reference/vep_plugin_files.csv'
snp_db = params.pipelines_testdata_base_path + 'nallo/testdata/snp_dbs.csv'
somalier_sites = params.pipelines_testdata_base_path + 'nallo/reference/somalier_sites.vcf.gz'
reduced_penetrance = params.pipelines_testdata_base_path + 'nallo/reference/reduced_penetrance.tsv'
Expand Down
18 changes: 18 additions & 0 deletions workflows/nallo.nf
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ workflow NALLO {
: Channel.value([])
ch_vep_cache_unprocessed = params.vep_cache ? Channel.fromPath(params.vep_cache).map { it -> [ [ id:'vep_cache' ], it ] }.collect()
: Channel.value([[],[]])
ch_vep_extra_files_unsplit = params.vep_plugin_files ? Channel.fromPath(params.vep_plugin_files).collect()
: ''
ch_expected_xy_bed = params.hificnv_xy ? Channel.fromPath(params.hificnv_xy).collect()
: ''
ch_expected_xx_bed = params.hificnv_xx ? Channel.fromPath(params.hificnv_xx).collect()
Expand Down Expand Up @@ -121,6 +123,21 @@ workflow NALLO {
.collect()
.set { ch_pedfile }

// Read and store paths in the vep_plugin_files file
if (params.vep_plugin_files) {
ch_vep_extra_files_unsplit.splitCsv ( header:true )
.map { row ->
f = file(row.vep_files[0])
if(f.isFile() || f.isDirectory()){
return [f]
} else {
error("\nVep database file ${f} does not exist.")
}
}
.collect()
.set {ch_vep_extra_files}
}

//
// Convert BAM files to FASTQ and vice versa
//
Expand Down Expand Up @@ -341,6 +358,7 @@ workflow NALLO {
fai.map { name, fai -> [ [ id: name ], fai ] },
ch_vep_cache,
params.vep_cache_version,
ch_vep_extra_files,
(params.cadd_resources && params.cadd_prescored),
ch_cadd_header,
ch_cadd_resources,
Expand Down

0 comments on commit d87c379

Please sign in to comment.