Skip to content

Commit

Permalink
Adding Snippy to count SNPs in the finished assembly
Browse files Browse the repository at this point in the history
  • Loading branch information
marchoeppner committed Oct 2, 2024
1 parent 60ad4c9 commit 19c89f8
Show file tree
Hide file tree
Showing 46 changed files with 2,337 additions and 6 deletions.
16 changes: 16 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,14 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: SAMTOOLS_MERGE {
publishDir = [
path: { "${params.outdir}/samples/${meta.sample_id}/bam" },
mode: params.publish_dir_mode,
enabled: false,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: 'BWAMEM2_MEM|BWAMEM2_INDEX' {
publishDir = [
path: { "${params.outdir}/samples/${meta.sample_id}/bwamem" },
Expand Down Expand Up @@ -175,6 +183,14 @@ process {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: 'SNIPPY_RUN' {
publishDir = [
path: { "${params.outdir}/samples/${meta.sample_id}/variants" },
mode: params.publish_dir_mode,
enabled: true,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: FASTQC {
publishDir = [
path: { "${params.outdir}/samples/${meta.sample_id}/qc/fastqc" },
Expand Down
8 changes: 8 additions & 0 deletions modules/bcftools/stats/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
name: bcftools_stats
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- bioconda::bcftools=1.20
- bioconda::htslib=1.20
46 changes: 46 additions & 0 deletions modules/bcftools/stats/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
process BCFTOOLS_STATS {
tag "$meta.sample_id"
label 'short_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bcftools:1.20--h8b25389_0':
'quay.io/biocontainers/bcftools:1.20--h8b25389_0' }"

input:
tuple val(meta), path(vcf), path(tbi)

output:
tuple val(meta), path("*stats.txt"), emit: stats
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.sample_id}"

"""
bcftools stats \\
$args \\
$vcf > ${prefix}.bcftools_stats.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"

"""
touch ${prefix}.bcftools_stats.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
END_VERSIONS
"""
}
77 changes: 77 additions & 0 deletions modules/bcftools/stats/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
name: bcftools_stats
description: Generates stats from VCF files
keywords:
- variant calling
- stats
- VCF
tools:
- stats:
description: |
Parses VCF or BCF and produces text file stats which is suitable for
machine processing and can be plotted using plot-vcfstats.
homepage: http://samtools.github.io/bcftools/bcftools.html
documentation: http://www.htslib.org/doc/bcftools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
description: VCF input file
pattern: "*.{vcf}"
- tbi:
type: file
description: |
The tab index for the VCF file to be inspected. Optional: only required when parameter regions is chosen.
pattern: "*.tbi"
- regions:
type: file
description: |
Optionally, restrict the operation to regions listed in this file. (VCF, BED or tab-delimited)
- targets:
type: file
description: |
Optionally, restrict the operation to regions listed in this file (doesn't rely upon tbi index files)
- samples:
type: file
description: |
Optional, file of sample names to be included or excluded.
e.g. 'file.tsv'
- exons:
type: file
description: |
Tab-delimited file with exons for indel frameshifts (chr,beg,end; 1-based, inclusive, optionally bgzip compressed).
e.g. 'exons.tsv.gz'
- fasta:
type: file
description: |
Faidx indexed reference sequence file to determine INDEL context.
e.g. 'reference.fa'
output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- stats:
type: file
description: Text output file containing stats
pattern: "*_{stats.txt}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@joseespinosa"
- "@drpatelh"
- "@SusiJo"
- "@TCLamnidis"
maintainers:
- "@joseespinosa"
- "@drpatelh"
- "@SusiJo"
- "@TCLamnidis"
182 changes: 182 additions & 0 deletions modules/bcftools/stats/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
nextflow_process {

name "Test Process BCFTOOLS_STATS"
script "../main.nf"
process "BCFTOOLS_STATS"

tag "modules"
tag "modules_nfcore"
tag "bcftools"
tag "bcftools/stats"

test("sarscov2 - vcf_gz") {

when {
process {
"""
input[0] = [ [ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
[]]
input[1] = [ [], [] ]
input[2] = [ [], [] ]
input[3] = [ [], [] ]
input[4] = [ [], [] ]
input[5] = [ [], [] ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match("versions") },
{ assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() },
)
}

}

test("sarscov2 - vcf_gz - regions") {

when {
process {
"""
input[0] = [ [ id:'regions_test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true)]
input[1] = [ [id:'regions_test'],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) ]
input[2] = [ [], [] ]
input[3] = [ [], [] ]
input[4] = [ [], [] ]
input[5] = [ [], [] ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match("regions_versions") },
{ assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() },
)
}

}

test("sarscov2 - vcf_gz - targets") {

when {
process {
"""
input[0] = [ [ id:'targets_test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
[] ]
input[1] = [ [], [] ]
input[2] = [ [id:'targets_test'],
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true)
]
input[3] = [ [], [] ]
input[4] = [ [], [] ]
input[5] = [ [], [] ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match("targets_versions") },
{ assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() },
)
}

}

test("sarscov2 - vcf_gz - exons") {

when {
process {
"""
input[0] = [ [ id:'exon_test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
[] ]
input[1] = [ [], [] ]
input[2] = [ [], [] ]
input[3] = [ [], [] ]
input[4] = [ [id: "exon_test"],
file(params.modules_testdata_base_path + 'delete_me/bcftools/stats/exons.tsv.gz', checkIfExists: true) ]
input[5] = [ [], [] ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match("exon_versions") },
{ assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() },
)
}

}

test("sarscov2 - vcf_gz - reference") {

when {
process {
"""
input[0] = [ [ id:'ref_test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
[] ]
input[1] = [ [], [] ]
input[2] = [ [], [] ]
input[3] = [ [], [] ]
input[4] = [ [], [] ]
input[5] = [ [id: 'ref_test'],
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.versions).match("ref_versions") },
{ assert snapshot(file(process.out.stats.get(0).get(1)).readLines()[0..5]).match() },
)
}

}


test("sarscov2 - vcf_gz - stub") {

options "-stub"

when {
process {
"""
input[0] = [ [ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true),
[]]
input[1] = [ [], [] ]
input[2] = [ [], [] ]
input[3] = [ [], [] ]
input[4] = [ [], [] ]
input[5] = [ [], [] ]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
Loading

0 comments on commit 19c89f8

Please sign in to comment.