Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sourmash search #135

Open
wants to merge 2 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,42 @@ else {
barcode_metadata_folder = "barcode_metadata"
}


////////////////////////////////////////////////////
/* -- Parse Sourmash Seach Parameters -- */
////////////////////////////////////////////////////

if (params.celltype_sbt_db_dna) {
Channel.fromPath(params.celltype_sbt_db_dna, checkIfExists: true)
.ifEmpty { exit 1, "Reference cell label DNA k-mer signatures file not found:"
"${params.reference_proteome_fasta}" }
.map { tuple("dna", params.celltype_sbt_db_dna_ksize, it) }
.set{ ch_celltype_db_sbt_dna }
} else {
ch_celltype_db_sbt_dna = Channel.empty()
}

if (params.celltype_sbt_db_protein) {
Channel.fromPath(params.celltype_sbt_db_protein, checkIfExists: true)
.ifEmpty { exit 1, "Reference cell label Protein k-mer signatures file not found:"
"${params.reference_proteome_fasta}" }
.map { tuple("protein", params.celltype_sbt_db_protein_ksize, it) }
.set{ ch_celltype_db_sbt_protein }
} else {
ch_celltype_db_sbt_protein = Channel.empty()
}

if (params.celltype_sbt_db_dayhoff) {
Channel.fromPath(params.celltype_sbt_db_dayhoff, checkIfExists: true)
.ifEmpty { exit 1, "Reference cell label Dayhoff k-mer signatures file not found:"
"${params.reference_proteome_fasta}" }
.map { tuple("dayhoff", params.celltype_sbt_db_dayhoff_ksize, it) }
.set{ ch_celltype_db_sbt_dayhoff }
} else {
ch_celltype_db_sbt_dayhoff = Channel.empty()
}


// Has the run name been specified by the user?
// this has the bonus effect of catching both -name and --name
custom_runName = params.name
Expand Down Expand Up @@ -1648,6 +1684,41 @@ if (!params.split_kmer && !params.skip_compare && !params.skip_compute) {
}
}

// If a cell type databse is present
if (params.celltype_sbt_db) {
process sourmash_search {
// Combine peptide and nucleotide sketches
tag "${sketch_id}"
publishDir "${params.outdir}/index", mode: 'copy'

input:
file(celltype_db_sbt_zip) from ch_celltype_db_sbt_zip.collect()
set val(molecule), val(ksize), file(sigs) from ch_sourmash_sketches_to_search

output:
file(csv)

script:
csv = "${sbt_zip.simpleName}.csv"
// Parse sourmash search parameters
containment_flag = params.containment ? "--containment" : ""
threshold_flag = "--threshold ${params.search_threshold}"
"""
sourmash search \\
${threshold_flag} \\
${containment_flag} \\
--ksize ${ksize} \\
--${molecule} \\
--output ${csv} \\
${celltype_db_sbt_zip} \\
.
"""
}
}


}


/*
* STEP 16 - MultiQC
Expand Down
13 changes: 13 additions & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,19 @@ params {
// Comparing sketches
skip_compare = false

// Query per-cell k-mer signatures for labels in a cell type database
containment = false
search_threshold = 1e-10
// DNA, k_nuc size = 21
celltype_sbt_db_dna = false
celltype_sbt_db_dna_ksize = 21
// Protein, k_nuc size = 30, k_aa size = 10
celltype_sbt_db_protein = false
celltype_sbt_db_protein_ksize = 30
// Dayhoff, k_nuc size = 51, k_aa size = 17
celltype_sbt_db_dayhoff = false
celltype_sbt_db_dayhoff_ksize = 51

// Computing sketches
skip_compute = false

Expand Down