-
Notifications
You must be signed in to change notification settings - Fork 13
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
olgabot/sourmash sig merge #117
Merged
Merged
Changes from 3 commits
Commits
Show all changes
52 commits
Select commit
Hold shift + click to select a range
df31610
Update extract_per_cell_fastqs to not say __aligned__aligned and retr…
olgabot 700272d
Initial commit for adding sourmash sig merge on aligned/unaligned fro…
olgabot c4f9521
Update changelog
olgabot 7d3e215
Try to get grouptuple to work
olgabot 533dc94
Set minimum UMI per cell to be a default of 1000
olgabot da84ba4
Set test min UMI per cell as 5
olgabot 6d30732
Remove unused --shard_size option
olgabot 8fc0c33
Add option for skipping sig merge
olgabot 63273e5
Update Dockerfile
olgabot 054d8b3
Add test for --skip_sig_merge
olgabot f1304ca
Update changelog
olgabot a579175
Use more realistic scales and ksizes
olgabot ef43907
regular test doesn't fail anymore
olgabot 08c32ec
Merge branch 'dev' into olgabot/sourmash-sig-merge
pranathivemuri d0bec5c
Update bam config
olgabot 000d6ca
Add dump ch_sourmash_sketches_mixed
olgabot 6bce013
Update schema
olgabot 04e62d4
Merge remote-tracking branch 'origin' into olgabot/sourmash-sig-merge
olgabot ba765ff
Add params.ksizes to sketch output
olgabot ed5e72b
Add peptide_molecules
olgabot 1718502
add check for skip_compute in sig merge logic
olgabot 0029b51
Add header
olgabot c04a5a1
Only mix sketches if not skip_compute
olgabot 5893884
param --> params
olgabot 4118c6c
Add some projectdir stuff
olgabot baa96f8
More projectDir fixes
olgabot 8ba5db1
Do per-ksize sourmash sig merge
olgabot c27b2b4
Add sourmash describe csvs to multiqc
olgabot 05f6702
Update ProjectDir
olgabot 2fe4523
Properly save translate output
olgabot 27d95f0
Add dump of sourmash sketches
olgabot e68db00
Fixing sourmash sig merge
olgabot 7195eae
Add ch_sourmash_sig_describe_nucleotides
olgabot 9d090b4
more if/else
olgabot 5bea4c9
Update changelog
olgabot 9682b03
Getting "sig merge" to finally run
olgabot 76b2ed7
Add option to skip sig merge
olgabot 3c95f82
Update validate_sketch_value to only allow a single value
olgabot 1321968
Change sketch values to single value
olgabot dc0ecdd
peptide_molecule --> translate_peptide_molecule
olgabot 1fe32b1
add "translate_" to peptide ksize and jaccard threshold
olgabot 1297886
Do sig merge on individual moltypes
olgabot d8e764b
Add test_sig_merge
olgabot 232ac0d
Add test_sig_merge to CI
olgabot 253fa83
Don't allow multiple sketch values
olgabot 67aec5e
Reduce bloom filter table size
olgabot 8c99f9f
Sig merge is working!
olgabot fccec83
Make test params more realistic
olgabot 8de30e3
Update default ksizes, add track abundance true
olgabot 0aade17
Update variables in merge_renamed_sigs.pyh
olgabot ad36259
Get sourmash compare to happen on correct ksizes and moltypes
olgabot 43bbafd
Merge branch 'dev' into olgabot/sourmash-sig-merge
olgabot File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -894,9 +894,9 @@ if (params.tenx_tgz || params.bam) { | |
.set{ tenx_reads_with_good_barcodes_ch } | ||
|
||
process extract_per_cell_fastqs { | ||
tag "${is_aligned_channel_id}__${cell_barcode}" | ||
tag "${fastq_id}" | ||
label "low_memory" | ||
errorStrategy 'ignore' | ||
errorStrategy { task.exitStatus in [143,137,104,134,139] ? 'retry' : 'ignore' } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is cool! |
||
publishDir "${params.outdir}/10x-fastqs/per-cell/${channel_id}/", mode: 'copy', pattern: '*.fastq.gz', saveAs: { filename -> "${filename.replace("|", "-")}"} | ||
|
||
input: | ||
|
@@ -909,10 +909,8 @@ if (params.tenx_tgz || params.bam) { | |
set val(fastq_id), val(cell_id), val(is_aligned) into ch_fastq_id_to_cell_id_is_aligned | ||
|
||
script: | ||
is_aligned_channel_id = "${channel_id}__${is_aligned}" | ||
processes = "--processes ${task.cpus}" | ||
this_cell_barcode = tenx_cell_barcode_pattern.replace('([ACGT]+)', cell_barcode) | ||
fastq_id = "${is_aligned_channel_id}__${is_aligned}__${cell_barcode}" | ||
fastq_id = "${channel_id}__${is_aligned}__${cell_barcode}" | ||
cell_id = "${channel_id}__${cell_barcode}" | ||
this_cell_fastq_gz = "${fastq_id}.fastq.gz" | ||
""" | ||
|
@@ -1276,7 +1274,7 @@ if (!params.remove_ribo_rna) { | |
|
||
output: | ||
file(csv) into ch_sourmash_sig_describe_nucleotides | ||
set val(sketch_id), val("dna"), val(ksize), val(sketch_value), file(sig) into sourmash_sketches_all_nucleotide | ||
set val(sample_id), val("dna"), val(ksize), file(sig) into sourmash_sketches_all_nucleotide | ||
|
||
script: | ||
// Don't calculate DNA signature if this is protein, to minimize disk, | ||
|
@@ -1301,7 +1299,7 @@ if (!params.remove_ribo_rna) { | |
sourmash sig describe --csv ${csv} ${sig} | ||
""" | ||
} | ||
sourmash_sketches_nucleotide = sourmash_sketches_all_nucleotide.filter{ it[4].size() > 0 } | ||
sourmash_sketches_nucleotide = sourmash_sketches_all_nucleotide.filter{ it[3].size() > 0 } | ||
} | ||
} else { | ||
sourmash_sketches_nucleotide = Channel.empty() | ||
|
@@ -1344,7 +1342,7 @@ if (!params.skip_compute && (protein_input || params.reference_proteome_fasta)){ | |
|
||
output: | ||
file(csv) into ch_sourmash_sig_describe_peptides | ||
set val(sketch_id), val(molecule), val(ksize), val(sketch_value), file(sig) into sourmash_sketches_all_peptide | ||
set val(sample_id), val(molecule), val(ksize), file(sig) into sourmash_sketches_all_peptide | ||
|
||
script: | ||
sketch_id = make_sketch_id(molecule, ksize, sketch_value, track_abundance, sketch_style) | ||
|
@@ -1369,11 +1367,68 @@ if (!params.skip_compute && (protein_input || params.reference_proteome_fasta)){ | |
sourmash sig describe --csv ${csv} ${sig} | ||
""" | ||
} | ||
sourmash_sketches_peptide = sourmash_sketches_all_peptide.filter{ it[4].size() > 0 } | ||
sourmash_sketches_peptide = sourmash_sketches_all_peptide.filter{ it[3].size() > 0 } | ||
} else { | ||
sourmash_sketches_peptide = Channel.empty() | ||
} | ||
|
||
if (params.bam || params.tenx_tgz) { | ||
// Merge signatures from same sample id and sketch id | ||
|
||
sourmash_sketches_nucleotide | ||
.mix ( sourmash_sketches_peptide ) | ||
.set { ch_sourmash_sketches_mixed} | ||
|
||
ch_fastq_id_to_cell_id_is_aligned | ||
.combine ( ch_sourmash_sketches_mixed ) | ||
.dump( tag: 'fastq_id_to_cells__join__sketches' ) | ||
.groupTuple( by: 1 ) | ||
.dump( tag: 'fastq_id_to_cells__join__sketches__grouptuple' ) | ||
.set { ch_sourmash_sketches_to_merge } | ||
|
||
process sourmash_sig_merge { | ||
tag "${sig_id}" | ||
label "low_memory" | ||
publishDir "${params.outdir}/sketches_merged/${sketch_id}", mode: "${params.publish_dir_mode}", | ||
saveAs: {filename -> | ||
if (filename.indexOf(".csv") > 0) "description/$filename" | ||
else if (filename.indexOf(".sig") > 0) "sigs/$filename" | ||
else null | ||
} | ||
|
||
input: | ||
set val(molecule), val(ksize), val(sketch_style), val(sketch_value), val(sample_id), file(reads) from ch_sourmash_sketches_to_merge | ||
|
||
output: | ||
file(csv) into ch_sourmash_sig_describe_merged | ||
set val(sketch_id), val(molecule), val(ksize), val(sketch_value), file(sig) into sourmash_sketches | ||
|
||
script: | ||
// sketch_id = make_sketch_id(molecule, ksize, sketch_value, track_abundance, sketch_style) | ||
sketch_value_flag = make_sketch_value_flag(sketch_style, sketch_value) | ||
track_abundance_flag = track_abundance ? '--track-abundance' : '' | ||
processes = "--processes ${task.cpus}" | ||
sig_id = "${sample_id}__${sketch_id}" | ||
sig = "${sig_id}.sig" | ||
csv = "${sig_id}.csv" | ||
""" | ||
sourmash compute \\ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so we are doing sourmash compute twice? is this also dependent on skip_compute flag? |
||
${sketch_value_flag} \\ | ||
--ksizes $ksize \\ | ||
--input-is-protein \\ | ||
--$molecule \\ | ||
--name '${sample_id}' \\ | ||
--no-dna \\ | ||
$processes \\ | ||
$track_abundance_flag \\ | ||
--output ${sig} \\ | ||
$reads | ||
sourmash sig describe --csv ${csv} ${sig} | ||
""" | ||
} | ||
|
||
} | ||
|
||
if (params.split_kmer){ | ||
process ska_compare_sketches { | ||
tag "${sketch_id}" | ||
|
@@ -1397,7 +1452,6 @@ if (params.split_kmer){ | |
if (!params.split_kmer && !params.skip_compare && !params.skip_compute) { | ||
process sourmash_compare_sketches { | ||
// Combine peptide and nucleotide sketches | ||
sourmash_sketches = sourmash_sketches_peptide.concat(sourmash_sketches_nucleotide) | ||
tag "${sketch_id}" | ||
publishDir "${params.outdir}/compare_sketches", mode: 'copy' | ||
|
||
|
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
could you write a detailed description? also
sourmash_sig_merge