Skip to content

Commit

Permalink
Should address #51, aligner.with_cigar_clipping(), a poor name, to ad…
Browse files Browse the repository at this point in the history
…d softclipping to the cigar vec
  • Loading branch information
jguhlin committed Mar 16, 2024
1 parent bfdd2af commit 6aca1fd
Show file tree
Hide file tree
Showing 2 changed files with 87 additions and 6 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ and/or:
* _breaking_ Curl is no longer a default option for htslib, please re-enable it as needed with cargo.toml features
* _breaking_ Now using needletail for map-files, enabled by default. However, compression algorithms are disabled. Please enable with cargo.toml features
* Experimental rayon support
* aligner.with_cigar_clipping() to add soft clipping to the CIGAR vec (with_cigar() still adds to only the string, following the minimap2 outputs for PAF)

### 0.1.16 minimap2 2.26
* Much better cross compilation support thanks to @Adoni5
Expand Down
92 changes: 86 additions & 6 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,9 @@ pub struct Aligner {

/// Index reader created by minimap2
pub idx_reader: Option<mm_idx_reader_t>,

/// Whether to add soft clipping to CIGAR result
pub cigar_clipping: bool,
}

/// Create a default aligner
Expand All @@ -313,6 +316,7 @@ impl Default for Aligner {
threads: 1,
idx: None,
idx_reader: None,
cigar_clipping: false,
}
}
}
Expand Down Expand Up @@ -501,6 +505,11 @@ impl Aligner {
self
}

pub fn with_cigar_clipping(mut self) -> Self {
self.cigar_clipping = true;
self
}

pub fn with_sam_out(mut self) -> Self {
// Make sure MM_F_CIGAR flag isn't already set
assert!((self.mapopt.flag & MM_F_OUT_SAM as i64) == 0);
Expand Down Expand Up @@ -860,7 +869,7 @@ impl Aligner {

// Create a vector of the cigar blocks
let (cigar, cigar_str) = if n_cigar > 0 {
let cigar = p
let mut cigar = p
.cigar
.as_slice(n_cigar as usize)
.to_vec()
Expand Down Expand Up @@ -917,13 +926,19 @@ impl Aligner {
// TODO: Support hard clipping
let clip_char = 'S';

// Append soft clip identifiers to start and end
// Pre and append soft clip identifiers to start and end
if clip_len0 > 0 {
cigar_str = format!("{}{}{}", clip_len0, cigar_str, clip_char);
if self.cigar_clipping {
cigar.insert(0, (clip_len0 as u32, 4_u8));
}
}

if clip_len1 > 0 {
cigar_str = format!("{}{}{}", cigar_str, clip_len1, clip_char);
if self.cigar_clipping {
cigar.push((clip_len1 as u32, 4_u8));
}
}

(Some(cigar), Some(cigar_str))
Expand Down Expand Up @@ -1180,7 +1195,6 @@ pub enum FileFormat {
#[cfg(test)]
mod tests {
use super::*;
use std::pin::Pin;

#[test]
fn aligner_between_threads() {
Expand Down Expand Up @@ -1270,14 +1284,13 @@ mod tests {
// Because I'm not sure how this will work with FFI + Threads, want a sanity check
use rayon::prelude::*;

let mut aligner = Aligner::builder().preset(Preset::MapOnt).with_threads(2);
let mut aligner = Aligner::builder().preset(Preset::MapOnt).with_threads(2).with_cigar();

aligner = aligner.with_index("yeast_ref.mmi", None).unwrap();

let sequences = vec![
"ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA",
"ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA",
"ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA","ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA",
"ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA",
"ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA",
"ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA",
Expand All @@ -1288,6 +1301,8 @@ mod tests {
"ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA",
"ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA",
"ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA",
"ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGGGTTTTCAGCGATGCAAAA",
"GTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAGGTGTTTGAAACACCAGCGACACCAGAAGGCTGTGGATGTTAAATCGTAGAACCTATAGACGAGTTCTAAAATATACTTTGGG",
"ACGGTAGAGAGGAAGAAGAAGGAATAGCGGACTTGTGTATTTTATCGTCATTCGTGGTTATCATATAGTTTATTGATTTGAAGACTACGTAAGTAATTTGAGGACTGATTAAAATTTTCTTTTTTAGCTTAGAGTCAATTAAAGAGGGCAAAATTTTCTCAAAAGACCATGGTGCATATGACGATAGCTTTAGTAGTATGGATTGGGCTCTTCTTTCATGGATGTTATTCAGAAGGAGTGATATATCGAG",
];

Expand Down Expand Up @@ -1332,6 +1347,7 @@ mod tests {
threads,
idx,
idx_reader,
cigar_clipping: false,
};
}

Expand Down Expand Up @@ -1445,7 +1461,7 @@ mod tests {

#[test]
fn test_mappy_output() {
let aligner = Aligner::builder()
let mut aligner = Aligner::builder()
.preset(Preset::MapOnt)
.with_threads(1)
.with_index("test_data/MT-human.fa", None)
Expand Down Expand Up @@ -1497,6 +1513,70 @@ mod tests {
))
);
assert_eq!(align.cs, Some(String::from(":14-cc:1*ct:2+atc:9*ag:12*tc:1*ac:7*tc:4-t:1*ag:48*ag:2*ag:21*tc*tc:8-t:2*ag:5*tc:2*ag:4*ct*ac*ct:2*tc*ct:2*ag:4*ag:17")));

aligner = aligner.with_cigar_clipping();
let mut mappings = aligner.map(
b"GTTTATGTAGCTTATTCTATCCAAAGCAATGCACTGAAAATGTCTCGACGGGCCCACACGCCCCATAAACAAATAGGTTTGGTCCTAGCCTTTCTATTAGCTCTTAGTGAGGTTACACATGCAAGCATCCCCGCCCCAGTGAGTCGCCCTCCAAGTCACTCTGACTAAGAGGAGCAAGCATCAAGCACGCAACAGCGCAG",
true, true, None, None).unwrap();
assert_eq!(mappings.len(), 1);

let observed = mappings.pop().unwrap();

assert_eq!(observed.target_name, Some(String::from("MT_human")));
assert_eq!(observed.target_start, 576);
assert_eq!(observed.target_end, 768);
assert_eq!(observed.query_start, 0);
assert_eq!(observed.query_end, 191);
assert_eq!(observed.mapq, 29);
assert_eq!(observed.match_len, 168);
assert_eq!(observed.block_len, 195);
assert_eq!(observed.strand, Strand::Forward);
assert_eq!(observed.is_primary, true);

let align = observed.alignment.as_ref().unwrap();
assert_eq!(align.nm, 27);
assert_eq!(
align.cigar,
Some(vec![
(14, 0),
(2, 2),
(4, 0),
(3, 1),
(37, 0),
(1, 2),
(85, 0),
(1, 2),
(48, 0),
(9, 4)
])
);
assert_eq!(
align.cigar_str,
Some(String::from("14M2D4M3I37M1D85M1D48M9S"))
);

let mut mappings = aligner.map(
b"TTTGGTCCTAGCCTTTCTATTAGCTCTTAGTGAGGTTACACATGCAAGCATCCCCGCCCCAGTGAGTCGCCCTCCAAGTCACTCTGACTAAGAGGAGCAAGCATCAAGCACGCAACAGCGCAG",
true, true, None, None).unwrap();
assert_eq!(mappings.len(), 1);

let observed = mappings.pop().unwrap();

assert_eq!(
align.cigar,
Some(vec![
(14, 0),
(2, 2),
(4, 0),
(3, 1),
(37, 0),
(1, 2),
(85, 0),
(1, 2),
(48, 0),
(9, 4)
])
);
}

#[test]
Expand Down

0 comments on commit 6aca1fd

Please sign in to comment.