From 32e2508fcbb1e93e532afea7d1ba7f5b8910bb94 Mon Sep 17 00:00:00 2001 From: jguhlin Date: Sun, 22 Jan 2023 13:10:55 +1300 Subject: [PATCH 1/2] Some minor changes to see if we can get this to work on google colab --- minimappers2/Cargo.toml | 2 +- minimappers2/pyproject.toml | 7 ++----- minimappers2/src/lib.rs | 6 +++--- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/minimappers2/Cargo.toml b/minimappers2/Cargo.toml index 2d84742..adce8b5 100644 --- a/minimappers2/Cargo.toml +++ b/minimappers2/Cargo.toml @@ -14,7 +14,7 @@ minimap2-sys = { version = "0.1.10", features = ["simde"] } #minimap2 = { path = "../", features = ["simde"] } #minimap2-sys = { path = "../minimap2-sys", features = ["simde"] } crossbeam = "0.8.2" -mimalloc = {version = "0.1.34", default-features = false } +# mimalloc = {version = "0.1.34", default-features = false } pyo3 = { version = "0.17.0", features = ["extension-module", "abi3-py37"] } polars = "0.26.1" diff --git a/minimappers2/pyproject.toml b/minimappers2/pyproject.toml index 4ecb6fb..98273d5 100644 --- a/minimappers2/pyproject.toml +++ b/minimappers2/pyproject.toml @@ -13,18 +13,15 @@ license = { file = "LICENSE" } description = "A Python wrapper for minimap2-rs" keywords = ["minimap2", "bioinformatics", "alignment", "mapping"] dependencies = [ - "polars==0.15.15", - "pyarrow==10.0.1", + "polars>=0.15.15", + "pyarrow>=10.0.1", ] [project.urls] homepage = "https://github.com/jguhlin/minimap2-rs" repository = "https://github.com/jguhlin/minimap2-rs" - [build-system] requires = ["maturin>=0.14,<0.15"] build-backend = "maturin" -[options] - diff --git a/minimappers2/src/lib.rs b/minimappers2/src/lib.rs index a5c1fc5..856c470 100644 --- a/minimappers2/src/lib.rs +++ b/minimappers2/src/lib.rs @@ -1,7 +1,7 @@ use std::num::NonZeroI32; use std::sync::{Mutex, Arc}; -use mimalloc::MiMalloc; +// use mimalloc::MiMalloc; use minimap2::*; use minimap2_sys::{mm_set_opt, MM_F_CIGAR}; use pyo3::prelude::*; @@ -10,8 +10,8 @@ use polars::{prelude::*, df}; use crossbeam::queue::ArrayQueue; use fffx::{Fasta, Fastq}; -#[global_allocator] -static GLOBAL: MiMalloc = MiMalloc; +// #[global_allocator] +// static GLOBAL: MiMalloc = MiMalloc; mod multithreading; From 94335fcc962d400e834e970ed0f8939037f6a46a Mon Sep 17 00:00:00 2001 From: jguhlin Date: Sun, 22 Jan 2023 13:13:19 +1300 Subject: [PATCH 2/2] Remove default test --- minimappers2/Cargo.lock | 20 ----- minimappers2/src/lib.rs | 127 +++++++++++++++-------------- minimappers2/src/multithreading.rs | 2 +- 3 files changed, 67 insertions(+), 82 deletions(-) diff --git a/minimappers2/Cargo.lock b/minimappers2/Cargo.lock index 061f297..87422e0 100644 --- a/minimappers2/Cargo.lock +++ b/minimappers2/Cargo.lock @@ -717,16 +717,6 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "348108ab3fba42ec82ff6e9564fc4ca0247bdccdc68dd8af9764bbc79c3c8ffb" -[[package]] -name = "libmimalloc-sys" -version = "0.1.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd8c7cbf8b89019683667e347572e6d55a7df7ea36b0c4ce69961b0cde67b174" -dependencies = [ - "cc", - "libc", -] - [[package]] name = "libz-sys" version = "1.1.8" @@ -800,15 +790,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "mimalloc" -version = "0.1.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dcb174b18635f7561a0c6c9fc2ce57218ac7523cf72c50af80e2d79ab8f3ba1" -dependencies = [ - "libmimalloc-sys", -] - [[package]] name = "minimap2" version = "0.1.11" @@ -841,7 +822,6 @@ version = "0.1.1" dependencies = [ "crossbeam", "fffx", - "mimalloc", "minimap2", "minimap2-sys", "polars", diff --git a/minimappers2/src/lib.rs b/minimappers2/src/lib.rs index 856c470..e05e025 100644 --- a/minimappers2/src/lib.rs +++ b/minimappers2/src/lib.rs @@ -1,14 +1,14 @@ use std::num::NonZeroI32; -use std::sync::{Mutex, Arc}; +use std::sync::{Arc, Mutex}; // use mimalloc::MiMalloc; +use crossbeam::queue::ArrayQueue; +use fffx::{Fasta, Fastq}; use minimap2::*; use minimap2_sys::{mm_set_opt, MM_F_CIGAR}; +use polars::{df, prelude::*}; use pyo3::prelude::*; use pyo3_polars::{error::PyPolarsErr, PyDataFrame}; -use polars::{prelude::*, df}; -use crossbeam::queue::ArrayQueue; -use fffx::{Fasta, Fastq}; // #[global_allocator] // static GLOBAL: MiMalloc = MiMalloc; @@ -47,13 +47,15 @@ unsafe impl Send for Aligner {} #[pymethods] impl Aligner { - // Mapping functions /// Map a single sequence fn map1(&self, seq: &Sequence) -> PyResult { let mut mappings = Mappings::default(); - let results = self.aligner.map(&seq.sequence, true, true, None, None).unwrap(); + let results = self + .aligner + .map(&seq.sequence, true, true, None, None) + .unwrap(); results.into_iter().for_each(|mut r| { r.query_name = Some(seq.id.clone()); mappings.push(r) @@ -73,7 +75,10 @@ impl Aligner { let mut mappings = Mappings::default(); for seq in seqs { - let results = self.aligner.map(&seq.sequence, true, true, None, None).unwrap(); + let results = self + .aligner + .map(&seq.sequence, true, true, None, None) + .unwrap(); results.into_iter().for_each(|mut r| { r.query_name = Some(seq.id.clone()); mappings.push(r) @@ -86,7 +91,7 @@ impl Aligner { let work_queue = Arc::new(Mutex::new(seqs)); let results_queue = Arc::new(ArrayQueue::>>::new(128)); let mut thread_handles = Vec::new(); - for i in 0..(self.aligner.threads-1) { + for i in 0..(self.aligner.threads - 1) { let work_queue = Arc::clone(&work_queue); let results_queue = Arc::clone(&results_queue); @@ -127,13 +132,13 @@ impl Aligner { match result { Some(WorkQueue::Work(result)) => { result.into_iter().for_each(|r| mappings.push(r)); - }, + } Some(WorkQueue::Done) => { finished_count += 1; if finished_count == (self.aligner.threads - 1) { break; } - }, + } None => { // Probably should be backoff, but let's try this for now... std::thread::sleep(std::time::Duration::from_millis(100)); @@ -203,7 +208,7 @@ impl Aligner { self.preset(Preset::AvaPb); } - /// Configure aligner for Asm + /// Configure aligner for Asm fn asm(&mut self) { self.preset(Preset::Asm); } @@ -245,26 +250,26 @@ impl Aligner { } impl Aligner { - /// Create an aligner using a preset. - fn preset(&mut self, preset: Preset) { - let mut idxopt = IdxOpt::default(); - let mut mapopt = MapOpt::default(); - - unsafe { - // Set preset - mm_set_opt(preset.into(), &mut idxopt, &mut mapopt) - }; - - self.aligner.idxopt = idxopt; - self.aligner.mapopt = mapopt; - } + /// Create an aligner using a preset. + fn preset(&mut self, preset: Preset) { + let mut idxopt = IdxOpt::default(); + let mut mapopt = MapOpt::default(); + + unsafe { + // Set preset + mm_set_opt(preset.into(), &mut idxopt, &mut mapopt) + }; + + self.aligner.idxopt = idxopt; + self.aligner.mapopt = mapopt; + } } /* TODO - Destroy index when aligner is dropped or when new index is created impl Drop for Aligner { fn drop(&mut self) { - + } } */ @@ -431,66 +436,77 @@ impl Mappings { } pub fn to_df(self) -> Result { - // Convert strand to string + or - let strand: Vec = self.strand.iter().map(|x| x.to_string()).collect(); // Convert query len to Option // let query_len: Vec> = self.query_len.iter().map(|x| x.map(|y| y as u32.into())).collect(); - let query_len: Vec> = self.query_len.iter().map(|x| - match x { + let query_len: Vec> = self + .query_len + .iter() + .map(|x| match x { Some(y) => Some(y.get() as u32), None => None, - } - ).collect(); + }) + .collect(); - let nm: Vec> = self.alignment.iter().map(|x| - match x { + let nm: Vec> = self + .alignment + .iter() + .map(|x| match x { // These are ugly but it's early in the morning... Some(y) => Some(y.nm), None => None, - } - ).collect(); + }) + .collect(); - let cigar: Vec>> = self.alignment.iter().map(|x| - match x { + let cigar: Vec>> = self + .alignment + .iter() + .map(|x| match x { Some(y) => match &y.cigar { Some(z) => Some(z.clone()), None => None, }, None => None, - } - ).collect(); + }) + .collect(); - let cigar_str: Vec> = self.alignment.iter().map(|x| - match x { + let cigar_str: Vec> = self + .alignment + .iter() + .map(|x| match x { Some(y) => match &y.cigar_str { Some(z) => Some(z.clone()), None => None, }, None => None, - } - ).collect(); + }) + .collect(); - let md: Vec> = self.alignment.iter().map(|x| - match x { + let md: Vec> = self + .alignment + .iter() + .map(|x| match x { Some(y) => match &y.md { Some(z) => Some(z.clone()), None => None, }, None => None, - } - ).collect(); + }) + .collect(); - let cs: Vec> = self.alignment.iter().map(|x| - match x { + let cs: Vec> = self + .alignment + .iter() + .map(|x| match x { Some(y) => match &y.cs { Some(z) => Some(z.clone()), None => None, }, None => None, - } - ).collect(); + }) + .collect(); let query_name = Series::new("query_name", self.query_name); let query_len = Series::new("query_len", query_len); @@ -533,14 +549,3 @@ impl Mappings { ]) } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} diff --git a/minimappers2/src/multithreading.rs b/minimappers2/src/multithreading.rs index 7c592c0..009c003 100644 --- a/minimappers2/src/multithreading.rs +++ b/minimappers2/src/multithreading.rs @@ -3,4 +3,4 @@ pub enum WorkQueue { Work(T), Done, -} \ No newline at end of file +}