diff --git a/imspy/imspy/spectrum.py b/imspy/imspy/spectrum.py index 80baa757..12ad7fb4 100644 --- a/imspy/imspy/spectrum.py +++ b/imspy/imspy/spectrum.py @@ -64,6 +64,22 @@ def intensity(self) -> NDArray[np.float64]: """ return self.__spec_ptr.intensity + def filter(self, mz_min: float = 0.0, mz_max: float = 2000.0, intensity_min: float = 0.0, + intensity_max: float = 1e9) -> 'IndexedMzSpectrum': + """Filter the spectrum for a given m/z range and intensity range. + + Args: + mz_min (float): Minimum m/z value. + mz_max (float): Maximum m/z value. + intensity_min (float, optional): Minimum intensity value. Defaults to 0.0. + intensity_max (float, optional): Maximum intensity value. Defaults to 1e9. + + Returns: + IndexedMzSpectrum: Filtered spectrum. + """ + return IndexedMzSpectrum.from_py_indexed_mz_spectrum( + self.__spec_ptr.filter_ranged(mz_min, mz_max, intensity_min, intensity_max)) + @property def df(self) -> pd.DataFrame: """Data. diff --git a/imspy/pyproject.toml b/imspy/pyproject.toml index e33a348a..bf8889d1 100644 --- a/imspy/pyproject.toml +++ b/imspy/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "imspy" -version = "0.2.2" +version = "0.2.5" description = "" authors = ["theGreatHerrLebert "] readme = "README.md" @@ -11,7 +11,7 @@ pandas = ">=2.1" numpy = ">=1.21" tensorflow = ">=2.14" tensorflow-probability = ">=0.22.1" -imspy-connector = ">=0.2.0" +imspy-connector = ">=0.2.5" [build-system] requires = ["poetry-core"] diff --git a/imspy_connector/Cargo.toml b/imspy_connector/Cargo.toml index 83818278..4cf27a34 100644 --- a/imspy_connector/Cargo.toml +++ b/imspy_connector/Cargo.toml @@ -1,6 +1,6 @@ [package] -name = "imspy_connector" -version = "0.2.2" +name = "imspy-connector" +version = "0.2.5" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/imspy_connector/src/lib.rs b/imspy_connector/src/lib.rs index f27f65fa..9e2c8957 100644 --- a/imspy_connector/src/lib.rs +++ b/imspy_connector/src/lib.rs @@ -2,11 +2,12 @@ mod py_handle; mod py_mz_spectrum; mod py_tims_frame; mod py_tims_slice; +mod py_timstof_dda; use pyo3::prelude::*; use crate::py_handle::PyTimsDataHandle; use crate::py_mz_spectrum::{PyMzSpectrum, PyIndexedMzSpectrum, PyTimsSpectrum, PyMzSpectrumVectorized}; -use crate::py_tims_frame::{PyTimsFrame, PyTimsFrameVectorized}; +use crate::py_tims_frame::{PyTimsFrame, PyTimsFrameVectorized, PyRawTimsFrame}; use crate::py_tims_slice::{PyTimsPlane, PyTimsSlice, PyTimsSliceVectorized}; #[pymodule] @@ -17,6 +18,7 @@ fn imspy_connector(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; diff --git a/imspy_connector/src/py_mz_spectrum.rs b/imspy_connector/src/py_mz_spectrum.rs index 8a53c75e..46e01577 100644 --- a/imspy_connector/src/py_mz_spectrum.rs +++ b/imspy_connector/src/py_mz_spectrum.rs @@ -154,6 +154,14 @@ impl PyIndexedMzSpectrum { pub fn intensity(&self, py: Python) -> Py> { self.inner.mz_spectrum.intensity.clone().into_pyarray(py).to_owned() } + + pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min: f64, intensity_max: f64) -> PyResult { + let filtered = self.inner.filter_ranged(mz_min, mz_max, intensity_min, intensity_max); + let py_filtered = PyIndexedMzSpectrum { + inner: filtered, + }; + Ok(py_filtered) + } } #[pyclass] diff --git a/imspy_connector/src/py_tims_frame.rs b/imspy_connector/src/py_tims_frame.rs index a5f391c5..6e0c593f 100644 --- a/imspy_connector/src/py_tims_frame.rs +++ b/imspy_connector/src/py_tims_frame.rs @@ -1,10 +1,72 @@ use pyo3::prelude::*; use numpy::{PyArray1, IntoPyArray}; -use mscore::{TimsFrame, ImsFrame, MsType, TimsFrameVectorized, ImsFrameVectorized, ToResolution, Vectorized}; +use mscore::{TimsFrame, ImsFrame, MsType, TimsFrameVectorized, ImsFrameVectorized, ToResolution, Vectorized, RawTimsFrame}; use pyo3::types::PyList; use crate::py_mz_spectrum::{PyIndexedMzSpectrum, PyMzSpectrum, PyTimsSpectrum}; +#[pyclass] +#[derive(Clone)] +pub struct PyRawTimsFrame { + pub inner: RawTimsFrame, +} + +#[pymethods] +impl PyRawTimsFrame { + #[new] + pub unsafe fn new(frame_id: i32, + ms_type: i32, + retention_time: f64, + scan: &PyArray1, + tof: &PyArray1, + intensity: &PyArray1) -> PyResult { + Ok(PyRawTimsFrame { + inner: RawTimsFrame { + frame_id, + retention_time, + ms_type: MsType::new(ms_type), + scan: scan.as_slice()?.to_vec(), + tof: tof.as_slice()?.to_vec(), + intensity: intensity.as_slice()?.to_vec(), + }, + }) + } + + #[getter] + pub fn intensity(&self, py: Python) -> Py> { + self.inner.intensity.clone().into_pyarray(py).to_owned() + } + #[getter] + pub fn scan(&self, py: Python) -> Py> { + self.inner.scan.clone().into_pyarray(py).to_owned() + } + + #[getter] + pub fn tof(&self, py: Python) -> Py> { + self.inner.tof.clone().into_pyarray(py).to_owned() + } + + #[getter] + pub fn frame_id(&self) -> i32 { + self.inner.frame_id + } + + #[getter] + pub fn ms_type_numeric(&self) -> i32 { + self.inner.ms_type.ms_type_numeric() + } + + #[getter] + pub fn ms_type(&self) -> String { + self.inner.ms_type.to_string() + } + + #[getter] + pub fn retention_time(&self) -> f64 { + self.inner.retention_time + } +} + #[pyclass] #[derive(Clone)] pub struct PyTimsFrame { diff --git a/imspy_connector/src/py_timstof_dda.rs b/imspy_connector/src/py_timstof_dda.rs new file mode 100644 index 00000000..55675181 --- /dev/null +++ b/imspy_connector/src/py_timstof_dda.rs @@ -0,0 +1,61 @@ +use pyo3::prelude::*; +use mscore::{ TimsDDAPrecursor }; + +#[pyclass] +#[derive(Clone)] +pub struct PyTimsDDAPrecursor { + pub inner: TimsDDAPrecursor, +} + +#[pymethods] +impl PyTimsDDAPrecursor { + #[new] + pub fn new( + _py: Python, + id: u32, + parent_id: u32, + scan_num: u32, + mz_average: f32, + mz_most_intense: f32, + intensity: f32, + mz_mono_isotopic: Option, + charge: Option, + ) -> PyResult { + let tims_dda_precursor = TimsDDAPrecursor { + id, + parent_id, + scan_num, + mz_average, + mz_most_intense, + intensity, + mz_mono_isotopic, + charge, + }; + + Ok(PyTimsDDAPrecursor { inner: tims_dda_precursor }) + } + + #[getter] + pub fn id(&self) -> u32 { self.inner.id } + + #[getter] + pub fn parent_id(&self) -> u32 { self.inner.parent_id } + + #[getter] + pub fn scan_num(&self) -> u32 { self.inner.scan_num } + + #[getter] + pub fn mz_average(&self) -> f32 { self.inner.mz_average } + + #[getter] + pub fn mz_most_intense(&self) -> f32 { self.inner.mz_most_intense } + + #[getter] + pub fn intensity(&self) -> f32 { self.inner.intensity } + + #[getter] + pub fn mz_mono_isotopic(&self) -> Option { self.inner.mz_mono_isotopic } + + #[getter] + pub fn charge(&self) -> Option { self.inner.charge } +} \ No newline at end of file diff --git a/mscore/src/chemistry.rs b/mscore/src/chemistry.rs new file mode 100644 index 00000000..3c737fb3 --- /dev/null +++ b/mscore/src/chemistry.rs @@ -0,0 +1,72 @@ +/// convert 1 over reduced ion mobility (1/k0) to CCS +/// +/// Arguments: +/// +/// * `one_over_k0` - 1 over reduced ion mobility (1/k0) +/// * `charge` - charge state of the ion +/// * `mz` - mass-over-charge of the ion +/// * `mass_gas` - mass of drift gas (N2) +/// * `temp` - temperature of the drift gas in C° +/// * `t_diff` - factor to translate from C° to K +/// +/// Returns: +/// +/// * `ccs` - collision cross-section +/// +/// # Examples +/// +/// ``` +/// use mscore::one_over_reduced_mobility_to_ccs; +/// +/// let ccs = one_over_reduced_mobility_to_ccs(0.5, 1000.0, 2, 28.013, 31.85, 273.15); +/// assert_eq!(ccs, 806.5918693771381); +/// ``` +pub fn one_over_reduced_mobility_to_ccs( + one_over_k0: f64, + mz: f64, + charge: u32, + mass_gas: f64, + temp: f64, + t_diff: f64, +) -> f64 { + let summary_constant = 18509.8632163405; + let reduced_mass = (mz * charge as f64 * mass_gas) / (mz * charge as f64 + mass_gas); + summary_constant * charge as f64 / (reduced_mass * (temp + t_diff)).sqrt() / one_over_k0 +} + + +/// convert CCS to 1 over reduced ion mobility (1/k0) +/// +/// Arguments: +/// +/// * `ccs` - collision cross-section +/// * `charge` - charge state of the ion +/// * `mz` - mass-over-charge of the ion +/// * `mass_gas` - mass of drift gas (N2) +/// * `temp` - temperature of the drift gas in C° +/// * `t_diff` - factor to translate from C° to K +/// +/// Returns: +/// +/// * `one_over_k0` - 1 over reduced ion mobility (1/k0) +/// +/// # Examples +/// +/// ``` +/// use mscore::ccs_to_reduced_mobility; +/// +/// let k0 = ccs_to_reduced_mobility(806.5918693771381, 1000.0, 2, 28.013, 31.85, 273.15); +/// assert_eq!(1.0 / k0, 0.5); +/// ``` +pub fn ccs_to_reduced_mobility( + ccs: f64, + mz: f64, + charge: u32, + mass_gas: f64, + temp: f64, + t_diff: f64, +) -> f64 { + let summary_constant = 18509.8632163405; + let reduced_mass = (mz * charge as f64 * mass_gas) / (mz * charge as f64 + mass_gas); + ((reduced_mass * (temp + t_diff)).sqrt() * ccs) / (summary_constant * charge as f64) +} diff --git a/mscore/src/lib.rs b/mscore/src/lib.rs index 05641038..a497cb7a 100644 --- a/mscore/src/lib.rs +++ b/mscore/src/lib.rs @@ -1,8 +1,14 @@ pub mod mz_spectrum; mod tims_frame; mod tims_slice; +mod timstof_dda; +mod chemistry; pub use { + + chemistry::one_over_reduced_mobility_to_ccs, + chemistry::ccs_to_reduced_mobility, + mz_spectrum::MsType, mz_spectrum::MzSpectrum, @@ -17,6 +23,7 @@ pub use { tims_frame::ImsFrame, tims_frame::ImsFrameVectorized, + tims_frame::RawTimsFrame, tims_frame::TimsFrame, tims_frame::TimsFrameVectorized, tims_frame::ToResolution, @@ -28,4 +35,6 @@ pub use { tims_slice::TimsSliceFlat, tims_slice::TimsPlane, + + timstof_dda::TimsDDAPrecursor, }; diff --git a/mscore/src/mz_spectrum.rs b/mscore/src/mz_spectrum.rs index 8d0aa497..816b5f5c 100644 --- a/mscore/src/mz_spectrum.rs +++ b/mscore/src/mz_spectrum.rs @@ -398,6 +398,21 @@ impl IndexedMzSpectrum { } } } + + pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min:f64, intensity_max: f64) -> Self { + let mut mz_vec: Vec = Vec::new(); + let mut intensity_vec: Vec = Vec::new(); + let mut index_vec: Vec = Vec::new(); + + for ((&mz, &intensity), &index) in self.mz_spectrum.mz.iter().zip(self.mz_spectrum.intensity.iter()).zip(self.index.iter()) { + if mz_min <= mz && mz <= mz_max && intensity >= intensity_min && intensity <= intensity_max { + mz_vec.push(mz); + intensity_vec.push(intensity); + index_vec.push(index); + } + } + IndexedMzSpectrum { index: index_vec, mz_spectrum: MzSpectrum { mz: mz_vec, intensity: intensity_vec } } + } } impl Display for IndexedMzSpectrum { diff --git a/mscore/src/tims_frame.rs b/mscore/src/tims_frame.rs index a93a5105..1f1d7ad2 100644 --- a/mscore/src/tims_frame.rs +++ b/mscore/src/tims_frame.rs @@ -13,6 +13,16 @@ pub trait Vectorized { fn vectorized(&self, resolution: i32) -> T; } +#[derive(Clone)] +pub struct RawTimsFrame { + pub frame_id: i32, + pub retention_time: f64, + pub ms_type: MsType, + pub scan: Vec, + pub tof: Vec, + pub intensity: Vec, +} + #[derive(Clone)] pub struct ImsFrame { pub retention_time: f64, diff --git a/mscore/src/timstof_dda.rs b/mscore/src/timstof_dda.rs new file mode 100644 index 00000000..451f3b58 --- /dev/null +++ b/mscore/src/timstof_dda.rs @@ -0,0 +1,11 @@ +#[derive(Clone)] +pub struct TimsDDAPrecursor { + pub id: u32, + pub parent_id: u32, + pub scan_num: u32, + pub mz_average: f32, + pub mz_most_intense: f32, + pub intensity: f32, + pub mz_mono_isotopic: Option, + pub charge: Option, +} \ No newline at end of file diff --git a/rustdf/Cargo.toml b/rustdf/Cargo.toml index 70ae3b65..975ed72a 100644 --- a/rustdf/Cargo.toml +++ b/rustdf/Cargo.toml @@ -9,10 +9,10 @@ path = "src/lib.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -libloading = "0.8" +libloading = "0.8.1" rusqlite = "0.29.0" lzf = "1.0.0" -zstd = "0.12.4" +zstd = "0.13.0" byteorder = "1.4.3" anyhow = "1.0.75" mscore = {path = "../mscore"} \ No newline at end of file diff --git a/rustdf/src/data/handle.rs b/rustdf/src/data/handle.rs index 67e5f35f..bb979fbb 100644 --- a/rustdf/src/data/handle.rs +++ b/rustdf/src/data/handle.rs @@ -8,7 +8,7 @@ use std::fs::File; use std::io::{Seek, SeekFrom, Cursor}; use byteorder::{LittleEndian, ByteOrder, ReadBytesExt}; -use mscore::{TimsFrame, ImsFrame, MsType, TimsSlice}; +use mscore::{TimsFrame, RawTimsFrame, ImsFrame, MsType, TimsSlice}; /// Decompresses a ZSTD compressed byte array /// @@ -269,6 +269,65 @@ impl TimsDataHandle { .into_iter()).collect() } + pub fn get_raw_frame(&self, frame_id: u32) -> Result> { + + let frame_index = (frame_id - 1) as usize; + let offset = self.tims_offset_values[frame_index] as u64; + + let mut file_path = PathBuf::from(&self.data_path); + file_path.push("analysis.tdf_bin"); + let mut infile = File::open(&file_path)?; + + infile.seek(SeekFrom::Start(offset))?; + + let mut bin_buffer = [0u8; 4]; + infile.read_exact(&mut bin_buffer)?; + let bin_size = Cursor::new(bin_buffer).read_i32::()?; + + infile.read_exact(&mut bin_buffer)?; + + match self.global_meta_data.tims_compression_type { + // TODO: implement + _ if self.global_meta_data.tims_compression_type == 1 => { + return Err("Decompression Type1 not implemented.".into()); + }, + + // Extract from ZSTD compressed binary + _ if self.global_meta_data.tims_compression_type == 2 => { + + let mut compressed_data = vec![0u8; bin_size as usize - 8]; + infile.read_exact(&mut compressed_data)?; + + let decompressed_bytes = zstd_decompress(&compressed_data)?; + + let (scan, tof, intensity) = parse_decompressed_bruker_binary_data(&decompressed_bytes)?; + + let ms_type_raw = self.frame_meta_data[frame_index].ms_ms_type; + + let ms_type = match ms_type_raw { + 0 => MsType::Precursor, + 8 => MsType::FragmentDda, + 9 => MsType::FragmentDia, + _ => MsType::Unknown, + }; + + Ok(RawTimsFrame { + frame_id: frame_id as i32, + retention_time: self.frame_meta_data[(frame_id - 1) as usize].time, + ms_type, + scan: scan.iter().map(|&x| x as i32).collect(), + tof: tof.iter().map(|&x| x as i32).collect(), + intensity: intensity.iter().map(|&x| x as f64).collect(), + }) + }, + + // Error on unknown compression algorithm + _ => { + return Err("TimsCompressionType is not 1 or 2.".into()); + } + } + } + /// get a frame from the tims dataset /// /// # Arguments diff --git a/rustdf/src/data/meta.rs b/rustdf/src/data/meta.rs index 55f1a218..eb407bc1 100644 --- a/rustdf/src/data/meta.rs +++ b/rustdf/src/data/meta.rs @@ -3,12 +3,36 @@ extern crate rusqlite; use rusqlite::{Connection, Result}; use std::path::Path; +#[derive(Debug, Clone)] +pub struct PasefMsMsMeta { + pub frame_id: i64, + pub scan_num_begin: i64, + pub scan_num_end: i64, + pub isolation_mz: f64, + pub isolation_width: f64, + pub collision_energy: f64, + pub precursor_id: i64, +} + +#[derive(Debug, Clone)] +pub struct DDAPrecursorMeta { + pub precursor_id: i64, + pub precursor_mz_highest_intensity: f64, + pub precursor_mz_average: f64, + pub precursor_mz_monoisotopic: Option, + pub precursor_charge: Option, + pub precursor_average_scan_number: f64, + pub precursor_total_intensity: f64, + pub precursor_frame_id: i64, +} + +#[derive(Debug, Clone)] pub struct DDAPrecursorInfo { pub precursor_id: i64, pub precursor_mz_highest_intensity: f64, pub precursor_mz_average: f64, - pub precursor_mz_monoisotopic: f64, - pub precursor_charge: i64, + pub precursor_mz_monoisotopic: Option, + pub precursor_charge: Option, pub precursor_average_scan_number: f64, pub precursor_total_intensity: f64, pub precursor_frame_id: i64, @@ -75,6 +99,58 @@ struct GlobalMetaInternal { value: String, } +pub fn read_dda_precursor_info(bruker_d_folder_name: &str) -> Result, Box> { + // Connect to the database + let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf"); + let conn = Connection::open(db_path)?; + + // prepare the query + let rows: Vec<&str> = vec!["Id", "LargestPeakMz", "AverageMz", "MonoisotopicMz", "Charge", "ScanNumber", "Intensity", "Parent"]; + let query = format!("SELECT {} FROM Precursors", rows.join(", ")); + + // execute the query + let frames_rows: Result, _> = conn.prepare(&query)?.query_map([], |row| { + Ok(DDAPrecursorInfo { + precursor_id: row.get(0)?, + precursor_mz_highest_intensity: row.get(1)?, + precursor_mz_average: row.get(2)?, + precursor_mz_monoisotopic: row.get(3)?, // Now using Option + precursor_charge: row.get(4)?, // Now using Option + precursor_average_scan_number: row.get(5)?, + precursor_total_intensity: row.get(6)?, + precursor_frame_id: row.get(7)?, + }) + })?.collect(); + + // return the frames + Ok(frames_rows?) +} + +pub fn read_pasef_frame_ms_ms_info(bruker_d_folder_name: &str) -> Result, Box> { + // Connect to the database + let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf"); + let conn = Connection::open(db_path)?; + + // prepare the query + let rows: Vec<&str> = vec!["Frame", "ScanNumBegin", "ScanNumEnd", "IsolationMz", "IsolationWidth", "CollisionEnergy", "Precursor"]; + let query = format!("SELECT {} FROM PasefFrameMsMsInfo", rows.join(", ")); + + // execute the query + let frames_rows: Result, _> = conn.prepare(&query)?.query_map([], |row| { + Ok(PasefMsMsMeta { + frame_id: row.get(0)?, + scan_num_begin: row.get(1)?, + scan_num_end: row.get(2)?, + isolation_mz: row.get(3)?, + isolation_width: row.get(4)?, + collision_energy: row.get(5)?, + precursor_id: row.get(6)?, }) + })?.collect(); + + // return the frames + Ok(frames_rows?) +} + // Read the global meta data from the analysis.tdf file pub fn read_global_meta_sql(bruker_d_folder_name: &str) -> Result> { diff --git a/rustdf/src/main.rs b/rustdf/src/main.rs index 97c3914c..3b349776 100644 --- a/rustdf/src/main.rs +++ b/rustdf/src/main.rs @@ -1,33 +1,21 @@ -use rustdf::data::handle::TimsDataHandle; use std::env; +use rustdf::data::meta::{read_dda_precursor_info}; -fn main() { - let args: Vec = env::args().collect(); - // args[0] is always the path to the program itself - if args.len() <= 1 { - eprintln!("Please provide a frame id."); - return; - } +fn main() { + let _args: Vec = env::args().collect(); - let frame_id: u32 = match args[1].parse() { - Ok(id) => id, - Err(_) => { - eprintln!("Invalid frame id provided."); - return; - } - }; + let data_path = "/media/hd01/CCSPred/M210115_001_Slot1-1_1_850.d"; - println!("Frame ID: {}", frame_id); + let result = read_dda_precursor_info(data_path); - let data_path = "/media/hd01/CCSPred/M210115_001_Slot1-1_1_850.d"; - let bruker_lib_path = "/home/administrator/Documents/promotion/ENV/lib/python3.8/site-packages/opentims_bruker_bridge/libtimsdata.so"; - let tims_data = TimsDataHandle::new(bruker_lib_path, data_path); - match tims_data { - Ok(tims_data) => { - let _frame = tims_data.get_frame(frame_id); + match result { + Ok(precursors) => { + println!("Precursors: {:?}", precursors); + }, + Err(e) => { + println!("Error: {:?}", e); } + } - Err(e) => println!("error: {}", e), - }; }