From fd485e197f6182e9522bab142b372d916f6ad9cf Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 11:23:14 +0100 Subject: [PATCH 01/29] adding precursor load --- imspy/imspy/timstof/dda.py | 72 ++++++++++++++++++++++++++++++++++ imspy_connector/pyproject.toml | 1 + imspy_connector/src/py_dda.rs | 59 ++++++++++++++++++++++++++++ mscore/src/timstof/frame.rs | 14 +++++++ rustdf/src/data/dda.rs | 16 ++++++++ 5 files changed, 162 insertions(+) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index 5910a49a..135d3337 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -1,4 +1,6 @@ import sqlite3 +from typing import List + import pandas as pd from imspy.simulation.annotation import RustWrapperObject @@ -10,6 +12,62 @@ import warnings +class PrecursorDDA(RustWrapperObject): + def __init__(self, precursor_id: int, precursor_mz_highest_intensity: float, precursor_mz_average: float, + precursor_mz_monoisotopic: float, precursor_average_scan_number: int, precursor_total_intensity: float, + precursor_frame_id: int, precursor_charge: int): + self._precursor_ptr = ims.PyDDAPrecursorMeta(precursor_id, precursor_mz_highest_intensity, precursor_mz_average, + precursor_mz_monoisotopic, precursor_average_scan_number, + precursor_total_intensity, precursor_frame_id, precursor_charge) + + @classmethod + def from_py_ptr(cls, precursor: ims.PyDDAPrecursorMeta): + instance = cls.__new__(cls) + instance._precursor_ptr = precursor + return instance + + @property + def precursor_id(self) -> int: + return self._precursor_ptr.precursor_id + + @property + def precursor_mz_highest_intensity(self) -> float: + return self._precursor_ptr.precursor_mz_highest_intensity + + @property + def precursor_mz_average(self) -> float: + return self._precursor_ptr.precursor_mz_average + + @property + def precursor_mz_monoisotopic(self) -> float: + return self._precursor_ptr.precursor_mz_monoisotopic + + @property + def precursor_charge(self) -> int: + return self._precursor_ptr.precursor_charge + + @property + def precursor_average_scan_number(self) -> int: + return self._precursor_ptr.precursor_average_scan_number + + @property + def precursor_total_intensity(self) -> float: + return self._precursor_ptr.precursor_total_intensity + + @property + def precursor_frame_id(self) -> int: + return self._precursor_ptr.precursor_frame_id + + def get_py_ptr(self): + return self._precursor_ptr + + def __repr__(self): + return f"PrecursorDDA(precursor_id={self.precursor_id}, precursor_mz_highest_intensity={self.precursor_mz_highest_intensity}, " \ + f"precursor_mz_average={self.precursor_mz_average}, precursor_mz_monoisotopic={self.precursor_mz_monoisotopic}, " \ + f"precursor_charge={self.precursor_charge}, precursor_average_scan_number={self.precursor_average_scan_number}, " \ + f"precursor_total_intensity={self.precursor_total_intensity}, precursor_frame_id={self.precursor_frame_id})" + + class TimsDatasetDDA(TimsDataset, RustWrapperObject): def __init__(self, data_path: str, in_memory: bool = False, use_bruker_sdk: bool = True): @@ -100,6 +158,20 @@ def get_pasef_fragments(self, num_threads: int = 1) -> pd.DataFrame: return pd.merge(time, B, left_on=['frame_id'], right_on=['frame_id'], how='inner') + def get_precursor_frames(self, min_intensity: float, max_peaks: int, num_threads: int) -> List[TimsFrame]: + """ + Get precursor frames. + Args: + min_intensity: minimum intensity a peak must have to be considered + max_peaks: maximum number of peaks to consider, frames will be sorted by intensity and only the top max_peaks will be considered + num_threads: number of threads to use for processing + + Returns: + List[TimsFrame]: List of all precursor frames + """ + precursor_frames = [TimsFrame.from_py_ptr(frame) for frame in self.__dataset.get_precursor_frames(min_intensity, max_peaks, num_threads)] + return precursor_frames + def __repr__(self): return (f"TimsDatasetDDA(data_path={self.data_path}, num_frames={self.frame_count}, " f"fragmented_precursors={self.fragmented_precursors.shape[0]})") diff --git a/imspy_connector/pyproject.toml b/imspy_connector/pyproject.toml index 20f27caa..932b6e29 100644 --- a/imspy_connector/pyproject.toml +++ b/imspy_connector/pyproject.toml @@ -7,6 +7,7 @@ name = "imspy_connector" dependencies = [ "opentims-bruker-bridge>=1.1.0", ] +version = "0.3.7" requires-python = ">=3.11" classifiers = [ "Programming Language :: Rust", diff --git a/imspy_connector/src/py_dda.rs b/imspy_connector/src/py_dda.rs index 19d2c356..caba56c7 100644 --- a/imspy_connector/src/py_dda.rs +++ b/imspy_connector/src/py_dda.rs @@ -2,9 +2,58 @@ use pyo3::prelude::*; use rustdf::data::dda::{PASEFDDAFragment, TimsDatasetDDA}; use rustdf::data::handle::TimsData; +use rustdf::data::meta::DDAPrecursorMeta; use crate::py_tims_frame::PyTimsFrame; use crate::py_tims_slice::PyTimsSlice; +#[pyclass] +pub struct PyDDAPrecursorMeta { + inner: DDAPrecursorMeta, +} + +#[pymethods] +impl PyDDAPrecursorMeta { + #[new] + #[pyo3(signature = (precursor_id, precursor_mz_highest_intensity, precursor_mz_average, precursor_mz_monoisotopic, precursor_average_scan_number, precursor_total_intensity, precursor_frame_id, precursor_charge=None))] + pub fn new(precursor_id: i64, precursor_mz_highest_intensity: f64, precursor_mz_average: f64, precursor_mz_monoisotopic: Option, precursor_average_scan_number: f64, precursor_total_intensity: f64, precursor_frame_id: i64, precursor_charge: Option) -> Self { + let precursor_meta = DDAPrecursorMeta { + precursor_id, + precursor_mz_highest_intensity, + precursor_mz_average, + precursor_mz_monoisotopic, + precursor_charge, + precursor_average_scan_number, + precursor_total_intensity, + precursor_frame_id, + }; + PyDDAPrecursorMeta { inner: precursor_meta } + } + + #[getter] + pub fn precursor_id(&self) -> i64 { self.inner.precursor_id } + + #[getter] + pub fn precursor_mz_highest_intensity(&self) -> f64 { self.inner.precursor_mz_highest_intensity } + + #[getter] + pub fn precursor_mz_average(&self) -> f64 { self.inner.precursor_mz_average } + + #[getter] + pub fn precursor_mz_monoisotopic(&self) -> Option { self.inner.precursor_mz_monoisotopic } + + #[getter] + pub fn precursor_charge(&self) -> Option { self.inner.precursor_charge } + + #[getter] + pub fn precursor_average_scan_number(&self) -> f64 { self.inner.precursor_average_scan_number } + + #[getter] + pub fn precursor_total_intensity(&self) -> f64 { self.inner.precursor_total_intensity } + + #[getter] + pub fn precursor_frame_id(&self) -> i64 { self.inner.precursor_frame_id } +} + #[pyclass] pub struct PyTimsDatasetDDA { inner: TimsDatasetDDA, @@ -41,6 +90,16 @@ impl PyTimsDatasetDDA { let pasef_fragments = self.inner.get_pasef_fragments(num_threads); pasef_fragments.iter().map(|pasef_fragment| PyTimsFragmentDDA { inner: pasef_fragment.clone() }).collect() } + + pub fn get_selected_precursors(&self) -> Vec { + let pasef_precursor_meta = self.inner.get_selected_precursors(); + pasef_precursor_meta.iter().map(|precursor_meta| PyDDAPrecursorMeta { inner: precursor_meta.clone() }).collect() + } + + pub fn get_precursor_frames(&self, min_intensity: f64, max_peaks: usize, num_threads: usize) -> Vec { + let precursor_frames = self.inner.get_precursor_frames(min_intensity, max_peaks, num_threads); + precursor_frames.iter().map(|frame| PyTimsFrame { inner: frame.clone() }).collect() + } } #[pyclass] diff --git a/mscore/src/timstof/frame.rs b/mscore/src/timstof/frame.rs index 46c9a6c5..7ce9f76c 100644 --- a/mscore/src/timstof/frame.rs +++ b/mscore/src/timstof/frame.rs @@ -248,6 +248,20 @@ impl TimsFrame { TimsFrame::new(self.frame_id, self.ms_type.clone(), self.ims_frame.retention_time, scan_vec, mobility_vec, tof_vec, mz_vec, intensity_vec) } + pub fn top_n(&self, n: usize) -> TimsFrame { + let mut indices: Vec = (0..self.ims_frame.intensity.len()).collect(); + indices.sort_by(|a, b| self.ims_frame.intensity[*b].partial_cmp(&self.ims_frame.intensity[*a]).unwrap()); + indices.truncate(n); + + let scan = indices.iter().map(|&i| self.scan[i]).collect(); + let mobility = indices.iter().map(|&i| self.ims_frame.mobility[i]).collect(); + let tof = indices.iter().map(|&i| self.tof[i]).collect(); + let mz = indices.iter().map(|&i| self.ims_frame.mz[i]).collect(); + let intensity = indices.iter().map(|&i| self.ims_frame.intensity[i]).collect(); + + TimsFrame::new(self.frame_id, self.ms_type.clone(), self.ims_frame.retention_time, scan, mobility, tof, mz, intensity) + } + pub fn to_windows_indexed(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> (Vec, Vec, Vec) { // split by scan (ion mobility) let spectra = self.to_tims_spectra(); diff --git a/rustdf/src/data/dda.rs b/rustdf/src/data/dda.rs index c84c1cd5..1cfd383f 100644 --- a/rustdf/src/data/dda.rs +++ b/rustdf/src/data/dda.rs @@ -45,6 +45,22 @@ impl TimsDatasetDDA { read_dda_precursor_meta(&self.loader.get_data_path()).unwrap() } + pub fn get_precursor_frames(&self, min_intensity: f64, max_num_peaks: usize, num_threads: usize) -> Vec { + // get all precursor frames + let meta_data = read_meta_data_sql(&self.loader.get_data_path()).unwrap(); + + // get the precursor frames + let precursor_frames = meta_data.iter().filter(|x| x.ms_ms_type == 0); + + let tims_silce = self.get_slice(precursor_frames.map(|x| x.id as u32).collect(), num_threads); + + let result: Vec<_> = tims_silce.frames.par_iter().map(|frame| { + frame.filter_ranged(0.0, 2000.0, 0, 2000, 0.0, 5.0, 1.0, min_intensity).top_n(max_num_peaks) + }).collect(); + + result + } + pub fn get_pasef_frame_ms_ms_info(&self) -> Vec { read_pasef_frame_ms_ms_info(&self.loader.get_data_path()).unwrap() } From 22cda86ce171dbd373d47ea0836df212866eac7c Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 11:52:32 +0100 Subject: [PATCH 02/29] adding PyPrecursor --- imspy_connector/src/py_dda.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/imspy_connector/src/py_dda.rs b/imspy_connector/src/py_dda.rs index caba56c7..db20e28f 100644 --- a/imspy_connector/src/py_dda.rs +++ b/imspy_connector/src/py_dda.rs @@ -139,5 +139,6 @@ impl PyTimsFragmentDDA { pub fn py_dda(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; + m.add_class::()?; Ok(()) } \ No newline at end of file From 2a2b9246a1e27e5b6b60e4f14847963b39b3d544 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 11:56:04 +0100 Subject: [PATCH 03/29] adding precursor load --- imspy/imspy/timstof/dda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index 135d3337..86820235 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -158,7 +158,7 @@ def get_pasef_fragments(self, num_threads: int = 1) -> pd.DataFrame: return pd.merge(time, B, left_on=['frame_id'], right_on=['frame_id'], how='inner') - def get_precursor_frames(self, min_intensity: float, max_peaks: int, num_threads: int) -> List[TimsFrame]: + def get_precursor_frames(self, min_intensity: float = 75, max_peaks: int = 500, num_threads: int = 4) -> List[TimsFrame]: """ Get precursor frames. Args: From 2d35c81bccbb38dac9ec2ec167dd503239fcab49 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 11:59:40 +0100 Subject: [PATCH 04/29] fixing argument order --- rustdf/src/data/dda.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rustdf/src/data/dda.rs b/rustdf/src/data/dda.rs index 1cfd383f..54a26ee8 100644 --- a/rustdf/src/data/dda.rs +++ b/rustdf/src/data/dda.rs @@ -55,7 +55,7 @@ impl TimsDatasetDDA { let tims_silce = self.get_slice(precursor_frames.map(|x| x.id as u32).collect(), num_threads); let result: Vec<_> = tims_silce.frames.par_iter().map(|frame| { - frame.filter_ranged(0.0, 2000.0, 0, 2000, 0.0, 5.0, 1.0, min_intensity).top_n(max_num_peaks) + frame.filter_ranged(0.0, 2000.0, 0, 2000, 0.0, 5.0, min_intensity, 1e9).top_n(max_num_peaks) }).collect(); result From b14ef222c5fe298709954803574e967f4ec2f2b8 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 12:08:22 +0100 Subject: [PATCH 05/29] adding class return --- imspy/imspy/timstof/dda.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index 86820235..1cec6189 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -172,6 +172,14 @@ def get_precursor_frames(self, min_intensity: float = 75, max_peaks: int = 500, precursor_frames = [TimsFrame.from_py_ptr(frame) for frame in self.__dataset.get_precursor_frames(min_intensity, max_peaks, num_threads)] return precursor_frames + def get_selected_precursors_meta(self) -> List[PrecursorDDA]: + """ + Get meta data for all selected precursors + Returns: + List[PrecursorDDA]: List of all selected precursors + """ + return [PrecursorDDA.from_py_ptr(precursor) for precursor in self.__dataset.get_selected_precursors_meta()] + def __repr__(self): return (f"TimsDatasetDDA(data_path={self.data_path}, num_frames={self.frame_count}, " f"fragmented_precursors={self.fragmented_precursors.shape[0]})") From e876b2e2d0b1d98c9ae000b9d01595ea0388db10 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 12:09:24 +0100 Subject: [PATCH 06/29] adding PyPrecursor --- imspy/imspy/timstof/dda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index 1cec6189..fc9e5b1a 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -178,7 +178,7 @@ def get_selected_precursors_meta(self) -> List[PrecursorDDA]: Returns: List[PrecursorDDA]: List of all selected precursors """ - return [PrecursorDDA.from_py_ptr(precursor) for precursor in self.__dataset.get_selected_precursors_meta()] + return [PrecursorDDA.from_py_ptr(precursor) for precursor in self.__dataset.get_selected_precursors()] def __repr__(self): return (f"TimsDatasetDDA(data_path={self.data_path}, num_frames={self.frame_count}, " From 154b61e9b8ebf48ae7ad8e1cb66f2f1fbc9ed279 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 12:15:34 +0100 Subject: [PATCH 07/29] adding to sage precursor --- imspy/imspy/timstof/dda.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index fc9e5b1a..1b250830 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -7,6 +7,8 @@ from imspy.timstof.data import TimsDataset from imspy.timstof.frame import TimsFrame +from sagepy.core import Precursor, Tolerance + import imspy_connector ims = imspy_connector.py_dda import warnings @@ -58,6 +60,16 @@ def precursor_total_intensity(self) -> float: def precursor_frame_id(self) -> int: return self._precursor_ptr.precursor_frame_id + def to_sage_precursor(self, isolation_window: Tolerance = Tolerance(-3.0, 3.0,)) -> Precursor: + return Precursor( + mz=self.precursor_mz_monoisotopic, + intensity=self.precursor_total_intensity, + charge=self.precursor_charge, + spectrum_ref=str(self.precursor_frame_id), + inverse_ion_mobility=self.precursor_average_scan_number, + isolation_window=isolation_window, + ) + def get_py_ptr(self): return self._precursor_ptr From 93f72a5225bee3058fce1535733c35c4b0aa8eb3 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 12:16:39 +0100 Subject: [PATCH 08/29] update --- imspy/imspy/timstof/dda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index 1b250830..b76bdc25 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -60,7 +60,7 @@ def precursor_total_intensity(self) -> float: def precursor_frame_id(self) -> int: return self._precursor_ptr.precursor_frame_id - def to_sage_precursor(self, isolation_window: Tolerance = Tolerance(-3.0, 3.0,)) -> Precursor: + def to_sage_precursor(self, isolation_window: Tolerance = Tolerance(da=(-3.0, 3.0,))) -> Precursor: return Precursor( mz=self.precursor_mz_monoisotopic, intensity=self.precursor_total_intensity, From 46eaf12732776af13cf3ac0411d0d156b2ec44ce Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 12:56:58 +0100 Subject: [PATCH 09/29] testing --- imspy/imspy/timstof/dda.py | 43 +++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index b76bdc25..ef43c5ea 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -1,13 +1,17 @@ import sqlite3 from typing import List +import numpy as np import pandas as pd +from examples.imspy_dda import sage_precursor +from sagepy.core.spectrum import Peak + from imspy.simulation.annotation import RustWrapperObject from imspy.timstof.data import TimsDataset from imspy.timstof.frame import TimsFrame -from sagepy.core import Precursor, Tolerance +from sagepy.core import Precursor, Tolerance, ProcessedSpectrum import imspy_connector ims = imspy_connector.py_dda @@ -184,6 +188,43 @@ def get_precursor_frames(self, min_intensity: float = 75, max_peaks: int = 500, precursor_frames = [TimsFrame.from_py_ptr(frame) for frame in self.__dataset.get_precursor_frames(min_intensity, max_peaks, num_threads)] return precursor_frames + def get_sage_processed_precursors(self, file_id: int = 0) -> List[ProcessedSpectrum]: + precursor_meta = self.get_selected_precursors_meta() + + precursor_dict = {} + + for precursor in precursor_meta: + if precursor.precursor_frame_id not in precursor_dict: + precursor_dict[precursor.precursor_frame_id] = [] + precursor_dict[precursor.precursor_frame_id].append(precursor) + + precursor_frames = self.get_precursor_frames() + + processed_spectra = [] + + for frame in precursor_frames: + if frame.frame_id in precursor_dict: + + peaks = [Peak(mz, i) for mz, i in zip(frame.mz, frame.intensity)] + + precursors = [p.to_sage_precursor() for p in precursor_dict[frame.frame_id]] + + processed_spectrum = ProcessedSpectrum( + id = str(frame.frame_id), + level=1, + file_id=file_id, + scan_start_time=frame.retention_time, + ion_injection_time=frame.retention_time, + precursors=precursors, + total_ion_current=np.sum(frame.intensity), + peaks=peaks + ) + + processed_spectra.append(processed_spectrum) + + return processed_spectra + + def get_selected_precursors_meta(self) -> List[PrecursorDDA]: """ Get meta data for all selected precursors From 1965eaa510fd0fb9891a696043cd24baec42086b Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 12:57:47 +0100 Subject: [PATCH 10/29] testing --- imspy/imspy/timstof/dda.py | 1 - 1 file changed, 1 deletion(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index ef43c5ea..b684a6d6 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -4,7 +4,6 @@ import numpy as np import pandas as pd -from examples.imspy_dda import sage_precursor from sagepy.core.spectrum import Peak from imspy.simulation.annotation import RustWrapperObject From 4511b830e46d23db7fc37f9ff92eabec57bd5a9c Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 13:01:37 +0100 Subject: [PATCH 11/29] testing --- imspy/imspy/timstof/dda.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index b684a6d6..b2b8e33d 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -64,8 +64,15 @@ def precursor_frame_id(self) -> int: return self._precursor_ptr.precursor_frame_id def to_sage_precursor(self, isolation_window: Tolerance = Tolerance(da=(-3.0, 3.0,))) -> Precursor: + + # check if mz precursor_mz_monoisotopic is not None, if it is, use precursor_mz_average + if self.precursor_mz_monoisotopic is None: + mz = self.precursor_mz_average + else: + mz = self.precursor_mz_monoisotopic + return Precursor( - mz=self.precursor_mz_monoisotopic, + mz=mz, intensity=self.precursor_total_intensity, charge=self.precursor_charge, spectrum_ref=str(self.precursor_frame_id), From 181b966e34d852318896e89dc8845b395ac6375d Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 13:13:06 +0100 Subject: [PATCH 12/29] adding free mem --- imspy/imspy/timstof/dda.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index b2b8e33d..55411a3f 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -194,7 +194,7 @@ def get_precursor_frames(self, min_intensity: float = 75, max_peaks: int = 500, precursor_frames = [TimsFrame.from_py_ptr(frame) for frame in self.__dataset.get_precursor_frames(min_intensity, max_peaks, num_threads)] return precursor_frames - def get_sage_processed_precursors(self, file_id: int = 0) -> List[ProcessedSpectrum]: + def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: int = 500, file_id: int = 0, num_threads: int = 4) -> List[ProcessedSpectrum]: precursor_meta = self.get_selected_precursors_meta() precursor_dict = {} @@ -204,7 +204,7 @@ def get_sage_processed_precursors(self, file_id: int = 0) -> List[ProcessedSpect precursor_dict[precursor.precursor_frame_id] = [] precursor_dict[precursor.precursor_frame_id].append(precursor) - precursor_frames = self.get_precursor_frames() + precursor_frames = self.get_precursor_frames(min_intensity, max_peaks, num_threads) processed_spectra = [] @@ -225,9 +225,11 @@ def get_sage_processed_precursors(self, file_id: int = 0) -> List[ProcessedSpect total_ion_current=np.sum(frame.intensity), peaks=peaks ) - processed_spectra.append(processed_spectrum) + # delete precursor_frames to free memory + del precursor_frames + return processed_spectra From bf6be923f653b362af6409e7b238165400073179 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 14:07:39 +0100 Subject: [PATCH 13/29] testing --- imspy/imspy/timstof/dda.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index 55411a3f..4b87ea69 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -194,7 +194,13 @@ def get_precursor_frames(self, min_intensity: float = 75, max_peaks: int = 500, precursor_frames = [TimsFrame.from_py_ptr(frame) for frame in self.__dataset.get_precursor_frames(min_intensity, max_peaks, num_threads)] return precursor_frames - def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: int = 500, file_id: int = 0, num_threads: int = 4) -> List[ProcessedSpectrum]: + def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: int = 500, file_id: int = 0, num_threads: int = 16) -> List[ProcessedSpectrum]: + + if self.use_bruker_sdk: + warnings.warn("Using multiple threads is currently not supported when using Bruker SDK, " + "setting num_threads to 1.") + num_threads = 1 + precursor_meta = self.get_selected_precursors_meta() precursor_dict = {} From edc4a5c410995778b27e477c47a3ad060800f694 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 14:08:55 +0100 Subject: [PATCH 14/29] adding max peaks --- imspy/imspy/timstof/dda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index 4b87ea69..4c183b38 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -194,7 +194,7 @@ def get_precursor_frames(self, min_intensity: float = 75, max_peaks: int = 500, precursor_frames = [TimsFrame.from_py_ptr(frame) for frame in self.__dataset.get_precursor_frames(min_intensity, max_peaks, num_threads)] return precursor_frames - def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: int = 500, file_id: int = 0, num_threads: int = 16) -> List[ProcessedSpectrum]: + def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: int = int(1e6), file_id: int = 0, num_threads: int = 16) -> List[ProcessedSpectrum]: if self.use_bruker_sdk: warnings.warn("Using multiple threads is currently not supported when using Bruker SDK, " From e9258cdef67d3bbb38307c50dcc780c6d0288b57 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 14:12:22 +0100 Subject: [PATCH 15/29] adding --- imspy/imspy/timstof/dda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index 4c183b38..ec07089c 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -194,7 +194,7 @@ def get_precursor_frames(self, min_intensity: float = 75, max_peaks: int = 500, precursor_frames = [TimsFrame.from_py_ptr(frame) for frame in self.__dataset.get_precursor_frames(min_intensity, max_peaks, num_threads)] return precursor_frames - def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: int = int(1e6), file_id: int = 0, num_threads: int = 16) -> List[ProcessedSpectrum]: + def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: int = 5000, file_id: int = 0, num_threads: int = 16) -> List[ProcessedSpectrum]: if self.use_bruker_sdk: warnings.warn("Using multiple threads is currently not supported when using Bruker SDK, " From 59b6f506aa87ff35d85a717651b0ea8db15bccd9 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 14:20:24 +0100 Subject: [PATCH 16/29] adding spectrum processor --- imspy/imspy/timstof/dda.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index ec07089c..e1d565ae 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -10,7 +10,7 @@ from imspy.timstof.data import TimsDataset from imspy.timstof.frame import TimsFrame -from sagepy.core import Precursor, Tolerance, ProcessedSpectrum +from sagepy.core import Precursor, Tolerance, ProcessedSpectrum, RawSpectrum, Representation, SpectrumProcessor import imspy_connector ims = imspy_connector.py_dda @@ -214,6 +214,12 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in processed_spectra = [] + spectrum_processor = SpectrumProcessor( + take_top_n=max_peaks, + min_deisotope_mz=0.0, + deisotope=False, + ) + for frame in precursor_frames: if frame.frame_id in precursor_dict: @@ -221,16 +227,20 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in precursors = [p.to_sage_precursor() for p in precursor_dict[frame.frame_id]] - processed_spectrum = ProcessedSpectrum( - id = str(frame.frame_id), - level=1, + raw_spectrum = RawSpectrum( file_id=file_id, + spec_id=str(frame.frame_id), + total_ion_current=np.sum(frame.intensity), + precursors=precursors, + mz=frame.mz, + intensity=frame.intensity, + representation=Representation(representation="centroid"), scan_start_time=frame.retention_time, ion_injection_time=frame.retention_time, - precursors=precursors, - total_ion_current=np.sum(frame.intensity), - peaks=peaks + ms_level=1, ) + + processed_spectrum = spectrum_processor.process(raw_spectrum) processed_spectra.append(processed_spectrum) # delete precursor_frames to free memory From 6f60f1d0cd2bdc7e8818f7b5d46239b2f8e622f7 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 14:23:15 +0100 Subject: [PATCH 17/29] fixing --- imspy/imspy/timstof/dda.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index e1d565ae..607e0299 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -232,8 +232,8 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in spec_id=str(frame.frame_id), total_ion_current=np.sum(frame.intensity), precursors=precursors, - mz=frame.mz, - intensity=frame.intensity, + mz=frame.mz.astype(np.float32), + intensity=frame.intensity.astype(np.float32), representation=Representation(representation="centroid"), scan_start_time=frame.retention_time, ion_injection_time=frame.retention_time, From 12f44606ac54be7a65ff4319b6256b6437841170 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 15:06:04 +0100 Subject: [PATCH 18/29] testing --- imspy/imspy/timstof/dda.py | 46 +++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 20 deletions(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index 607e0299..21d733dd 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -201,8 +201,10 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in "setting num_threads to 1.") num_threads = 1 + # get selected precursors meta precursor_meta = self.get_selected_precursors_meta() + # create a dictionary with frame_id as key and a list of precursors as value precursor_dict = {} for precursor in precursor_meta: @@ -210,38 +212,42 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in precursor_dict[precursor.precursor_frame_id] = [] precursor_dict[precursor.precursor_frame_id].append(precursor) + # get precursor frames, filtered by min_intensity and max_peaks precursor_frames = self.get_precursor_frames(min_intensity, max_peaks, num_threads) - processed_spectra = [] - spectrum_processor = SpectrumProcessor( take_top_n=max_peaks, min_deisotope_mz=0.0, deisotope=False, ) + processed_spectra = [] + + # for each precursor frame, get the corresponding precursors and process the spectrum for frame in precursor_frames: if frame.frame_id in precursor_dict: - - peaks = [Peak(mz, i) for mz, i in zip(frame.mz, frame.intensity)] - precursors = [p.to_sage_precursor() for p in precursor_dict[frame.frame_id]] - raw_spectrum = RawSpectrum( - file_id=file_id, - spec_id=str(frame.frame_id), - total_ion_current=np.sum(frame.intensity), - precursors=precursors, - mz=frame.mz.astype(np.float32), - intensity=frame.intensity.astype(np.float32), - representation=Representation(representation="centroid"), - scan_start_time=frame.retention_time, - ion_injection_time=frame.retention_time, - ms_level=1, - ) - - processed_spectrum = spectrum_processor.process(raw_spectrum) - processed_spectra.append(processed_spectrum) + # currently, need to emit one spectrum per precursor + for p in precursors: + + # TODO: this could also be done in the outer loop, but would require a change in the Sage API + raw_spectrum = RawSpectrum( + file_id=file_id, + spec_id=str(frame.frame_id), + total_ion_current=0.0, + precursors=[p], + mz=frame.mz.astype(np.float32), + intensity=frame.intensity.astype(np.float32), + representation=Representation(representation="centroid"), + scan_start_time=frame.retention_time / 60, + ion_injection_time=frame.retention_time / 60, + ms_level=1, + ) + + # process the spectrum + processed_spectrum = spectrum_processor.process(raw_spectrum) + processed_spectra.append(processed_spectrum) # delete precursor_frames to free memory del precursor_frames From de8d0b3bc1d8df97a18ffdbf928cd8a8bb021099 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 15:42:45 +0100 Subject: [PATCH 19/29] reverting --- imspy/imspy/timstof/dda.py | 44 +++++++++++++++----------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index 21d733dd..c958a285 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -201,10 +201,8 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in "setting num_threads to 1.") num_threads = 1 - # get selected precursors meta precursor_meta = self.get_selected_precursors_meta() - # create a dictionary with frame_id as key and a list of precursors as value precursor_dict = {} for precursor in precursor_meta: @@ -212,42 +210,34 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in precursor_dict[precursor.precursor_frame_id] = [] precursor_dict[precursor.precursor_frame_id].append(precursor) - # get precursor frames, filtered by min_intensity and max_peaks precursor_frames = self.get_precursor_frames(min_intensity, max_peaks, num_threads) + processed_spectra = [] + spectrum_processor = SpectrumProcessor( take_top_n=max_peaks, min_deisotope_mz=0.0, deisotope=False, ) - processed_spectra = [] - - # for each precursor frame, get the corresponding precursors and process the spectrum for frame in precursor_frames: if frame.frame_id in precursor_dict: precursors = [p.to_sage_precursor() for p in precursor_dict[frame.frame_id]] - - # currently, need to emit one spectrum per precursor - for p in precursors: - - # TODO: this could also be done in the outer loop, but would require a change in the Sage API - raw_spectrum = RawSpectrum( - file_id=file_id, - spec_id=str(frame.frame_id), - total_ion_current=0.0, - precursors=[p], - mz=frame.mz.astype(np.float32), - intensity=frame.intensity.astype(np.float32), - representation=Representation(representation="centroid"), - scan_start_time=frame.retention_time / 60, - ion_injection_time=frame.retention_time / 60, - ms_level=1, - ) - - # process the spectrum - processed_spectrum = spectrum_processor.process(raw_spectrum) - processed_spectra.append(processed_spectrum) + raw_spectrum = RawSpectrum( + file_id=file_id, + spec_id=str(frame.frame_id), + total_ion_current=np.sum(frame.intensity), + precursors=precursors, + mz=frame.mz.astype(np.float32), + intensity=frame.intensity.astype(np.float32), + representation=Representation(representation="centroid"), + scan_start_time=frame.retention_time / 60, + ion_injection_time=frame.retention_time / 60, + ms_level=1, + ) + + processed_spectrum = spectrum_processor.process(raw_spectrum) + processed_spectra.append(processed_spectrum) # delete precursor_frames to free memory del precursor_frames From 386ab15602059c516203ea39910891cac4d9d294 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 15:47:40 +0100 Subject: [PATCH 20/29] push --- imspy/imspy/timstof/dda.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index c958a285..0296cb00 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -1,11 +1,9 @@ import sqlite3 -from typing import List +from typing import List, Optional import numpy as np import pandas as pd -from sagepy.core.spectrum import Peak - from imspy.simulation.annotation import RustWrapperObject from imspy.timstof.data import TimsDataset from imspy.timstof.frame import TimsFrame @@ -63,7 +61,9 @@ def precursor_total_intensity(self) -> float: def precursor_frame_id(self) -> int: return self._precursor_ptr.precursor_frame_id - def to_sage_precursor(self, isolation_window: Tolerance = Tolerance(da=(-3.0, 3.0,))) -> Precursor: + def to_sage_precursor(self, + isolation_window: Tolerance = Tolerance(da=(-3.0, 3.0,)), + handle: Optional['TimsDatasetDDA'] = None) -> Precursor: # check if mz precursor_mz_monoisotopic is not None, if it is, use precursor_mz_average if self.precursor_mz_monoisotopic is None: @@ -71,12 +71,21 @@ def to_sage_precursor(self, isolation_window: Tolerance = Tolerance(da=(-3.0, 3. else: mz = self.precursor_mz_monoisotopic + if handle is not None: + inverse_ion_mobility = handle.scan_to_inverse_mobility( + frame_id=self.precursor_frame_id, + scan_values=np.array([int(self.precursor_average_scan_number)]) + ) + else: + inverse_ion_mobility = self.precursor_average_scan_number + + return Precursor( mz=mz, intensity=self.precursor_total_intensity, charge=self.precursor_charge, spectrum_ref=str(self.precursor_frame_id), - inverse_ion_mobility=self.precursor_average_scan_number, + inverse_ion_mobility=inverse_ion_mobility, isolation_window=isolation_window, ) @@ -222,7 +231,7 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in for frame in precursor_frames: if frame.frame_id in precursor_dict: - precursors = [p.to_sage_precursor() for p in precursor_dict[frame.frame_id]] + precursors = [p.to_sage_precursor(self) for p in precursor_dict[frame.frame_id]] raw_spectrum = RawSpectrum( file_id=file_id, spec_id=str(frame.frame_id), From faafc125518b592edbf3d22e880b80af55a3e245 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 15:51:32 +0100 Subject: [PATCH 21/29] p --- imspy/imspy/timstof/dda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index 0296cb00..2ed4d6c8 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -231,7 +231,7 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in for frame in precursor_frames: if frame.frame_id in precursor_dict: - precursors = [p.to_sage_precursor(self) for p in precursor_dict[frame.frame_id]] + precursors = [p.to_sage_precursor(handle=self) for p in precursor_dict[frame.frame_id]] raw_spectrum = RawSpectrum( file_id=file_id, spec_id=str(frame.frame_id), From 81beb0ff146d600a9dbf01d34ddb9bbb592955af Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 15:53:11 +0100 Subject: [PATCH 22/29] adding ims --- imspy/imspy/timstof/dda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index 2ed4d6c8..9ee5f695 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -75,7 +75,7 @@ def to_sage_precursor(self, inverse_ion_mobility = handle.scan_to_inverse_mobility( frame_id=self.precursor_frame_id, scan_values=np.array([int(self.precursor_average_scan_number)]) - ) + )[0] else: inverse_ion_mobility = self.precursor_average_scan_number From c1419e3f03a874f80924d6f0dc4304fd8243a07c Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 15:54:38 +0100 Subject: [PATCH 23/29] tesint --- imspy/imspy/timstof/dda.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index 9ee5f695..ee733886 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -79,7 +79,6 @@ def to_sage_precursor(self, else: inverse_ion_mobility = self.precursor_average_scan_number - return Precursor( mz=mz, intensity=self.precursor_total_intensity, @@ -210,17 +209,20 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in "setting num_threads to 1.") num_threads = 1 + # get all selected precursors precursor_meta = self.get_selected_precursors_meta() + # create a dictionary with frame_id as key and a list of precursors as value precursor_dict = {} - for precursor in precursor_meta: if precursor.precursor_frame_id not in precursor_dict: precursor_dict[precursor.precursor_frame_id] = [] precursor_dict[precursor.precursor_frame_id].append(precursor) + # get all precursor frames precursor_frames = self.get_precursor_frames(min_intensity, max_peaks, num_threads) + # process all precursor frames processed_spectra = [] spectrum_processor = SpectrumProcessor( @@ -229,6 +231,7 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in deisotope=False, ) + # associate precursors with frames, precursor will be associated with the frame with the same frame_id for frame in precursor_frames: if frame.frame_id in precursor_dict: precursors = [p.to_sage_precursor(handle=self) for p in precursor_dict[frame.frame_id]] @@ -245,6 +248,7 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in ms_level=1, ) + # process the spectrum processed_spectrum = spectrum_processor.process(raw_spectrum) processed_spectra.append(processed_spectrum) From 3a7f1a263da4c45dbc46ff3c58faa0eddc30ea63 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Sun, 12 Jan 2025 20:15:07 +0100 Subject: [PATCH 24/29] adding precursorDDA --- rustdf/src/data/meta.rs | 78 +++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 31 deletions(-) diff --git a/rustdf/src/data/meta.rs b/rustdf/src/data/meta.rs index c4963e58..7c8e93f6 100644 --- a/rustdf/src/data/meta.rs +++ b/rustdf/src/data/meta.rs @@ -42,6 +42,22 @@ pub struct DDAPrecursorMeta { pub precursor_frame_id: i64, } +#[derive(Debug, Clone)] +pub struct DDAPrecursor { + pub frame_id: i64, + pub precursor_id: i64, + pub mono_mz: f64, + pub highest_intensity_mz: f64, + pub average_mz: f64, + pub charge: Option, + pub inverse_ion_mobility: f64, + pub collision_energy: f64, + pub precuror_total_intensity: f64, + pub isolation_mz: f64, + pub isolation_width: f64, +} + +#[derive(Debug, Clone)] pub struct DDAFragmentInfo { pub frame_id: i64, pub scan_begin: i64, @@ -138,14 +154,14 @@ pub fn read_pasef_frame_ms_ms_info(bruker_d_folder_name: &str) -> Result, _> = conn.prepare(&query)?.query_map([], |row| { Ok(PasefMsMsMeta { - frame_id: row.get(0)?, - scan_num_begin: row.get(1)?, - scan_num_end: row.get(2)?, - isolation_mz: row.get(3)?, - isolation_width: row.get(4)?, - collision_energy: row.get(5)?, - precursor_id: row.get(6)?, }) - })?.collect(); + frame_id: row.get(0)?, + scan_num_begin: row.get(1)?, + scan_num_end: row.get(2)?, + isolation_mz: row.get(3)?, + isolation_width: row.get(4)?, + collision_energy: row.get(5)?, + precursor_id: row.get(6)?, }) + })?.collect(); // return the frames Ok(frames_rows?) @@ -163,8 +179,8 @@ pub fn read_global_meta_sql(bruker_d_folder_name: &str) -> Result Result Result, let conn = Connection::open(db_path)?; // prepare the query - let rows: Vec<&str> = vec!["Id", "Time", "ScanMode", "Polarity", "MsMsType", "TimsId", "MaxIntensity", "SummedIntensities", - "NumScans", "NumPeaks", "MzCalibration", "T1", "T2", "TimsCalibration", "PropertyGroup", "AccumulationTime", "RampTime"]; + let rows: Vec<&str> = vec!["Id", "Time", "ScanMode", "Polarity", "MsMsType", "TimsId", "MaxIntensity", "SummedIntensities", + "NumScans", "NumPeaks", "MzCalibration", "T1", "T2", "TimsCalibration", "PropertyGroup", "AccumulationTime", "RampTime"]; let query = format!("SELECT {} FROM Frames", rows.join(", ")); // execute the query let frames_rows: Result, _> = conn.prepare(&query)?.query_map([], |row| { - Ok(FrameMeta { - id: row.get(0)?, - time: row.get(1)?, - scan_mode: row.get(2)?, - polarity: row.get(3)?, - ms_ms_type: row.get(4)?, - tims_id: row.get(5)?, - max_intensity: row.get(6)?, - sum_intensity: row.get(7)?, - num_scans: row.get(8)?, - num_peaks: row.get(9)?, - mz_calibration: row.get(10)?, - t_1: row.get(11)?, - t_2: row.get(12)?, - tims_calibration: row.get(13)?, - property_group: row.get(14)?, - accumulation_time: row.get(15)?, - ramp_time: row.get(16)?, + Ok(FrameMeta { + id: row.get(0)?, + time: row.get(1)?, + scan_mode: row.get(2)?, + polarity: row.get(3)?, + ms_ms_type: row.get(4)?, + tims_id: row.get(5)?, + max_intensity: row.get(6)?, + sum_intensity: row.get(7)?, + num_scans: row.get(8)?, + num_peaks: row.get(9)?, + mz_calibration: row.get(10)?, + t_1: row.get(11)?, + t_2: row.get(12)?, + tims_calibration: row.get(13)?, + property_group: row.get(14)?, + accumulation_time: row.get(15)?, + ramp_time: row.get(16)?, }) })?.collect(); From 7bacab2c639b422b46d8bd0b4c9c06c005d9dec3 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Mon, 13 Jan 2025 09:40:07 +0100 Subject: [PATCH 25/29] fixed return of precursor info, added prototype of LFQ --- imspy/imspy/timstof/dda.py | 118 +++++++++++++++++----------------- imspy_connector/src/py_dda.rs | 74 ++++++++++++++------- rustdf/src/data/dda.rs | 49 +++++++++++++- rustdf/src/data/meta.rs | 2 +- 4 files changed, 156 insertions(+), 87 deletions(-) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py index ee733886..32c993d7 100644 --- a/imspy/imspy/timstof/dda.py +++ b/imspy/imspy/timstof/dda.py @@ -15,87 +15,89 @@ import warnings + class PrecursorDDA(RustWrapperObject): - def __init__(self, precursor_id: int, precursor_mz_highest_intensity: float, precursor_mz_average: float, - precursor_mz_monoisotopic: float, precursor_average_scan_number: int, precursor_total_intensity: float, - precursor_frame_id: int, precursor_charge: int): - self._precursor_ptr = ims.PyDDAPrecursorMeta(precursor_id, precursor_mz_highest_intensity, precursor_mz_average, - precursor_mz_monoisotopic, precursor_average_scan_number, - precursor_total_intensity, precursor_frame_id, precursor_charge) + def __init__(self, frame_id: int, precursor_id: int, highest_intensity_mz: float, average_mz: float, + inverse_ion_mobility: float, collision_energy: float, precuror_total_intensity: float, + isolation_mz: float, isolation_width: float, mono_mz: Optional[float] = None, charge: Optional[int] = None): + self._precursor_ptr = ims.PyDDAPrecursor( + frame_id, precursor_id, highest_intensity_mz, average_mz, inverse_ion_mobility, collision_energy, + precuror_total_intensity, isolation_mz, isolation_width, mono_mz, charge + ) @classmethod - def from_py_ptr(cls, precursor: ims.PyDDAPrecursorMeta): + def from_py_ptr(cls, precursor: ims.PyDDAPrecursor): instance = cls.__new__(cls) instance._precursor_ptr = precursor return instance + @property + def frame_id(self) -> int: + return self._precursor_ptr.frame_id + @property def precursor_id(self) -> int: return self._precursor_ptr.precursor_id @property - def precursor_mz_highest_intensity(self) -> float: - return self._precursor_ptr.precursor_mz_highest_intensity + def mono_mz(self) -> Optional[float]: + return self._precursor_ptr.mono_mz @property - def precursor_mz_average(self) -> float: - return self._precursor_ptr.precursor_mz_average + def highest_intensity_mz(self) -> float: + return self._precursor_ptr.highest_intensity_mz @property - def precursor_mz_monoisotopic(self) -> float: - return self._precursor_ptr.precursor_mz_monoisotopic + def average_mz(self) -> float: + return self._precursor_ptr.average_mz @property - def precursor_charge(self) -> int: - return self._precursor_ptr.precursor_charge + def charge(self) -> Optional[int]: + return self._precursor_ptr.charge @property - def precursor_average_scan_number(self) -> int: - return self._precursor_ptr.precursor_average_scan_number + def inverse_ion_mobility(self) -> float: + return self._precursor_ptr.inverse_ion_mobility @property - def precursor_total_intensity(self) -> float: - return self._precursor_ptr.precursor_total_intensity + def collision_energy(self) -> float: + return self._precursor_ptr.collision_energy @property - def precursor_frame_id(self) -> int: - return self._precursor_ptr.precursor_frame_id - - def to_sage_precursor(self, - isolation_window: Tolerance = Tolerance(da=(-3.0, 3.0,)), - handle: Optional['TimsDatasetDDA'] = None) -> Precursor: - - # check if mz precursor_mz_monoisotopic is not None, if it is, use precursor_mz_average - if self.precursor_mz_monoisotopic is None: - mz = self.precursor_mz_average - else: - mz = self.precursor_mz_monoisotopic - - if handle is not None: - inverse_ion_mobility = handle.scan_to_inverse_mobility( - frame_id=self.precursor_frame_id, - scan_values=np.array([int(self.precursor_average_scan_number)]) - )[0] - else: - inverse_ion_mobility = self.precursor_average_scan_number + def precuror_total_intensity(self) -> float: + return self._precursor_ptr.precuror_total_intensity - return Precursor( - mz=mz, - intensity=self.precursor_total_intensity, - charge=self.precursor_charge, - spectrum_ref=str(self.precursor_frame_id), - inverse_ion_mobility=inverse_ion_mobility, - isolation_window=isolation_window, - ) + @property + def isolation_mz(self) -> float: + return self._precursor_ptr.isolation_mz + + @property + def isolation_width(self) -> float: + return self._precursor_ptr.isolation_width + + def __repr__(self): + return (f"DDAPrecursor(frame_id={self.frame_id}, precursor_id={self.precursor_id}, " + f"highest_intensity_mz={self.highest_intensity_mz}, average_mz={self.average_mz}, " + f"inverse_ion_mobility={self.inverse_ion_mobility}, collision_energy={self.collision_energy}, " + f"precuror_total_intensity={self.precuror_total_intensity}, isolation_mz={self.isolation_mz}, " + f"isolation_width={self.isolation_width}, mono_mz={self.mono_mz}, charge={self.charge})") def get_py_ptr(self): return self._precursor_ptr - def __repr__(self): - return f"PrecursorDDA(precursor_id={self.precursor_id}, precursor_mz_highest_intensity={self.precursor_mz_highest_intensity}, " \ - f"precursor_mz_average={self.precursor_mz_average}, precursor_mz_monoisotopic={self.precursor_mz_monoisotopic}, " \ - f"precursor_charge={self.precursor_charge}, precursor_average_scan_number={self.precursor_average_scan_number}, " \ - f"precursor_total_intensity={self.precursor_total_intensity}, precursor_frame_id={self.precursor_frame_id})" + def to_sage_precursor(self) -> Precursor: + + mz = self.mono_mz if self.mono_mz is not None else self.average_mz + + return Precursor( + mz=mz, + intensity=self.precuror_total_intensity, + charge=self.charge, + spectrum_ref=str(self.frame_id), + isolation_window=Tolerance(da=(-self.isolation_width / 2, self.isolation_width / 2)), + inverse_ion_mobility=self.inverse_ion_mobility, + collision_energy=self.collision_energy, + ) class TimsDatasetDDA(TimsDataset, RustWrapperObject): @@ -210,14 +212,14 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in num_threads = 1 # get all selected precursors - precursor_meta = self.get_selected_precursors_meta() + precursor_meta = self.get_selected_precursors() # create a dictionary with frame_id as key and a list of precursors as value precursor_dict = {} for precursor in precursor_meta: - if precursor.precursor_frame_id not in precursor_dict: - precursor_dict[precursor.precursor_frame_id] = [] - precursor_dict[precursor.precursor_frame_id].append(precursor) + if precursor.frame_id not in precursor_dict: + precursor_dict[precursor.frame_id] = [] + precursor_dict[precursor.frame_id].append(precursor) # get all precursor frames precursor_frames = self.get_precursor_frames(min_intensity, max_peaks, num_threads) @@ -234,7 +236,7 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in # associate precursors with frames, precursor will be associated with the frame with the same frame_id for frame in precursor_frames: if frame.frame_id in precursor_dict: - precursors = [p.to_sage_precursor(handle=self) for p in precursor_dict[frame.frame_id]] + precursors = [p.to_sage_precursor() for p in precursor_dict[frame.frame_id]] raw_spectrum = RawSpectrum( file_id=file_id, spec_id=str(frame.frame_id), @@ -258,7 +260,7 @@ def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: in return processed_spectra - def get_selected_precursors_meta(self) -> List[PrecursorDDA]: + def get_selected_precursors(self) -> List[PrecursorDDA]: """ Get meta data for all selected precursors Returns: diff --git a/imspy_connector/src/py_dda.rs b/imspy_connector/src/py_dda.rs index db20e28f..0a8f96f2 100644 --- a/imspy_connector/src/py_dda.rs +++ b/imspy_connector/src/py_dda.rs @@ -2,56 +2,80 @@ use pyo3::prelude::*; use rustdf::data::dda::{PASEFDDAFragment, TimsDatasetDDA}; use rustdf::data::handle::TimsData; -use rustdf::data::meta::DDAPrecursorMeta; +use rustdf::data::meta::{DDAPrecursor}; use crate::py_tims_frame::PyTimsFrame; use crate::py_tims_slice::PyTimsSlice; #[pyclass] -pub struct PyDDAPrecursorMeta { - inner: DDAPrecursorMeta, +pub struct PyDDAPrecursor { + inner: DDAPrecursor, } #[pymethods] -impl PyDDAPrecursorMeta { +impl PyDDAPrecursor { #[new] - #[pyo3(signature = (precursor_id, precursor_mz_highest_intensity, precursor_mz_average, precursor_mz_monoisotopic, precursor_average_scan_number, precursor_total_intensity, precursor_frame_id, precursor_charge=None))] - pub fn new(precursor_id: i64, precursor_mz_highest_intensity: f64, precursor_mz_average: f64, precursor_mz_monoisotopic: Option, precursor_average_scan_number: f64, precursor_total_intensity: f64, precursor_frame_id: i64, precursor_charge: Option) -> Self { - let precursor_meta = DDAPrecursorMeta { + #[pyo3(signature = (frame_id, precursor_id, highest_intensity_mz, average_mz, inverse_ion_mobility, collision_energy, precuror_total_intensity, isolation_mz, isolation_width, mono_mz=None, charge=None))] + pub fn new( + frame_id: i64, + precursor_id: i64, + highest_intensity_mz: f64, + average_mz: f64, + inverse_ion_mobility: f64, + collision_energy: f64, + precuror_total_intensity: f64, + isolation_mz: f64, + isolation_width: f64, + mono_mz: Option, + charge: Option, + ) -> Self { + let precursor = DDAPrecursor { + frame_id, precursor_id, - precursor_mz_highest_intensity, - precursor_mz_average, - precursor_mz_monoisotopic, - precursor_charge, - precursor_average_scan_number, - precursor_total_intensity, - precursor_frame_id, + mono_mz, + highest_intensity_mz, + average_mz, + charge, + inverse_ion_mobility, + collision_energy, + precuror_total_intensity, + isolation_mz, + isolation_width, }; - PyDDAPrecursorMeta { inner: precursor_meta } + PyDDAPrecursor { inner: precursor } } + #[getter] + pub fn frame_id(&self) -> i64 { self.inner.frame_id } + #[getter] pub fn precursor_id(&self) -> i64 { self.inner.precursor_id } #[getter] - pub fn precursor_mz_highest_intensity(&self) -> f64 { self.inner.precursor_mz_highest_intensity } + pub fn mono_mz(&self) -> Option { self.inner.mono_mz } #[getter] - pub fn precursor_mz_average(&self) -> f64 { self.inner.precursor_mz_average } + pub fn highest_intensity_mz(&self) -> f64 { self.inner.highest_intensity_mz } #[getter] - pub fn precursor_mz_monoisotopic(&self) -> Option { self.inner.precursor_mz_monoisotopic } + pub fn average_mz(&self) -> f64 { self.inner.average_mz } #[getter] - pub fn precursor_charge(&self) -> Option { self.inner.precursor_charge } + pub fn charge(&self) -> Option { self.inner.charge } + + #[getter] + pub fn inverse_ion_mobility(&self) -> f64 { self.inner.inverse_ion_mobility } + + #[getter] + pub fn collision_energy(&self) -> f64 { self.inner.collision_energy } #[getter] - pub fn precursor_average_scan_number(&self) -> f64 { self.inner.precursor_average_scan_number } + pub fn precuror_total_intensity(&self) -> f64 { self.inner.precuror_total_intensity } #[getter] - pub fn precursor_total_intensity(&self) -> f64 { self.inner.precursor_total_intensity } + pub fn isolation_mz(&self) -> f64 { self.inner.isolation_mz } #[getter] - pub fn precursor_frame_id(&self) -> i64 { self.inner.precursor_frame_id } + pub fn isolation_width(&self) -> f64 { self.inner.isolation_width } } #[pyclass] @@ -91,9 +115,9 @@ impl PyTimsDatasetDDA { pasef_fragments.iter().map(|pasef_fragment| PyTimsFragmentDDA { inner: pasef_fragment.clone() }).collect() } - pub fn get_selected_precursors(&self) -> Vec { + pub fn get_selected_precursors(&self) -> Vec { let pasef_precursor_meta = self.inner.get_selected_precursors(); - pasef_precursor_meta.iter().map(|precursor_meta| PyDDAPrecursorMeta { inner: precursor_meta.clone() }).collect() + pasef_precursor_meta.iter().map(|precursor_meta| PyDDAPrecursor { inner: precursor_meta.clone() }).collect() } pub fn get_precursor_frames(&self, min_intensity: f64, max_peaks: usize, num_threads: usize) -> Vec { @@ -139,6 +163,6 @@ impl PyTimsFragmentDDA { pub fn py_dda(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; - m.add_class::()?; + m.add_class::()?; Ok(()) } \ No newline at end of file diff --git a/rustdf/src/data/dda.rs b/rustdf/src/data/dda.rs index 54a26ee8..714fe9c4 100644 --- a/rustdf/src/data/dda.rs +++ b/rustdf/src/data/dda.rs @@ -1,10 +1,11 @@ +use std::collections::BTreeMap; use mscore::timstof::frame::{RawTimsFrame, TimsFrame}; use mscore::timstof::slice::TimsSlice; use rayon::prelude::*; use rayon::ThreadPoolBuilder; use crate::data::acquisition::AcquisitionMode; use crate::data::handle::{IndexConverter, TimsData, TimsDataLoader}; -use crate::data::meta::{DDAPrecursorMeta, PasefMsMsMeta, read_dda_precursor_meta, read_pasef_frame_ms_ms_info, read_global_meta_sql, read_meta_data_sql}; +use crate::data::meta::{PasefMsMsMeta, read_dda_precursor_meta, read_pasef_frame_ms_ms_info, read_global_meta_sql, read_meta_data_sql, DDAPrecursor}; #[derive(Clone)] pub struct PASEFDDAFragment { @@ -41,8 +42,50 @@ impl TimsDatasetDDA { TimsDatasetDDA { loader } } - pub fn get_selected_precursors(&self) -> Vec { - read_dda_precursor_meta(&self.loader.get_data_path()).unwrap() + /* + #[derive(Debug, Clone)] +pub struct DDAPrecursor { + pub frame_id: i64, + pub precursor_id: i64, + pub mono_mz: f64, + pub highest_intensity_mz: f64, + pub average_mz: f64, + pub charge: Option, + pub inverse_ion_mobility: f64, + pub collision_energy: f64, + pub precuror_total_intensity: f64, + pub isolation_mz: f64, + pub isolation_width: f64, +} + */ + + + + pub fn get_selected_precursors(&self) -> Vec { + let precursor_meta = read_dda_precursor_meta(&self.loader.get_data_path()).unwrap(); + let pasef_meta = self.get_pasef_frame_ms_ms_info(); + + let precursor_id_to_pasef_meta: BTreeMap = pasef_meta.iter().map(|x| (x.precursor_id as i64, x)).collect(); + + // go over all precursors and get the precursor meta data + let result: Vec<_> = precursor_meta.iter().map(|precursor| { + let pasef_meta = precursor_id_to_pasef_meta.get(&precursor.precursor_id).unwrap(); + DDAPrecursor { + frame_id: precursor.precursor_frame_id, + precursor_id: precursor.precursor_id, + mono_mz: precursor.precursor_mz_monoisotopic, + highest_intensity_mz: precursor.precursor_mz_highest_intensity, + average_mz: precursor.precursor_mz_average, + charge: precursor.precursor_charge, + inverse_ion_mobility: self.scan_to_inverse_mobility(precursor.precursor_frame_id as u32, &vec![precursor.precursor_average_scan_number as u32])[0], + collision_energy: pasef_meta.collision_energy, + precuror_total_intensity: precursor.precursor_total_intensity, + isolation_mz: pasef_meta.isolation_mz, + isolation_width: pasef_meta.isolation_width, + } + }).collect(); + + result } pub fn get_precursor_frames(&self, min_intensity: f64, max_num_peaks: usize, num_threads: usize) -> Vec { diff --git a/rustdf/src/data/meta.rs b/rustdf/src/data/meta.rs index 7c8e93f6..9e3e7c8c 100644 --- a/rustdf/src/data/meta.rs +++ b/rustdf/src/data/meta.rs @@ -46,7 +46,7 @@ pub struct DDAPrecursorMeta { pub struct DDAPrecursor { pub frame_id: i64, pub precursor_id: i64, - pub mono_mz: f64, + pub mono_mz: Option, pub highest_intensity_mz: f64, pub average_mz: f64, pub charge: Option, From 2a4b1d78cf6d543e9ae998660099056df3fba0b2 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Mon, 13 Jan 2025 09:56:15 +0100 Subject: [PATCH 26/29] updated Cargo.toml for crates.io --- mscore/Cargo.toml | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/mscore/Cargo.toml b/mscore/Cargo.toml index 36d44b5c..a52494ef 100644 --- a/mscore/Cargo.toml +++ b/mscore/Cargo.toml @@ -2,18 +2,45 @@ name = "mscore" version = "0.2.0" edition = "2021" +authors = ["David Teschner "] +description = "A Rust library providing core operations for computational mass spectrometry proteomics." +license = "MIT" +repository = "https://github.com/theGreatHerrLebert/rustims" +documentation = "https://docs.rs/mscore" +readme = "README.md" +keywords = ["statistics", "matrix", "scoring", "parallel"] +categories = ["math", "science", "data-structures"] +rust-version = "1.84" + +[lib] +name = "mscore" +path = "src/lib.rs" [dependencies] +# Statistical functions statrs = "0.18.0" +# Iterator utilities itertools = "0.14.0" +# Parallelism rayon = "1.10.0" +# Matrix operations nalgebra = "0.33.2" +# Serialization serde = { version = "1.0.217", features = ["derive"] } +# Regular expressions regex = "1.11.1" +# Random number generation rand = "0.8.5" +# Ordered floats ordered-float = "4.6.0" +# Binary serialization bincode = "2.0.0-rc.3" -[lib] -name = "mscore" -path = "src/lib.rs" +[profile.release] +debug = true +overflow-checks = true +lto = "thin" +panic = "abort" + +[package.metadata.docs.rs] +features = ["all"] From 5b6dc57a6add3631a531534082af0640769d5280 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Mon, 13 Jan 2025 10:01:01 +0100 Subject: [PATCH 27/29] fixed keyword not recognized --- mscore/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mscore/Cargo.toml b/mscore/Cargo.toml index a52494ef..2814b8b3 100644 --- a/mscore/Cargo.toml +++ b/mscore/Cargo.toml @@ -9,7 +9,7 @@ repository = "https://github.com/theGreatHerrLebert/rustims" documentation = "https://docs.rs/mscore" readme = "README.md" keywords = ["statistics", "matrix", "scoring", "parallel"] -categories = ["math", "science", "data-structures"] +categories = ["mathematics", "science", "data-structures"] rust-version = "1.84" [lib] From 2b91d1984d4304d25b8386020a50dbdc986762a6 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Mon, 13 Jan 2025 10:08:12 +0100 Subject: [PATCH 28/29] used fmt to fix formatting --- mscore/src/algorithm/isotope.rs | 234 ++++++++--- mscore/src/algorithm/peptide.rs | 100 +++-- rustdf/Cargo.toml | 34 +- rustdf/src/data/acquisition.rs | 2 +- rustdf/src/data/dataset.rs | 64 ++- rustdf/src/data/dda.rs | 233 +++++++---- rustdf/src/data/dia.rs | 142 +++++-- rustdf/src/data/handle.rs | 284 +++++++++----- rustdf/src/data/meta.rs | 263 +++++++++---- rustdf/src/data/mod.rs | 6 +- rustdf/src/data/raw.rs | 96 +++-- rustdf/src/data/utility.rs | 116 ++++-- rustdf/src/lib.rs | 2 +- rustdf/src/main.rs | 27 +- rustdf/src/sim/containers.rs | 72 +++- rustdf/src/sim/dia.rs | 677 ++++++++++++++++++++++++++------ rustdf/src/sim/handle.rs | 386 +++++++++++------- rustdf/src/sim/mod.rs | 6 +- rustdf/src/sim/precursor.rs | 205 +++++++--- rustdf/src/sim/utility.rs | 27 +- 20 files changed, 2149 insertions(+), 827 deletions(-) diff --git a/mscore/src/algorithm/isotope.rs b/mscore/src/algorithm/isotope.rs index b1b254bf..3d25dc81 100644 --- a/mscore/src/algorithm/isotope.rs +++ b/mscore/src/algorithm/isotope.rs @@ -1,15 +1,15 @@ extern crate statrs; -use std::collections::{BTreeMap, HashMap, HashSet}; use rayon::prelude::*; use rayon::ThreadPoolBuilder; +use std::collections::{BTreeMap, HashMap, HashSet}; -use statrs::distribution::{Continuous, Normal}; use crate::chemistry::constants::{MASS_NEUTRON, MASS_PROTON}; use crate::chemistry::elements::{atoms_isotopic_weights, isotopic_abundance}; use crate::data::peptide::PeptideIon; use crate::data::spectrum::MzSpectrum; use crate::data::spectrum::ToResolution; +use statrs::distribution::{Continuous, Normal}; /// convolve two distributions of masses and abundances /// @@ -35,8 +35,13 @@ use crate::data::spectrum::ToResolution; /// let result = convolve(&dist_a, &dist_b, 1e-6, 1e-12, 200); /// assert_eq!(result, vec![(200.0, 0.25), (201.0, 0.5), (202.0, 0.25)]); /// ``` -pub fn convolve(dist_a: &Vec<(f64, f64)>, dist_b: &Vec<(f64, f64)>, mass_tolerance: f64, abundance_threshold: f64, max_results: usize) -> Vec<(f64, f64)> { - +pub fn convolve( + dist_a: &Vec<(f64, f64)>, + dist_b: &Vec<(f64, f64)>, + mass_tolerance: f64, + abundance_threshold: f64, + max_results: usize, +) -> Vec<(f64, f64)> { let mut result: Vec<(f64, f64)> = Vec::new(); for (mass_a, abundance_a) in dist_a { @@ -50,7 +55,10 @@ pub fn convolve(dist_a: &Vec<(f64, f64)>, dist_b: &Vec<(f64, f64)>, mass_toleran } // Insert or update the combined mass in the result distribution - if let Some(entry) = result.iter_mut().find(|(m, _)| (*m - combined_mass).abs() < mass_tolerance) { + if let Some(entry) = result + .iter_mut() + .find(|(m, _)| (*m - combined_mass).abs() < mass_tolerance) + { entry.1 += combined_abundance; } else { result.push((combined_mass, combined_abundance)); @@ -110,7 +118,13 @@ pub fn convolve_pow(dist: &Vec<(f64, f64)>, n: i32) -> Vec<(f64, f64)> { // If n is not a power of 2, recursively fill in the remainder if power / 2 < n { - result = convolve(&result, &convolve_pow(dist, n - power / 2, ), 1e-6, 1e-12, 200); + result = convolve( + &result, + &convolve_pow(dist, n - power / 2), + 1e-6, + 1e-12, + 200, + ); } result @@ -146,19 +160,33 @@ pub fn generate_isotope_distribution( atomic_composition: &HashMap, mass_tolerance: f64, abundance_threshold: f64, - max_result: i32 + max_result: i32, ) -> Vec<(f64, f64)> { - let mut cumulative_distribution: Option> = None; - let atoms_isotopic_weights: HashMap> = atoms_isotopic_weights().iter().map(|(k, v)| (k.to_string(), v.clone())).collect(); - let atomic_isotope_abundance: HashMap> = isotopic_abundance().iter().map(|(k, v)| (k.to_string(), v.clone())).collect(); + let atoms_isotopic_weights: HashMap> = atoms_isotopic_weights() + .iter() + .map(|(k, v)| (k.to_string(), v.clone())) + .collect(); + let atomic_isotope_abundance: HashMap> = isotopic_abundance() + .iter() + .map(|(k, v)| (k.to_string(), v.clone())) + .collect(); for (element, &count) in atomic_composition.iter() { - let elemental_isotope_weights = atoms_isotopic_weights.get(element).expect("Element not found in isotopic weights table").clone(); - let elemental_isotope_abundance = atomic_isotope_abundance.get(element).expect("Element not found in isotopic abundance table").clone(); - - let element_distribution: Vec<(f64, f64)> = elemental_isotope_weights.iter().zip(elemental_isotope_abundance.iter()).map(|(&mass, &abundance - )| (mass, abundance)).collect(); + let elemental_isotope_weights = atoms_isotopic_weights + .get(element) + .expect("Element not found in isotopic weights table") + .clone(); + let elemental_isotope_abundance = atomic_isotope_abundance + .get(element) + .expect("Element not found in isotopic abundance table") + .clone(); + + let element_distribution: Vec<(f64, f64)> = elemental_isotope_weights + .iter() + .zip(elemental_isotope_abundance.iter()) + .map(|(&mass, &abundance)| (mass, abundance)) + .collect(); let element_power_distribution = if count > 1 { convolve_pow(&element_distribution, count) @@ -167,30 +195,50 @@ pub fn generate_isotope_distribution( }; cumulative_distribution = match cumulative_distribution { - Some(cum_dist) => Some(convolve(&cum_dist, &element_power_distribution, mass_tolerance, abundance_threshold, max_result as usize)), + Some(cum_dist) => Some(convolve( + &cum_dist, + &element_power_distribution, + mass_tolerance, + abundance_threshold, + max_result as usize, + )), None => Some(element_power_distribution), }; } let final_distribution = cumulative_distribution.expect("Peptide has no elements"); // Normalize the distribution - let total_abundance: f64 = final_distribution.iter().map(|&(_, abundance)| abundance).sum(); - let result: Vec<_> = final_distribution.into_iter().map(|(mass, abundance)| (mass, abundance / total_abundance)).collect(); + let total_abundance: f64 = final_distribution + .iter() + .map(|&(_, abundance)| abundance) + .sum(); + let result: Vec<_> = final_distribution + .into_iter() + .map(|(mass, abundance)| (mass, abundance / total_abundance)) + .collect(); let mut sort_map: BTreeMap = BTreeMap::new(); let quantize = |mz: f64| -> i64 { (mz * 1_000_000.0).round() as i64 }; for (mz, intensity) in result { let key = quantize(mz); - sort_map.entry(key).and_modify(|e| *e += intensity).or_insert(intensity); + sort_map + .entry(key) + .and_modify(|e| *e += intensity) + .or_insert(intensity); } - let mz: Vec = sort_map.keys().map(|&key| key as f64 / 1_000_000.0).collect(); + let mz: Vec = sort_map + .keys() + .map(|&key| key as f64 / 1_000_000.0) + .collect(); let intensity: Vec = sort_map.values().map(|&intensity| intensity).collect(); - mz.iter().zip(intensity.iter()).map(|(&mz, &intensity)| (mz, intensity)).collect() + mz.iter() + .zip(intensity.iter()) + .map(|(&mz, &intensity)| (mz, intensity)) + .collect() } - /// calculate the normal probability density function /// /// Arguments: @@ -241,11 +289,14 @@ pub fn factorial(n: i32) -> f64 { pub fn weight(mass: f64, peak_nums: Vec, normalize: bool) -> Vec { let lam_val = lam(mass, 0.000594, -0.03091); let factorials: Vec = peak_nums.iter().map(|&k| factorial(k)).collect(); - let mut weights: Vec = peak_nums.iter().map(|&k| { - let pow = lam_val.powi(k); - let exp = (-lam_val).exp(); - exp * pow / factorials[k as usize] - }).collect(); + let mut weights: Vec = peak_nums + .iter() + .map(|&k| { + let pow = lam_val.powi(k); + let exp = (-lam_val).exp(); + exp * pow / factorials[k as usize] + }) + .collect(); if normalize { let sum: f64 = weights.iter().sum(); @@ -295,10 +346,28 @@ pub fn lam(mass: f64, slope: f64, intercept: f64) -> f64 { /// /// * `Vec` - isotope pattern /// -pub fn iso(x: &Vec, mass: f64, charge: f64, sigma: f64, amp: f64, k: usize, step_size: f64) -> Vec { +pub fn iso( + x: &Vec, + mass: f64, + charge: f64, + sigma: f64, + amp: f64, + k: usize, + step_size: f64, +) -> Vec { let k_range: Vec = (0..k).collect(); - let means: Vec = k_range.iter().map(|&k_val| (mass + MASS_NEUTRON * k_val as f64) / charge).collect(); - let weights = weight(mass, k_range.iter().map(|&k_val| k_val as i32).collect::>(), true); + let means: Vec = k_range + .iter() + .map(|&k_val| (mass + MASS_NEUTRON * k_val as f64) / charge) + .collect(); + let weights = weight( + mass, + k_range + .iter() + .map(|&k_val| k_val as i32) + .collect::>(), + true, + ); let mut intensities = vec![0.0; x.len()]; for (i, x_val) in x.iter().enumerate() { @@ -307,7 +376,10 @@ pub fn iso(x: &Vec, mass: f64, charge: f64, sigma: f64, amp: f64, k: usize, } intensities[i] *= step_size; } - intensities.iter().map(|&intensity| intensity * amp).collect() + intensities + .iter() + .map(|&intensity| intensity * amp) + .collect() } /// generate the isotope pattern for a given mass and charge @@ -334,13 +406,27 @@ pub fn iso(x: &Vec, mass: f64, charge: f64, sigma: f64, amp: f64, k: usize, /// /// let (mzs, intensities) = generate_isotope_pattern(1500.0, 1510.0, 3000.0, 2.0, 1e4, 10, 1.0, 3); /// ``` -pub fn generate_isotope_pattern(lower_bound: f64, upper_bound: f64, mass: f64, charge: f64, amp: f64, k: usize, sigma: f64, resolution: i32) -> (Vec, Vec) { +pub fn generate_isotope_pattern( + lower_bound: f64, + upper_bound: f64, + mass: f64, + charge: f64, + amp: f64, + k: usize, + sigma: f64, + resolution: i32, +) -> (Vec, Vec) { let step_size = f64::min(sigma / 10.0, 1.0 / 10f64.powi(resolution)); let size = ((upper_bound - lower_bound) / step_size).ceil() as usize; - let mzs: Vec = (0..size).map(|i| lower_bound + step_size * i as f64).collect(); + let mzs: Vec = (0..size) + .map(|i| lower_bound + step_size * i as f64) + .collect(); let intensities = iso(&mzs, mass, charge, sigma, amp, k, step_size); - (mzs.iter().map(|&mz| mz + MASS_PROTON).collect(), intensities) + ( + mzs.iter().map(|&mz| mz + MASS_PROTON).collect(), + intensities, + ) } /// generate the averagine spectrum for a given mass and charge @@ -373,7 +459,7 @@ pub fn generate_averagine_spectrum( k: i32, resolution: i32, centroid: bool, - amp: Option + amp: Option, ) -> MzSpectrum { let amp = amp.unwrap_or(1e4); let lb = mass / charge as f64 - 0.2; @@ -390,10 +476,16 @@ pub fn generate_averagine_spectrum( resolution, ); - let spectrum = MzSpectrum::new(mz, intensities).to_resolution(resolution).filter_ranged(lb, ub, min_intensity as f64, 1e9); + let spectrum = MzSpectrum::new(mz, intensities) + .to_resolution(resolution) + .filter_ranged(lb, ub, min_intensity as f64, 1e9); if centroid { - spectrum.to_centroid(std::cmp::max(min_intensity, 1), 1.0 / 10f64.powi(resolution - 1), true) + spectrum.to_centroid( + std::cmp::max(min_intensity, 1), + 1.0 / 10f64.powi(resolution - 1), + true, + ) } else { spectrum } @@ -434,16 +526,31 @@ pub fn generate_averagine_spectra( resolution: i32, centroid: bool, num_threads: usize, - amp: Option + amp: Option, ) -> Vec { let amp = amp.unwrap_or(1e5); let mut spectra: Vec = Vec::new(); - let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); + let thread_pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); thread_pool.install(|| { - spectra = masses.par_iter().zip(charges.par_iter()).map(|(&mass, &charge)| { - generate_averagine_spectrum(mass, charge, min_intensity, k, resolution, centroid, Some(amp)) - }).collect(); + spectra = masses + .par_iter() + .zip(charges.par_iter()) + .map(|(&mass, &charge)| { + generate_averagine_spectrum( + mass, + charge, + min_intensity, + k, + resolution, + centroid, + Some(amp), + ) + }) + .collect(); }); spectra @@ -461,7 +568,11 @@ pub fn generate_averagine_spectra( /// /// * `MzSpectrum` - precursor spectrum /// -pub fn generate_precursor_spectrum(sequence: &str, charge: i32, peptide_id: Option) -> MzSpectrum { +pub fn generate_precursor_spectrum( + sequence: &str, + charge: i32, + peptide_id: Option, +) -> MzSpectrum { let peptide_ion = PeptideIon::new(sequence.to_string(), charge, 1.0, peptide_id); peptide_ion.calculate_isotopic_spectrum(1e-3, 1e-9, 200, 1e-6) } @@ -478,21 +589,38 @@ pub fn generate_precursor_spectrum(sequence: &str, charge: i32, peptide_id: Opti /// /// * `Vec` - list of precursor spectra /// -pub fn generate_precursor_spectra(sequences: &Vec<&str>, charges: &Vec, num_threads: usize, peptide_ids: Vec>) -> Vec { - let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); +pub fn generate_precursor_spectra( + sequences: &Vec<&str>, + charges: &Vec, + num_threads: usize, + peptide_ids: Vec>, +) -> Vec { + let thread_pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); // need to zip sequences and charges and peptide_ids let result = thread_pool.install(|| { - sequences.par_iter().zip(charges.par_iter()).zip(peptide_ids.par_iter()).map(|((&sequence, &charge), &peptide_id)| { - generate_precursor_spectrum(sequence, charge, peptide_id) - }).collect() + sequences + .par_iter() + .zip(charges.par_iter()) + .zip(peptide_ids.par_iter()) + .map(|((&sequence, &charge), &peptide_id)| { + generate_precursor_spectrum(sequence, charge, peptide_id) + }) + .collect() }); result } // Calculates the isotope distribution for a fragment given the isotope distribution of the fragment, the isotope distribution of the complementary fragment, and the transmitted precursor isotopes // implemented based on OpenMS: "https://github.com/OpenMS/OpenMS/blob/079143800f7ed036a7c68ea6e124fe4f5cfc9569/src/openms/source/CHEMISTRY/ISOTOPEDISTRIBUTION/CoarseIsotopePatternGenerator.cpp#L415" -pub fn calculate_transmission_dependent_fragment_ion_isotope_distribution(fragment_isotope_dist: &Vec<(f64, f64)>, comp_fragment_isotope_dist: &Vec<(f64, f64)>, precursor_isotopes: &HashSet, max_isotope: usize) -> Vec<(f64, f64)> { - +pub fn calculate_transmission_dependent_fragment_ion_isotope_distribution( + fragment_isotope_dist: &Vec<(f64, f64)>, + comp_fragment_isotope_dist: &Vec<(f64, f64)>, + precursor_isotopes: &HashSet, + max_isotope: usize, +) -> Vec<(f64, f64)> { if fragment_isotope_dist.is_empty() || comp_fragment_isotope_dist.is_empty() { return Vec::new(); } @@ -502,7 +630,9 @@ pub fn calculate_transmission_dependent_fragment_ion_isotope_distribution(fragme r_max = max_isotope; } - let mut result = (0..r_max).map(|i| (fragment_isotope_dist[0].0 + i as f64, 0.0)).collect::>(); + let mut result = (0..r_max) + .map(|i| (fragment_isotope_dist[0].0 + i as f64, 0.0)) + .collect::>(); // Calculation of dependent isotope distribution for (i, &(_mz, intensity)) in fragment_isotope_dist.iter().enumerate().take(r_max) { @@ -516,4 +646,4 @@ pub fn calculate_transmission_dependent_fragment_ion_isotope_distribution(fragme } result -} \ No newline at end of file +} diff --git a/mscore/src/algorithm/peptide.rs b/mscore/src/algorithm/peptide.rs index 20df0726..bcbb7f74 100644 --- a/mscore/src/algorithm/peptide.rs +++ b/mscore/src/algorithm/peptide.rs @@ -1,14 +1,16 @@ -use std::collections::HashMap; -use rayon::prelude::*; -use rayon::ThreadPoolBuilder; -use regex::Regex; -use statrs::distribution::{Binomial, Discrete}; use crate::chemistry::amino_acid::{amino_acid_composition, amino_acid_masses}; use crate::chemistry::constants::{MASS_CO, MASS_NH3, MASS_PROTON, MASS_WATER}; use crate::chemistry::formulas::calculate_mz; -use crate::chemistry::unimod::{modification_atomic_composition, unimod_modifications_mass_numerical}; +use crate::chemistry::unimod::{ + modification_atomic_composition, unimod_modifications_mass_numerical, +}; use crate::chemistry::utility::{find_unimod_patterns, unimod_sequence_to_tokens}; use crate::data::peptide::{FragmentType, PeptideProductIon, PeptideSequence}; +use rayon::prelude::*; +use rayon::ThreadPoolBuilder; +use regex::Regex; +use statrs::distribution::{Binomial, Discrete}; +use std::collections::HashMap; /// calculate the monoisotopic mass of a peptide sequence /// @@ -54,8 +56,16 @@ pub fn calculate_peptide_mono_isotopic_mass(peptide_sequence: &PeptideSequence) } // Mass of amino acids and modifications - let mass_sequence: f64 = aa_counts.iter().map(|(aa, &count)| amino_acid_masses.get(&aa.to_string()[..]).unwrap_or(&0.0) * count as f64).sum(); - let mass_modifications: f64 = modifications.iter().map(|&mod_id| modifications_mz_numerical.get(&mod_id).unwrap_or(&0.0)).sum(); + let mass_sequence: f64 = aa_counts + .iter() + .map(|(aa, &count)| { + amino_acid_masses.get(&aa.to_string()[..]).unwrap_or(&0.0) * count as f64 + }) + .sum(); + let mass_modifications: f64 = modifications + .iter() + .map(|&mod_id| modifications_mz_numerical.get(&mod_id).unwrap_or(&0.0)) + .sum(); mass_sequence + mass_modifications + MASS_WATER } @@ -80,7 +90,6 @@ pub fn calculate_peptide_mono_isotopic_mass(peptide_sequence: &PeptideSequence) /// assert_eq!(mass, 936.4188766862999); /// ``` pub fn calculate_peptide_product_ion_mono_isotopic_mass(sequence: &str, kind: FragmentType) -> f64 { - let (sequence, modifications) = find_unimod_patterns(sequence); // Return mz of empty sequence @@ -91,7 +100,8 @@ pub fn calculate_peptide_product_ion_mono_isotopic_mass(sequence: &str, kind: Fr let amino_acid_masses = amino_acid_masses(); // Add up raw amino acid masses and potential modifications - let mass_sequence: f64 = sequence.chars() + let mass_sequence: f64 = sequence + .chars() .map(|aa| amino_acid_masses.get(&aa.to_string()[..]).unwrap_or(&0.0)) .sum(); @@ -171,8 +181,9 @@ pub fn calculate_amino_acid_composition(sequence: &str) -> HashMap } /// calculate the atomic composition of a peptide sequence -pub fn peptide_sequence_to_atomic_composition(peptide_sequence: &PeptideSequence) -> HashMap<&'static str, i32> { - +pub fn peptide_sequence_to_atomic_composition( + peptide_sequence: &PeptideSequence, +) -> HashMap<&'static str, i32> { let token_sequence = unimod_sequence_to_tokens(peptide_sequence.sequence.as_str(), false); let mut collection: HashMap<&'static str, i32> = HashMap::new(); @@ -217,7 +228,6 @@ pub fn peptide_sequence_to_atomic_composition(peptide_sequence: &PeptideSequence /// /// * `Vec<(&str, i32)>` - a vector of tuples representing the atomic composition of the product ion pub fn atomic_product_ion_composition(product_ion: &PeptideProductIon) -> Vec<(&str, i32)> { - let mut composition = peptide_sequence_to_atomic_composition(&product_ion.ion.sequence); match product_ion.kind { @@ -226,31 +236,29 @@ pub fn atomic_product_ion_composition(product_ion: &PeptideProductIon) -> Vec<(& *composition.entry("H").or_insert(0) -= 2; *composition.entry("O").or_insert(0) -= 2; *composition.entry("C").or_insert(0) -= 1; - }, + } FragmentType::B => { // B: peptide_mass - Water *composition.entry("H").or_insert(0) -= 2; *composition.entry("O").or_insert(0) -= 1; - }, + } FragmentType::C => { // C: peptide_mass + NH3 - Water *composition.entry("H").or_insert(0) += 1; *composition.entry("N").or_insert(0) += 1; *composition.entry("O").or_insert(0) -= 1; - }, + } FragmentType::X => { // X: peptide_mass + CO *composition.entry("C").or_insert(0) += 1; // Add 1 for CO *composition.entry("O").or_insert(0) += 1; // Add 1 for CO *composition.entry("H").or_insert(0) -= 2; // Subtract 2 for 2 protons - }, - FragmentType::Y => { - () - }, + } + FragmentType::Y => (), FragmentType::Z => { *composition.entry("H").or_insert(0) -= 3; *composition.entry("N").or_insert(0) -= 1; - }, + } } composition.iter().map(|(k, v)| (*k, *v)).collect() @@ -265,12 +273,25 @@ pub fn atomic_product_ion_composition(product_ion: &PeptideProductIon) -> Vec<(& /// /// * `Vec>` - a vector of vectors of tuples representing the atomic composition of each product ion /// -pub fn fragments_to_composition(product_ions: Vec, num_threads: usize) -> Vec> { - let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); +pub fn fragments_to_composition( + product_ions: Vec, + num_threads: usize, +) -> Vec> { + let thread_pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); let result = thread_pool.install(|| { - product_ions.par_iter().map(|ion| atomic_product_ion_composition(ion)).map(|composition| { - composition.iter().map(|(k, v)| (k.to_string(), *v)).collect() - }).collect() + product_ions + .par_iter() + .map(|ion| atomic_product_ion_composition(ion)) + .map(|composition| { + composition + .iter() + .map(|(k, v)| (k.to_string(), *v)) + .collect() + }) + .collect() }); result } @@ -325,7 +346,11 @@ pub fn get_num_protonizable_sites(sequence: &str) -> usize { /// let sequence = "PEPTIDEH"; /// let charge_state_probs = simulate_charge_state_for_sequence(sequence, None, None); /// assert_eq!(charge_state_probs, vec![0.25, 0.5, 0.25, 0.0, 0.0]); -pub fn simulate_charge_state_for_sequence(sequence: &str, max_charge: Option, charged_probability: Option) -> Vec { +pub fn simulate_charge_state_for_sequence( + sequence: &str, + max_charge: Option, + charged_probability: Option, +) -> Vec { let charged_prob = charged_probability.unwrap_or(0.5); let max_charge = max_charge.unwrap_or(5); let num_protonizable_sites = get_num_protonizable_sites(sequence); @@ -363,11 +388,22 @@ pub fn simulate_charge_state_for_sequence(sequence: &str, max_charge: Option, num_threads: usize, max_charge: Option, charged_probability: Option) -> Vec> { - let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); +pub fn simulate_charge_states_for_sequences( + sequences: Vec<&str>, + num_threads: usize, + max_charge: Option, + charged_probability: Option, +) -> Vec> { + let pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); pool.install(|| { - sequences.par_iter() - .map(|sequence| simulate_charge_state_for_sequence(sequence, max_charge, charged_probability)) + sequences + .par_iter() + .map(|sequence| { + simulate_charge_state_for_sequence(sequence, max_charge, charged_probability) + }) .collect() }) -} \ No newline at end of file +} diff --git a/rustdf/Cargo.toml b/rustdf/Cargo.toml index eabad56e..127dc059 100644 --- a/rustdf/Cargo.toml +++ b/rustdf/Cargo.toml @@ -2,22 +2,46 @@ name = "rustdf" version = "0.3.0" edition = "2021" +authors = ["David Teschner "] # Add your name and email +description = "A Rust library for interacting with Bruker TDF formatted Raw Data." +license = "MIT" +repository = "https://github.com/theGreatHerrLebert/rustims" +documentation = "https://docs.rs/rustdf" +readme = "README.md" +keywords = ["dataframe", "sql", "compression", "parallel"] +categories = ["data-structures", "science", "utilities"] +rust-version = "1.84" [lib] path = "src/lib.rs" -[profile.release] -debug = true - [dependencies] -clap = { version = "4.5.24", features = ["derive"] } +# Command-line argument parsing +clap = { version = "4.5.26", features = ["derive"] } +# Dynamic library loading libloading = "0.8.6" +# SQLite with bundled binaries rusqlite = { version = "0.32.1", features = ["bundled"] } +# Compression libraries lzf = "1.0.0" zstd = "0.13.2" +# Byte order utilities byteorder = "1.5.0" -mscore = {path = "../mscore"} +# Core library for computational proteomics +mscore = { version = "0.2.0" } +# Parallelism rayon = "1.10.0" +# Serialization serde = { version = "1.0.217", features = ["derive"] } serde_json = "1.0.135" +# Random number generation rand = "0.8.5" + +[profile.release] +debug = true +overflow-checks = true +lto = "thin" +panic = "abort" + +[package.metadata.docs.rs] +features = ["all"] diff --git a/rustdf/src/data/acquisition.rs b/rustdf/src/data/acquisition.rs index 20d9086e..28b5305a 100644 --- a/rustdf/src/data/acquisition.rs +++ b/rustdf/src/data/acquisition.rs @@ -59,4 +59,4 @@ impl From<&str> for AcquisitionMode { _ => AcquisitionMode::Unknown, } } -} \ No newline at end of file +} diff --git a/rustdf/src/data/dataset.rs b/rustdf/src/data/dataset.rs index f7cdc4cf..f34a82e5 100644 --- a/rustdf/src/data/dataset.rs +++ b/rustdf/src/data/dataset.rs @@ -1,31 +1,55 @@ -use mscore::timstof::frame::{RawTimsFrame, TimsFrame}; -use mscore::timstof::slice::TimsSlice; use crate::data::acquisition::AcquisitionMode; use crate::data::handle::{IndexConverter, TimsData, TimsDataLoader}; use crate::data::meta::{read_global_meta_sql, read_meta_data_sql}; +use mscore::timstof::frame::{RawTimsFrame, TimsFrame}; +use mscore::timstof::slice::TimsSlice; pub struct TimsDataset { pub loader: TimsDataLoader, } impl TimsDataset { - pub fn new(bruker_lib_path: &str, data_path: &str, in_memory: bool, use_bruker_sdk: bool) -> Self { - + pub fn new( + bruker_lib_path: &str, + data_path: &str, + in_memory: bool, + use_bruker_sdk: bool, + ) -> Self { // TODO: error handling let global_meta_data = read_global_meta_sql(data_path).unwrap(); let meta_data = read_meta_data_sql(data_path).unwrap(); - + let scan_max_index = meta_data.iter().map(|x| x.num_scans).max().unwrap() as u32; let im_lower = global_meta_data.one_over_k0_range_lower; let im_upper = global_meta_data.one_over_k0_range_upper; - + let tof_max_index = global_meta_data.tof_max_index; let mz_lower = global_meta_data.mz_acquisition_range_lower; let mz_upper = global_meta_data.mz_acquisition_range_upper; let loader = match in_memory { - true => TimsDataLoader::new_in_memory(bruker_lib_path, data_path, use_bruker_sdk, scan_max_index, im_lower, im_upper, tof_max_index, mz_lower, mz_upper), - false => TimsDataLoader::new_lazy(bruker_lib_path, data_path, use_bruker_sdk, scan_max_index, im_lower, im_upper, tof_max_index, mz_lower, mz_upper), + true => TimsDataLoader::new_in_memory( + bruker_lib_path, + data_path, + use_bruker_sdk, + scan_max_index, + im_lower, + im_upper, + tof_max_index, + mz_lower, + mz_upper, + ), + false => TimsDataLoader::new_lazy( + bruker_lib_path, + data_path, + use_bruker_sdk, + scan_max_index, + im_lower, + im_upper, + tof_max_index, + mz_lower, + mz_upper, + ), }; TimsDataset { loader } @@ -61,18 +85,30 @@ impl TimsData for TimsDataset { impl IndexConverter for TimsDataset { fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec) -> Vec { - self.loader.get_index_converter().tof_to_mz(frame_id, tof_values) + self.loader + .get_index_converter() + .tof_to_mz(frame_id, tof_values) } // convert m/z values to TOF values given a valid data handle and frame id fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec) -> Vec { - self.loader.get_index_converter().mz_to_tof(frame_id, mz_values) + self.loader + .get_index_converter() + .mz_to_tof(frame_id, mz_values) } // convert inverse mobility values to scan values given a valid data handle and frame id fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec) -> Vec { - self.loader.get_index_converter().scan_to_inverse_mobility(frame_id, scan_values) + self.loader + .get_index_converter() + .scan_to_inverse_mobility(frame_id, scan_values) } // convert scan values to inverse mobility values given a valid data handle and frame id - fn inverse_mobility_to_scan(&self, frame_id: u32, inverse_mobility_values: &Vec) -> Vec { - self.loader.get_index_converter().inverse_mobility_to_scan(frame_id, inverse_mobility_values) + fn inverse_mobility_to_scan( + &self, + frame_id: u32, + inverse_mobility_values: &Vec, + ) -> Vec { + self.loader + .get_index_converter() + .inverse_mobility_to_scan(frame_id, inverse_mobility_values) } -} \ No newline at end of file +} diff --git a/rustdf/src/data/dda.rs b/rustdf/src/data/dda.rs index 714fe9c4..75a31e12 100644 --- a/rustdf/src/data/dda.rs +++ b/rustdf/src/data/dda.rs @@ -1,11 +1,14 @@ -use std::collections::BTreeMap; +use crate::data::acquisition::AcquisitionMode; +use crate::data::handle::{IndexConverter, TimsData, TimsDataLoader}; +use crate::data::meta::{ + read_dda_precursor_meta, read_global_meta_sql, read_meta_data_sql, read_pasef_frame_ms_ms_info, + DDAPrecursor, PasefMsMsMeta, +}; use mscore::timstof::frame::{RawTimsFrame, TimsFrame}; use mscore::timstof::slice::TimsSlice; use rayon::prelude::*; use rayon::ThreadPoolBuilder; -use crate::data::acquisition::AcquisitionMode; -use crate::data::handle::{IndexConverter, TimsData, TimsDataLoader}; -use crate::data::meta::{PasefMsMsMeta, read_dda_precursor_meta, read_pasef_frame_ms_ms_info, read_global_meta_sql, read_meta_data_sql, DDAPrecursor}; +use std::collections::BTreeMap; #[derive(Clone)] pub struct PASEFDDAFragment { @@ -20,9 +23,12 @@ pub struct TimsDatasetDDA { } impl TimsDatasetDDA { - - pub fn new(bruker_lib_path: &str, data_path: &str, in_memory: bool, use_bruker_sdk: bool) -> Self { - + pub fn new( + bruker_lib_path: &str, + data_path: &str, + in_memory: bool, + use_bruker_sdk: bool, + ) -> Self { // TODO: error handling let global_meta_data = read_global_meta_sql(data_path).unwrap(); let meta_data = read_meta_data_sql(data_path).unwrap(); @@ -34,72 +40,113 @@ impl TimsDatasetDDA { let tof_max_index = global_meta_data.tof_max_index; let mz_lower = global_meta_data.mz_acquisition_range_lower; let mz_upper = global_meta_data.mz_acquisition_range_upper; - + let loader = match in_memory { - true => TimsDataLoader::new_in_memory(bruker_lib_path, data_path, use_bruker_sdk, scan_max_index, im_lower, im_upper, tof_max_index, mz_lower, mz_upper), - false => TimsDataLoader::new_lazy(bruker_lib_path, data_path, use_bruker_sdk, scan_max_index, im_lower, im_upper, tof_max_index, mz_lower, mz_upper), + true => TimsDataLoader::new_in_memory( + bruker_lib_path, + data_path, + use_bruker_sdk, + scan_max_index, + im_lower, + im_upper, + tof_max_index, + mz_lower, + mz_upper, + ), + false => TimsDataLoader::new_lazy( + bruker_lib_path, + data_path, + use_bruker_sdk, + scan_max_index, + im_lower, + im_upper, + tof_max_index, + mz_lower, + mz_upper, + ), }; TimsDatasetDDA { loader } } /* - #[derive(Debug, Clone)] -pub struct DDAPrecursor { - pub frame_id: i64, - pub precursor_id: i64, - pub mono_mz: f64, - pub highest_intensity_mz: f64, - pub average_mz: f64, - pub charge: Option, - pub inverse_ion_mobility: f64, - pub collision_energy: f64, - pub precuror_total_intensity: f64, - pub isolation_mz: f64, - pub isolation_width: f64, -} - */ - - + #[derive(Debug, Clone)] + pub struct DDAPrecursor { + pub frame_id: i64, + pub precursor_id: i64, + pub mono_mz: f64, + pub highest_intensity_mz: f64, + pub average_mz: f64, + pub charge: Option, + pub inverse_ion_mobility: f64, + pub collision_energy: f64, + pub precuror_total_intensity: f64, + pub isolation_mz: f64, + pub isolation_width: f64, + } + */ pub fn get_selected_precursors(&self) -> Vec { let precursor_meta = read_dda_precursor_meta(&self.loader.get_data_path()).unwrap(); let pasef_meta = self.get_pasef_frame_ms_ms_info(); - let precursor_id_to_pasef_meta: BTreeMap = pasef_meta.iter().map(|x| (x.precursor_id as i64, x)).collect(); + let precursor_id_to_pasef_meta: BTreeMap = pasef_meta + .iter() + .map(|x| (x.precursor_id as i64, x)) + .collect(); // go over all precursors and get the precursor meta data - let result: Vec<_> = precursor_meta.iter().map(|precursor| { - let pasef_meta = precursor_id_to_pasef_meta.get(&precursor.precursor_id).unwrap(); - DDAPrecursor { - frame_id: precursor.precursor_frame_id, - precursor_id: precursor.precursor_id, - mono_mz: precursor.precursor_mz_monoisotopic, - highest_intensity_mz: precursor.precursor_mz_highest_intensity, - average_mz: precursor.precursor_mz_average, - charge: precursor.precursor_charge, - inverse_ion_mobility: self.scan_to_inverse_mobility(precursor.precursor_frame_id as u32, &vec![precursor.precursor_average_scan_number as u32])[0], - collision_energy: pasef_meta.collision_energy, - precuror_total_intensity: precursor.precursor_total_intensity, - isolation_mz: pasef_meta.isolation_mz, - isolation_width: pasef_meta.isolation_width, - } - }).collect(); + let result: Vec<_> = precursor_meta + .iter() + .map(|precursor| { + let pasef_meta = precursor_id_to_pasef_meta + .get(&precursor.precursor_id) + .unwrap(); + DDAPrecursor { + frame_id: precursor.precursor_frame_id, + precursor_id: precursor.precursor_id, + mono_mz: precursor.precursor_mz_monoisotopic, + highest_intensity_mz: precursor.precursor_mz_highest_intensity, + average_mz: precursor.precursor_mz_average, + charge: precursor.precursor_charge, + inverse_ion_mobility: self.scan_to_inverse_mobility( + precursor.precursor_frame_id as u32, + &vec![precursor.precursor_average_scan_number as u32], + )[0], + collision_energy: pasef_meta.collision_energy, + precuror_total_intensity: precursor.precursor_total_intensity, + isolation_mz: pasef_meta.isolation_mz, + isolation_width: pasef_meta.isolation_width, + } + }) + .collect(); result } - pub fn get_precursor_frames(&self, min_intensity: f64, max_num_peaks: usize, num_threads: usize) -> Vec { + pub fn get_precursor_frames( + &self, + min_intensity: f64, + max_num_peaks: usize, + num_threads: usize, + ) -> Vec { // get all precursor frames let meta_data = read_meta_data_sql(&self.loader.get_data_path()).unwrap(); // get the precursor frames let precursor_frames = meta_data.iter().filter(|x| x.ms_ms_type == 0); - let tims_silce = self.get_slice(precursor_frames.map(|x| x.id as u32).collect(), num_threads); + let tims_silce = + self.get_slice(precursor_frames.map(|x| x.id as u32).collect(), num_threads); - let result: Vec<_> = tims_silce.frames.par_iter().map(|frame| { - frame.filter_ranged(0.0, 2000.0, 0, 2000, 0.0, 5.0, min_intensity, 1e9).top_n(max_num_peaks) - }).collect(); + let result: Vec<_> = tims_silce + .frames + .par_iter() + .map(|frame| { + frame + .filter_ranged(0.0, 2000.0, 0, 2000, 0.0, 5.0, min_intensity, 1e9) + .top_n(max_num_peaks) + }) + .collect(); result } @@ -113,38 +160,42 @@ pub struct DDAPrecursor { // extract fragment spectra information let pasef_info = self.get_pasef_frame_ms_ms_info(); - let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); + let pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); let filtered_frames = pool.install(|| { - - let result: Vec<_> = pasef_info.par_iter().map(|pasef_info| { - - // get the frame - let frame = self.loader.get_frame(pasef_info.frame_id as u32); - - // get five percent of the scan range - let scan_margin = (pasef_info.scan_num_end - pasef_info.scan_num_begin) / 20; - - // get the fragment spectrum by scan range - let filtered_frame = frame.filter_ranged( - 0.0, - 2000.0, - (pasef_info.scan_num_begin - scan_margin) as i32, - (pasef_info.scan_num_end + scan_margin) as i32, - 0.0, - 5.0, - 0.0, - 1e9, - ); - - PASEFDDAFragment { - frame_id: pasef_info.frame_id as u32, - precursor_id: pasef_info.precursor_id as u32, - collision_energy: pasef_info.collision_energy, - // flatten the spectrum - selected_fragment: filtered_frame, - } - }).collect(); + let result: Vec<_> = pasef_info + .par_iter() + .map(|pasef_info| { + // get the frame + let frame = self.loader.get_frame(pasef_info.frame_id as u32); + + // get five percent of the scan range + let scan_margin = (pasef_info.scan_num_end - pasef_info.scan_num_begin) / 20; + + // get the fragment spectrum by scan range + let filtered_frame = frame.filter_ranged( + 0.0, + 2000.0, + (pasef_info.scan_num_begin - scan_margin) as i32, + (pasef_info.scan_num_end + scan_margin) as i32, + 0.0, + 5.0, + 0.0, + 1e9, + ); + + PASEFDDAFragment { + frame_id: pasef_info.frame_id as u32, + precursor_id: pasef_info.precursor_id as u32, + collision_energy: pasef_info.collision_energy, + // flatten the spectrum + selected_fragment: filtered_frame, + } + }) + .collect(); result }); @@ -181,18 +232,30 @@ impl TimsData for TimsDatasetDDA { impl IndexConverter for TimsDatasetDDA { fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec) -> Vec { - self.loader.get_index_converter().tof_to_mz(frame_id, tof_values) + self.loader + .get_index_converter() + .tof_to_mz(frame_id, tof_values) } fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec) -> Vec { - self.loader.get_index_converter().mz_to_tof(frame_id, mz_values) + self.loader + .get_index_converter() + .mz_to_tof(frame_id, mz_values) } fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec) -> Vec { - self.loader.get_index_converter().scan_to_inverse_mobility(frame_id, scan_values) + self.loader + .get_index_converter() + .scan_to_inverse_mobility(frame_id, scan_values) } - fn inverse_mobility_to_scan(&self, frame_id: u32, inverse_mobility_values: &Vec) -> Vec { - self.loader.get_index_converter().inverse_mobility_to_scan(frame_id, inverse_mobility_values) + fn inverse_mobility_to_scan( + &self, + frame_id: u32, + inverse_mobility_values: &Vec, + ) -> Vec { + self.loader + .get_index_converter() + .inverse_mobility_to_scan(frame_id, inverse_mobility_values) } -} \ No newline at end of file +} diff --git a/rustdf/src/data/dia.rs b/rustdf/src/data/dia.rs index 617e6da4..632bc7bf 100644 --- a/rustdf/src/data/dia.rs +++ b/rustdf/src/data/dia.rs @@ -1,21 +1,28 @@ -use rand::prelude::IteratorRandom; -use mscore::timstof::frame::{RawTimsFrame, TimsFrame}; -use mscore::timstof::slice::TimsSlice; use crate::data::acquisition::AcquisitionMode; use crate::data::handle::{IndexConverter, TimsData, TimsDataLoader}; -use crate::data::meta::{read_dia_ms_ms_info, read_dia_ms_ms_windows, read_global_meta_sql, read_meta_data_sql, DiaMsMisInfo, DiaMsMsWindow, FrameMeta, GlobalMetaData}; +use crate::data::meta::{ + read_dia_ms_ms_info, read_dia_ms_ms_windows, read_global_meta_sql, read_meta_data_sql, + DiaMsMisInfo, DiaMsMsWindow, FrameMeta, GlobalMetaData, +}; +use mscore::timstof::frame::{RawTimsFrame, TimsFrame}; +use mscore::timstof::slice::TimsSlice; +use rand::prelude::IteratorRandom; pub struct TimsDatasetDIA { pub loader: TimsDataLoader, pub global_meta_data: GlobalMetaData, pub meta_data: Vec, pub dia_ms_mis_info: Vec, - pub dia_ms_ms_windows: Vec + pub dia_ms_ms_windows: Vec, } impl TimsDatasetDIA { - pub fn new(bruker_lib_path: &str, data_path: &str, in_memory: bool, use_bruker_sdk: bool) -> Self { - + pub fn new( + bruker_lib_path: &str, + data_path: &str, + in_memory: bool, + use_bruker_sdk: bool, + ) -> Self { // TODO: error handling let global_meta_data = read_global_meta_sql(data_path).unwrap(); let meta_data = read_meta_data_sql(data_path).unwrap(); @@ -29,63 +36,116 @@ impl TimsDatasetDIA { let tof_max_index = global_meta_data.tof_max_index; let mz_lower = global_meta_data.mz_acquisition_range_lower; let mz_upper = global_meta_data.mz_acquisition_range_upper; - - let loader = match in_memory { - true => TimsDataLoader::new_in_memory(bruker_lib_path, data_path, use_bruker_sdk, scan_max_index, im_lower, im_upper, tof_max_index, mz_lower, mz_upper), - false => TimsDataLoader::new_lazy(bruker_lib_path, data_path, use_bruker_sdk, scan_max_index, im_lower, im_upper, tof_max_index, mz_lower, mz_upper), + + let loader = match in_memory { + true => TimsDataLoader::new_in_memory( + bruker_lib_path, + data_path, + use_bruker_sdk, + scan_max_index, + im_lower, + im_upper, + tof_max_index, + mz_lower, + mz_upper, + ), + false => TimsDataLoader::new_lazy( + bruker_lib_path, + data_path, + use_bruker_sdk, + scan_max_index, + im_lower, + im_upper, + tof_max_index, + mz_lower, + mz_upper, + ), }; - - TimsDatasetDIA { loader, global_meta_data, meta_data, dia_ms_mis_info, dia_ms_ms_windows } + + TimsDatasetDIA { + loader, + global_meta_data, + meta_data, + dia_ms_mis_info, + dia_ms_ms_windows, + } } - - pub fn sample_precursor_signal(&self, num_frames: usize, max_intensity: f64, take_probability: f64) -> TimsFrame { + + pub fn sample_precursor_signal( + &self, + num_frames: usize, + max_intensity: f64, + take_probability: f64, + ) -> TimsFrame { // get all precursor frames let precursor_frames = self.meta_data.iter().filter(|x| x.ms_ms_type == 0); - + // randomly sample num_frames let mut rng = rand::thread_rng(); let mut sampled_frames: Vec = Vec::new(); - + // go through each frame and sample the data for frame in precursor_frames.choose_multiple(&mut rng, num_frames) { let frame_id = frame.id; - let frame_data = self.loader.get_frame(frame_id as u32).filter_ranged(0.0, 2000.0, 0, 1000, 0.0, 5.0, 1.0, max_intensity).generate_random_sample(take_probability); + let frame_data = self + .loader + .get_frame(frame_id as u32) + .filter_ranged(0.0, 2000.0, 0, 1000, 0.0, 5.0, 1.0, max_intensity) + .generate_random_sample(take_probability); sampled_frames.push(frame_data); } // get the first frame let mut sampled_frame = sampled_frames.remove(0); - + // sum all the other frames to the first frame for frame in sampled_frames { sampled_frame = sampled_frame + frame; } - + sampled_frame } - - pub fn sample_fragment_signal(&self, num_frames: usize, window_group: u32, max_intensity: f64, take_probability: f64) -> TimsFrame { + + pub fn sample_fragment_signal( + &self, + num_frames: usize, + window_group: u32, + max_intensity: f64, + take_probability: f64, + ) -> TimsFrame { // get all fragment frames, filter by window_group - let fragment_frames: Vec = self.dia_ms_mis_info.iter().filter(|x| x.window_group == window_group).map(|x| x.frame_id).collect(); - + let fragment_frames: Vec = self + .dia_ms_mis_info + .iter() + .filter(|x| x.window_group == window_group) + .map(|x| x.frame_id) + .collect(); + // randomly sample num_frames let mut rng = rand::thread_rng(); let mut sampled_frames: Vec = Vec::new(); - + // go through each frame and sample the data - for frame_id in fragment_frames.into_iter().choose_multiple(&mut rng, num_frames) { - let frame_data = self.loader.get_frame(frame_id).filter_ranged(0.0, 2000.0, 0, 1000, 0.0, 5.0, 1.0, max_intensity).generate_random_sample(take_probability); + for frame_id in fragment_frames + .into_iter() + .choose_multiple(&mut rng, num_frames) + { + let frame_data = self + .loader + .get_frame(frame_id) + .filter_ranged(0.0, 2000.0, 0, 1000, 0.0, 5.0, 1.0, max_intensity) + .generate_random_sample(take_probability); sampled_frames.push(frame_data); } // get the first frame let mut sampled_frame = sampled_frames.remove(0); - + // sum all the other frames to the first frame for frame in sampled_frames { sampled_frame = sampled_frame + frame; } - + sampled_frame } } @@ -117,18 +177,30 @@ impl TimsData for TimsDatasetDIA { impl IndexConverter for TimsDatasetDIA { fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec) -> Vec { - self.loader.get_index_converter().tof_to_mz(frame_id, tof_values) + self.loader + .get_index_converter() + .tof_to_mz(frame_id, tof_values) } fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec) -> Vec { - self.loader.get_index_converter().mz_to_tof(frame_id, mz_values) + self.loader + .get_index_converter() + .mz_to_tof(frame_id, mz_values) } fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec) -> Vec { - self.loader.get_index_converter().scan_to_inverse_mobility(frame_id, scan_values) + self.loader + .get_index_converter() + .scan_to_inverse_mobility(frame_id, scan_values) } - fn inverse_mobility_to_scan(&self, frame_id: u32, inverse_mobility_values: &Vec) -> Vec { - self.loader.get_index_converter().inverse_mobility_to_scan(frame_id, inverse_mobility_values) + fn inverse_mobility_to_scan( + &self, + frame_id: u32, + inverse_mobility_values: &Vec, + ) -> Vec { + self.loader + .get_index_converter() + .inverse_mobility_to_scan(frame_id, inverse_mobility_values) } -} \ No newline at end of file +} diff --git a/rustdf/src/data/handle.rs b/rustdf/src/data/handle.rs index f266167f..33d3f6df 100644 --- a/rustdf/src/data/handle.rs +++ b/rustdf/src/data/handle.rs @@ -1,17 +1,19 @@ -use std::fs::File; -use std::io::{Read, Cursor, SeekFrom, Seek}; -use std::path::PathBuf; +use crate::data::meta::{read_global_meta_sql, read_meta_data_sql, FrameMeta, GlobalMetaData}; +use crate::data::raw::BrukerTimsDataLibrary; +use crate::data::utility::{ + flatten_scan_values, parse_decompressed_bruker_binary_data, zstd_decompress, +}; use byteorder::{LittleEndian, ReadBytesExt}; use mscore::data::spectrum::MsType; use mscore::timstof::frame::{ImsFrame, RawTimsFrame, TimsFrame}; use mscore::timstof::slice::TimsSlice; -use crate::data::meta::{FrameMeta, GlobalMetaData, read_global_meta_sql, read_meta_data_sql}; -use crate::data::raw::BrukerTimsDataLibrary; -use crate::data::utility::{flatten_scan_values, parse_decompressed_bruker_binary_data, zstd_decompress}; +use std::fs::File; +use std::io::{Cursor, Read, Seek, SeekFrom}; +use std::path::PathBuf; +use crate::data::acquisition::AcquisitionMode; use rayon::prelude::*; use rayon::ThreadPoolBuilder; -use crate::data::acquisition::AcquisitionMode; use std::error::Error; @@ -31,9 +33,8 @@ fn parse_decompressed_bruker_binary_type1( ) -> usize { // Interpret decompressed_bytes as a slice of i32 let int_count = decompressed_bytes.len() / 4; - let buffer = unsafe { - std::slice::from_raw_parts(decompressed_bytes.as_ptr() as *const i32, int_count) - }; + let buffer = + unsafe { std::slice::from_raw_parts(decompressed_bytes.as_ptr() as *const i32, int_count) }; let mut tof_index = 0i32; let mut previous_was_intensity = true; @@ -89,7 +90,10 @@ impl TimsRawDataLayout { } // get the tims offset values - let tims_offset_values = frame_meta_data.iter().map(|x| x.tims_id).collect::>(); + let tims_offset_values = frame_meta_data + .iter() + .map(|x| x.tims_id) + .collect::>(); // get the acquisition mode let acquisition_mode = match frame_meta_data[0].scan_mode { @@ -105,7 +109,7 @@ impl TimsRawDataLayout { max_scan_count, frame_id_ptr, tims_offset_values, - acquisition_mode + acquisition_mode, } } } @@ -123,7 +127,11 @@ pub trait IndexConverter { fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec) -> Vec; fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec) -> Vec; fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec) -> Vec; - fn inverse_mobility_to_scan(&self, frame_id: u32, inverse_mobility_values: &Vec) -> Vec; + fn inverse_mobility_to_scan( + &self, + frame_id: u32, + inverse_mobility_values: &Vec, + ) -> Vec; } pub struct BrukerLibTimsDataConverter { @@ -133,9 +141,7 @@ pub struct BrukerLibTimsDataConverter { impl BrukerLibTimsDataConverter { pub fn new(bruker_lib_path: &str, data_path: &str) -> Self { let bruker_lib = BrukerTimsDataLibrary::new(bruker_lib_path, data_path).unwrap(); - BrukerLibTimsDataConverter { - bruker_lib, - } + BrukerLibTimsDataConverter { bruker_lib } } } impl IndexConverter for BrukerLibTimsDataConverter { @@ -159,9 +165,11 @@ impl IndexConverter for BrukerLibTimsDataConverter { } let mut mz_values: Vec = Vec::new(); - mz_values.resize(tof.len(), 0.0); + mz_values.resize(tof.len(), 0.0); - self.bruker_lib.tims_index_to_mz(frame_id, &dbl_tofs, &mut mz_values).expect("Bruker binary call failed at: tims_index_to_mz;"); + self.bruker_lib + .tims_index_to_mz(frame_id, &dbl_tofs, &mut mz_values) + .expect("Bruker binary call failed at: tims_index_to_mz;"); mz_values } @@ -175,9 +183,11 @@ impl IndexConverter for BrukerLibTimsDataConverter { } let mut tof_values: Vec = Vec::new(); - tof_values.resize(mz.len(), 0.0); + tof_values.resize(mz.len(), 0.0); - self.bruker_lib.tims_mz_to_index(frame_id, &dbl_mz, &mut tof_values).expect("Bruker binary call failed at: tims_mz_to_index;"); + self.bruker_lib + .tims_mz_to_index(frame_id, &dbl_mz, &mut tof_values) + .expect("Bruker binary call failed at: tims_mz_to_index;"); tof_values.iter().map(|&x| x.round() as u32).collect() } @@ -204,7 +214,9 @@ impl IndexConverter for BrukerLibTimsDataConverter { let mut inv_mob: Vec = Vec::new(); inv_mob.resize(scan.len(), 0.0); - self.bruker_lib.tims_scan_to_inv_mob(frame_id, &dbl_scans, &mut inv_mob).expect("Bruker binary call failed at: tims_scannum_to_oneoverk0;"); + self.bruker_lib + .tims_scan_to_inv_mob(frame_id, &dbl_scans, &mut inv_mob) + .expect("Bruker binary call failed at: tims_scannum_to_oneoverk0;"); inv_mob } @@ -229,9 +241,11 @@ impl IndexConverter for BrukerLibTimsDataConverter { } let mut scan_values: Vec = Vec::new(); - scan_values.resize(inv_mob.len(), 0.0); + scan_values.resize(inv_mob.len(), 0.0); - self.bruker_lib.inv_mob_to_tims_scan(frame_id, &dbl_inv_mob, &mut scan_values).expect("Bruker binary call failed at: tims_oneoverk0_to_scannum;"); + self.bruker_lib + .inv_mob_to_tims_scan(frame_id, &dbl_inv_mob, &mut scan_values) + .expect("Bruker binary call failed at: tims_oneoverk0_to_scannum;"); scan_values.iter().map(|&x| x.round() as u32).collect() } @@ -239,40 +253,51 @@ impl IndexConverter for BrukerLibTimsDataConverter { pub enum TimsIndexConverter { Simple(SimpleIndexConverter), - BrukerLib(BrukerLibTimsDataConverter) + BrukerLib(BrukerLibTimsDataConverter), } impl IndexConverter for TimsIndexConverter { fn tof_to_mz(&self, frame_id: u32, tof_values: &Vec) -> Vec { match self { TimsIndexConverter::Simple(converter) => converter.tof_to_mz(frame_id, tof_values), - TimsIndexConverter::BrukerLib(converter) => converter.tof_to_mz(frame_id, tof_values) + TimsIndexConverter::BrukerLib(converter) => converter.tof_to_mz(frame_id, tof_values), } } fn mz_to_tof(&self, frame_id: u32, mz_values: &Vec) -> Vec { match self { TimsIndexConverter::Simple(converter) => converter.mz_to_tof(frame_id, mz_values), - TimsIndexConverter::BrukerLib(converter) => converter.mz_to_tof(frame_id, mz_values) + TimsIndexConverter::BrukerLib(converter) => converter.mz_to_tof(frame_id, mz_values), } } fn scan_to_inverse_mobility(&self, frame_id: u32, scan_values: &Vec) -> Vec { match self { - TimsIndexConverter::Simple(converter) => converter.scan_to_inverse_mobility(frame_id, scan_values), - TimsIndexConverter::BrukerLib(converter) => converter.scan_to_inverse_mobility(frame_id, scan_values) + TimsIndexConverter::Simple(converter) => { + converter.scan_to_inverse_mobility(frame_id, scan_values) + } + TimsIndexConverter::BrukerLib(converter) => { + converter.scan_to_inverse_mobility(frame_id, scan_values) + } } } - fn inverse_mobility_to_scan(&self, frame_id: u32, inverse_mobility_values: &Vec) -> Vec { + fn inverse_mobility_to_scan( + &self, + frame_id: u32, + inverse_mobility_values: &Vec, + ) -> Vec { match self { - TimsIndexConverter::Simple(converter) => converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values), - TimsIndexConverter::BrukerLib(converter) => converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values) + TimsIndexConverter::Simple(converter) => { + converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values) + } + TimsIndexConverter::BrukerLib(converter) => { + converter.inverse_mobility_to_scan(frame_id, inverse_mobility_values) + } } } } - pub struct TimsLazyLoder { pub raw_data_layout: TimsRawDataLayout, pub index_converter: TimsIndexConverter, @@ -291,7 +316,13 @@ impl TimsData for TimsLazyLoder { ms_type: MsType::Unknown, scan: Vec::new(), tof: Vec::new(), - ims_frame: ImsFrame { retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time, mobility: Vec::new(), mz: Vec::new(), intensity: Vec::new() } + ims_frame: ImsFrame { + retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize] + .time, + mobility: Vec::new(), + mz: Vec::new(), + intensity: Vec::new(), + }, }; } @@ -311,7 +342,8 @@ impl TimsData for TimsLazyLoder { match self.raw_data_layout.global_meta_data.tims_compression_type { 1 => { - let scan_count = self.raw_data_layout.frame_meta_data[frame_index].num_scans as usize; + let scan_count = + self.raw_data_layout.frame_meta_data[frame_index].num_scans as usize; let num_peaks = num_peaks as usize; let compression_offset = 8 + (scan_count + 1) * 4; @@ -349,8 +381,9 @@ impl TimsData for TimsLazyLoder { } let max_output_size = num_peaks * 8; - let decompressed_bytes = lzf_decompress(&compressed_data[start..end], max_output_size) - .expect("LZF decompression failed."); + let decompressed_bytes = + lzf_decompress(&compressed_data[start..end], max_output_size) + .expect("LZF decompression failed."); scan_start += parse_decompressed_bruker_binary_type1( &decompressed_bytes, @@ -358,7 +391,7 @@ impl TimsData for TimsLazyLoder { &mut tof_indices_, &mut intensities_, scan_start, - scan_index + scan_index, ); } @@ -379,7 +412,9 @@ impl TimsData for TimsLazyLoder { let tof_i32 = tof_indices_.iter().map(|&x| x as i32).collect::>(); let mz = self.index_converter.tof_to_mz(frame_id, &tof_indices_); - let inv_mobility = self.index_converter.scan_to_inverse_mobility(frame_id, &scan); + let inv_mobility = self + .index_converter + .scan_to_inverse_mobility(frame_id, &scan); let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type; let ms_type = match ms_type_raw { @@ -398,10 +433,10 @@ impl TimsData for TimsLazyLoder { retention_time: self.raw_data_layout.frame_meta_data[frame_index].time, mobility: inv_mobility, mz, - intensity: intensity_dbl - } + intensity: intensity_dbl, + }, } - }, + } // Existing handling of Type 2 2 => { @@ -410,13 +445,16 @@ impl TimsData for TimsLazyLoder { let decompressed_bytes = zstd_decompress(&compressed_data).unwrap(); - let (scan, tof, intensity) = parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap(); + let (scan, tof, intensity) = + parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap(); let intensity_dbl = intensity.iter().map(|&x| x as f64).collect(); let tof_i32 = tof.iter().map(|&x| x as i32).collect(); let scan = flatten_scan_values(&scan, true); let mz = self.index_converter.tof_to_mz(frame_id, &tof); - let inv_mobility = self.index_converter.scan_to_inverse_mobility(frame_id, &scan); + let inv_mobility = self + .index_converter + .scan_to_inverse_mobility(frame_id, &scan); let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type; @@ -436,10 +474,10 @@ impl TimsData for TimsLazyLoder { retention_time: self.raw_data_layout.frame_meta_data[frame_index].time, mobility: inv_mobility, mz, - intensity: intensity_dbl - } + intensity: intensity_dbl, + }, } - }, + } _ => { panic!("TimsCompressionType is not 1 or 2.") @@ -448,7 +486,6 @@ impl TimsData for TimsLazyLoder { } fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame { - let frame_index = (frame_id - 1) as usize; let offset = self.raw_data_layout.tims_offset_values[frame_index] as u64; @@ -466,7 +503,6 @@ impl TimsData for TimsLazyLoder { }; } - let mut file_path = PathBuf::from(&self.raw_data_layout.raw_data_path); file_path.push("analysis.tdf_bin"); let mut infile = File::open(&file_path).unwrap(); @@ -482,17 +518,17 @@ impl TimsData for TimsLazyLoder { match self.raw_data_layout.global_meta_data.tims_compression_type { _ if self.raw_data_layout.global_meta_data.tims_compression_type == 1 => { panic!("Decompression Type1 not implemented."); - }, + } // Extract from ZSTD compressed binary _ if self.raw_data_layout.global_meta_data.tims_compression_type == 2 => { - let mut compressed_data = vec![0u8; bin_size as usize - 8]; infile.read_exact(&mut compressed_data).unwrap(); let decompressed_bytes = zstd_decompress(&compressed_data).unwrap(); - let (scan, tof, intensity) = parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap(); + let (scan, tof, intensity) = + parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap(); let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type; @@ -505,7 +541,8 @@ impl TimsData for TimsLazyLoder { let frame = RawTimsFrame { frame_id: frame_id as i32, - retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize].time, + retention_time: self.raw_data_layout.frame_meta_data[(frame_id - 1) as usize] + .time, ms_type, scan, tof, @@ -513,7 +550,7 @@ impl TimsData for TimsLazyLoder { }; return frame; - }, + } // Error on unknown compression algorithm _ => { @@ -523,9 +560,7 @@ impl TimsData for TimsLazyLoder { } fn get_slice(&self, frame_ids: Vec, _num_threads: usize) -> TimsSlice { - let result: Vec = frame_ids - .into_iter() - .map(|f| { self.get_frame(f) }).collect(); + let result: Vec = frame_ids.into_iter().map(|f| self.get_frame(f)).collect(); TimsSlice { frames: result } } @@ -546,12 +581,11 @@ impl TimsData for TimsLazyLoder { pub struct TimsInMemoryLoader { pub raw_data_layout: TimsRawDataLayout, pub index_converter: TimsIndexConverter, - compressed_data: Vec + compressed_data: Vec, } impl TimsData for TimsInMemoryLoader { fn get_frame(&self, frame_id: u32) -> TimsFrame { - let raw_frame = self.get_raw_frame(frame_id); let raw_frame = match raw_frame.ms_type { @@ -568,7 +602,9 @@ impl TimsData for TimsInMemoryLoader { let scan = flatten_scan_values(&raw_frame.scan, true); let mz = self.index_converter.tof_to_mz(frame_id, &raw_frame.tof); - let inverse_mobility = self.index_converter.scan_to_inverse_mobility(frame_id, &scan); + let inverse_mobility = self + .index_converter + .scan_to_inverse_mobility(frame_id, &scan); let ims_frame = ImsFrame { retention_time: raw_frame.retention_time, @@ -591,14 +627,17 @@ impl TimsData for TimsInMemoryLoader { let offset = self.raw_data_layout.tims_offset_values[frame_index] as usize; let bin_size_offset = offset + 4; // Assuming the size is stored immediately before the frame data - let bin_size = Cursor::new(&self.compressed_data[offset..bin_size_offset]).read_i32::().unwrap(); + let bin_size = Cursor::new(&self.compressed_data[offset..bin_size_offset]) + .read_i32::() + .unwrap(); let data_offset = bin_size_offset + 4; // Adjust based on actual structure let frame_data = &self.compressed_data[data_offset..data_offset + bin_size as usize - 8]; let decompressed_bytes = zstd_decompress(&frame_data).unwrap(); - let (scan, tof, intensity) = parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap(); + let (scan, tof, intensity) = + parse_decompressed_bruker_binary_data(&decompressed_bytes).unwrap(); let ms_type_raw = self.raw_data_layout.frame_meta_data[frame_index].ms_ms_type; @@ -622,16 +661,18 @@ impl TimsData for TimsInMemoryLoader { } fn get_slice(&self, frame_ids: Vec, num_threads: usize) -> TimsSlice { - let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); + let pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); let frames = pool.install(|| { - frame_ids.par_iter().map(|&frame_id| { - self.get_frame(frame_id) - }).collect() + frame_ids + .par_iter() + .map(|&frame_id| self.get_frame(frame_id)) + .collect() }); - TimsSlice { - frames - } + TimsSlice { frames } } fn get_acquisition_mode(&self) -> AcquisitionMode { @@ -649,30 +690,70 @@ impl TimsData for TimsInMemoryLoader { pub enum TimsDataLoader { InMemory(TimsInMemoryLoader), - Lazy(TimsLazyLoder) + Lazy(TimsLazyLoder), } impl TimsDataLoader { - pub fn new_lazy(bruker_lib_path: &str, data_path: &str, use_bruker_sdk: bool, scan_max_index: u32, im_lower: f64, im_upper: f64, tof_max_index: u32, mz_lower: f64, mz_upper: f64) -> Self { + pub fn new_lazy( + bruker_lib_path: &str, + data_path: &str, + use_bruker_sdk: bool, + scan_max_index: u32, + im_lower: f64, + im_upper: f64, + tof_max_index: u32, + mz_lower: f64, + mz_upper: f64, + ) -> Self { let raw_data_layout = TimsRawDataLayout::new(data_path); - + let index_converter = match use_bruker_sdk { - true => TimsIndexConverter::BrukerLib(BrukerLibTimsDataConverter::new(bruker_lib_path, data_path)), - false => TimsIndexConverter::Simple(SimpleIndexConverter::from_boundaries(mz_lower, mz_upper, tof_max_index, im_lower, im_upper, scan_max_index)) + true => TimsIndexConverter::BrukerLib(BrukerLibTimsDataConverter::new( + bruker_lib_path, + data_path, + )), + false => TimsIndexConverter::Simple(SimpleIndexConverter::from_boundaries( + mz_lower, + mz_upper, + tof_max_index, + im_lower, + im_upper, + scan_max_index, + )), }; - + TimsDataLoader::Lazy(TimsLazyLoder { raw_data_layout, - index_converter + index_converter, }) } - pub fn new_in_memory(bruker_lib_path: &str, data_path: &str, use_bruker_sdk: bool, scan_max_index: u32, im_lower: f64, im_upper: f64, tof_max_index: u32, mz_lower: f64, mz_upper: f64) -> Self { + pub fn new_in_memory( + bruker_lib_path: &str, + data_path: &str, + use_bruker_sdk: bool, + scan_max_index: u32, + im_lower: f64, + im_upper: f64, + tof_max_index: u32, + mz_lower: f64, + mz_upper: f64, + ) -> Self { let raw_data_layout = TimsRawDataLayout::new(data_path); - + let index_converter = match use_bruker_sdk { - true => TimsIndexConverter::BrukerLib(BrukerLibTimsDataConverter::new(bruker_lib_path, data_path)), - false => TimsIndexConverter::Simple(SimpleIndexConverter::from_boundaries(mz_lower, mz_upper, tof_max_index, im_lower, im_upper, scan_max_index)) + true => TimsIndexConverter::BrukerLib(BrukerLibTimsDataConverter::new( + bruker_lib_path, + data_path, + )), + false => TimsIndexConverter::Simple(SimpleIndexConverter::from_boundaries( + mz_lower, + mz_upper, + tof_max_index, + im_lower, + im_upper, + scan_max_index, + )), }; let mut file_path = PathBuf::from(data_path); @@ -684,13 +765,13 @@ impl TimsDataLoader { TimsDataLoader::InMemory(TimsInMemoryLoader { raw_data_layout, index_converter, - compressed_data: data + compressed_data: data, }) } pub fn get_index_converter(&self) -> &dyn IndexConverter { match self { TimsDataLoader::InMemory(loader) => &loader.index_converter, - TimsDataLoader::Lazy(loader) => &loader.index_converter + TimsDataLoader::Lazy(loader) => &loader.index_converter, } } } @@ -699,41 +780,41 @@ impl TimsData for TimsDataLoader { fn get_frame(&self, frame_id: u32) -> TimsFrame { match self { TimsDataLoader::InMemory(loader) => loader.get_frame(frame_id), - TimsDataLoader::Lazy(loader) => loader.get_frame(frame_id) + TimsDataLoader::Lazy(loader) => loader.get_frame(frame_id), } } fn get_raw_frame(&self, frame_id: u32) -> RawTimsFrame { match self { TimsDataLoader::InMemory(loader) => loader.get_raw_frame(frame_id), - TimsDataLoader::Lazy(loader) => loader.get_raw_frame(frame_id) + TimsDataLoader::Lazy(loader) => loader.get_raw_frame(frame_id), } } fn get_slice(&self, frame_ids: Vec, num_threads: usize) -> TimsSlice { match self { TimsDataLoader::InMemory(loader) => loader.get_slice(frame_ids, num_threads), - TimsDataLoader::Lazy(loader) => loader.get_slice(frame_ids, num_threads) + TimsDataLoader::Lazy(loader) => loader.get_slice(frame_ids, num_threads), } } fn get_acquisition_mode(&self) -> AcquisitionMode { match self { TimsDataLoader::InMemory(loader) => loader.get_acquisition_mode(), - TimsDataLoader::Lazy(loader) => loader.get_acquisition_mode() + TimsDataLoader::Lazy(loader) => loader.get_acquisition_mode(), } } fn get_frame_count(&self) -> i32 { match self { TimsDataLoader::InMemory(loader) => loader.get_frame_count(), - TimsDataLoader::Lazy(loader) => loader.get_frame_count() + TimsDataLoader::Lazy(loader) => loader.get_frame_count(), } } fn get_data_path(&self) -> &str { match self { TimsDataLoader::InMemory(loader) => loader.get_data_path(), - TimsDataLoader::Lazy(loader) => loader.get_data_path() + TimsDataLoader::Lazy(loader) => loader.get_data_path(), } } } @@ -742,7 +823,7 @@ pub struct SimpleIndexConverter { pub tof_intercept: f64, pub tof_slope: f64, pub scan_intercept: f64, - pub scan_slope: f64, + pub scan_slope: f64, } impl SimpleIndexConverter { @@ -755,9 +836,8 @@ impl SimpleIndexConverter { scan_max_index: u32, ) -> Self { let tof_intercept: f64 = mz_min.sqrt(); - let tof_slope: f64 = - (mz_max.sqrt() - tof_intercept) / tof_max_index as f64; - + let tof_slope: f64 = (mz_max.sqrt() - tof_intercept) / tof_max_index as f64; + let scan_intercept: f64 = im_max; let scan_slope: f64 = (im_min - scan_intercept) / scan_max_index as f64; Self { @@ -773,44 +853,48 @@ impl IndexConverter for SimpleIndexConverter { fn tof_to_mz(&self, _frame_id: u32, _tof_values: &Vec) -> Vec { let mut mz_values: Vec = Vec::new(); mz_values.resize(_tof_values.len(), 0.0); - + for (i, &val) in _tof_values.iter().enumerate() { mz_values[i] = (self.tof_intercept + self.tof_slope * val as f64).powi(2); } - + mz_values } fn mz_to_tof(&self, _frame_id: u32, _mz_values: &Vec) -> Vec { let mut tof_values: Vec = Vec::new(); tof_values.resize(_mz_values.len(), 0); - + for (i, &val) in _mz_values.iter().enumerate() { tof_values[i] = ((val.sqrt() - self.tof_intercept) / self.tof_slope) as u32; } - + tof_values } fn scan_to_inverse_mobility(&self, _frame_id: u32, _scan_values: &Vec) -> Vec { let mut inv_mobility_values: Vec = Vec::new(); inv_mobility_values.resize(_scan_values.len(), 0.0); - + for (i, &val) in _scan_values.iter().enumerate() { inv_mobility_values[i] = self.scan_intercept + self.scan_slope * val as f64; } - + inv_mobility_values } - fn inverse_mobility_to_scan(&self, _frame_id: u32, _inverse_mobility_values: &Vec) -> Vec { + fn inverse_mobility_to_scan( + &self, + _frame_id: u32, + _inverse_mobility_values: &Vec, + ) -> Vec { let mut scan_values: Vec = Vec::new(); scan_values.resize(_inverse_mobility_values.len(), 0); - + for (i, &val) in _inverse_mobility_values.iter().enumerate() { scan_values[i] = ((val - self.scan_intercept) / self.scan_slope) as u32; } - + scan_values } } diff --git a/rustdf/src/data/meta.rs b/rustdf/src/data/meta.rs index 9e3e7c8c..951517a0 100644 --- a/rustdf/src/data/meta.rs +++ b/rustdf/src/data/meta.rs @@ -115,72 +115,104 @@ struct GlobalMetaInternal { value: String, } -pub fn read_dda_precursor_meta(bruker_d_folder_name: &str) -> Result, Box> { +pub fn read_dda_precursor_meta( + bruker_d_folder_name: &str, +) -> Result, Box> { // Connect to the database let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf"); let conn = Connection::open(db_path)?; // prepare the query - let rows: Vec<&str> = vec!["Id", "LargestPeakMz", "AverageMz", "MonoisotopicMz", "Charge", "ScanNumber", "Intensity", "Parent"]; + let rows: Vec<&str> = vec![ + "Id", + "LargestPeakMz", + "AverageMz", + "MonoisotopicMz", + "Charge", + "ScanNumber", + "Intensity", + "Parent", + ]; let query = format!("SELECT {} FROM Precursors", rows.join(", ")); // execute the query - let frames_rows: Result, _> = conn.prepare(&query)?.query_map([], |row| { - Ok(DDAPrecursorMeta { - precursor_id: row.get(0)?, - precursor_mz_highest_intensity: row.get(1)?, - precursor_mz_average: row.get(2)?, - precursor_mz_monoisotopic: row.get(3)?, // Now using Option - precursor_charge: row.get(4)?, // Now using Option - precursor_average_scan_number: row.get(5)?, - precursor_total_intensity: row.get(6)?, - precursor_frame_id: row.get(7)?, - }) - })?.collect(); + let frames_rows: Result, _> = conn + .prepare(&query)? + .query_map([], |row| { + Ok(DDAPrecursorMeta { + precursor_id: row.get(0)?, + precursor_mz_highest_intensity: row.get(1)?, + precursor_mz_average: row.get(2)?, + precursor_mz_monoisotopic: row.get(3)?, // Now using Option + precursor_charge: row.get(4)?, // Now using Option + precursor_average_scan_number: row.get(5)?, + precursor_total_intensity: row.get(6)?, + precursor_frame_id: row.get(7)?, + }) + })? + .collect(); // return the frames Ok(frames_rows?) } -pub fn read_pasef_frame_ms_ms_info(bruker_d_folder_name: &str) -> Result, Box> { +pub fn read_pasef_frame_ms_ms_info( + bruker_d_folder_name: &str, +) -> Result, Box> { // Connect to the database let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf"); let conn = Connection::open(db_path)?; // prepare the query - let rows: Vec<&str> = vec!["Frame", "ScanNumBegin", "ScanNumEnd", "IsolationMz", "IsolationWidth", "CollisionEnergy", "Precursor"]; + let rows: Vec<&str> = vec![ + "Frame", + "ScanNumBegin", + "ScanNumEnd", + "IsolationMz", + "IsolationWidth", + "CollisionEnergy", + "Precursor", + ]; let query = format!("SELECT {} FROM PasefFrameMsMsInfo", rows.join(", ")); // execute the query - let frames_rows: Result, _> = conn.prepare(&query)?.query_map([], |row| { - Ok(PasefMsMsMeta { - frame_id: row.get(0)?, - scan_num_begin: row.get(1)?, - scan_num_end: row.get(2)?, - isolation_mz: row.get(3)?, - isolation_width: row.get(4)?, - collision_energy: row.get(5)?, - precursor_id: row.get(6)?, }) - })?.collect(); + let frames_rows: Result, _> = conn + .prepare(&query)? + .query_map([], |row| { + Ok(PasefMsMsMeta { + frame_id: row.get(0)?, + scan_num_begin: row.get(1)?, + scan_num_end: row.get(2)?, + isolation_mz: row.get(3)?, + isolation_width: row.get(4)?, + collision_energy: row.get(5)?, + precursor_id: row.get(6)?, + }) + })? + .collect(); // return the frames Ok(frames_rows?) } // Read the global meta data from the analysis.tdf file -pub fn read_global_meta_sql(bruker_d_folder_name: &str) -> Result> { - +pub fn read_global_meta_sql( + bruker_d_folder_name: &str, +) -> Result> { // Connect to the database let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf"); let conn = Connection::open(db_path)?; // execute the query - let frames_rows: Result, _> = conn.prepare("SELECT * FROM GlobalMetadata")?.query_map([], |row| { - Ok(GlobalMetaInternal { - key: row.get(0)?, - value: row.get(1)?, - }) - })?.collect(); + let frames_rows: Result, _> = conn + .prepare("SELECT * FROM GlobalMetadata")? + .query_map([], |row| { + Ok(GlobalMetaInternal { + key: row.get(0)?, + value: row.get(1)?, + }) + })? + .collect(); let mut global_meta = GlobalMetaData { schema_type: String::new(), @@ -202,18 +234,36 @@ pub fn read_global_meta_sql(bruker_d_folder_name: &str) -> Result global_meta.schema_type = row.value, - "SchemaVersionMajor" => global_meta.schema_version_major = row.value.parse::().unwrap(), - "SchemaVersionMinor" => global_meta.schema_version_minor = row.value.parse::().unwrap(), + "SchemaVersionMajor" => { + global_meta.schema_version_major = row.value.parse::().unwrap() + } + "SchemaVersionMinor" => { + global_meta.schema_version_minor = row.value.parse::().unwrap() + } "AcquisitionSoftwareVendor" => global_meta.acquisition_software_vendor = row.value, "InstrumentVendor" => global_meta.instrument_vendor = row.value, "ClosedProperly" => global_meta.closed_property = row.value.parse::().unwrap(), - "TimsCompressionType" => global_meta.tims_compression_type = row.value.parse::().unwrap(), - "MaxNumPeaksPerScan" => global_meta.max_num_peaks_per_scan = row.value.parse::().unwrap(), - "MzAcqRangeLower" => global_meta.mz_acquisition_range_lower = row.value.parse::().unwrap(), - "MzAcqRangeUpper" => global_meta.mz_acquisition_range_upper = row.value.parse::().unwrap(), - "OneOverK0AcqRangeLower" => global_meta.one_over_k0_range_lower = row.value.parse::().unwrap(), - "OneOverK0AcqRangeUpper" => global_meta.one_over_k0_range_upper = row.value.parse::().unwrap(), - "DigitizerNumSamples" => global_meta.tof_max_index = (row.value.parse::().unwrap() + 1) as u32, + "TimsCompressionType" => { + global_meta.tims_compression_type = row.value.parse::().unwrap() + } + "MaxNumPeaksPerScan" => { + global_meta.max_num_peaks_per_scan = row.value.parse::().unwrap() + } + "MzAcqRangeLower" => { + global_meta.mz_acquisition_range_lower = row.value.parse::().unwrap() + } + "MzAcqRangeUpper" => { + global_meta.mz_acquisition_range_upper = row.value.parse::().unwrap() + } + "OneOverK0AcqRangeLower" => { + global_meta.one_over_k0_range_lower = row.value.parse::().unwrap() + } + "OneOverK0AcqRangeUpper" => { + global_meta.one_over_k0_range_upper = row.value.parse::().unwrap() + } + "DigitizerNumSamples" => { + global_meta.tof_max_index = (row.value.parse::().unwrap() + 1) as u32 + } _ => (), } } @@ -222,44 +272,68 @@ pub fn read_global_meta_sql(bruker_d_folder_name: &str) -> Result Result, Box> { +pub fn read_meta_data_sql( + bruker_d_folder_name: &str, +) -> Result, Box> { // Connect to the database let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf"); let conn = Connection::open(db_path)?; // prepare the query - let rows: Vec<&str> = vec!["Id", "Time", "ScanMode", "Polarity", "MsMsType", "TimsId", "MaxIntensity", "SummedIntensities", - "NumScans", "NumPeaks", "MzCalibration", "T1", "T2", "TimsCalibration", "PropertyGroup", "AccumulationTime", "RampTime"]; + let rows: Vec<&str> = vec![ + "Id", + "Time", + "ScanMode", + "Polarity", + "MsMsType", + "TimsId", + "MaxIntensity", + "SummedIntensities", + "NumScans", + "NumPeaks", + "MzCalibration", + "T1", + "T2", + "TimsCalibration", + "PropertyGroup", + "AccumulationTime", + "RampTime", + ]; let query = format!("SELECT {} FROM Frames", rows.join(", ")); // execute the query - let frames_rows: Result, _> = conn.prepare(&query)?.query_map([], |row| { - Ok(FrameMeta { - id: row.get(0)?, - time: row.get(1)?, - scan_mode: row.get(2)?, - polarity: row.get(3)?, - ms_ms_type: row.get(4)?, - tims_id: row.get(5)?, - max_intensity: row.get(6)?, - sum_intensity: row.get(7)?, - num_scans: row.get(8)?, - num_peaks: row.get(9)?, - mz_calibration: row.get(10)?, - t_1: row.get(11)?, - t_2: row.get(12)?, - tims_calibration: row.get(13)?, - property_group: row.get(14)?, - accumulation_time: row.get(15)?, - ramp_time: row.get(16)?, - }) - })?.collect(); + let frames_rows: Result, _> = conn + .prepare(&query)? + .query_map([], |row| { + Ok(FrameMeta { + id: row.get(0)?, + time: row.get(1)?, + scan_mode: row.get(2)?, + polarity: row.get(3)?, + ms_ms_type: row.get(4)?, + tims_id: row.get(5)?, + max_intensity: row.get(6)?, + sum_intensity: row.get(7)?, + num_scans: row.get(8)?, + num_peaks: row.get(9)?, + mz_calibration: row.get(10)?, + t_1: row.get(11)?, + t_2: row.get(12)?, + tims_calibration: row.get(13)?, + property_group: row.get(14)?, + accumulation_time: row.get(15)?, + ramp_time: row.get(16)?, + }) + })? + .collect(); // return the frames Ok(frames_rows?) } -pub fn read_dia_ms_ms_info(bruker_d_folder_name: &str) -> Result, Box> { +pub fn read_dia_ms_ms_info( + bruker_d_folder_name: &str, +) -> Result, Box> { // Connect to the database let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf"); let conn = Connection::open(db_path)?; @@ -269,38 +343,53 @@ pub fn read_dia_ms_ms_info(bruker_d_folder_name: &str) -> Result, _> = conn.prepare(&query)?.query_map([], |row| { - Ok(DiaMsMisInfo { - frame_id: row.get(0)?, - window_group: row.get(1)?, - }) - })?.collect(); + let frames_rows: Result, _> = conn + .prepare(&query)? + .query_map([], |row| { + Ok(DiaMsMisInfo { + frame_id: row.get(0)?, + window_group: row.get(1)?, + }) + })? + .collect(); // return the frames Ok(frames_rows?) } -pub fn read_dia_ms_ms_windows(bruker_d_folder_name: &str) -> Result, Box> { +pub fn read_dia_ms_ms_windows( + bruker_d_folder_name: &str, +) -> Result, Box> { // Connect to the database let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf"); let conn = Connection::open(db_path)?; // prepare the query - let rows: Vec<&str> = vec!["WindowGroup", "ScanNumBegin", "ScanNumEnd", "IsolationMz", "IsolationWidth", "CollisionEnergy"]; + let rows: Vec<&str> = vec![ + "WindowGroup", + "ScanNumBegin", + "ScanNumEnd", + "IsolationMz", + "IsolationWidth", + "CollisionEnergy", + ]; let query = format!("SELECT {} FROM DiaFrameMsMsWindows", rows.join(", ")); // execute the query - let frames_rows: Result, _> = conn.prepare(&query)?.query_map([], |row| { - Ok(DiaMsMsWindow { - window_group: row.get(0)?, - scan_num_begin: row.get(1)?, - scan_num_end: row.get(2)?, - isolation_mz: row.get(3)?, - isolation_width: row.get(4)?, - collision_energy: row.get(5)?, - }) - })?.collect(); + let frames_rows: Result, _> = conn + .prepare(&query)? + .query_map([], |row| { + Ok(DiaMsMsWindow { + window_group: row.get(0)?, + scan_num_begin: row.get(1)?, + scan_num_end: row.get(2)?, + isolation_mz: row.get(3)?, + isolation_width: row.get(4)?, + collision_energy: row.get(5)?, + }) + })? + .collect(); // return the frames Ok(frames_rows?) -} \ No newline at end of file +} diff --git a/rustdf/src/data/mod.rs b/rustdf/src/data/mod.rs index 8891b9f7..0e356db2 100644 --- a/rustdf/src/data/mod.rs +++ b/rustdf/src/data/mod.rs @@ -1,8 +1,8 @@ -pub mod raw; -pub mod meta; +pub mod acquisition; pub mod dataset; pub mod dda; pub mod dia; pub mod handle; +pub mod meta; +pub mod raw; pub mod utility; -pub mod acquisition; \ No newline at end of file diff --git a/rustdf/src/data/raw.rs b/rustdf/src/data/raw.rs index 08083a74..6e86df76 100644 --- a/rustdf/src/data/raw.rs +++ b/rustdf/src/data/raw.rs @@ -32,26 +32,24 @@ impl BrukerTimsDataLibrary { // let data_path = "path/to/data.d"; // let tims_data = BrukerTimsDataLibrary::new(bruker_lib_path, data_path); // ``` - pub fn new(bruker_lib_path: &str, data_path: &str) -> Result> { - + pub fn new( + bruker_lib_path: &str, + data_path: &str, + ) -> Result> { // Load the library - let lib = unsafe { - Library::new(bruker_lib_path)? - }; - + let lib = unsafe { Library::new(bruker_lib_path)? }; + // create a handle to the raw data let handle = unsafe { - let func: Symbol u64> = lib.get(b"tims_open")?; + let func: Symbol u64> = + lib.get(b"tims_open")?; let path = std::ffi::CString::new(data_path)?; let handle = func(path.as_ptr(), 0); handle }; // return the BrukerTimsDataLibrary struct - Ok(BrukerTimsDataLibrary { - lib, - handle, - }) + Ok(BrukerTimsDataLibrary { lib, handle }) } // @@ -68,7 +66,7 @@ impl BrukerTimsDataLibrary { // ``` pub fn tims_close(&self) -> Result<(), Box> { unsafe { - let func: Symbol ()> = self.lib.get(b"tims_close")?; + let func: Symbol ()> = self.lib.get(b"tims_close")?; func(self.handle); } Ok(()) @@ -87,10 +85,22 @@ impl BrukerTimsDataLibrary { // Err(e) => println!("error: {}", e), // }; // ``` - pub fn tims_index_to_mz(&self, frame_id: u32, dbl_tofs: &[c_double], mzs: &mut [c_double]) -> Result<(), Box> { + pub fn tims_index_to_mz( + &self, + frame_id: u32, + dbl_tofs: &[c_double], + mzs: &mut [c_double], + ) -> Result<(), Box> { unsafe { - let func: Symbol = self.lib.get(b"tims_index_to_mz")?; - func(self.handle, frame_id, dbl_tofs.as_ptr(), mzs.as_mut_ptr(), dbl_tofs.len() as u32); + let func: Symbol = + self.lib.get(b"tims_index_to_mz")?; + func( + self.handle, + frame_id, + dbl_tofs.as_ptr(), + mzs.as_mut_ptr(), + dbl_tofs.len() as u32, + ); } Ok(()) } @@ -108,10 +118,22 @@ impl BrukerTimsDataLibrary { // Err(e) => println!("error: {}", e), // }; // ``` - pub fn tims_mz_to_index(&self, frame_id: u32, mzs: &[c_double], indices: &mut [c_double]) -> Result<(), Box> { + pub fn tims_mz_to_index( + &self, + frame_id: u32, + mzs: &[c_double], + indices: &mut [c_double], + ) -> Result<(), Box> { unsafe { - let func: Symbol = self.lib.get(b"tims_mz_to_index")?; - func(self.handle, frame_id, mzs.as_ptr(), indices.as_mut_ptr(), mzs.len() as u32); + let func: Symbol = + self.lib.get(b"tims_mz_to_index")?; + func( + self.handle, + frame_id, + mzs.as_ptr(), + indices.as_mut_ptr(), + mzs.len() as u32, + ); } Ok(()) } @@ -129,10 +151,22 @@ impl BrukerTimsDataLibrary { // Err(e) => println!("error: {}", e), // }; // ``` - pub fn tims_scan_to_inv_mob(&self, frame_id: u32, dbl_scans: &[c_double], inv_mob: &mut [c_double]) -> Result<(), Box> { + pub fn tims_scan_to_inv_mob( + &self, + frame_id: u32, + dbl_scans: &[c_double], + inv_mob: &mut [c_double], + ) -> Result<(), Box> { unsafe { - let func: Symbol = self.lib.get(b"tims_scannum_to_oneoverk0")?; - func(self.handle, frame_id, dbl_scans.as_ptr(), inv_mob.as_mut_ptr(), dbl_scans.len() as u32); + let func: Symbol = + self.lib.get(b"tims_scannum_to_oneoverk0")?; + func( + self.handle, + frame_id, + dbl_scans.as_ptr(), + inv_mob.as_mut_ptr(), + dbl_scans.len() as u32, + ); } Ok(()) } @@ -150,10 +184,22 @@ impl BrukerTimsDataLibrary { // Err(e) => println!("error: {}", e), // }; // ``` - pub fn inv_mob_to_tims_scan(&self, frame_id: u32, inv_mob: &[c_double], scans: &mut [c_double]) -> Result<(), Box> { + pub fn inv_mob_to_tims_scan( + &self, + frame_id: u32, + inv_mob: &[c_double], + scans: &mut [c_double], + ) -> Result<(), Box> { unsafe { - let func: Symbol = self.lib.get(b"tims_oneoverk0_to_scannum")?; - func(self.handle, frame_id, inv_mob.as_ptr(), scans.as_mut_ptr(), inv_mob.len() as u32); + let func: Symbol = + self.lib.get(b"tims_oneoverk0_to_scannum")?; + func( + self.handle, + frame_id, + inv_mob.as_ptr(), + scans.as_mut_ptr(), + inv_mob.len() as u32, + ); } Ok(()) } @@ -167,4 +213,4 @@ impl Drop for BrukerTimsDataLibrary { Err(e) => println!("error: {}", e), }; } -} \ No newline at end of file +} diff --git a/rustdf/src/data/utility.rs b/rustdf/src/data/utility.rs index 4653338e..1792b81e 100644 --- a/rustdf/src/data/utility.rs +++ b/rustdf/src/data/utility.rs @@ -1,11 +1,10 @@ -use std::io; -use std::io::{Read, Write}; use byteorder::{ByteOrder, LittleEndian}; use mscore::timstof::frame::TimsFrame; +use rayon::iter::IntoParallelRefIterator; use rayon::prelude::*; use rayon::ThreadPoolBuilder; -use rayon::iter::IntoParallelRefIterator; - +use std::io; +use std::io::{Read, Write}; /// Decompresses a ZSTD compressed byte array /// @@ -86,21 +85,37 @@ pub fn reconstruct_compressed_data( Ok(final_data) } -pub fn compress_collection(frames: Vec, max_scan_count: u32, compression_level: i32, num_threads: usize) -> Vec> { - - let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); +pub fn compress_collection( + frames: Vec, + max_scan_count: u32, + compression_level: i32, + num_threads: usize, +) -> Vec> { + let pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); let result = pool.install(|| { - frames.par_iter().map(|frame| { - let compressed_data = reconstruct_compressed_data( - frame.scan.iter().map(|&x| x as u32).collect(), - frame.tof.iter().map(|&x| x as u32).collect(), - frame.ims_frame.intensity.iter().map(|&x| x as u32).collect(), - max_scan_count, - compression_level, - ).unwrap(); - compressed_data - }).collect() + frames + .par_iter() + .map(|frame| { + let compressed_data = reconstruct_compressed_data( + frame.scan.iter().map(|&x| x as u32).collect(), + frame.tof.iter().map(|&x| x as u32).collect(), + frame + .ims_frame + .intensity + .iter() + .map(|&x| x as u32) + .collect(), + max_scan_count, + compression_level, + ) + .unwrap(); + compressed_data + }) + .collect() }); result } @@ -117,8 +132,9 @@ pub fn compress_collection(frames: Vec, max_scan_count: u32, compress /// * `tof_indices` - A vector of u32 that holds the tof indices /// * `intensities` - A vector of u32 that holds the intensities /// -pub fn parse_decompressed_bruker_binary_data(decompressed_bytes: &[u8]) -> Result<(Vec, Vec, Vec), Box> { - +pub fn parse_decompressed_bruker_binary_data( + decompressed_bytes: &[u8], +) -> Result<(Vec, Vec, Vec), Box> { let mut buffer_u32 = Vec::new(); for i in 0..(decompressed_bytes.len() / 4) { @@ -126,7 +142,7 @@ pub fn parse_decompressed_bruker_binary_data(decompressed_bytes: &[u8]) -> Resul decompressed_bytes[i], decompressed_bytes[i + (decompressed_bytes.len() / 4)], decompressed_bytes[i + (2 * decompressed_bytes.len() / 4)], - decompressed_bytes[i + (3 * decompressed_bytes.len() / 4)] + decompressed_bytes[i + (3 * decompressed_bytes.len() / 4)], ]); buffer_u32.push(value); } @@ -144,10 +160,20 @@ pub fn parse_decompressed_bruker_binary_data(decompressed_bytes: &[u8]) -> Resul scan_indices[0] = 0; // get the tof indices, which are the first half of the buffer after the scan indices - let mut tof_indices: Vec = buffer_u32.iter().skip(scan_count).step_by(2).cloned().collect(); + let mut tof_indices: Vec = buffer_u32 + .iter() + .skip(scan_count) + .step_by(2) + .cloned() + .collect(); // get the intensities, which are the second half of the buffer - let intensities: Vec = buffer_u32.iter().skip(scan_count + 1).step_by(2).cloned().collect(); + let intensities: Vec = buffer_u32 + .iter() + .skip(scan_count + 1) + .step_by(2) + .cloned() + .collect(); // calculate the last scan before moving scan indices let last_scan = intensities.len() as u32 - scan_indices[1..].iter().sum::(); @@ -237,23 +263,44 @@ pub fn get_realdata_loop(back_data: &[u8]) -> Vec { real_data } -pub fn get_data_for_compression(tofs: &Vec, scans: &Vec, intensities: &Vec, max_scans: u32) -> Vec { +pub fn get_data_for_compression( + tofs: &Vec, + scans: &Vec, + intensities: &Vec, + max_scans: u32, +) -> Vec { let mut tof_copy = tofs.clone(); modify_tofs(&mut tof_copy, &scans); let peak_cnts = get_peak_cnts(max_scans, &scans); - let interleaved: Vec = tofs.iter().zip(intensities.iter()).flat_map(|(tof, intensity)| vec![*tof, *intensity]).collect(); + let interleaved: Vec = tofs + .iter() + .zip(intensities.iter()) + .flat_map(|(tof, intensity)| vec![*tof, *intensity]) + .collect(); get_realdata(&peak_cnts, &interleaved) } - -pub fn get_data_for_compression_par(tofs: Vec>, scans: Vec>, intensities: Vec>, max_scans: u32, num_threads: usize) -> Vec> { - let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); +pub fn get_data_for_compression_par( + tofs: Vec>, + scans: Vec>, + intensities: Vec>, + max_scans: u32, + num_threads: usize, +) -> Vec> { + let pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); let result = pool.install(|| { - tofs.par_iter().zip(scans.par_iter()).zip(intensities.par_iter()).map(|((tof, scan), intensity)| { - get_data_for_compression(tof, scan, intensity, max_scans) - }).collect() + tofs.par_iter() + .zip(scans.par_iter()) + .zip(intensities.par_iter()) + .map(|((tof, scan), intensity)| { + get_data_for_compression(tof, scan, intensity, max_scans) + }) + .collect() }); result @@ -261,7 +308,8 @@ pub fn get_data_for_compression_par(tofs: Vec>, scans: Vec>, i pub fn flatten_scan_values(scan: &Vec, zero_indexed: bool) -> Vec { let add = if zero_indexed { 0 } else { 1 }; - scan.iter().enumerate() - .flat_map(|(index, &count)| vec![(index + add) as u32; count as usize] - .into_iter()).collect() -} \ No newline at end of file + scan.iter() + .enumerate() + .flat_map(|(index, &count)| vec![(index + add) as u32; count as usize].into_iter()) + .collect() +} diff --git a/rustdf/src/lib.rs b/rustdf/src/lib.rs index 306b7ce6..fe424e22 100644 --- a/rustdf/src/lib.rs +++ b/rustdf/src/lib.rs @@ -1,2 +1,2 @@ pub mod data; -pub mod sim; \ No newline at end of file +pub mod sim; diff --git a/rustdf/src/main.rs b/rustdf/src/main.rs index 9e7d9714..faed5b80 100644 --- a/rustdf/src/main.rs +++ b/rustdf/src/main.rs @@ -1,6 +1,6 @@ +use clap::Parser; use rustdf::sim::dia::TimsTofSyntheticsFrameBuilderDIA; use std::path::Path; -use clap::Parser; /// Create synthetic DIA proteomics experiment data #[derive(Parser, Debug)] @@ -27,9 +27,7 @@ struct Args { num_frames: usize, } - fn main() { - let args = Args::parse(); let db_path_str = args.path; @@ -37,12 +35,29 @@ fn main() { let num_threads = args.num_threads; let fragment = args.fragment; - let experiment = TimsTofSyntheticsFrameBuilderDIA::new(path, false,4).unwrap(); - let first_frames = experiment.precursor_frame_builder.frames.iter().map(|x| x.frame_id.clone()).skip(100).take(args.num_frames).collect::>(); + let experiment = TimsTofSyntheticsFrameBuilderDIA::new(path, false, 4).unwrap(); + let first_frames = experiment + .precursor_frame_builder + .frames + .iter() + .map(|x| x.frame_id.clone()) + .skip(100) + .take(args.num_frames) + .collect::>(); // go over the frames in batches of 256 for frame_batch in first_frames.chunks(args.batch_size) { - let frames = experiment.build_frames(frame_batch.to_vec(), fragment, false, true,5.0, false, 5.0, false, num_threads); + let frames = experiment.build_frames( + frame_batch.to_vec(), + fragment, + false, + true, + 5.0, + false, + 5.0, + false, + num_threads, + ); for frame in frames { println!("frame_id: {}", frame.frame_id); diff --git a/rustdf/src/sim/containers.rs b/rustdf/src/sim/containers.rs index 97c9356f..ab58c1af 100644 --- a/rustdf/src/sim/containers.rs +++ b/rustdf/src/sim/containers.rs @@ -1,7 +1,7 @@ -use mscore::data::peptide::{PeptideSequence}; -use mscore::data::spectrum::{MzSpectrum, MsType}; -use serde::{Serialize, Deserialize}; +use mscore::data::peptide::PeptideSequence; +use mscore::data::spectrum::{MsType, MzSpectrum}; use rand::distributions::{Distribution, Uniform}; +use serde::{Deserialize, Serialize}; #[derive(Serialize, Deserialize, Debug, Clone)] pub struct SignalDistribution { @@ -13,26 +13,54 @@ pub struct SignalDistribution { } impl SignalDistribution { - pub fn new(mean: f32, variance: f32, error: f32, occurrence: Vec, abundance: Vec) -> Self { - SignalDistribution { mean, variance, error, occurrence, abundance, } + pub fn new( + mean: f32, + variance: f32, + error: f32, + occurrence: Vec, + abundance: Vec, + ) -> Self { + SignalDistribution { + mean, + variance, + error, + occurrence, + abundance, + } } pub fn add_noise(&self, noise_level: f32) -> Vec { let mut rng = rand::thread_rng(); let noise_dist = Uniform::new(0.0, noise_level); - let noise: Vec = self.abundance.iter().map(|_| noise_dist.sample(&mut rng)).collect(); - let noise_relative: Vec = self.abundance.iter().zip(noise.iter()).map(|(&abu, &noise)| abu * noise).collect(); - let noised_signal: Vec = self.abundance.iter().zip(noise_relative.iter()).map(|(&abu, &noise_rel)| abu + noise_rel).collect(); + let noise: Vec = self + .abundance + .iter() + .map(|_| noise_dist.sample(&mut rng)) + .collect(); + let noise_relative: Vec = self + .abundance + .iter() + .zip(noise.iter()) + .map(|(&abu, &noise)| abu * noise) + .collect(); + let noised_signal: Vec = self + .abundance + .iter() + .zip(noise_relative.iter()) + .map(|(&abu, &noise_rel)| abu + noise_rel) + .collect(); let sum_noised_signal: f32 = noised_signal.iter().sum(); let sum_rt_abu: f32 = self.abundance.iter().sum(); - noised_signal.iter().map(|&x| (x / sum_noised_signal) * sum_rt_abu).collect() + noised_signal + .iter() + .map(|&x| (x / sum_noised_signal) * sum_rt_abu) + .collect() } } - #[derive(Debug, Clone)] pub struct PeptidesSim { pub protein_id: u32, @@ -41,8 +69,8 @@ pub struct PeptidesSim { pub proteins: String, pub decoy: bool, pub missed_cleavages: i8, - pub n_term : Option, - pub c_term : Option, + pub n_term: Option, + pub c_term: Option, pub mono_isotopic_mass: f32, pub retention_time: f32, pub events: f32, @@ -84,7 +112,12 @@ impl PeptidesSim { frame_start, frame_end, frame_distribution: SignalDistribution::new( - 0.0, 0.0, 0.0, frame_occurrence, frame_abundance), + 0.0, + 0.0, + 0.0, + frame_occurrence, + frame_abundance, + ), } } } @@ -122,7 +155,7 @@ impl WindowGroupSettingsSim { #[derive(Debug, Clone)] pub struct FrameToWindowGroupSim { pub frame_id: u32, - pub window_group:u32, + pub window_group: u32, } impl FrameToWindowGroupSim { @@ -167,12 +200,16 @@ impl IonSim { mobility, simulated_spectrum, scan_distribution: SignalDistribution::new( - 0.0, 0.0, 0.0, scan_occurrence, scan_abundance), + 0.0, + 0.0, + 0.0, + scan_occurrence, + scan_abundance, + ), } } } - #[derive(Debug, Clone)] pub struct ScansSim { pub scan: u32, @@ -207,7 +244,6 @@ impl FramesSim { 9 => MsType::FragmentDia, _ => MsType::Unknown, } - } } @@ -246,4 +282,4 @@ impl FragmentIonSim { } dense } -} \ No newline at end of file +} diff --git a/rustdf/src/sim/dia.rs b/rustdf/src/sim/dia.rs index 02cb6ceb..4cf1dfd9 100644 --- a/rustdf/src/sim/dia.rs +++ b/rustdf/src/sim/dia.rs @@ -1,31 +1,35 @@ -use std::collections::{BTreeMap, HashSet}; -use std::path::Path; use mscore::data::peptide::{PeptideIon, PeptideProductIonSeriesCollection}; -use mscore::timstof::collision::{TimsTofCollisionEnergy, TimsTofCollisionEnergyDIA}; -use mscore::timstof::quadrupole::{IonTransmission, TimsTransmissionDIA}; use mscore::data::spectrum::{IndexedMzSpectrum, MsType, MzSpectrum}; -use mscore::simulation::annotation::{MzSpectrumAnnotated, TimsFrameAnnotated, TimsSpectrumAnnotated}; +use mscore::simulation::annotation::{ + MzSpectrumAnnotated, TimsFrameAnnotated, TimsSpectrumAnnotated, +}; +use mscore::timstof::collision::{TimsTofCollisionEnergy, TimsTofCollisionEnergyDIA}; use mscore::timstof::frame::TimsFrame; +use mscore::timstof::quadrupole::{IonTransmission, TimsTransmissionDIA}; use mscore::timstof::spectrum::TimsSpectrum; +use std::collections::{BTreeMap, HashSet}; +use std::path::Path; use rayon::prelude::*; use rayon::ThreadPoolBuilder; use crate::sim::handle::TimsTofSyntheticsDataHandle; -use crate::sim::precursor::{TimsTofSyntheticsPrecursorFrameBuilder}; +use crate::sim::precursor::TimsTofSyntheticsPrecursorFrameBuilder; pub struct TimsTofSyntheticsFrameBuilderDIA { pub path: String, pub precursor_frame_builder: TimsTofSyntheticsPrecursorFrameBuilder, pub transmission_settings: TimsTransmissionDIA, pub fragmentation_settings: TimsTofCollisionEnergyDIA, - pub fragment_ions: Option)>>, - pub fragment_ions_annotated: Option)>>, + pub fragment_ions: + Option)>>, + pub fragment_ions_annotated: Option< + BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec)>, + >, } impl TimsTofSyntheticsFrameBuilderDIA { pub fn new(path: &Path, with_annotations: bool, num_threads: usize) -> rusqlite::Result { - let synthetics = TimsTofSyntheticsPrecursorFrameBuilder::new(path)?; let handle = TimsTofSyntheticsDataHandle::new(path)?; @@ -38,7 +42,12 @@ impl TimsTofSyntheticsFrameBuilderDIA { match with_annotations { true => { - let fragment_ions = Some(TimsTofSyntheticsDataHandle::build_fragment_ions_annotated(&synthetics.peptides, &fragment_ions, num_threads)); + let fragment_ions = + Some(TimsTofSyntheticsDataHandle::build_fragment_ions_annotated( + &synthetics.peptides, + &fragment_ions, + num_threads, + )); Ok(Self { path: path.to_str().unwrap().to_string(), precursor_frame_builder: synthetics, @@ -50,13 +59,17 @@ impl TimsTofSyntheticsFrameBuilderDIA { } false => { - let fragment_ions = Some(TimsTofSyntheticsDataHandle::build_fragment_ions(&synthetics.peptides, &fragment_ions, num_threads)); + let fragment_ions = Some(TimsTofSyntheticsDataHandle::build_fragment_ions( + &synthetics.peptides, + &fragment_ions, + num_threads, + )); Ok(Self { path: path.to_str().unwrap().to_string(), precursor_frame_builder: synthetics, transmission_settings, fragmentation_settings, - fragment_ions, + fragment_ions, fragment_ions_annotated: None, }) } @@ -74,18 +87,72 @@ impl TimsTofSyntheticsFrameBuilderDIA { /// /// A TimsFrame /// - pub fn build_frame(&self, frame_id: u32, fragmentation: bool, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, mz_noise_fragment: bool, fragment_noise_ppm: f64, right_drag: bool) -> TimsFrame { + pub fn build_frame( + &self, + frame_id: u32, + fragmentation: bool, + mz_noise_precursor: bool, + uniform: bool, + precursor_noise_ppm: f64, + mz_noise_fragment: bool, + fragment_noise_ppm: f64, + right_drag: bool, + ) -> TimsFrame { // determine if the frame is a precursor frame - match self.precursor_frame_builder.precursor_frame_id_set.contains(&frame_id) { - true => self.build_ms1_frame(frame_id, mz_noise_precursor, uniform, precursor_noise_ppm, right_drag), - false => self.build_ms2_frame(frame_id, fragmentation, mz_noise_fragment, uniform, fragment_noise_ppm, right_drag), + match self + .precursor_frame_builder + .precursor_frame_id_set + .contains(&frame_id) + { + true => self.build_ms1_frame( + frame_id, + mz_noise_precursor, + uniform, + precursor_noise_ppm, + right_drag, + ), + false => self.build_ms2_frame( + frame_id, + fragmentation, + mz_noise_fragment, + uniform, + fragment_noise_ppm, + right_drag, + ), } } - pub fn build_frame_annotated(&self, frame_id: u32, fragmentation: bool, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, mz_noise_fragment: bool, fragment_noise_ppm: f64, right_drag: bool) -> TimsFrameAnnotated { - match self.precursor_frame_builder.precursor_frame_id_set.contains(&frame_id) { - true => self.build_ms1_frame_annotated(frame_id, mz_noise_precursor, uniform, precursor_noise_ppm, right_drag), - false => self.build_ms2_frame_annotated(frame_id, fragmentation, mz_noise_fragment, uniform, fragment_noise_ppm, right_drag), + pub fn build_frame_annotated( + &self, + frame_id: u32, + fragmentation: bool, + mz_noise_precursor: bool, + uniform: bool, + precursor_noise_ppm: f64, + mz_noise_fragment: bool, + fragment_noise_ppm: f64, + right_drag: bool, + ) -> TimsFrameAnnotated { + match self + .precursor_frame_builder + .precursor_frame_id_set + .contains(&frame_id) + { + true => self.build_ms1_frame_annotated( + frame_id, + mz_noise_precursor, + uniform, + precursor_noise_ppm, + right_drag, + ), + false => self.build_ms2_frame_annotated( + frame_id, + fragmentation, + mz_noise_fragment, + uniform, + fragment_noise_ppm, + right_drag, + ), } } @@ -110,13 +177,40 @@ impl TimsTofSyntheticsFrameBuilderDIA { result } - pub fn build_frames(&self, frame_ids: Vec, fragmentation: bool, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, mz_noise_fragment: bool, fragment_noise_ppm: f64, right_drag: bool, num_threads: usize) -> Vec { - - let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); + pub fn build_frames( + &self, + frame_ids: Vec, + fragmentation: bool, + mz_noise_precursor: bool, + uniform: bool, + precursor_noise_ppm: f64, + mz_noise_fragment: bool, + fragment_noise_ppm: f64, + right_drag: bool, + num_threads: usize, + ) -> Vec { + let thread_pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); let mut tims_frames: Vec = Vec::new(); thread_pool.install(|| { - tims_frames = frame_ids.par_iter().map(|frame_id| self.build_frame(*frame_id, fragmentation, mz_noise_precursor, uniform, precursor_noise_ppm, mz_noise_fragment, fragment_noise_ppm, right_drag)).collect(); + tims_frames = frame_ids + .par_iter() + .map(|frame_id| { + self.build_frame( + *frame_id, + fragmentation, + mz_noise_precursor, + uniform, + precursor_noise_ppm, + mz_noise_fragment, + fragment_noise_ppm, + right_drag, + ) + }) + .collect(); }); tims_frames.sort_by(|a, b| a.frame_id.cmp(&b.frame_id)); @@ -124,13 +218,40 @@ impl TimsTofSyntheticsFrameBuilderDIA { tims_frames } - pub fn build_frames_annotated(&self, frame_ids: Vec, fragmentation: bool, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, mz_noise_fragment: bool, fragment_noise_ppm: f64, right_drag: bool, num_threads: usize) -> Vec { - - let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); + pub fn build_frames_annotated( + &self, + frame_ids: Vec, + fragmentation: bool, + mz_noise_precursor: bool, + uniform: bool, + precursor_noise_ppm: f64, + mz_noise_fragment: bool, + fragment_noise_ppm: f64, + right_drag: bool, + num_threads: usize, + ) -> Vec { + let thread_pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); let mut tims_frames: Vec = Vec::new(); thread_pool.install(|| { - tims_frames = frame_ids.par_iter().map(|frame_id| self.build_frame_annotated(*frame_id, fragmentation, mz_noise_precursor, uniform, precursor_noise_ppm, mz_noise_fragment, fragment_noise_ppm, right_drag)).collect(); + tims_frames = frame_ids + .par_iter() + .map(|frame_id| { + self.build_frame_annotated( + *frame_id, + fragmentation, + mz_noise_precursor, + uniform, + precursor_noise_ppm, + mz_noise_fragment, + fragment_noise_ppm, + right_drag, + ) + }) + .collect(); }); tims_frames.sort_by(|a, b| a.frame_id.cmp(&b.frame_id)); @@ -138,53 +259,162 @@ impl TimsTofSyntheticsFrameBuilderDIA { tims_frames } - fn build_ms1_frame(&self, frame_id: u32, mz_noise_precursor: bool, uniform: bool, precursor_ppm: f64, right_drag: bool) -> TimsFrame { - let mut tims_frame = self.precursor_frame_builder.build_precursor_frame(frame_id, mz_noise_precursor, uniform, precursor_ppm, right_drag); - let intensities_rounded = tims_frame.ims_frame.intensity.iter().map(|x| x.round()).collect::>(); + fn build_ms1_frame( + &self, + frame_id: u32, + mz_noise_precursor: bool, + uniform: bool, + precursor_ppm: f64, + right_drag: bool, + ) -> TimsFrame { + let mut tims_frame = self.precursor_frame_builder.build_precursor_frame( + frame_id, + mz_noise_precursor, + uniform, + precursor_ppm, + right_drag, + ); + let intensities_rounded = tims_frame + .ims_frame + .intensity + .iter() + .map(|x| x.round()) + .collect::>(); tims_frame.ims_frame.intensity = intensities_rounded; tims_frame } - fn build_ms1_frame_annotated(&self, frame_id: u32, mz_noise_precursor: bool, uniform: bool, precursor_ppm: f64, right_drag: bool) -> TimsFrameAnnotated { - let mut tims_frame = self.precursor_frame_builder.build_precursor_frame_annotated(frame_id, mz_noise_precursor, uniform, precursor_ppm, right_drag); - let intensities_rounded = tims_frame.intensity.iter().map(|x| x.round()).collect::>(); + fn build_ms1_frame_annotated( + &self, + frame_id: u32, + mz_noise_precursor: bool, + uniform: bool, + precursor_ppm: f64, + right_drag: bool, + ) -> TimsFrameAnnotated { + let mut tims_frame = self + .precursor_frame_builder + .build_precursor_frame_annotated( + frame_id, + mz_noise_precursor, + uniform, + precursor_ppm, + right_drag, + ); + let intensities_rounded = tims_frame + .intensity + .iter() + .map(|x| x.round()) + .collect::>(); tims_frame.intensity = intensities_rounded; tims_frame } - fn build_ms2_frame(&self, frame_id: u32, fragmentation: bool, mz_noise_fragment: bool, uniform: bool, fragment_ppm: f64, right_drag: bool) -> TimsFrame { + fn build_ms2_frame( + &self, + frame_id: u32, + fragmentation: bool, + mz_noise_fragment: bool, + uniform: bool, + fragment_ppm: f64, + right_drag: bool, + ) -> TimsFrame { match fragmentation { false => { - let mut frame = self.transmission_settings.transmit_tims_frame(&self.build_ms1_frame(frame_id, mz_noise_fragment, uniform, fragment_ppm, right_drag), None); - let intensities_rounded = frame.ims_frame.intensity.iter().map(|x| x.round()).collect::>(); + let mut frame = self.transmission_settings.transmit_tims_frame( + &self.build_ms1_frame( + frame_id, + mz_noise_fragment, + uniform, + fragment_ppm, + right_drag, + ), + None, + ); + let intensities_rounded = frame + .ims_frame + .intensity + .iter() + .map(|x| x.round()) + .collect::>(); frame.ims_frame.intensity = intensities_rounded; frame.ms_type = MsType::FragmentDia; frame - }, + } true => { - let mut frame = self.build_fragment_frame(frame_id, &self.fragment_ions.as_ref().unwrap(), mz_noise_fragment, uniform, fragment_ppm, None, None, None, Some(right_drag)); - let intensities_rounded = frame.ims_frame.intensity.iter().map(|x| x.round()).collect::>(); + let mut frame = self.build_fragment_frame( + frame_id, + &self.fragment_ions.as_ref().unwrap(), + mz_noise_fragment, + uniform, + fragment_ppm, + None, + None, + None, + Some(right_drag), + ); + let intensities_rounded = frame + .ims_frame + .intensity + .iter() + .map(|x| x.round()) + .collect::>(); frame.ims_frame.intensity = intensities_rounded; frame - }, + } } } - fn build_ms2_frame_annotated(&self, frame_id: u32, fragmentation: bool, mz_noise_fragment: bool, uniform: bool, fragment_ppm: f64, right_drag: bool) -> TimsFrameAnnotated { + fn build_ms2_frame_annotated( + &self, + frame_id: u32, + fragmentation: bool, + mz_noise_fragment: bool, + uniform: bool, + fragment_ppm: f64, + right_drag: bool, + ) -> TimsFrameAnnotated { match fragmentation { false => { - let mut frame = self.transmission_settings.transmit_tims_frame_annotated(&self.build_ms1_frame_annotated(frame_id, mz_noise_fragment, uniform, fragment_ppm, right_drag), None); - let intensities_rounded = frame.intensity.iter().map(|x| x.round()).collect::>(); + let mut frame = self.transmission_settings.transmit_tims_frame_annotated( + &self.build_ms1_frame_annotated( + frame_id, + mz_noise_fragment, + uniform, + fragment_ppm, + right_drag, + ), + None, + ); + let intensities_rounded = frame + .intensity + .iter() + .map(|x| x.round()) + .collect::>(); frame.intensity = intensities_rounded; frame.ms_type = MsType::FragmentDia; frame - }, + } true => { - let mut frame = self.build_fragment_frame_annotated(frame_id, &self.fragment_ions_annotated.as_ref().unwrap(), mz_noise_fragment, uniform, fragment_ppm, None, None, None, Some(right_drag)); - let intensities_rounded = frame.intensity.iter().map(|x| x.round()).collect::>(); + let mut frame = self.build_fragment_frame_annotated( + frame_id, + &self.fragment_ions_annotated.as_ref().unwrap(), + mz_noise_fragment, + uniform, + fragment_ppm, + None, + None, + None, + Some(right_drag), + ); + let intensities_rounded = frame + .intensity + .iter() + .map(|x| x.round()) + .collect::>(); frame.intensity = intensities_rounded; frame - }, + } } } @@ -204,7 +434,10 @@ impl TimsTofSyntheticsFrameBuilderDIA { fn build_fragment_frame( &self, frame_id: u32, - fragment_ions: &BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec)>, + fragment_ions: &BTreeMap< + (u32, i8, i8), + (PeptideProductIonSeriesCollection, Vec), + >, mz_noise_fragment: bool, uniform: bool, fragment_ppm: f64, @@ -213,9 +446,12 @@ impl TimsTofSyntheticsFrameBuilderDIA { intensity_min: Option, right_drag: Option, ) -> TimsFrame { - // check frame id - let ms_type = match self.precursor_frame_builder.precursor_frame_id_set.contains(&frame_id) { + let ms_type = match self + .precursor_frame_builder + .precursor_frame_id_set + .contains(&frame_id) + { false => MsType::FragmentDia, true => MsType::Unknown, }; @@ -223,11 +459,19 @@ impl TimsTofSyntheticsFrameBuilderDIA { let mut tims_spectra: Vec = Vec::new(); // Frame might not have any peptides - if !self.precursor_frame_builder.frame_to_abundances.contains_key(&frame_id) { + if !self + .precursor_frame_builder + .frame_to_abundances + .contains_key(&frame_id) + { return TimsFrame::new( frame_id as i32, ms_type.clone(), - *self.precursor_frame_builder.frame_to_rt.get(&frame_id).unwrap() as f64, + *self + .precursor_frame_builder + .frame_to_rt + .get(&frame_id) + .unwrap() as f64, vec![], vec![], vec![], @@ -237,18 +481,29 @@ impl TimsTofSyntheticsFrameBuilderDIA { } // Get the peptide ids and abundances for the frame, should now save to unwrap since we checked if the frame is in the map - let (peptide_ids, frame_abundances) = self.precursor_frame_builder.frame_to_abundances.get(&frame_id).unwrap(); + let (peptide_ids, frame_abundances) = self + .precursor_frame_builder + .frame_to_abundances + .get(&frame_id) + .unwrap(); // Go over all peptides in the frame with their respective abundances for (peptide_id, frame_abundance) in peptide_ids.iter().zip(frame_abundances.iter()) { - // jump to next peptide if the peptide_id is not in the peptide_to_ions map - if !self.precursor_frame_builder.peptide_to_ions.contains_key(&peptide_id) { + if !self + .precursor_frame_builder + .peptide_to_ions + .contains_key(&peptide_id) + { continue; } // get all the ions for the peptide - let (ion_abundances, scan_occurrences, scan_abundances, charges, spectra) = self.precursor_frame_builder.peptide_to_ions.get(&peptide_id).unwrap(); + let (ion_abundances, scan_occurrences, scan_abundances, charges, spectra) = self + .precursor_frame_builder + .peptide_to_ions + .get(&peptide_id) + .unwrap(); for (index, ion_abundance) in ion_abundances.iter().enumerate() { // occurrence and abundance of the ion in the scan @@ -259,25 +514,42 @@ impl TimsTofSyntheticsFrameBuilderDIA { let spectrum = spectra.get(index).unwrap(); // go over occurrence and abundance of the ion in the scan - for (scan, scan_abundance) in all_scan_occurrence.iter().zip(all_scan_abundance.iter()) { - + for (scan, scan_abundance) in + all_scan_occurrence.iter().zip(all_scan_abundance.iter()) + { // first, check if precursor is transmitted - if !self.transmission_settings.any_transmitted(frame_id as i32, *scan as i32, &spectrum.mz, None) { + if !self.transmission_settings.any_transmitted( + frame_id as i32, + *scan as i32, + &spectrum.mz, + None, + ) { continue; } // calculate abundance factor - let total_events = self.precursor_frame_builder.peptide_to_events.get(&peptide_id).unwrap(); - let fraction_events = frame_abundance * scan_abundance * ion_abundance * total_events; + let total_events = self + .precursor_frame_builder + .peptide_to_events + .get(&peptide_id) + .unwrap(); + let fraction_events = + frame_abundance * scan_abundance * ion_abundance * total_events; // get collision energy for the ion - let collision_energy = self.fragmentation_settings.get_collision_energy(frame_id as i32, *scan as i32); + let collision_energy = self + .fragmentation_settings + .get_collision_energy(frame_id as i32, *scan as i32); let collision_energy_quantized = (collision_energy * 1e3).round() as i8; // get charge state for the ion let charge_state = charges.get(index).unwrap(); // extract fragment ions for the peptide, charge state and collision energy - let maybe_value = fragment_ions.get(&(*peptide_id, *charge_state, collision_energy_quantized)); + let maybe_value = fragment_ions.get(&( + *peptide_id, + *charge_state, + collision_energy_quantized, + )); // jump to next peptide if the fragment_ions is None (can this happen?) if maybe_value.is_none() { @@ -298,21 +570,27 @@ impl TimsTofSyntheticsFrameBuilderDIA { scaled_spec }; - tims_spectra.push( - TimsSpectrum::new( - frame_id as i32, - *scan as i32, - *self.precursor_frame_builder.frame_to_rt.get(&frame_id).unwrap() as f64, - *self.precursor_frame_builder.scan_to_mobility.get(&scan).unwrap() as f64, - ms_type.clone(), - IndexedMzSpectrum::new(vec![0; mz_spectrum.mz.len()], mz_spectrum.mz, mz_spectrum.intensity).filter_ranged( - 100.0, - 1700.0, - 1.0, - 1e9, - ), + tims_spectra.push(TimsSpectrum::new( + frame_id as i32, + *scan as i32, + *self + .precursor_frame_builder + .frame_to_rt + .get(&frame_id) + .unwrap() as f64, + *self + .precursor_frame_builder + .scan_to_mobility + .get(&scan) + .unwrap() as f64, + ms_type.clone(), + IndexedMzSpectrum::new( + vec![0; mz_spectrum.mz.len()], + mz_spectrum.mz, + mz_spectrum.intensity, ) - ); + .filter_ranged(100.0, 1700.0, 1.0, 1e9), + )); } } } @@ -322,7 +600,11 @@ impl TimsTofSyntheticsFrameBuilderDIA { return TimsFrame::new( frame_id as i32, ms_type.clone(), - *self.precursor_frame_builder.frame_to_rt.get(&frame_id).unwrap() as f64, + *self + .precursor_frame_builder + .frame_to_rt + .get(&frame_id) + .unwrap() as f64, vec![], vec![], vec![], @@ -347,7 +629,10 @@ impl TimsTofSyntheticsFrameBuilderDIA { pub fn build_fragment_frame_annotated( &self, frame_id: u32, - fragment_ions: &BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec)>, + fragment_ions: &BTreeMap< + (u32, i8, i8), + (PeptideProductIonSeriesCollection, Vec), + >, mz_noise_fragment: bool, uniform: bool, fragment_ppm: f64, @@ -356,17 +641,29 @@ impl TimsTofSyntheticsFrameBuilderDIA { intensity_min: Option, right_drag: Option, ) -> TimsFrameAnnotated { - let ms_type = match self.precursor_frame_builder.precursor_frame_id_set.contains(&frame_id) { + let ms_type = match self + .precursor_frame_builder + .precursor_frame_id_set + .contains(&frame_id) + { false => MsType::FragmentDia, true => MsType::Unknown, }; let mut tims_spectra: Vec = Vec::new(); - if !self.precursor_frame_builder.frame_to_abundances.contains_key(&frame_id) { + if !self + .precursor_frame_builder + .frame_to_abundances + .contains_key(&frame_id) + { return TimsFrameAnnotated::new( frame_id as i32, - *self.precursor_frame_builder.frame_to_rt.get(&frame_id).unwrap() as f64, + *self + .precursor_frame_builder + .frame_to_rt + .get(&frame_id) + .unwrap() as f64, ms_type.clone(), vec![], vec![], @@ -377,37 +674,76 @@ impl TimsTofSyntheticsFrameBuilderDIA { ); } - let (peptide_ids, frame_abundances) = self.precursor_frame_builder.frame_to_abundances.get(&frame_id).unwrap(); + let (peptide_ids, frame_abundances) = self + .precursor_frame_builder + .frame_to_abundances + .get(&frame_id) + .unwrap(); for (peptide_id, frame_abundance) in peptide_ids.iter().zip(frame_abundances.iter()) { - if !self.precursor_frame_builder.peptide_to_ions.contains_key(&peptide_id) { + if !self + .precursor_frame_builder + .peptide_to_ions + .contains_key(&peptide_id) + { continue; } - let (ion_abundances, scan_occurrences, scan_abundances, charges, _) = self.precursor_frame_builder.peptide_to_ions.get(&peptide_id).unwrap(); + let (ion_abundances, scan_occurrences, scan_abundances, charges, _) = self + .precursor_frame_builder + .peptide_to_ions + .get(&peptide_id) + .unwrap(); for (index, ion_abundance) in ion_abundances.iter().enumerate() { let all_scan_occurrence = scan_occurrences.get(index).unwrap(); let all_scan_abundance = scan_abundances.get(index).unwrap(); - let peptide = self.precursor_frame_builder.peptides.get(peptide_id).unwrap(); - let ion = PeptideIon::new(peptide.sequence.sequence.clone(), charges[index] as i32, *ion_abundance as f64, Some(*peptide_id as i32)); + let peptide = self + .precursor_frame_builder + .peptides + .get(peptide_id) + .unwrap(); + let ion = PeptideIon::new( + peptide.sequence.sequence.clone(), + charges[index] as i32, + *ion_abundance as f64, + Some(*peptide_id as i32), + ); // TODO: make this configurable let spectrum = ion.calculate_isotopic_spectrum_annotated(1e-3, 1e-8, 200, 1e-4); - for (scan, scan_abundance) in all_scan_occurrence.iter().zip(all_scan_abundance.iter()) { - if !self.transmission_settings.any_transmitted(frame_id as i32, *scan as i32, &spectrum.mz, None) { + for (scan, scan_abundance) in + all_scan_occurrence.iter().zip(all_scan_abundance.iter()) + { + if !self.transmission_settings.any_transmitted( + frame_id as i32, + *scan as i32, + &spectrum.mz, + None, + ) { continue; } - let total_events = self.precursor_frame_builder.peptide_to_events.get(&peptide_id).unwrap(); - let fraction_events = frame_abundance * scan_abundance * ion_abundance * total_events; - - let collision_energy = self.fragmentation_settings.get_collision_energy(frame_id as i32, *scan as i32); + let total_events = self + .precursor_frame_builder + .peptide_to_events + .get(&peptide_id) + .unwrap(); + let fraction_events = + frame_abundance * scan_abundance * ion_abundance * total_events; + + let collision_energy = self + .fragmentation_settings + .get_collision_energy(frame_id as i32, *scan as i32); let collision_energy_quantized = (collision_energy * 1e3).round() as i8; let charge_state = charges.get(index).unwrap(); - let maybe_value = fragment_ions.get(&(*peptide_id, *charge_state, collision_energy_quantized)); + let maybe_value = fragment_ions.get(&( + *peptide_id, + *charge_state, + collision_energy_quantized, + )); if maybe_value.is_none() { continue; @@ -426,16 +762,23 @@ impl TimsTofSyntheticsFrameBuilderDIA { scaled_spec }; - tims_spectra.push( - TimsSpectrumAnnotated::new( - frame_id as i32, - *scan, - *self.precursor_frame_builder.frame_to_rt.get(&frame_id).unwrap() as f64, - *self.precursor_frame_builder.scan_to_mobility.get(&scan).unwrap() as f64, - ms_type.clone(), - vec![0; mz_spectrum.mz.len()], - mz_spectrum) - ); + tims_spectra.push(TimsSpectrumAnnotated::new( + frame_id as i32, + *scan, + *self + .precursor_frame_builder + .frame_to_rt + .get(&frame_id) + .unwrap() as f64, + *self + .precursor_frame_builder + .scan_to_mobility + .get(&scan) + .unwrap() as f64, + ms_type.clone(), + vec![0; mz_spectrum.mz.len()], + mz_spectrum, + )); } } } @@ -444,7 +787,11 @@ impl TimsTofSyntheticsFrameBuilderDIA { if tims_spectra.is_empty() { return TimsFrameAnnotated::new( frame_id as i32, - *self.precursor_frame_builder.frame_to_rt.get(&frame_id).unwrap() as f64, + *self + .precursor_frame_builder + .frame_to_rt + .get(&frame_id) + .unwrap() as f64, ms_type.clone(), vec![], vec![], @@ -469,33 +816,69 @@ impl TimsTofSyntheticsFrameBuilderDIA { ) } - pub fn get_ion_transmission_matrix(&self, peptide_id: u32, charge: i8, include_precursor_frames: bool) -> Vec> { - + pub fn get_ion_transmission_matrix( + &self, + peptide_id: u32, + charge: i8, + include_precursor_frames: bool, + ) -> Vec> { let maybe_peptide_sim = self.precursor_frame_builder.peptides.get(&peptide_id); - + let mut frame_ids = match maybe_peptide_sim { Some(maybe_peptide_sim) => maybe_peptide_sim.frame_distribution.occurrence.clone(), - _ => vec![] + _ => vec![], }; if !include_precursor_frames { - frame_ids = frame_ids.iter().filter(|frame_id| !self.precursor_frame_builder.precursor_frame_id_set.contains(frame_id)).cloned().collect(); + frame_ids = frame_ids + .iter() + .filter(|frame_id| { + !self + .precursor_frame_builder + .precursor_frame_id_set + .contains(frame_id) + }) + .cloned() + .collect(); } - let ion = self.precursor_frame_builder.ions.get(&peptide_id).unwrap().iter().find(|ion| ion.charge == charge).unwrap(); + let ion = self + .precursor_frame_builder + .ions + .get(&peptide_id) + .unwrap() + .iter() + .find(|ion| ion.charge == charge) + .unwrap(); let spectrum = ion.simulated_spectrum.clone(); let scan_distribution = &ion.scan_distribution; - let mut transmission_matrix = vec![vec![0.0; frame_ids.len()]; scan_distribution.occurrence.len()]; + let mut transmission_matrix = + vec![vec![0.0; frame_ids.len()]; scan_distribution.occurrence.len()]; for (frame_index, frame) in frame_ids.iter().enumerate() { for (scan_index, scan) in scan_distribution.occurrence.iter().enumerate() { - if self.transmission_settings.all_transmitted(*frame as i32, *scan as i32, &spectrum.mz, None) { + if self.transmission_settings.all_transmitted( + *frame as i32, + *scan as i32, + &spectrum.mz, + None, + ) { transmission_matrix[scan_index][frame_index] = 1.0; - } - else if self.transmission_settings.any_transmitted(*frame as i32, *scan as i32, &spectrum.mz, None) { - let transmitted_spectrum = self.transmission_settings.transmit_spectrum(*frame as i32, *scan as i32, spectrum.clone(), None); - let percentage_transmitted = transmitted_spectrum.intensity.iter().sum::() / spectrum.intensity.iter().sum::(); + } else if self.transmission_settings.any_transmitted( + *frame as i32, + *scan as i32, + &spectrum.mz, + None, + ) { + let transmitted_spectrum = self.transmission_settings.transmit_spectrum( + *frame as i32, + *scan as i32, + spectrum.clone(), + None, + ); + let percentage_transmitted = transmitted_spectrum.intensity.iter().sum::() + / spectrum.intensity.iter().sum::(); transmission_matrix[scan_index][frame_index] = percentage_transmitted as f32; } } @@ -505,8 +888,31 @@ impl TimsTofSyntheticsFrameBuilderDIA { } pub fn count_number_transmissions(&self, peptide_id: u32, charge: i8) -> (usize, usize) { - let frame_ids: Vec<_> = self.precursor_frame_builder.peptides.get(&peptide_id).unwrap().frame_distribution.occurrence.clone().iter().filter(|frame_id| !self.precursor_frame_builder.precursor_frame_id_set.contains(frame_id)).cloned().collect(); - let ion = self.precursor_frame_builder.ions.get(&peptide_id).unwrap().iter().find(|ion| ion.charge == charge).unwrap(); + let frame_ids: Vec<_> = self + .precursor_frame_builder + .peptides + .get(&peptide_id) + .unwrap() + .frame_distribution + .occurrence + .clone() + .iter() + .filter(|frame_id| { + !self + .precursor_frame_builder + .precursor_frame_id_set + .contains(frame_id) + }) + .cloned() + .collect(); + let ion = self + .precursor_frame_builder + .ions + .get(&peptide_id) + .unwrap() + .iter() + .find(|ion| ion.charge == charge) + .unwrap(); let spectrum = ion.simulated_spectrum.clone(); let scan_distribution = &ion.scan_distribution; @@ -516,7 +922,12 @@ impl TimsTofSyntheticsFrameBuilderDIA { for frame in frame_ids.iter() { let mut frame_transmitted = false; for scan in scan_distribution.occurrence.iter() { - if self.transmission_settings.any_transmitted(*frame as i32, *scan as i32, &spectrum.mz, None) { + if self.transmission_settings.any_transmitted( + *frame as i32, + *scan as i32, + &spectrum.mz, + None, + ) { frame_transmitted = true; scan_count += 1; } @@ -529,11 +940,22 @@ impl TimsTofSyntheticsFrameBuilderDIA { (frame_count, scan_count) } - pub fn count_number_transmissions_parallel(&self, peptide_ids: Vec, charge: Vec, num_threads: usize) -> Vec<(usize, usize)> { - - let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); + pub fn count_number_transmissions_parallel( + &self, + peptide_ids: Vec, + charge: Vec, + num_threads: usize, + ) -> Vec<(usize, usize)> { + let thread_pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); let result: Vec<(usize, usize)> = thread_pool.install(|| { - peptide_ids.par_iter().zip(charge.par_iter()).map(|(peptide_id, charge)| self.count_number_transmissions(*peptide_id, *charge)).collect() + peptide_ids + .par_iter() + .zip(charge.par_iter()) + .map(|(peptide_id, charge)| self.count_number_transmissions(*peptide_id, *charge)) + .collect() }); result @@ -542,6 +964,7 @@ impl TimsTofSyntheticsFrameBuilderDIA { impl TimsTofCollisionEnergy for TimsTofSyntheticsFrameBuilderDIA { fn get_collision_energy(&self, frame_id: i32, scan_id: i32) -> f64 { - self.fragmentation_settings.get_collision_energy(frame_id, scan_id) + self.fragmentation_settings + .get_collision_energy(frame_id, scan_id) } -} \ No newline at end of file +} diff --git a/rustdf/src/sim/handle.rs b/rustdf/src/sim/handle.rs index 1d9e5e16..b1200628 100644 --- a/rustdf/src/sim/handle.rs +++ b/rustdf/src/sim/handle.rs @@ -1,14 +1,17 @@ -use std::collections::{BTreeMap, BTreeSet, HashSet}; -use std::path::Path; +use crate::sim::containers::{ + FragmentIonSim, FrameToWindowGroupSim, FramesSim, IonSim, PeptidesSim, ScansSim, + SignalDistribution, WindowGroupSettingsSim, +}; use mscore::data::peptide::{FragmentType, PeptideProductIonSeriesCollection, PeptideSequence}; -use mscore::timstof::collision::{TimsTofCollisionEnergy, TimsTofCollisionEnergyDIA}; -use mscore::timstof::quadrupole::{IonTransmission, TimsTransmissionDIA}; use mscore::data::spectrum::{MsType, MzSpectrum}; use mscore::simulation::annotation::MzSpectrumAnnotated; -use rusqlite::Connection; -use crate::sim::containers::{FragmentIonSim, FramesSim, FrameToWindowGroupSim, IonSim, PeptidesSim, ScansSim, SignalDistribution, WindowGroupSettingsSim}; +use mscore::timstof::collision::{TimsTofCollisionEnergy, TimsTofCollisionEnergyDIA}; +use mscore::timstof::quadrupole::{IonTransmission, TimsTransmissionDIA}; use rayon::prelude::*; use rayon::ThreadPoolBuilder; +use rusqlite::Connection; +use std::collections::{BTreeMap, BTreeSet, HashSet}; +use std::path::Path; #[derive(Debug)] pub struct TimsTofSyntheticsDataHandle { @@ -24,11 +27,7 @@ impl TimsTofSyntheticsDataHandle { pub fn read_frames(&self) -> rusqlite::Result> { let mut stmt = self.connection.prepare("SELECT * FROM frames")?; let frames_iter = stmt.query_map([], |row| { - Ok(FramesSim::new( - row.get(0)?, - row.get(1)?, - row.get(2)?, - )) + Ok(FramesSim::new(row.get(0)?, row.get(1)?, row.get(2)?)) })?; let mut frames = Vec::new(); for frame in frames_iter { @@ -39,12 +38,7 @@ impl TimsTofSyntheticsDataHandle { pub fn read_scans(&self) -> rusqlite::Result> { let mut stmt = self.connection.prepare("SELECT * FROM scans")?; - let scans_iter = stmt.query_map([], |row| { - Ok(ScansSim::new( - row.get(0)?, - row.get(1)?, - )) - })?; + let scans_iter = stmt.query_map([], |row| Ok(ScansSim::new(row.get(0)?, row.get(1)?)))?; let mut scans = Vec::new(); for scan in scans_iter { scans.push(scan?); @@ -59,21 +53,23 @@ impl TimsTofSyntheticsDataHandle { let frame_occurrence: Vec = match serde_json::from_str(&frame_occurrence_str) { Ok(value) => value, - Err(e) => return Err(rusqlite::Error::FromSqlConversionFailure( - 15, - rusqlite::types::Type::Text, - Box::new(e), - )), + Err(e) => { + return Err(rusqlite::Error::FromSqlConversionFailure( + 15, + rusqlite::types::Type::Text, + Box::new(e), + )) + } }; // if the frame abundance is not available, set it to 0 let frame_abundance: Vec = match serde_json::from_str(&frame_abundance_str) { Ok(value) => value, - Err(_e) => vec![0.0; frame_occurrence.len()], + Err(_e) => vec![0.0; frame_occurrence.len()], }; - let frame_distribution = SignalDistribution::new( - 0.0, 0.0, 0.0, frame_occurrence, frame_abundance); + let frame_distribution = + SignalDistribution::new(0.0, 0.0, 0.0, frame_occurrence, frame_abundance); Ok(PeptidesSim { protein_id: row.get(0)?, @@ -106,31 +102,38 @@ impl TimsTofSyntheticsDataHandle { let scan_occurrence_str: String = row.get(8)?; let scan_abundance_str: String = row.get(9)?; - let simulated_spectrum: MzSpectrum = match serde_json::from_str(&simulated_spectrum_str) { + let simulated_spectrum: MzSpectrum = match serde_json::from_str(&simulated_spectrum_str) + { Ok(value) => value, - Err(e) => return Err(rusqlite::Error::FromSqlConversionFailure( - 6, - rusqlite::types::Type::Text, - Box::new(e), - )), + Err(e) => { + return Err(rusqlite::Error::FromSqlConversionFailure( + 6, + rusqlite::types::Type::Text, + Box::new(e), + )) + } }; let scan_occurrence: Vec = match serde_json::from_str(&scan_occurrence_str) { Ok(value) => value, - Err(e) => return Err(rusqlite::Error::FromSqlConversionFailure( - 8, - rusqlite::types::Type::Text, - Box::new(e), - )), + Err(e) => { + return Err(rusqlite::Error::FromSqlConversionFailure( + 8, + rusqlite::types::Type::Text, + Box::new(e), + )) + } }; let scan_abundance: Vec = match serde_json::from_str(&scan_abundance_str) { Ok(value) => value, - Err(e) => return Err(rusqlite::Error::FromSqlConversionFailure( - 9, - rusqlite::types::Type::Text, - Box::new(e), - )), + Err(e) => { + return Err(rusqlite::Error::FromSqlConversionFailure( + 9, + rusqlite::types::Type::Text, + Box::new(e), + )) + } }; Ok(IonSim::new( @@ -174,10 +177,7 @@ impl TimsTofSyntheticsDataHandle { pub fn read_frame_to_window_group(&self) -> rusqlite::Result> { let mut stmt = self.connection.prepare("SELECT * FROM dia_ms_ms_info")?; let frame_to_window_group_iter = stmt.query_map([], |row| { - Ok(FrameToWindowGroupSim::new( - row.get(0)?, - row.get(1)?, - )) + Ok(FrameToWindowGroupSim::new(row.get(0)?, row.get(1)?)) })?; let mut frame_to_window_groups: Vec = Vec::new(); @@ -189,7 +189,6 @@ impl TimsTofSyntheticsDataHandle { } pub fn read_fragment_ions(&self) -> rusqlite::Result> { - let mut stmt = self.connection.prepare("SELECT * FROM fragment_ions")?; let fragment_ion_sim_iter = stmt.query_map([], |row| { @@ -198,20 +197,24 @@ impl TimsTofSyntheticsDataHandle { let indices: Vec = match serde_json::from_str(&indices_string) { Ok(value) => value, - Err(e) => return Err(rusqlite::Error::FromSqlConversionFailure( - 4, - rusqlite::types::Type::Text, - Box::new(e), - )), + Err(e) => { + return Err(rusqlite::Error::FromSqlConversionFailure( + 4, + rusqlite::types::Type::Text, + Box::new(e), + )) + } }; let values: Vec = match serde_json::from_str(&values_string) { Ok(value) => value, - Err(e) => return Err(rusqlite::Error::FromSqlConversionFailure( - 5, - rusqlite::types::Type::Text, - Box::new(e), - )), + Err(e) => { + return Err(rusqlite::Error::FromSqlConversionFailure( + 5, + rusqlite::types::Type::Text, + Box::new(e), + )) + } }; Ok(FragmentIonSim::new( @@ -237,13 +240,34 @@ impl TimsTofSyntheticsDataHandle { let window_group_settings = self.read_window_group_settings().unwrap(); TimsTransmissionDIA::new( - frame_to_window_group.iter().map(|x| x.frame_id as i32).collect(), - frame_to_window_group.iter().map(|x| x.window_group as i32).collect(), - window_group_settings.iter().map(|x| x.window_group as i32).collect(), - window_group_settings.iter().map(|x| x.scan_start as i32).collect(), - window_group_settings.iter().map(|x| x.scan_end as i32).collect(), - window_group_settings.iter().map(|x| x.isolation_mz as f64).collect(), - window_group_settings.iter().map(|x| x.isolation_width as f64).collect(), + frame_to_window_group + .iter() + .map(|x| x.frame_id as i32) + .collect(), + frame_to_window_group + .iter() + .map(|x| x.window_group as i32) + .collect(), + window_group_settings + .iter() + .map(|x| x.window_group as i32) + .collect(), + window_group_settings + .iter() + .map(|x| x.scan_start as i32) + .collect(), + window_group_settings + .iter() + .map(|x| x.scan_end as i32) + .collect(), + window_group_settings + .iter() + .map(|x| x.isolation_mz as f64) + .collect(), + window_group_settings + .iter() + .map(|x| x.isolation_width as f64) + .collect(), None, ) } @@ -253,12 +277,30 @@ impl TimsTofSyntheticsDataHandle { let window_group_settings = self.read_window_group_settings().unwrap(); TimsTofCollisionEnergyDIA::new( - frame_to_window_group.iter().map(|x| x.frame_id as i32).collect(), - frame_to_window_group.iter().map(|x| x.window_group as i32).collect(), - window_group_settings.iter().map(|x| x.window_group as i32).collect(), - window_group_settings.iter().map(|x| x.scan_start as i32).collect(), - window_group_settings.iter().map(|x| x.scan_end as i32).collect(), - window_group_settings.iter().map(|x| x.collision_energy as f64).collect(), + frame_to_window_group + .iter() + .map(|x| x.frame_id as i32) + .collect(), + frame_to_window_group + .iter() + .map(|x| x.window_group as i32) + .collect(), + window_group_settings + .iter() + .map(|x| x.window_group as i32) + .collect(), + window_group_settings + .iter() + .map(|x| x.scan_start as i32) + .collect(), + window_group_settings + .iter() + .map(|x| x.scan_end as i32) + .collect(), + window_group_settings + .iter() + .map(|x| x.collision_energy as f64) + .collect(), ) } @@ -267,29 +309,38 @@ impl TimsTofSyntheticsDataHandle { peptide_map: &BTreeMap, precursor_frames: &HashSet, transmission: &TimsTransmissionDIA, - collision_energy: &TimsTofCollisionEnergyDIA) -> BTreeSet<(u32, u32, String, i8, i32)> { - + collision_energy: &TimsTofCollisionEnergyDIA, + ) -> BTreeSet<(u32, u32, String, i8, i32)> { let peptide = peptide_map.get(&ion.peptide_id).unwrap(); let mut ret_tree: BTreeSet<(u32, u32, String, i8, i32)> = BTreeSet::new(); // go over all frames the ion occurs in for frame in peptide.frame_distribution.occurrence.iter() { - // only consider fragment frames if !precursor_frames.contains(frame) { - // go over all scans the ion occurs in for scan in &ion.scan_distribution.occurrence { // check transmission for all precursor ion peaks of the isotopic envelope let precursor_spec = &ion.simulated_spectrum; - if transmission.any_transmitted(*frame as i32, *scan as i32, &precursor_spec.mz, Some(0.5)) { - - let collision_energy = collision_energy.get_collision_energy(*frame as i32, *scan as i32); + if transmission.any_transmitted( + *frame as i32, + *scan as i32, + &precursor_spec.mz, + Some(0.5), + ) { + let collision_energy = + collision_energy.get_collision_energy(*frame as i32, *scan as i32); let quantized_energy = (collision_energy * 100.0).round() as i32; - ret_tree.insert((ion.peptide_id, ion.ion_id, peptide.sequence.sequence.clone(), ion.charge, quantized_energy)); + ret_tree.insert(( + ion.peptide_id, + ion.ion_id, + peptide.sequence.sequence.clone(), + ion.charge, + quantized_energy, + )); } } } @@ -298,20 +349,35 @@ impl TimsTofSyntheticsDataHandle { } // TODO: take isotopic envelope into account - pub fn get_transmitted_ions(&self, num_threads: usize) -> (Vec, Vec, Vec, Vec, Vec) { - - let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // create a thread pool + pub fn get_transmitted_ions( + &self, + num_threads: usize, + ) -> (Vec, Vec, Vec, Vec, Vec) { + let thread_pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); // create a thread pool let peptides = self.read_peptides().unwrap(); let peptide_map = TimsTofSyntheticsDataHandle::build_peptide_map(&peptides); - let precursor_frames = TimsTofSyntheticsDataHandle::build_precursor_frame_id_set(&self.read_frames().unwrap()); + let precursor_frames = + TimsTofSyntheticsDataHandle::build_precursor_frame_id_set(&self.read_frames().unwrap()); let transmission = self.get_transmission_dia(); let collision_energy = self.get_collision_energy_dia(); let ions = self.read_ions().unwrap(); - let trees = thread_pool.install(|| { ions.par_iter().map(|ion| { - TimsTofSyntheticsDataHandle::ion_map_fn(ion.clone(), &peptide_map, &precursor_frames, &transmission, &collision_energy) - }).collect::>() + let trees = thread_pool.install(|| { + ions.par_iter() + .map(|ion| { + TimsTofSyntheticsDataHandle::ion_map_fn( + ion.clone(), + &peptide_map, + &precursor_frames, + &transmission, + &collision_energy, + ) + }) + .collect::>() }); let mut ret_tree: BTreeSet<(u32, u32, String, i8, i32)> = BTreeSet::new(); @@ -333,7 +399,13 @@ impl TimsTofSyntheticsDataHandle { ret_energy.push(energy as f32 / 100.0); } - (ret_peptide_id, ret_ion_id, ret_sequence, ret_charge, ret_energy) + ( + ret_peptide_id, + ret_ion_id, + ret_sequence, + ret_charge, + ret_energy, + ) } /// Method to build a map from peptide id to ions @@ -357,7 +429,9 @@ impl TimsTofSyntheticsDataHandle { /// Method to build a set of precursor frame ids, can be used to check if a frame is a precursor frame pub fn build_precursor_frame_id_set(frames: &Vec) -> HashSet { - frames.iter().filter(|frame| frame.parse_ms_type() == MsType::Precursor) + frames + .iter() + .filter(|frame| frame.parse_ms_type() == MsType::Precursor) .map(|frame| frame.frame_id) .collect() } @@ -388,7 +462,9 @@ impl TimsTofSyntheticsDataHandle { } scan_to_mobility } - pub fn build_frame_to_abundances(peptides: &Vec) -> BTreeMap, Vec)> { + pub fn build_frame_to_abundances( + peptides: &Vec, + ) -> BTreeMap, Vec)> { let mut frame_to_abundances = BTreeMap::new(); for peptide in peptides.iter() { @@ -397,7 +473,9 @@ impl TimsTofSyntheticsDataHandle { let frame_abundance = peptide.frame_distribution.abundance.clone(); for (frame_id, abundance) in frame_occurrence.iter().zip(frame_abundance.iter()) { - let (occurrences, abundances) = frame_to_abundances.entry(*frame_id).or_insert((vec![], vec![])); + let (occurrences, abundances) = frame_to_abundances + .entry(*frame_id) + .or_insert((vec![], vec![])); occurrences.push(peptide_id); abundances.push(*abundance); } @@ -405,7 +483,18 @@ impl TimsTofSyntheticsDataHandle { frame_to_abundances } - pub fn build_peptide_to_ions(ions: &Vec) -> BTreeMap, Vec>, Vec>, Vec, Vec)> { + pub fn build_peptide_to_ions( + ions: &Vec, + ) -> BTreeMap< + u32, + ( + Vec, + Vec>, + Vec>, + Vec, + Vec, + ), + > { let mut peptide_to_ions = BTreeMap::new(); for ion in ions.iter() { @@ -416,7 +505,9 @@ impl TimsTofSyntheticsDataHandle { let charge = ion.charge; let spectrum = ion.simulated_spectrum.clone(); - let (abundances, scan_occurrences, scan_abundances, charges, spectra) = peptide_to_ions.entry(peptide_id).or_insert((vec![], vec![], vec![], vec![], vec![])); + let (abundances, scan_occurrences, scan_abundances, charges, spectra) = peptide_to_ions + .entry(peptide_id) + .or_insert((vec![], vec![], vec![], vec![], vec![])); abundances.push(abundance); scan_occurrences.push(scan_occurrence); scan_abundances.push(scan_abundance); @@ -426,30 +517,44 @@ impl TimsTofSyntheticsDataHandle { peptide_to_ions } - pub fn build_fragment_ions(peptides_sim: &BTreeMap, fragment_ions: &Vec, num_threads: usize) -> BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec)> { - - let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); + pub fn build_fragment_ions( + peptides_sim: &BTreeMap, + fragment_ions: &Vec, + num_threads: usize, + ) -> BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec)> { + let thread_pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); let fragment_ion_map = thread_pool.install(|| { - fragment_ions.par_iter() + fragment_ions + .par_iter() .map(|fragment_ion| { - let key = (fragment_ion.peptide_id, fragment_ion.charge, (fragment_ion.collision_energy * 1e3).round() as i8); - - let value = peptides_sim.get(&fragment_ion.peptide_id).unwrap().sequence.associate_with_predicted_intensities( - fragment_ion.charge as i32, - FragmentType::B, - fragment_ion.to_dense(174), - true, - true, + let key = ( + fragment_ion.peptide_id, + fragment_ion.charge, + (fragment_ion.collision_energy * 1e3).round() as i8, ); - let fragment_ions: Vec = value.peptide_ions.par_iter().map(|ion_series| { - ion_series.generate_isotopic_spectrum( - 1e-2, - 1e-3, - 100, - 1e-5, - ) - }).collect(); + let value = peptides_sim + .get(&fragment_ion.peptide_id) + .unwrap() + .sequence + .associate_with_predicted_intensities( + fragment_ion.charge as i32, + FragmentType::B, + fragment_ion.to_dense(174), + true, + true, + ); + + let fragment_ions: Vec = value + .peptide_ions + .par_iter() + .map(|ion_series| { + ion_series.generate_isotopic_spectrum(1e-2, 1e-3, 100, 1e-5) + }) + .collect(); (key, (value, fragment_ions)) }) .collect::>() // Collect the results into a BTreeMap @@ -458,15 +563,31 @@ impl TimsTofSyntheticsDataHandle { fragment_ion_map } - pub fn build_fragment_ions_annotated(peptides_sim: &BTreeMap, fragment_ions: &Vec, num_threads: usize) -> BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec)> { - - let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); - let fragment_ion_map = thread_pool.install(|| { - fragment_ions.par_iter() - .map(|fragment_ion| { - let key = (fragment_ion.peptide_id, fragment_ion.charge, (fragment_ion.collision_energy * 1e3).round() as i8); + pub fn build_fragment_ions_annotated( + peptides_sim: &BTreeMap, + fragment_ions: &Vec, + num_threads: usize, + ) -> BTreeMap<(u32, i8, i8), (PeptideProductIonSeriesCollection, Vec)> + { + let thread_pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); + let fragment_ion_map = thread_pool.install(|| { + fragment_ions + .par_iter() + .map(|fragment_ion| { + let key = ( + fragment_ion.peptide_id, + fragment_ion.charge, + (fragment_ion.collision_energy * 1e3).round() as i8, + ); - let value = peptides_sim.get(&fragment_ion.peptide_id).unwrap().sequence.associate_with_predicted_intensities( + let value = peptides_sim + .get(&fragment_ion.peptide_id) + .unwrap() + .sequence + .associate_with_predicted_intensities( fragment_ion.charge as i32, FragmentType::B, fragment_ion.to_dense(174), @@ -474,19 +595,18 @@ impl TimsTofSyntheticsDataHandle { true, ); - let fragment_ions: Vec = value.peptide_ions.par_iter().map(|ion_series| { - ion_series.generate_isotopic_spectrum_annotated( - 1e-2, - 1e-3, - 100, - 1e-5, - ) - }).collect(); - (key, (value, fragment_ions)) - }) - .collect::>() // Collect the results into a BTreeMap - }); - - fragment_ion_map + let fragment_ions: Vec = value + .peptide_ions + .par_iter() + .map(|ion_series| { + ion_series.generate_isotopic_spectrum_annotated(1e-2, 1e-3, 100, 1e-5) + }) + .collect(); + (key, (value, fragment_ions)) + }) + .collect::>() // Collect the results into a BTreeMap + }); + + fragment_ion_map } -} \ No newline at end of file +} diff --git a/rustdf/src/sim/mod.rs b/rustdf/src/sim/mod.rs index e4fc7f01..c9f285ad 100644 --- a/rustdf/src/sim/mod.rs +++ b/rustdf/src/sim/mod.rs @@ -1,5 +1,5 @@ -pub mod precursor; pub mod containers; -pub mod utility; +pub mod dia; pub mod handle; -pub mod dia; \ No newline at end of file +pub mod precursor; +pub mod utility; diff --git a/rustdf/src/sim/precursor.rs b/rustdf/src/sim/precursor.rs index bd1963d7..c6cd8cf8 100644 --- a/rustdf/src/sim/precursor.rs +++ b/rustdf/src/sim/precursor.rs @@ -1,16 +1,18 @@ -use std::collections::{BTreeMap, HashSet}; +use mscore::data::peptide::PeptideIon; +use mscore::data::spectrum::{IndexedMzSpectrum, MsType, MzSpectrum}; +use mscore::simulation::annotation::{ + MzSpectrumAnnotated, PeakAnnotation, TimsFrameAnnotated, TimsSpectrumAnnotated, +}; use mscore::timstof::frame::TimsFrame; use mscore::timstof::spectrum::TimsSpectrum; -use mscore::data::spectrum::{MzSpectrum, MsType, IndexedMzSpectrum}; -use rusqlite::{Result}; +use rusqlite::Result; +use std::collections::{BTreeMap, HashSet}; use std::path::Path; -use mscore::data::peptide::PeptideIon; -use mscore::simulation::annotation::{MzSpectrumAnnotated, PeakAnnotation, TimsFrameAnnotated, TimsSpectrumAnnotated}; -use rayon::prelude::*; -use rayon::ThreadPoolBuilder; use crate::sim::containers::{FramesSim, IonSim, PeptidesSim, ScansSim}; use crate::sim::handle::TimsTofSyntheticsDataHandle; +use rayon::prelude::*; +use rayon::ThreadPoolBuilder; pub struct TimsTofSyntheticsPrecursorFrameBuilder { pub ions: BTreeMap>, @@ -19,7 +21,16 @@ pub struct TimsTofSyntheticsPrecursorFrameBuilder { pub frames: Vec, pub precursor_frame_id_set: HashSet, pub frame_to_abundances: BTreeMap, Vec)>, - pub peptide_to_ions: BTreeMap, Vec>, Vec>, Vec, Vec)>, + pub peptide_to_ions: BTreeMap< + u32, + ( + Vec, + Vec>, + Vec>, + Vec, + Vec, + ), + >, pub frame_to_rt: BTreeMap, pub scan_to_mobility: BTreeMap, pub peptide_to_events: BTreeMap, @@ -47,7 +58,9 @@ impl TimsTofSyntheticsPrecursorFrameBuilder { peptides: TimsTofSyntheticsDataHandle::build_peptide_map(&peptides), scans: scans.clone(), frames: frames.clone(), - precursor_frame_id_set: TimsTofSyntheticsDataHandle::build_precursor_frame_id_set(&frames), + precursor_frame_id_set: TimsTofSyntheticsDataHandle::build_precursor_frame_id_set( + &frames, + ), frame_to_abundances: TimsTofSyntheticsDataHandle::build_frame_to_abundances(&peptides), peptide_to_ions: TimsTofSyntheticsDataHandle::build_peptide_to_ions(&ions), frame_to_rt: TimsTofSyntheticsDataHandle::build_frame_to_rt(&frames), @@ -65,8 +78,14 @@ impl TimsTofSyntheticsPrecursorFrameBuilder { /// # Returns /// /// * A TimsFrame instance - pub fn build_precursor_frame(&self, frame_id: u32, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, right_drag: bool) -> TimsFrame { - + pub fn build_precursor_frame( + &self, + frame_id: u32, + mz_noise_precursor: bool, + uniform: bool, + precursor_noise_ppm: f64, + right_drag: bool, + ) -> TimsFrame { let ms_type = match self.precursor_frame_id_set.contains(&frame_id) { true => MsType::Precursor, false => MsType::Unknown, @@ -98,7 +117,8 @@ impl TimsTofSyntheticsPrecursorFrameBuilder { } // one peptide can have multiple ions, occurring in multiple scans - let (ion_abundances, scan_occurrences, scan_abundances, _, spectra) = self.peptide_to_ions.get(&peptide_id).unwrap(); + let (ion_abundances, scan_occurrences, scan_abundances, _, spectra) = + self.peptide_to_ions.get(&peptide_id).unwrap(); for (index, ion_abundance) in ion_abundances.iter().enumerate() { let scan_occurrence = scan_occurrences.get(index).unwrap(); @@ -106,13 +126,18 @@ impl TimsTofSyntheticsPrecursorFrameBuilder { let spectrum = spectra.get(index).unwrap(); for (scan, scan_abu) in scan_occurrence.iter().zip(scan_abundance.iter()) { - let abundance_factor = abundance * ion_abundance * scan_abu * self.peptide_to_events.get(&peptide_id).unwrap(); + let abundance_factor = abundance + * ion_abundance + * scan_abu + * self.peptide_to_events.get(&peptide_id).unwrap(); let scan_id = *scan; let scaled_spec: MzSpectrum = spectrum.clone() * abundance_factor as f64; let mz_spectrum = if mz_noise_precursor { match uniform { - true => scaled_spec.add_mz_noise_uniform(precursor_noise_ppm, right_drag), + true => { + scaled_spec.add_mz_noise_uniform(precursor_noise_ppm, right_drag) + } false => scaled_spec.add_mz_noise_normal(precursor_noise_ppm), } } else { @@ -125,7 +150,11 @@ impl TimsTofSyntheticsPrecursorFrameBuilder { *self.frame_to_rt.get(&frame_id).unwrap() as f64, *self.scan_to_mobility.get(&scan_id).unwrap() as f64, ms_type.clone(), - IndexedMzSpectrum::new(vec![0; mz_spectrum.mz.len()], mz_spectrum.mz, mz_spectrum.intensity), + IndexedMzSpectrum::new( + vec![0; mz_spectrum.mz.len()], + mz_spectrum.mz, + mz_spectrum.intensity, + ), ); tims_spectra.push(tims_spec); } @@ -134,16 +163,7 @@ impl TimsTofSyntheticsPrecursorFrameBuilder { let tims_frame = TimsFrame::from_tims_spectra(tims_spectra); - tims_frame.filter_ranged( - 0.0, - 10000.0, - 0, - 2000, - 0.0, - 10.0, - 1.0, - 1e9, - ) + tims_frame.filter_ranged(0.0, 10000.0, 0, 2000, 0.0, 10.0, 1.0, 1e9) } /// Build a collection of precursor frames in parallel @@ -157,12 +177,34 @@ impl TimsTofSyntheticsPrecursorFrameBuilder { /// /// * A vector of TimsFrame instances /// - pub fn build_precursor_frames(&self, frame_ids: Vec, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, right_drag: bool, num_threads: usize) -> Vec { - let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); + pub fn build_precursor_frames( + &self, + frame_ids: Vec, + mz_noise_precursor: bool, + uniform: bool, + precursor_noise_ppm: f64, + right_drag: bool, + num_threads: usize, + ) -> Vec { + let thread_pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); let mut tims_frames: Vec = Vec::new(); thread_pool.install(|| { - tims_frames = frame_ids.par_iter().map(|frame_id| self.build_precursor_frame(*frame_id, mz_noise_precursor, uniform, precursor_noise_ppm, right_drag)).collect(); + tims_frames = frame_ids + .par_iter() + .map(|frame_id| { + self.build_precursor_frame( + *frame_id, + mz_noise_precursor, + uniform, + precursor_noise_ppm, + right_drag, + ) + }) + .collect(); }); tims_frames.sort_by(|a, b| a.frame_id.cmp(&b.frame_id)); @@ -170,8 +212,14 @@ impl TimsTofSyntheticsPrecursorFrameBuilder { tims_frames } - pub fn build_precursor_frame_annotated(&self, frame_id: u32, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, right_drag: bool) -> TimsFrameAnnotated { - + pub fn build_precursor_frame_annotated( + &self, + frame_id: u32, + mz_noise_precursor: bool, + uniform: bool, + precursor_noise_ppm: f64, + right_drag: bool, + ) -> TimsFrameAnnotated { let ms_type = match self.precursor_frame_id_set.contains(&frame_id) { true => MsType::Precursor, false => MsType::Unknown, @@ -189,7 +237,7 @@ impl TimsTofSyntheticsPrecursorFrameBuilder { vec![], vec![], vec![], - ) + ); } let (peptide_ids, abundances) = self.frame_to_abundances.get(&frame_id).unwrap(); @@ -201,25 +249,37 @@ impl TimsTofSyntheticsPrecursorFrameBuilder { continue; } - let (ion_abundances, scan_occurrences, scan_abundances, charges, _) = self.peptide_to_ions.get(&peptide_id).unwrap(); + let (ion_abundances, scan_occurrences, scan_abundances, charges, _) = + self.peptide_to_ions.get(&peptide_id).unwrap(); for (index, ion_abundance) in ion_abundances.iter().enumerate() { let scan_occurrence = scan_occurrences.get(index).unwrap(); let scan_abundance = scan_abundances.get(index).unwrap(); let charge = charges.get(index).unwrap(); let peptide = self.peptides.get(peptide_id).unwrap(); - let ion = PeptideIon::new(peptide.sequence.sequence.clone(), *charge as i32, *ion_abundance as f64, Some(*peptide_id as i32)); + let ion = PeptideIon::new( + peptide.sequence.sequence.clone(), + *charge as i32, + *ion_abundance as f64, + Some(*peptide_id as i32), + ); // TODO: make this configurable let spectrum = ion.calculate_isotopic_spectrum_annotated(1e-3, 1e-8, 200, 1e-4); for (scan, scan_abu) in scan_occurrence.iter().zip(scan_abundance.iter()) { - let abundance_factor = abundance * ion_abundance * scan_abu * self.peptide_to_events.get(&peptide_id).unwrap(); + let abundance_factor = abundance + * ion_abundance + * scan_abu + * self.peptide_to_events.get(&peptide_id).unwrap(); let scan_id = *scan; - let scaled_spec: MzSpectrumAnnotated = spectrum.clone() * abundance_factor as f64; + let scaled_spec: MzSpectrumAnnotated = + spectrum.clone() * abundance_factor as f64; let mz_spectrum = if mz_noise_precursor { match uniform { - true => scaled_spec.add_mz_noise_uniform(precursor_noise_ppm, right_drag), + true => { + scaled_spec.add_mz_noise_uniform(precursor_noise_ppm, right_drag) + } false => scaled_spec.add_mz_noise_normal(precursor_noise_ppm), } } else { @@ -228,12 +288,13 @@ impl TimsTofSyntheticsPrecursorFrameBuilder { let tims_spec = TimsSpectrumAnnotated::new( frame_id as i32, - *scan, - *self.frame_to_rt.get(&frame_id).unwrap() as f64, - *self.scan_to_mobility.get(&scan_id).unwrap() as f64, - ms_type.clone(), + *scan, + *self.frame_to_rt.get(&frame_id).unwrap() as f64, + *self.scan_to_mobility.get(&scan_id).unwrap() as f64, + ms_type.clone(), vec![0; mz_spectrum.mz.len()], - mz_spectrum); + mz_spectrum, + ); tims_spectra.push(tims_spec); } } @@ -241,16 +302,7 @@ impl TimsTofSyntheticsPrecursorFrameBuilder { let tims_frame = TimsFrameAnnotated::from_tims_spectra_annotated(tims_spectra); - let filtered_frame = tims_frame.filter_ranged( - 0.0, - 2000.0, - 0.0, - 2.0, - 0, - 1000, - 1.0, - 1e9, - ); + let filtered_frame = tims_frame.filter_ranged(0.0, 2000.0, 0.0, 2.0, 0, 1000, 1.0, 1e9); TimsFrameAnnotated { frame_id: filtered_frame.frame_id, @@ -261,24 +313,57 @@ impl TimsTofSyntheticsPrecursorFrameBuilder { scan: filtered_frame.scan, inv_mobility: filtered_frame.inv_mobility, intensity: filtered_frame.intensity, - annotations: filtered_frame.annotations.iter().map(|x| { - let mut contributions = x.contributions.clone(); - contributions.sort_by(|a, b| a.intensity_contribution.partial_cmp(&b.intensity_contribution).unwrap()); - PeakAnnotation { contributions, ..*x } - }).collect::>(), + annotations: filtered_frame + .annotations + .iter() + .map(|x| { + let mut contributions = x.contributions.clone(); + contributions.sort_by(|a, b| { + a.intensity_contribution + .partial_cmp(&b.intensity_contribution) + .unwrap() + }); + PeakAnnotation { + contributions, + ..*x + } + }) + .collect::>(), } } - pub fn build_precursor_frames_annotated(&self, frame_ids: Vec, mz_noise_precursor: bool, uniform: bool, precursor_noise_ppm: f64, right_drag: bool, num_threads: usize) -> Vec { - let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); + pub fn build_precursor_frames_annotated( + &self, + frame_ids: Vec, + mz_noise_precursor: bool, + uniform: bool, + precursor_noise_ppm: f64, + right_drag: bool, + num_threads: usize, + ) -> Vec { + let thread_pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); let mut tims_frames: Vec = Vec::new(); thread_pool.install(|| { - tims_frames = frame_ids.par_iter().map(|frame_id| self.build_precursor_frame_annotated(*frame_id, mz_noise_precursor, uniform, precursor_noise_ppm, right_drag)).collect(); + tims_frames = frame_ids + .par_iter() + .map(|frame_id| { + self.build_precursor_frame_annotated( + *frame_id, + mz_noise_precursor, + uniform, + precursor_noise_ppm, + right_drag, + ) + }) + .collect(); }); tims_frames.sort_by(|a, b| a.frame_id.cmp(&b.frame_id)); tims_frames } -} \ No newline at end of file +} diff --git a/rustdf/src/sim/utility.rs b/rustdf/src/sim/utility.rs index 3a8b1ff0..15811ab9 100644 --- a/rustdf/src/sim/utility.rs +++ b/rustdf/src/sim/utility.rs @@ -59,14 +59,13 @@ pub fn sequence_to_all_ions( half_charge_one: bool, peptide_id: Option, ) -> String { - let peptide_sequence = PeptideSequence::new(sequence.to_string(), peptide_id); let fragments = peptide_sequence.associate_with_predicted_intensities( charge, FragmentType::B, intensity_pred_flat.clone(), normalize, - half_charge_one + half_charge_one, ); to_string(&fragments).unwrap() } @@ -78,13 +77,29 @@ pub fn sequence_to_all_ions_par( normalize: bool, half_charge_one: bool, num_threads: usize, - peptide_ids: Vec> + peptide_ids: Vec>, ) -> Vec { - let thread_pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); + let thread_pool = ThreadPoolBuilder::new() + .num_threads(num_threads) + .build() + .unwrap(); let result = thread_pool.install(|| { - sequences.par_iter().zip(charges.par_iter()).zip(intensities_pred_flat.par_iter()).zip(peptide_ids.par_iter()) - .map(|(((seq, charge), intensities), peptide_id)| sequence_to_all_ions(seq, *charge, intensities, normalize, half_charge_one, *peptide_id)) + sequences + .par_iter() + .zip(charges.par_iter()) + .zip(intensities_pred_flat.par_iter()) + .zip(peptide_ids.par_iter()) + .map(|(((seq, charge), intensities), peptide_id)| { + sequence_to_all_ions( + seq, + *charge, + intensities, + normalize, + half_charge_one, + *peptide_id, + ) + }) .collect() }); From 08c48ffd81fd9c7e26a4fdeef0f3f7bab0370075 Mon Sep 17 00:00:00 2001 From: theGreatHerrLebert Date: Mon, 13 Jan 2025 10:09:58 +0100 Subject: [PATCH 29/29] removing wrong keyword --- rustdf/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rustdf/Cargo.toml b/rustdf/Cargo.toml index 127dc059..33a683a6 100644 --- a/rustdf/Cargo.toml +++ b/rustdf/Cargo.toml @@ -9,7 +9,7 @@ repository = "https://github.com/theGreatHerrLebert/rustims" documentation = "https://docs.rs/rustdf" readme = "README.md" keywords = ["dataframe", "sql", "compression", "parallel"] -categories = ["data-structures", "science", "utilities"] +categories = ["data-structures", "science"] rust-version = "1.84" [lib]