Merge pull request #313 from theGreatHerrLebert/feature/timsim-gui

Feature/timsim gui
theGreatHerrLebert · Jan 13, 2025 · 0cf03fb · 0cf03fb
2 parents 368114b + 08c48ff
commit 0cf03fb
Show file tree

Hide file tree

Showing 25 changed files with 2,482 additions and 791 deletions.
diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py
@@ -1,15 +1,105 @@
 import sqlite3
+from typing import List, Optional
+
+import numpy as np
 import pandas as pd
 
 from imspy.simulation.annotation import RustWrapperObject
 from imspy.timstof.data import TimsDataset
 from imspy.timstof.frame import TimsFrame
 
+from sagepy.core import Precursor, Tolerance, ProcessedSpectrum, RawSpectrum, Representation, SpectrumProcessor
+
 import imspy_connector
 ims = imspy_connector.py_dda
 import warnings
 
 
+
+class PrecursorDDA(RustWrapperObject):
+    def __init__(self, frame_id: int, precursor_id: int, highest_intensity_mz: float, average_mz: float,
+                 inverse_ion_mobility: float, collision_energy: float, precuror_total_intensity: float,
+                 isolation_mz: float, isolation_width: float, mono_mz: Optional[float] = None, charge: Optional[int] = None):
+        self._precursor_ptr = ims.PyDDAPrecursor(
+            frame_id, precursor_id, highest_intensity_mz, average_mz, inverse_ion_mobility, collision_energy,
+            precuror_total_intensity, isolation_mz, isolation_width, mono_mz, charge
+        )
+
+    @classmethod
+    def from_py_ptr(cls, precursor: ims.PyDDAPrecursor):
+        instance = cls.__new__(cls)
+        instance._precursor_ptr = precursor
+        return instance
+
+    @property
+    def frame_id(self) -> int:
+        return self._precursor_ptr.frame_id
+
+    @property
+    def precursor_id(self) -> int:
+        return self._precursor_ptr.precursor_id
+
+    @property
+    def mono_mz(self) -> Optional[float]:
+        return self._precursor_ptr.mono_mz
+
+    @property
+    def highest_intensity_mz(self) -> float:
+        return self._precursor_ptr.highest_intensity_mz
+
+    @property
+    def average_mz(self) -> float:
+        return self._precursor_ptr.average_mz
+
+    @property
+    def charge(self) -> Optional[int]:
+        return self._precursor_ptr.charge
+
+    @property
+    def inverse_ion_mobility(self) -> float:
+        return self._precursor_ptr.inverse_ion_mobility
+
+    @property
+    def collision_energy(self) -> float:
+        return self._precursor_ptr.collision_energy
+
+    @property
+    def precuror_total_intensity(self) -> float:
+        return self._precursor_ptr.precuror_total_intensity
+
+    @property
+    def isolation_mz(self) -> float:
+        return self._precursor_ptr.isolation_mz
+
+    @property
+    def isolation_width(self) -> float:
+        return self._precursor_ptr.isolation_width
+
+    def __repr__(self):
+        return (f"DDAPrecursor(frame_id={self.frame_id}, precursor_id={self.precursor_id}, "
+                f"highest_intensity_mz={self.highest_intensity_mz}, average_mz={self.average_mz}, "
+                f"inverse_ion_mobility={self.inverse_ion_mobility}, collision_energy={self.collision_energy}, "
+                f"precuror_total_intensity={self.precuror_total_intensity}, isolation_mz={self.isolation_mz}, "
+                f"isolation_width={self.isolation_width}, mono_mz={self.mono_mz}, charge={self.charge})")
+
+    def get_py_ptr(self):
+        return self._precursor_ptr
+
+    def to_sage_precursor(self) -> Precursor:
+
+        mz = self.mono_mz if self.mono_mz is not None else self.average_mz
+
+        return Precursor(
+            mz=mz,
+            intensity=self.precuror_total_intensity,
+            charge=self.charge,
+            spectrum_ref=str(self.frame_id),
+            isolation_window=Tolerance(da=(-self.isolation_width / 2, self.isolation_width / 2)),
+            inverse_ion_mobility=self.inverse_ion_mobility,
+            collision_energy=self.collision_energy,
+        )
+
+
 class TimsDatasetDDA(TimsDataset, RustWrapperObject):
 
     def __init__(self, data_path: str, in_memory: bool = False, use_bruker_sdk: bool = True):
@@ -100,6 +190,84 @@ def get_pasef_fragments(self, num_threads: int = 1) -> pd.DataFrame:
 
         return pd.merge(time, B, left_on=['frame_id'], right_on=['frame_id'], how='inner')
 
+    def get_precursor_frames(self, min_intensity: float = 75, max_peaks: int = 500, num_threads: int = 4) -> List[TimsFrame]:
+        """
+        Get precursor frames.
+        Args:
+            min_intensity: minimum intensity a peak must have to be considered
+            max_peaks: maximum number of peaks to consider, frames will be sorted by intensity and only the top max_peaks will be considered
+            num_threads: number of threads to use for processing
+
+        Returns:
+            List[TimsFrame]: List of all precursor frames
+        """
+        precursor_frames = [TimsFrame.from_py_ptr(frame) for frame in self.__dataset.get_precursor_frames(min_intensity, max_peaks, num_threads)]
+        return precursor_frames
+
+    def get_sage_processed_precursors(self, min_intensity: float = 75, max_peaks: int = 5000, file_id: int = 0, num_threads: int = 16) -> List[ProcessedSpectrum]:
+
+        if self.use_bruker_sdk:
+            warnings.warn("Using multiple threads is currently not supported when using Bruker SDK, "
+                            "setting num_threads to 1.")
+            num_threads = 1
+
+        # get all selected precursors
+        precursor_meta = self.get_selected_precursors()
+
+        # create a dictionary with frame_id as key and a list of precursors as value
+        precursor_dict = {}
+        for precursor in precursor_meta:
+            if precursor.frame_id not in precursor_dict:
+                precursor_dict[precursor.frame_id] = []
+            precursor_dict[precursor.frame_id].append(precursor)
+
+        # get all precursor frames
+        precursor_frames = self.get_precursor_frames(min_intensity, max_peaks, num_threads)
+
+        # process all precursor frames
+        processed_spectra = []
+
+        spectrum_processor = SpectrumProcessor(
+            take_top_n=max_peaks,
+            min_deisotope_mz=0.0,
+            deisotope=False,
+        )
+
+        # associate precursors with frames, precursor will be associated with the frame with the same frame_id
+        for frame in precursor_frames:
+            if frame.frame_id in precursor_dict:
+                precursors = [p.to_sage_precursor() for p in precursor_dict[frame.frame_id]]
+                raw_spectrum = RawSpectrum(
+                    file_id=file_id,
+                    spec_id=str(frame.frame_id),
+                    total_ion_current=np.sum(frame.intensity),
+                    precursors=precursors,
+                    mz=frame.mz.astype(np.float32),
+                    intensity=frame.intensity.astype(np.float32),
+                    representation=Representation(representation="centroid"),
+                    scan_start_time=frame.retention_time / 60,
+                    ion_injection_time=frame.retention_time / 60,
+                    ms_level=1,
+                )
+
+                # process the spectrum
+                processed_spectrum = spectrum_processor.process(raw_spectrum)
+                processed_spectra.append(processed_spectrum)
+
+        # delete precursor_frames to free memory
+        del precursor_frames
+
+        return processed_spectra
+
+
+    def get_selected_precursors(self) -> List[PrecursorDDA]:
+        """
+        Get meta data for all selected precursors
+        Returns:
+            List[PrecursorDDA]: List of all selected precursors
+        """
+        return [PrecursorDDA.from_py_ptr(precursor) for precursor in self.__dataset.get_selected_precursors()]
+
     def __repr__(self):
         return (f"TimsDatasetDDA(data_path={self.data_path}, num_frames={self.frame_count}, "
                 f"fragmented_precursors={self.fragmented_precursors.shape[0]})")

diff --git a/imspy_connector/pyproject.toml b/imspy_connector/pyproject.toml
@@ -7,6 +7,7 @@ name = "imspy_connector"
 dependencies = [
     "opentims-bruker-bridge>=1.1.0",
 ]
+version = "0.3.7"
 requires-python = ">=3.11"
 classifiers = [
     "Programming Language :: Rust",

diff --git a/imspy_connector/src/py_dda.rs b/imspy_connector/src/py_dda.rs
@@ -2,9 +2,82 @@ use pyo3::prelude::*;
 
 use rustdf::data::dda::{PASEFDDAFragment, TimsDatasetDDA};
 use rustdf::data::handle::TimsData;
+use rustdf::data::meta::{DDAPrecursor};
 use crate::py_tims_frame::PyTimsFrame;
 use crate::py_tims_slice::PyTimsSlice;
 
+#[pyclass]
+pub struct PyDDAPrecursor {
+    inner: DDAPrecursor,
+}
+
+#[pymethods]
+impl PyDDAPrecursor {
+    #[new]
+    #[pyo3(signature = (frame_id, precursor_id, highest_intensity_mz, average_mz, inverse_ion_mobility, collision_energy, precuror_total_intensity, isolation_mz, isolation_width, mono_mz=None, charge=None))]
+    pub fn new(
+        frame_id: i64,
+        precursor_id: i64,
+        highest_intensity_mz: f64,
+        average_mz: f64,
+        inverse_ion_mobility: f64,
+        collision_energy: f64,
+        precuror_total_intensity: f64,
+        isolation_mz: f64,
+        isolation_width: f64,
+        mono_mz: Option<f64>,
+        charge: Option<i64>,
+    ) -> Self {
+        let precursor = DDAPrecursor {
+            frame_id,
+            precursor_id,
+            mono_mz,
+            highest_intensity_mz,
+            average_mz,
+            charge,
+            inverse_ion_mobility,
+            collision_energy,
+            precuror_total_intensity,
+            isolation_mz,
+            isolation_width,
+        };
+        PyDDAPrecursor { inner: precursor }
+    }
+
+    #[getter]
+    pub fn frame_id(&self) -> i64 { self.inner.frame_id }
+
+    #[getter]
+    pub fn precursor_id(&self) -> i64 { self.inner.precursor_id }
+
+    #[getter]
+    pub fn mono_mz(&self) -> Option<f64> { self.inner.mono_mz }
+
+    #[getter]
+    pub fn highest_intensity_mz(&self) -> f64 { self.inner.highest_intensity_mz }
+
+    #[getter]
+    pub fn average_mz(&self) -> f64 { self.inner.average_mz }
+
+    #[getter]
+    pub fn charge(&self) -> Option<i64> { self.inner.charge }
+
+    #[getter]
+    pub fn inverse_ion_mobility(&self) -> f64 { self.inner.inverse_ion_mobility }
+
+    #[getter]
+    pub fn collision_energy(&self) -> f64 { self.inner.collision_energy }
+
+    #[getter]
+    pub fn precuror_total_intensity(&self) -> f64 { self.inner.precuror_total_intensity }
+
+    #[getter]
+    pub fn isolation_mz(&self) -> f64 { self.inner.isolation_mz }
+
+    #[getter]
+    pub fn isolation_width(&self) -> f64 { self.inner.isolation_width }
+}
+
 #[pyclass]
 pub struct PyTimsDatasetDDA {
     inner: TimsDatasetDDA,
@@ -41,6 +114,16 @@ impl PyTimsDatasetDDA {
         let pasef_fragments = self.inner.get_pasef_fragments(num_threads);
         pasef_fragments.iter().map(|pasef_fragment| PyTimsFragmentDDA { inner: pasef_fragment.clone() }).collect()
     }
+
+    pub fn get_selected_precursors(&self) -> Vec<PyDDAPrecursor> {
+        let pasef_precursor_meta = self.inner.get_selected_precursors();
+        pasef_precursor_meta.iter().map(|precursor_meta| PyDDAPrecursor { inner: precursor_meta.clone() }).collect()
+    }
+
+    pub fn get_precursor_frames(&self, min_intensity: f64, max_peaks: usize, num_threads: usize) -> Vec<PyTimsFrame> {
+        let precursor_frames = self.inner.get_precursor_frames(min_intensity, max_peaks, num_threads);
+        precursor_frames.iter().map(|frame| PyTimsFrame { inner: frame.clone() }).collect()
+    }
 }
 
 #[pyclass]
@@ -80,5 +163,6 @@ impl PyTimsFragmentDDA {
 pub fn py_dda(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<PyTimsDatasetDDA>()?;
     m.add_class::<PyTimsFragmentDDA>()?;
+    m.add_class::<PyDDAPrecursor>()?;
     Ok(())
 }
diff --git a/mscore/Cargo.toml b/mscore/Cargo.toml
@@ -2,18 +2,45 @@
 name = "mscore"
 version = "0.2.0"
 edition = "2021"
+authors = ["David Teschner <[email protected]>"]
+description = "A Rust library providing core operations for computational mass spectrometry proteomics."
+license = "MIT" 
+repository = "https://github.com/theGreatHerrLebert/rustims" 
+documentation = "https://docs.rs/mscore"
+readme = "README.md"
+keywords = ["statistics", "matrix", "scoring", "parallel"]
+categories = ["mathematics", "science", "data-structures"]
+rust-version = "1.84"
+
+[lib]
+name = "mscore"
+path = "src/lib.rs"
 
 [dependencies]
+# Statistical functions
 statrs = "0.18.0"
+# Iterator utilities
 itertools = "0.14.0"
+# Parallelism
 rayon = "1.10.0"
+# Matrix operations
 nalgebra = "0.33.2"
+# Serialization
 serde = { version = "1.0.217", features = ["derive"] }
+# Regular expressions
 regex = "1.11.1"
+# Random number generation
 rand = "0.8.5"
+# Ordered floats
 ordered-float = "4.6.0"
+# Binary serialization
 bincode = "2.0.0-rc.3"
 
-[lib]
-name = "mscore"
-path = "src/lib.rs"
+[profile.release]
+debug = true
+overflow-checks = true
+lto = "thin"
+panic = "abort"
+
+[package.metadata.docs.rs]
+features = ["all"]