diff --git a/imspy/imspy/core.py b/imspy/imspy/core.py new file mode 100644 index 00000000..12828afd --- /dev/null +++ b/imspy/imspy/core.py @@ -0,0 +1,6 @@ +from imspy.frame import TimsFrame +from imspy.spectrum import TimsSpectrum, MzSpectrum +from imspy.data import TimsDataset +from imspy.dia import TimsDatasetDIA +from imspy.slice import TimsSlice, TimsSliceVectorized +from imspy.dda import TimsDatasetDDA, FragmentDDA diff --git a/imspy/imspy/data.py b/imspy/imspy/data.py index d2674bc8..2e5b1e96 100644 --- a/imspy/imspy/data.py +++ b/imspy/imspy/data.py @@ -1,4 +1,119 @@ -from .frame import TimsFrame -from .spectrum import TimsSpectrum, MzSpectrum -from .handle import TimsDataset, TimsDatasetDDA, TimsDatasetDIA -from .slice import TimsSlice, TimsSliceVectorized +import numpy as np +import pandas as pd +import sqlite3 +from numpy.typing import NDArray + +import imspy_connector as pims +import opentims_bruker_bridge as obb + +from abc import ABC + +from imspy.frame import TimsFrame +from imspy.slice import TimsSlice + + +class TimsDataset(ABC): + def __init__(self, data_path: str): + """TimsDataHandle class. + + Args: + data_path (str): Path to the data. + """ + self.__dataset = None + self.binary_path = None + + self.data_path = data_path + self.meta_data = self.__load_meta_data() + self.precursor_frames = self.meta_data[self.meta_data["MsMsType"] == 0].Id.values.astype(np.int32) + self.fragment_frames = self.meta_data[self.meta_data["MsMsType"] > 0].Id.values.astype(np.int32) + self.__current_index = 1 + + # Try to load the data with the first binary found + appropriate_found = False + for so_path in obb.get_so_paths(): + try: + self.__dataset = pims.PyTimsDataset(self.data_path, so_path) + self.binary_path = so_path + appropriate_found = True + break + except Exception: + continue + assert appropriate_found is True, ("No appropriate bruker binary could be found, please check if your " + "operating system is supported by open-tims-bruker-bridge.") + + @property + def acquisition_mode(self) -> str: + """Get the acquisition mode. + + Returns: + str: Acquisition mode. + """ + return self.__dataset.get_acquisition_mode_as_string() + + @property + def acquisition_mode_numerical(self) -> int: + """Get the acquisition mode as a numerical value. + + Returns: + int: Acquisition mode as a numerical value. + """ + return self.__dataset.get_acquisition_mode() + + @property + def frame_count(self) -> int: + """Get the number of frames. + + Returns: + int: Number of frames. + """ + return self.__dataset.frame_count + + def __load_meta_data(self) -> pd.DataFrame: + """Get the meta data. + + Returns: + pd.DataFrame: Meta data. + """ + return pd.read_sql_query("SELECT * from Frames", sqlite3.connect(self.data_path + "/analysis.tdf")) + + def get_tims_frame(self, frame_id: int) -> TimsFrame: + """Get a TimsFrame. + + Args: + frame_id (int): Frame ID. + + Returns: + TimsFrame: TimsFrame. + """ + return TimsFrame.from_py_tims_frame(self.__dataset.get_frame(frame_id)) + + def get_tims_slice(self, frame_ids: NDArray[np.int32]) -> TimsSlice: + """Get a TimsFrame. + + Args: + frame_ids (int): Frame ID. + + Returns: + TimsFrame: TimsFrame. + """ + return TimsSlice.from_py_tims_slice(self.__dataset.get_slice(frame_ids)) + + def __iter__(self): + return self + + def __next__(self): + if self.__current_index <= self.frame_count: + frame_ptr = self.__dataset.get_frame(self.__current_index) + self.__current_index += 1 + if frame_ptr is not None: + return TimsFrame.from_py_tims_frame(frame_ptr) + else: + raise ValueError(f"Frame pointer is None for valid index: {self.__current_index}") + else: + self.__current_index = 1 # Reset for next iteration + raise StopIteration + + def __getitem__(self, index): + if isinstance(index, slice): + return self.get_tims_slice(np.arange(index.start, index.stop, index.step).astype(np.int32)) + return self.get_tims_frame(index) diff --git a/imspy/imspy/dda.py b/imspy/imspy/dda.py index e420fbe5..09978e66 100644 --- a/imspy/imspy/dda.py +++ b/imspy/imspy/dda.py @@ -1,11 +1,96 @@ -import numpy as np +import sqlite3 +from imspy.data import TimsDataset import pandas as pd import imspy_connector as pims - from imspy.frame import TimsFrame +class TimsDatasetDDA(TimsDataset): + + def __init__(self, data_path: str): + super().__init__(data_path=data_path) + self.__dataset = pims.PyTimsDatasetDDA(self.data_path, self.binary_path) + self.meta_data = self.meta_data.rename(columns={"Id": "frame_id"}) + self.fragmented_precursors = self._load_selected_precursors().rename( + columns={ + 'Id': 'precursor_id', + 'LargestPeakMz': 'largest_peak_mz', + 'AverageMz': 'average_mz', + 'MonoisotopicMz': 'monoisotopic_mz', + 'Charge': 'charge', + 'ScanNumber': 'average_scan', + 'Intensity': 'intensity', + 'Parent': 'parent_id', + } + ) + self.pasef_meta_data = self._load_pasef_meta_data().rename( + columns={ + 'Frame': 'frame_id', + 'ScanNumBegin': 'scan_begin', + 'ScanNumEnd': 'scan_end', + 'IsolationMz': 'isolation_mz', + 'IsolationWidth': 'isolation_width', + 'CollisionEnergy': 'collision_energy', + 'Precursor': 'precursor_id' + } + ) + + def _load_selected_precursors(self): + """Get precursors selected for fragmentation. + + Returns: + pd.DataFrame: Precursors selected for fragmentation. + """ + return pd.read_sql_query("SELECT * from Precursors", sqlite3.connect(self.data_path + "/analysis.tdf")) + + def _load_pasef_meta_data(self): + """Get PASEF meta data for DDA. + + Returns: + pd.DataFrame: PASEF meta data. + """ + return pd.read_sql_query("SELECT * from PasefFrameMsMsInfo", + sqlite3.connect(self.data_path + "/analysis.tdf")) + + def get_pasef_fragments(self) -> pd.DataFrame: + """Get PASEF fragments. + + Args: + num_threads (int, optional): Number of threads. Defaults to 4. + + Returns: + List[FragmentDDA]: List of PASEF fragments. + """ + pasef_fragments = [FragmentDDA.from_py_tims_fragment_dda(fragment) + for fragment in self.__dataset.get_pasef_fragments(1)] + + pasef_fragments = pd.DataFrame({ + 'frame_id': [s.frame_id for s in pasef_fragments], + 'precursor_id': [s.precursor_id for s in pasef_fragments], + 'raw_data': [s.selected_fragment for s in pasef_fragments] + }) + + A = pd.merge( + pasef_fragments, self.pasef_meta_data, + left_on=['precursor_id', 'frame_id'], + right_on=['precursor_id', 'frame_id'], + how='inner', + ) + + B = pd.merge( + A, self.fragmented_precursors, + left_on=['precursor_id'], + right_on=['precursor_id'], + how='inner' + ) + + time = self.meta_data[['frame_id']] + time.insert(time.shape[1], "time", self.meta_data['Time'] / 60) + + return pd.merge(time, B, left_on=['frame_id'], right_on=['frame_id'], how='inner') + + class FragmentDDA: def __init__(self, frame_id: int, precursor_id: int, selected_fragment: TimsFrame): self._fragment_ptr = pims.PyTimsFragmentDDA(frame_id, precursor_id, selected_fragment.get_fragment_ptr()) diff --git a/imspy/imspy/dia.py b/imspy/imspy/dia.py new file mode 100644 index 00000000..2d376f84 --- /dev/null +++ b/imspy/imspy/dia.py @@ -0,0 +1,21 @@ +import sqlite3 +from imspy.data import TimsDataset +import pandas as pd + +import imspy_connector as pims + + +class TimsDatasetDIA(TimsDataset): + def __init__(self, data_path: str): + super().__init__(data_path=data_path) + self.__dataset = pims.PyTimsDatasetDIA(self.data_path, self.binary_path) + + @property + def pasef_meta_data(self): + """Get PASEF meta data for DIA. + + Returns: + pd.DataFrame: PASEF meta data. + """ + return pd.read_sql_query("SELECT * from DiaFrameMsMsWindows", + sqlite3.connect(self.data_path + "/analysis.tdf")) diff --git a/imspy/imspy/handle.py b/imspy/imspy/handle.py deleted file mode 100644 index d5ee7bd6..00000000 --- a/imspy/imspy/handle.py +++ /dev/null @@ -1,176 +0,0 @@ -from typing import List - -import numpy as np -import pandas as pd -import sqlite3 -from numpy.typing import NDArray - -import imspy_connector as pims -import opentims_bruker_bridge as obb - -from abc import ABC - -from imspy.dda import FragmentDDA -from imspy.frame import TimsFrame -from imspy.slice import TimsSlice - - -class TimsDataset(ABC): - def __init__(self, data_path: str): - """TimsDataHandle class. - - Args: - data_path (str): Path to the data. - """ - self.__dataset = None - self.binary_path = None - - self.data_path = data_path - self.meta_data = self.__load_meta_data() - self.precursor_frames = self.meta_data[self.meta_data["MsMsType"] == 0].Id.values.astype(np.int32) - self.fragment_frames = self.meta_data[self.meta_data["MsMsType"] > 0].Id.values.astype(np.int32) - self.__current_index = 1 - - # Try to load the data with the first binary found - appropriate_found = False - for so_path in obb.get_so_paths(): - try: - self.__dataset = pims.PyTimsDataset(self.data_path, so_path) - self.binary_path = so_path - appropriate_found = True - break - except Exception: - continue - assert appropriate_found is True, ("No appropriate bruker binary could be found, please check if your " - "operating system is supported by open-tims-bruker-bridge.") - - @property - def acquisition_mode(self) -> str: - """Get the acquisition mode. - - Returns: - str: Acquisition mode. - """ - return self.__dataset.get_acquisition_mode_as_string() - - @property - def acquisition_mode_numerical(self) -> int: - """Get the acquisition mode as a numerical value. - - Returns: - int: Acquisition mode as a numerical value. - """ - return self.__dataset.get_acquisition_mode() - - @property - def frame_count(self) -> int: - """Get the number of frames. - - Returns: - int: Number of frames. - """ - return self.__dataset.frame_count - - def __load_meta_data(self) -> pd.DataFrame: - """Get the meta data. - - Returns: - pd.DataFrame: Meta data. - """ - return pd.read_sql_query("SELECT * from Frames", sqlite3.connect(self.data_path + "/analysis.tdf")) - - def get_tims_frame(self, frame_id: int) -> TimsFrame: - """Get a TimsFrame. - - Args: - frame_id (int): Frame ID. - - Returns: - TimsFrame: TimsFrame. - """ - return TimsFrame.from_py_tims_frame(self.__dataset.get_frame(frame_id)) - - def get_tims_slice(self, frame_ids: NDArray[np.int32]) -> TimsSlice: - """Get a TimsFrame. - - Args: - frame_ids (int): Frame ID. - - Returns: - TimsFrame: TimsFrame. - """ - return TimsSlice.from_py_tims_slice(self.__dataset.get_slice(frame_ids)) - - def __iter__(self): - return self - - def __next__(self): - if self.__current_index <= self.frame_count: - frame_ptr = self.__dataset.get_frame(self.__current_index) - self.__current_index += 1 - if frame_ptr is not None: - return TimsFrame.from_py_tims_frame(frame_ptr) - else: - raise ValueError(f"Frame pointer is None for valid index: {self.__current_index}") - else: - self.__current_index = 1 # Reset for next iteration - raise StopIteration - - def __getitem__(self, index): - if isinstance(index, slice): - return self.get_tims_slice(np.arange(index.start, index.stop, index.step).astype(np.int32)) - return self.get_tims_frame(index) - - -class TimsDatasetDDA(TimsDataset): - - def __init__(self, data_path: str): - super().__init__(data_path=data_path) - self.__dataset = pims.PyTimsDatasetDDA(self.data_path, self.binary_path) - - @property - def selected_precursors(self): - """Get precursors selected for fragmentation. - - Returns: - pd.DataFrame: Precursors selected for fragmentation. - """ - return pd.read_sql_query("SELECT * from Precursors", sqlite3.connect(self.data_path + "/analysis.tdf")) - - @property - def pasef_meta_data(self): - """Get PASEF meta data for DDA. - - Returns: - pd.DataFrame: PASEF meta data. - """ - return pd.read_sql_query("SELECT * from PasefFrameMsMsInfo", - sqlite3.connect(self.data_path + "/analysis.tdf")) - - def get_pasef_fragments(self): - """Get PASEF fragments. - - Args: - num_threads (int, optional): Number of threads. Defaults to 4. - - Returns: - List[FragmentDDA]: List of PASEF fragments. - """ - pasef_fragments = self.__dataset.get_pasef_fragments(1) - return [FragmentDDA.from_py_tims_fragment_dda(fragment) for fragment in pasef_fragments] - - -class TimsDatasetDIA(TimsDataset): - def __init__(self, data_path: str): - super().__init__(data_path=data_path) - self.__dataset = pims.PyTimsDatasetDIA(self.data_path, self.binary_path) - - @property - def pasef_meta_data(self): - """Get PASEF meta data for DIA. - - Returns: - pd.DataFrame: PASEF meta data. - """ - return pd.read_sql_query("SELECT * from DiaFrameMsMsWindows", - sqlite3.connect(self.data_path + "/analysis.tdf")) diff --git a/imspy/imspy/mixture.py b/imspy/imspy/mixture.py index a2e82e68..7942a8f9 100644 --- a/imspy/imspy/mixture.py +++ b/imspy/imspy/mixture.py @@ -1,6 +1,3 @@ -import os -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' - import tensorflow as tf import numpy as np import tensorflow_probability as tfp diff --git a/rustdf/src/data/dda.rs b/rustdf/src/data/dda.rs index 19a5f181..45519d40 100644 --- a/rustdf/src/data/dda.rs +++ b/rustdf/src/data/dda.rs @@ -51,7 +51,8 @@ impl TimsDatasetDDA { 0.0, 2000.0, pasef_info.scan_num_begin as i32, - pasef_info.scan_num_end as i32, + // TODO: check if this is correct + pasef_info.scan_num_end as i32 + 1, 0.0, 5.0, 0.0,