Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

David@mixture #47

Merged
merged 15 commits into from
Nov 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions imspy/imspy/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from imspy.frame import TimsFrame
from imspy.spectrum import TimsSpectrum, MzSpectrum
from imspy.data import TimsDataset
from imspy.dia import TimsDatasetDIA
from imspy.slice import TimsSlice, TimsSliceVectorized
from imspy.dda import TimsDatasetDDA, FragmentDDA
123 changes: 119 additions & 4 deletions imspy/imspy/data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,119 @@
from .frame import TimsFrame
from .spectrum import TimsSpectrum, MzSpectrum
from .handle import TimsDataset, TimsDatasetDDA, TimsDatasetDIA
from .slice import TimsSlice, TimsSliceVectorized
import numpy as np
import pandas as pd
import sqlite3
from numpy.typing import NDArray

import imspy_connector as pims
import opentims_bruker_bridge as obb

from abc import ABC

from imspy.frame import TimsFrame
from imspy.slice import TimsSlice


class TimsDataset(ABC):
def __init__(self, data_path: str):
"""TimsDataHandle class.

Args:
data_path (str): Path to the data.
"""
self.__dataset = None
self.binary_path = None

self.data_path = data_path
self.meta_data = self.__load_meta_data()
self.precursor_frames = self.meta_data[self.meta_data["MsMsType"] == 0].Id.values.astype(np.int32)
self.fragment_frames = self.meta_data[self.meta_data["MsMsType"] > 0].Id.values.astype(np.int32)
self.__current_index = 1

# Try to load the data with the first binary found
appropriate_found = False
for so_path in obb.get_so_paths():
try:
self.__dataset = pims.PyTimsDataset(self.data_path, so_path)
self.binary_path = so_path
appropriate_found = True
break
except Exception:
continue
assert appropriate_found is True, ("No appropriate bruker binary could be found, please check if your "
"operating system is supported by open-tims-bruker-bridge.")

@property
def acquisition_mode(self) -> str:
"""Get the acquisition mode.

Returns:
str: Acquisition mode.
"""
return self.__dataset.get_acquisition_mode_as_string()

@property
def acquisition_mode_numerical(self) -> int:
"""Get the acquisition mode as a numerical value.

Returns:
int: Acquisition mode as a numerical value.
"""
return self.__dataset.get_acquisition_mode()

@property
def frame_count(self) -> int:
"""Get the number of frames.

Returns:
int: Number of frames.
"""
return self.__dataset.frame_count

def __load_meta_data(self) -> pd.DataFrame:
"""Get the meta data.

Returns:
pd.DataFrame: Meta data.
"""
return pd.read_sql_query("SELECT * from Frames", sqlite3.connect(self.data_path + "/analysis.tdf"))

def get_tims_frame(self, frame_id: int) -> TimsFrame:
"""Get a TimsFrame.

Args:
frame_id (int): Frame ID.

Returns:
TimsFrame: TimsFrame.
"""
return TimsFrame.from_py_tims_frame(self.__dataset.get_frame(frame_id))

def get_tims_slice(self, frame_ids: NDArray[np.int32]) -> TimsSlice:
"""Get a TimsFrame.

Args:
frame_ids (int): Frame ID.

Returns:
TimsFrame: TimsFrame.
"""
return TimsSlice.from_py_tims_slice(self.__dataset.get_slice(frame_ids))

def __iter__(self):
return self

def __next__(self):
if self.__current_index <= self.frame_count:
frame_ptr = self.__dataset.get_frame(self.__current_index)
self.__current_index += 1
if frame_ptr is not None:
return TimsFrame.from_py_tims_frame(frame_ptr)
else:
raise ValueError(f"Frame pointer is None for valid index: {self.__current_index}")
else:
self.__current_index = 1 # Reset for next iteration
raise StopIteration

def __getitem__(self, index):
if isinstance(index, slice):
return self.get_tims_slice(np.arange(index.start, index.stop, index.step).astype(np.int32))
return self.get_tims_frame(index)
89 changes: 87 additions & 2 deletions imspy/imspy/dda.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,96 @@
import numpy as np
import sqlite3
from imspy.data import TimsDataset
import pandas as pd

import imspy_connector as pims

from imspy.frame import TimsFrame


class TimsDatasetDDA(TimsDataset):

def __init__(self, data_path: str):
super().__init__(data_path=data_path)
self.__dataset = pims.PyTimsDatasetDDA(self.data_path, self.binary_path)
self.meta_data = self.meta_data.rename(columns={"Id": "frame_id"})
self.fragmented_precursors = self._load_selected_precursors().rename(
columns={
'Id': 'precursor_id',
'LargestPeakMz': 'largest_peak_mz',
'AverageMz': 'average_mz',
'MonoisotopicMz': 'monoisotopic_mz',
'Charge': 'charge',
'ScanNumber': 'average_scan',
'Intensity': 'intensity',
'Parent': 'parent_id',
}
)
self.pasef_meta_data = self._load_pasef_meta_data().rename(
columns={
'Frame': 'frame_id',
'ScanNumBegin': 'scan_begin',
'ScanNumEnd': 'scan_end',
'IsolationMz': 'isolation_mz',
'IsolationWidth': 'isolation_width',
'CollisionEnergy': 'collision_energy',
'Precursor': 'precursor_id'
}
)

def _load_selected_precursors(self):
"""Get precursors selected for fragmentation.

Returns:
pd.DataFrame: Precursors selected for fragmentation.
"""
return pd.read_sql_query("SELECT * from Precursors", sqlite3.connect(self.data_path + "/analysis.tdf"))

def _load_pasef_meta_data(self):
"""Get PASEF meta data for DDA.

Returns:
pd.DataFrame: PASEF meta data.
"""
return pd.read_sql_query("SELECT * from PasefFrameMsMsInfo",
sqlite3.connect(self.data_path + "/analysis.tdf"))

def get_pasef_fragments(self) -> pd.DataFrame:
"""Get PASEF fragments.

Args:
num_threads (int, optional): Number of threads. Defaults to 4.

Returns:
List[FragmentDDA]: List of PASEF fragments.
"""
pasef_fragments = [FragmentDDA.from_py_tims_fragment_dda(fragment)
for fragment in self.__dataset.get_pasef_fragments(1)]

pasef_fragments = pd.DataFrame({
'frame_id': [s.frame_id for s in pasef_fragments],
'precursor_id': [s.precursor_id for s in pasef_fragments],
'raw_data': [s.selected_fragment for s in pasef_fragments]
})

A = pd.merge(
pasef_fragments, self.pasef_meta_data,
left_on=['precursor_id', 'frame_id'],
right_on=['precursor_id', 'frame_id'],
how='inner',
)

B = pd.merge(
A, self.fragmented_precursors,
left_on=['precursor_id'],
right_on=['precursor_id'],
how='inner'
)

time = self.meta_data[['frame_id']]
time.insert(time.shape[1], "time", self.meta_data['Time'] / 60)

return pd.merge(time, B, left_on=['frame_id'], right_on=['frame_id'], how='inner')


class FragmentDDA:
def __init__(self, frame_id: int, precursor_id: int, selected_fragment: TimsFrame):
self._fragment_ptr = pims.PyTimsFragmentDDA(frame_id, precursor_id, selected_fragment.get_fragment_ptr())
Expand Down
21 changes: 21 additions & 0 deletions imspy/imspy/dia.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import sqlite3
from imspy.data import TimsDataset
import pandas as pd

import imspy_connector as pims


class TimsDatasetDIA(TimsDataset):
def __init__(self, data_path: str):
super().__init__(data_path=data_path)
self.__dataset = pims.PyTimsDatasetDIA(self.data_path, self.binary_path)

@property
def pasef_meta_data(self):
"""Get PASEF meta data for DIA.

Returns:
pd.DataFrame: PASEF meta data.
"""
return pd.read_sql_query("SELECT * from DiaFrameMsMsWindows",
sqlite3.connect(self.data_path + "/analysis.tdf"))
Loading