From c35056083516c2c522d91a5f7fc1e21d9d32faaf Mon Sep 17 00:00:00 2001
From: "Tim Maier (Ubuntu Desktop)" <tmaier01@students.uni-mainz.de>
Date: Wed, 25 Oct 2023 15:06:48 +0200
Subject: [PATCH 1/7] transfer simulation framework from proteolizard

---
 imspy/imspy/data.py                           |   4 -
 imspy/imspy/data/__init__.py                  |   4 +
 imspy/imspy/data/frame.py                     | 391 +++++++++
 imspy/imspy/data/handle.py                    | 157 ++++
 imspy/imspy/data/slice.py                     | 394 +++++++++
 imspy/imspy/data/spectrum.py                  | 377 +++++++++
 imspy/imspy/feature.py                        | 211 +++++
 imspy/imspy/handle.py                         |   5 +
 imspy/imspy/isotopes.py                       | 242 ++++++
 imspy/imspy/noise.py                          | 124 +++
 imspy/imspy/proteome.py                       | 322 ++++++++
 imspy/imspy/simulation/experiment.py          | 204 +++++
 imspy/imspy/simulation/hardware_models.py     | 766 ++++++++++++++++++
 imspy/imspy/slice.py                          |   6 +
 imspy/imspy/utility.py                        | 508 ++++++++++++
 .../simulation/run_example_simulation.py      | 104 +++
 pyims/pyims/chemistry.py                      | 213 +++++
 17 files changed, 4028 insertions(+), 4 deletions(-)
 delete mode 100644 imspy/imspy/data.py
 create mode 100644 imspy/imspy/data/__init__.py
 create mode 100644 imspy/imspy/data/frame.py
 create mode 100644 imspy/imspy/data/handle.py
 create mode 100644 imspy/imspy/data/slice.py
 create mode 100644 imspy/imspy/data/spectrum.py
 create mode 100644 imspy/imspy/feature.py
 create mode 100644 imspy/imspy/isotopes.py
 create mode 100644 imspy/imspy/noise.py
 create mode 100644 imspy/imspy/proteome.py
 create mode 100644 imspy/imspy/simulation/experiment.py
 create mode 100644 imspy/imspy/simulation/hardware_models.py
 create mode 100644 imspy/imspy/utility.py
 create mode 100644 pyims/examples/simulation/run_example_simulation.py
 create mode 100644 pyims/pyims/chemistry.py

diff --git a/imspy/imspy/data.py b/imspy/imspy/data.py
deleted file mode 100644
index d2674bc8..00000000
--- a/imspy/imspy/data.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from .frame import TimsFrame
-from .spectrum import TimsSpectrum, MzSpectrum
-from .handle import TimsDataset, TimsDatasetDDA, TimsDatasetDIA
-from .slice import TimsSlice, TimsSliceVectorized
diff --git a/imspy/imspy/data/__init__.py b/imspy/imspy/data/__init__.py
new file mode 100644
index 00000000..1809a4f5
--- /dev/null
+++ b/imspy/imspy/data/__init__.py
@@ -0,0 +1,4 @@
+from pyims.data.frame import TimsFrame
+from pyims.data.spectrum import TimsSpectrum, MzSpectrum
+from pyims.data.handle import TimsDataset, TimsDatasetDDA, TimsDatasetDIA
+from pyims.data.slice import TimsSlice
diff --git a/imspy/imspy/data/frame.py b/imspy/imspy/data/frame.py
new file mode 100644
index 00000000..659209b0
--- /dev/null
+++ b/imspy/imspy/data/frame.py
@@ -0,0 +1,391 @@
+import pandas as pd
+
+from typing import List
+from numpy.typing import NDArray
+
+from tensorflow import sparse as sp
+
+import numpy as np
+import imspy_connector as pims
+from imspy.spectrum import MzSpectrum, TimsSpectrum, IndexedMzSpectrum
+
+from imspy.utilities import re_index_indices
+
+
+class TimsFrame:
+    def __init__(self, frame_id: int, ms_type: int, retention_time: float, scan: NDArray[np.int32],
+                 mobility: NDArray[np.float64], tof: NDArray[np.int32],
+                 mz: NDArray[np.float64], intensity: NDArray[np.float64]):
+        """TimsFrame class.
+
+        Args:
+            frame_id (int): Frame ID.
+            ms_type (int): MS type.
+            retention_time (float): Retention time.
+            scan (NDArray[np.int32]): Scan.
+            mobility (NDArray[np.float64]): Inverse mobility.
+            tof (NDArray[np.int32]): Time of flight.
+            mz (NDArray[np.float64]): m/z.
+            intensity (NDArray[np.float64]): Intensity.
+
+        Raises:
+            AssertionError: If the length of the scan, mobility, tof, mz and intensity arrays are not equal.
+        """
+
+        assert len(scan) == len(mobility) == len(tof) == len(mz) == len(intensity), \
+            "The length of the scan, mobility, tof, mz and intensity arrays must be equal."
+
+        self.__frame_ptr = pims.PyTimsFrame(frame_id, ms_type, retention_time, scan, mobility, tof, mz, intensity)
+
+    @classmethod
+    def from_py_tims_frame(cls, frame: pims.PyTimsFrame):
+        """Create a TimsFrame from a PyTimsFrame.
+
+        Args:
+            frame (pims.PyTimsFrame): PyTimsFrame to create the TimsFrame from.
+
+        Returns:
+            TimsFrame: TimsFrame created from the PyTimsFrame.
+        """
+        instance = cls.__new__(cls)
+        instance.__frame_ptr = frame
+        return instance
+
+    @property
+    def frame_id(self) -> int:
+        """Frame ID.
+
+        Returns:
+            int: Frame ID.
+        """
+        return self.__frame_ptr.frame_id
+
+    @property
+    def ms_type(self) -> str:
+        """MS type.
+
+        Returns:
+            int: MS type.
+        """
+        return self.__frame_ptr.ms_type_as_string
+
+    @property
+    def retention_time(self) -> float:
+        """Retention time.
+
+        Returns:
+            float: Retention time.
+        """
+        return self.__frame_ptr.retention_time
+
+    @property
+    def scan(self) -> NDArray[np.int32]:
+        """Scan.
+
+        Returns:
+            NDArray[np.int32]: Scan.
+        """
+        return self.__frame_ptr.scan
+
+    @property
+    def mobility(self) -> NDArray[np.float64]:
+        """Inverse mobility.
+
+        Returns:
+            NDArray[np.float64]: Inverse mobility.
+        """
+        return self.__frame_ptr.mobility
+
+    @property
+    def tof(self) -> NDArray[np.int32]:
+        """Time of flight.
+
+        Returns:
+            NDArray[np.int32]: Time of flight.
+        """
+        return self.__frame_ptr.tof
+
+    @property
+    def mz(self) -> NDArray[np.float64]:
+        """m/z.
+
+        Returns:
+            NDArray[np.float64]: m/z.
+        """
+        return self.__frame_ptr.mz
+
+    @property
+    def intensity(self) -> NDArray[np.float64]:
+        """Intensity.
+
+        Returns:
+            NDArray[np.float64]: Intensity.
+        """
+        return self.__frame_ptr.intensity
+
+    @property
+    def df(self) -> pd.DataFrame:
+        """ Data as a pandas DataFrame.
+
+        Returns:
+            pd.DataFrame: Data.
+        """
+
+        return pd.DataFrame({
+            'frame': np.repeat(self.frame_id, len(self.scan)),
+            'retention_time': np.repeat(self.retention_time, len(self.scan)),
+            'scan': self.scan,
+            'mobility': self.mobility,
+            'tof': self.tof,
+            'mz': self.mz,
+            'intensity': self.intensity})
+
+    def filter(self,
+               mz_min: float = 0.0,
+               mz_max: float = 2000.0,
+               scan_min: int = 0,
+               scan_max: int = 1000,
+               mobility_min: float = 0.0,
+               mobility_max: float = 2.0,
+               intensity_min: float = 0.0,
+               intensity_max: float = 1e9,
+               ) -> 'TimsFrame':
+        """Filter the frame for a given m/z range, scan range and intensity range.
+
+        Args:
+            mz_min (float): Minimum m/z value.
+            mz_max (float): Maximum m/z value.
+            scan_min (int, optional): Minimum scan value. Defaults to 0.
+            scan_max (int, optional): Maximum scan value. Defaults to 1000.
+            mobility_min (float, optional): Minimum inverse mobility value. Defaults to 0.0.
+            mobility_max (float, optional): Maximum inverse mobility value. Defaults to 2.0.
+            intensity_min (float, optional): Minimum intensity value. Defaults to 0.0.
+            intensity_max (float, optional): Maximum intensity value. Defaults to 1e9.
+
+        Returns:
+            TimsFrame: Filtered frame.
+        """
+
+        return TimsFrame.from_py_tims_frame(self.__frame_ptr.filter_ranged(mz_min, mz_max, scan_min, scan_max, mobility_min, mobility_max,
+                                                                           intensity_min, intensity_max))
+
+    def to_indexed_mz_spectrum(self) -> 'IndexedMzSpectrum':
+        """Convert the frame to an IndexedMzSpectrum.
+
+        Returns:
+            IndexedMzSpectrum: IndexedMzSpectrum.
+        """
+        return IndexedMzSpectrum.from_py_indexed_mz_spectrum(self.__frame_ptr.to_indexed_mz_spectrum())
+
+    def to_resolution(self, resolution: int) -> 'TimsFrame':
+        """Convert the frame to a given resolution.
+
+        Args:
+            resolution (int): Resolution.
+
+        Returns:
+            TimsFrame: Frame with the given resolution.
+        """
+        return TimsFrame.from_py_tims_frame(self.__frame_ptr.to_resolution(resolution))
+
+    def vectorized(self, resolution: int = 2) -> 'TimsFrameVectorized':
+        """Convert the frame to a vectorized frame.
+
+        Args:
+            resolution (int, optional): Resolution. Defaults to 2.
+
+        Returns:
+            TimsFrameVectorized: Vectorized frame.
+        """
+        return TimsFrameVectorized.from_py_tims_frame_vectorized(self.__frame_ptr.vectorized(resolution))
+
+    def to_tims_spectra(self) -> List['TimsSpectrum']:
+        """Convert the frame to a list of TimsSpectrum.
+
+        Returns:
+            List[TimsSpectrum]: List of TimsSpectrum.
+        """
+        return [TimsSpectrum.from_py_tims_spectrum(spec) for spec in self.__frame_ptr.to_tims_spectra()]
+
+    def to_windows(self, window_length: float = 10, overlapping: bool = True, min_num_peaks: int = 5,
+                   min_intensity: float = 1) -> List[MzSpectrum]:
+        """Convert the frame to a list of windows.
+
+        Args:
+            window_length (float, optional): Window length. Defaults to 10.
+            overlapping (bool, optional): Whether the windows should overlap. Defaults to True.
+            min_num_peaks (int, optional): Minimum number of peaks in a window. Defaults to 5.
+            min_intensity (float, optional): Minimum intensity of a peak in a window. Defaults to 1.
+
+        Returns:
+            List[MzSpectrum]: List of windows.
+        """
+        return [MzSpectrum.from_py_mz_spectrum(spec) for spec in self.__frame_ptr.to_windows(
+            window_length, overlapping, min_num_peaks, min_intensity)]
+
+    def __repr__(self):
+        return (f"TimsFrame(frame_id={self.__frame_ptr.frame_id}, ms_type={self.__frame_ptr.ms_type}, "
+                f"num_peaks={len(self.__frame_ptr.mz)})")
+
+
+class TimsFrameVectorized:
+    def __init__(self, frame_id: int, ms_type: int, retention_time: float, scan: NDArray[np.int32],
+                 mobility: NDArray[np.float64], tof: NDArray[np.int32],
+                 indices: NDArray[np.int32], intensity: NDArray[np.float64]):
+        """TimsFrameVectorized class.
+
+        Args:
+            frame_id (int): Frame ID.
+            ms_type (int): MS type.
+            retention_time (float): Retention time.
+            scan (NDArray[np.int32]): Scan.
+            mobility (NDArray[np.float64]): Inverse mobility.
+            tof (NDArray[np.int32]): Time of flight.
+            indices (NDArray[np.int32]): Indices.
+            intensity (NDArray[np.float64]): Intensity.
+
+        Raises:
+            AssertionError: If the length of the scan, mobility, tof, indices and intensity arrays are not equal.
+        """
+
+        assert len(scan) == len(mobility) == len(tof) == len(indices) == len(intensity), \
+            "The length of the scan, mobility, tof, indices and intensity arrays must be equal."
+
+        self.__frame_ptr = pims.PyTimsFrameVectorized(frame_id, ms_type, retention_time, scan, mobility, tof, indices,
+                                                      intensity)
+
+    @classmethod
+    def from_py_tims_frame_vectorized(cls, frame: pims.PyTimsFrameVectorized):
+        """Create a TimsFrameVectorized from a PyTimsFrameVectorized.
+
+        Args:
+            frame (pims.PyTimsFrameVectorized): PyTimsFrameVectorized to create the TimsFrameVectorized from.
+
+        Returns:
+            TimsFrameVectorized: TimsFrameVectorized created from the PyTimsFrameVectorized.
+        """
+        instance = cls.__new__(cls)
+        instance.__frame_ptr = frame
+        return instance
+
+    @property
+    def frame_id(self) -> int:
+        """Frame ID.
+
+        Returns:
+            int: Frame ID.
+        """
+        return self.__frame_ptr.frame_id
+
+    @property
+    def ms_type(self) -> str:
+        """MS type.
+
+        Returns:
+            int: MS type.
+        """
+        return self.__frame_ptr.ms_type_as_string
+
+    @property
+    def retention_time(self) -> float:
+        """Retention time.
+
+        Returns:
+            float: Retention time.
+        """
+        return self.__frame_ptr.retention_time
+
+    @property
+    def scan(self) -> NDArray[np.int32]:
+        """Scan.
+
+        Returns:
+            NDArray[np.int32]: Scan.
+        """
+        return self.__frame_ptr.scan
+
+    @property
+    def mobility(self) -> NDArray[np.float64]:
+        """Inverse mobility.
+
+        Returns:
+            NDArray[np.float64]: Inverse mobility.
+        """
+        return self.__frame_ptr.mobility
+
+    @property
+    def tof(self) -> NDArray[np.int32]:
+        """Time of flight.
+
+        Returns:
+            NDArray[np.int32]: Time of flight.
+        """
+        return self.__frame_ptr.tof
+
+    @property
+    def indices(self) -> NDArray[np.int32]:
+        """Indices.
+
+        Returns:
+            NDArray[np.int32]: Indices.
+        """
+        return self.__frame_ptr.indices
+
+    @property
+    def intensity(self) -> NDArray[np.float64]:
+        """Intensity.
+
+        Returns:
+            NDArray[np.float64]: Intensity.
+        """
+        return self.__frame_ptr.values
+
+    @property
+    def df(self) -> pd.DataFrame:
+        """ Data as a pandas DataFrame.
+
+        Returns:
+            pd.DataFrame: Data.
+        """
+
+        return pd.DataFrame({
+            'frame': np.repeat(self.frame_id, len(self.scan)),
+            'retention_time': np.repeat(self.retention_time, len(self.scan)),
+            'scan': self.scan,
+            'mobility': self.mobility,
+            'tof': self.tof,
+            'indices': self.indices,
+            'intensity': self.intensity})
+
+    def __repr__(self):
+        return (f"TimsFrameVectorized(frame_id={self.__frame_ptr.frame_id}, ms_type={self.__frame_ptr.ms_type}, "
+                f"num_peaks={len(self.__frame_ptr.indices)})")
+
+    def get_tensor_repr(self, dense=True, zero_indexed=True, re_index=True, scan_max=None, index_max=None):
+        s = self.scan
+        f = self.indices
+        i = self.intensity
+
+        if zero_indexed:
+            s = s - np.min(s)
+            f = f - np.min(f)
+
+        if re_index:
+            f = re_index_indices(f)
+
+        if scan_max is None:
+            m_s = np.max(s) + 1
+        else:
+            m_s = scan_max + 1
+
+        if index_max is None:
+            m_f = np.max(f) + 1
+        else:
+            m_f = index_max + 1
+
+        sv = sp.reorder(sp.SparseTensor(indices=np.c_[s, f], values=i, dense_shape=(m_s, m_f)))
+
+        if dense:
+            return sp.to_dense(sv)
+        else:
+            return sv
diff --git a/imspy/imspy/data/handle.py b/imspy/imspy/data/handle.py
new file mode 100644
index 00000000..4aec5145
--- /dev/null
+++ b/imspy/imspy/data/handle.py
@@ -0,0 +1,157 @@
+from typing import List
+
+import numpy as np
+import pandas as pd
+import sqlite3
+from numpy.typing import NDArray
+
+import imspy_connector as pims
+import opentims_bruker_bridge as obb
+
+from abc import ABC
+
+<<<<<<<< HEAD:imspy/imspy/handle.py
+from imspy.frame import TimsFrame
+from imspy.slice import TimsSlice
+========
+from pyims.data import TimsFrame, TimsSlice
+
+>>>>>>>> 1cd02c1 (transfer simulation framework from proteolizard):imspy/imspy/data/handle.py
+
+
+class TimsDataset(ABC):
+    def __init__(self, data_path: str):
+        """TimsDataHandle class.
+
+        Args:
+            data_path (str): Path to the data.
+        """
+        self.data_path = data_path
+        self.bp: List[str] = obb.get_so_paths()
+        self.meta_data = self.__load_meta_data()
+        self.precursor_frames = self.meta_data[self.meta_data["MsMsType"] == 0].Id.values.astype(np.int32)
+        self.fragment_frames = self.meta_data[self.meta_data["MsMsType"] > 0].Id.values.astype(np.int32)
+        self.__handle = None
+        self.__current_index = 1
+
+        # Try to load the data with the first binary found
+        appropriate_found = False
+        for so_path in self.bp:
+            try:
+                self.__handle = pims.PyTimsDataHandle(self.data_path, so_path)
+                appropriate_found = True
+                break
+            except Exception:
+                continue
+        assert appropriate_found is True, ("No appropriate bruker binary could be found, please check if your "
+                                           "operating system is supported by open-tims-bruker-bridge.")
+
+    @property
+    def acquisition_mode(self) -> str:
+        """Get the acquisition mode.
+
+        Returns:
+            str: Acquisition mode.
+        """
+        return self.__handle.get_acquisition_mode_as_string()
+
+    @property
+    def acquisition_mode_numerical(self) -> int:
+        """Get the acquisition mode as a numerical value.
+
+        Returns:
+            int: Acquisition mode as a numerical value.
+        """
+        return self.__handle.get_acquisition_mode()
+
+    @property
+    def frame_count(self) -> int:
+        """Get the number of frames.
+
+        Returns:
+            int: Number of frames.
+        """
+        return self.__handle.frame_count
+
+    def __load_meta_data(self) -> pd.DataFrame:
+        """Get the meta data.
+
+        Returns:
+            pd.DataFrame: Meta data.
+        """
+        return pd.read_sql_query("SELECT * from Frames", sqlite3.connect(self.data_path + "/analysis.tdf"))
+
+    def get_tims_frame(self, frame_id: int) -> TimsFrame:
+        """Get a TimsFrame.
+
+        Args:
+            frame_id (int): Frame ID.
+
+        Returns:
+            TimsFrame: TimsFrame.
+        """
+        return TimsFrame.from_py_tims_frame(self.__handle.get_tims_frame(frame_id))
+
+    def get_tims_slice(self, frame_ids: NDArray[np.int32]) -> TimsSlice:
+        """Get a TimsFrame.
+
+        Args:
+            frame_ids (int): Frame ID.
+
+        Returns:
+            TimsFrame: TimsFrame.
+        """
+        return TimsSlice.from_py_tims_slice(self.__handle.get_tims_slice(frame_ids))
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.__current_index <= self.frame_count:
+            frame_ptr = self.__handle.get_tims_frame(self.__current_index)
+            self.__current_index += 1
+            if frame_ptr is not None:
+                return TimsFrame.from_py_tims_frame(frame_ptr)
+            else:
+                raise ValueError(f"Frame pointer is None for valid index: {self.__current_index}")
+        else:
+            self.__current_index = 1  # Reset for next iteration
+            raise StopIteration
+
+    def __getitem__(self, index):
+        if isinstance(index, slice):
+            return self.get_tims_slice(np.arange(index.start, index.stop, index.step).astype(np.int32))
+        return self.get_tims_frame(index)
+
+
+class TimsDatasetDDA(TimsDataset):
+    @property
+    def selected_precursors(self):
+        """Get precursors selected for fragmentation.
+
+        Returns:
+            pd.DataFrame: Precursors selected for fragmentation.
+        """
+        return pd.read_sql_query("SELECT * from Precursors", sqlite3.connect(self.data_path + "/analysis.tdf"))
+
+    @property
+    def pasef_meta_data(self):
+        """Get PASEF meta data for DDA.
+
+        Returns:
+            pd.DataFrame: PASEF meta data.
+        """
+        return pd.read_sql_query("SELECT * from PasefFrameMsMsInfo",
+                                 sqlite3.connect(self.data_path + "/analysis.tdf"))
+
+
+class TimsDatasetDIA(TimsDataset):
+    @property
+    def pasef_meta_data(self):
+        """Get PASEF meta data for DIA.
+
+        Returns:
+            pd.DataFrame: PASEF meta data.
+        """
+        return pd.read_sql_query("SELECT * from DiaFrameMsMsWindows",
+                                 sqlite3.connect(self.data_path + "/analysis.tdf"))
diff --git a/imspy/imspy/data/slice.py b/imspy/imspy/data/slice.py
new file mode 100644
index 00000000..056febaa
--- /dev/null
+++ b/imspy/imspy/data/slice.py
@@ -0,0 +1,394 @@
+import numpy as np
+import pandas as pd
+from typing import List
+
+from numpy.typing import NDArray
+from tensorflow import sparse as sp
+
+from imspy.utilities import re_index_indices
+
+<<<<<<<< HEAD:imspy/imspy/slice.py
+import imspy_connector as pims
+from imspy.frame import TimsFrame, TimsFrameVectorized
+from imspy.spectrum import MzSpectrum
+========
+import pyims_connector as pims
+from pyims.data.frame import TimsFrame, TimsFrameVectorized
+from pyims.data.spectrum import MzSpectrum
+>>>>>>>> 1cd02c1 (transfer simulation framework from proteolizard):imspy/imspy/data/slice.py
+
+
+class TimsSlice:
+    def __init__(self,
+                 frame_id: NDArray[np.int32],
+                 scan: NDArray[np.int32],
+                 tof: NDArray[np.int32],
+                 retention_time: NDArray[np.float64],
+                 mobility: NDArray[np.float64],
+                 mz: NDArray[np.float64],
+                 intensity: NDArray[np.float64]):
+        """Create a TimsSlice.
+
+        Args:
+            frame_id (NDArray[np.int32]): Frame ID.
+            scan (NDArray[np.int32]): Scan.
+            tof (NDArray[np.int32]): TOF.
+            retention_time (NDArray[np.float64]): Retention time.
+            mobility (NDArray[np.float64]): Mobility.
+            mz (NDArray[np.float64]): m/z.
+            intensity (NDArray[np.float64]): Intensity.
+        """
+
+        assert len(frame_id) == len(scan) == len(tof) == len(retention_time) == len(mobility) == len(mz) == len(
+            intensity), "All arrays must have the same length."
+
+        self.__slice_ptr = pims.PyTimsSlice(
+            frame_id, scan, tof, retention_time, mobility, mz, intensity
+        )
+        self.__current_index = 0
+
+    @classmethod
+    def from_py_tims_slice(cls, tims_slice: pims.PyTimsSlice):
+        """Create a TimsSlice from a PyTimsSlice.
+
+        Args:
+            tims_slice (pims.PyTimsSlice): PyTimsSlice to create the TimsSlice from.
+
+        Returns:
+            TimsSlice: TimsSlice created from the PyTimsSlice.
+        """
+        instance = cls.__new__(cls)
+        instance.__slice_ptr = tims_slice
+        instance.__current_index = 0
+        return instance
+
+    @property
+    def first_frame_id(self) -> int:
+        """First frame ID.
+
+        Returns:
+            int: First frame ID.
+        """
+        return self.__slice_ptr.first_frame_id
+
+    @property
+    def last_frame_id(self) -> int:
+        """Last frame ID.
+
+        Returns:
+            int: Last frame ID.
+        """
+        return self.__slice_ptr.last_frame_id
+
+    def __repr__(self):
+        return f"TimsSlice({self.first_frame_id}, {self.last_frame_id})"
+
+    @property
+    def precursors(self):
+        return TimsSlice.from_py_tims_slice(self.__slice_ptr.get_precursors())
+
+    @property
+    def fragments(self):
+        return TimsSlice.from_py_tims_slice(self.__slice_ptr.get_fragments_dda())
+
+    def filter(self, mz_min: float = 0.0, mz_max: float = 2000.0, scan_min: int = 0, scan_max: int = 1000,
+               mobility_min: float = 0.0,
+               mobility_max: float = 2.0,
+               intensity_min: float = 0.0, intensity_max: float = 1e9, num_threads: int = 4) -> 'TimsSlice':
+        """Filter the slice by m/z, scan and intensity.
+
+        Args:
+            mz_min (float): Minimum m/z value.
+            mz_max (float): Maximum m/z value.
+            scan_min (int, optional): Minimum scan value. Defaults to 0.
+            scan_max (int, optional): Maximum scan value. Defaults to 1000.
+            mobility_min (float, optional): Minimum inverse mobility value. Defaults to 0.0.
+            mobility_max (float, optional): Maximum inverse mobility value. Defaults to 2.0.
+            intensity_min (float, optional): Minimum intensity value. Defaults to 0.0.
+            intensity_max (float, optional): Maximum intensity value. Defaults to 1e9.
+            num_threads (int, optional): Number of threads to use. Defaults to 4.
+
+        Returns:
+            TimsSlice: Filtered slice.
+        """
+        return TimsSlice.from_py_tims_slice(
+            self.__slice_ptr.filter_ranged(mz_min, mz_max, scan_min, scan_max, mobility_min, mobility_max,
+                                           intensity_min, intensity_max, num_threads))
+
+    @property
+    def frames(self) -> List[TimsFrame]:
+        """Get the frames.
+
+        Returns:
+            List[TimsFrame]: Frames.
+        """
+        return [TimsFrame.from_py_tims_frame(frame) for frame in self.__slice_ptr.get_frames()]
+
+    def to_resolution(self, resolution: int, num_threads: int = 4) -> 'TimsSlice':
+        """Convert the slice to a given resolution.
+
+        Args:
+            resolution (int): Resolution.
+            num_threads (int, optional): Number of threads to use. Defaults to 4.
+
+        Returns:
+            TimsSlice: Slice with given resolution.
+        """
+        return TimsSlice.from_py_tims_slice(self.__slice_ptr.to_resolution(resolution, num_threads))
+
+    def to_windows(self, window_length: float = 10, overlapping: bool = True, min_num_peaks: int = 5,
+                   min_intensity: float = 1, num_threads: int = 4) -> List[MzSpectrum]:
+        """Convert the slice to a list of windows.
+
+        Args:
+            window_length (float, optional): Window length. Defaults to 10.
+            overlapping (bool, optional): Whether the windows should overlap. Defaults to True.
+            min_num_peaks (int, optional): Minimum number of peaks in a window. Defaults to 5.
+            min_intensity (float, optional): Minimum intensity of a peak in a window. Defaults to 1.
+            num_threads (int, optional): Number of threads to use. Defaults to 1.
+
+        Returns:
+            List[MzSpectrum]: List of windows.
+        """
+        return [MzSpectrum.from_py_mz_spectrum(spec) for spec in self.__slice_ptr.to_windows(
+            window_length, overlapping, min_num_peaks, min_intensity, num_threads)]
+
+    @property
+    def df(self) -> pd.DataFrame:
+        """Get the data as a pandas DataFrame.
+
+        Returns:
+            pd.DataFrame: Data.
+        """
+        columns = ['frame', 'scan', 'tof', 'retention_time', 'mobility', 'mz', 'intensity']
+        return pd.DataFrame({c: v for c, v in zip(columns, self.__slice_ptr.to_arrays())})
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.__current_index < self.__slice_ptr.frame_count:
+            frame_ptr = self.__slice_ptr.get_frame_at_index(self.__current_index)
+            self.__current_index += 1
+            if frame_ptr is not None:
+                return TimsFrame.from_py_tims_frame(frame_ptr)
+            else:
+                raise ValueError("Frame pointer is None for valid index.")
+        else:
+            self.__current_index = 0  # Reset for next iteration
+            raise StopIteration
+
+    def vectorized(self, resolution: int = 2, num_threads: int = 4) -> 'TimsSliceVectorized':
+        """Get a vectorized version of the slice.
+
+        Args:
+            resolution (int, optional): Resolution. Defaults to 2.
+            num_threads (int, optional): Number of threads to use. Defaults to 4.
+
+        Returns:
+            TimsSliceVectorized: Vectorized version of the slice.
+        """
+        return TimsSliceVectorized.from_vectorized_py_tims_slice(self.__slice_ptr.vectorized(resolution, num_threads))
+
+    def get_tims_planes(self, tof_max_value: int = 400_000, num_chunks: int = 7, num_threads: int = 4) -> List[
+        'TimsPlane']:
+        return [TimsPlane.from_py_tims_plane(plane) for plane in
+                self.__slice_ptr.to_tims_planes(tof_max_value, num_chunks, num_threads)]
+
+
+class TimsSliceVectorized:
+    def __init__(self):
+        self.__slice_ptr = None
+        self.__current_index = 0
+
+    @classmethod
+    def from_vectorized_py_tims_slice(cls, tims_slice: pims.PyTimsSliceVectorized):
+        """Create a TimsSlice from a PyTimsSlice.
+
+        Args:
+            tims_slice (pims.PyTimsSlice): PyTimsSlice to create the TimsSlice from.
+
+        Returns:
+            TimsSlice: TimsSlice created from the PyTimsSlice.
+        """
+        instance = cls.__new__(cls)
+        instance.__slice_ptr = tims_slice
+        instance.__current_index = 0
+        return instance
+
+    @property
+    def first_frame_id(self) -> int:
+        """First frame ID.
+
+        Returns:
+            int: First frame ID.
+        """
+        return self.__slice_ptr.first_frame_id
+
+    @property
+    def last_frame_id(self) -> int:
+        """Last frame ID.
+
+        Returns:
+            int: Last frame ID.
+        """
+        return self.__slice_ptr.last_frame_id
+
+    @property
+    def precursors(self):
+        return TimsSlice.from_py_tims_slice(self.__slice_ptr.get_precursors())
+
+    @property
+    def fragments(self):
+        return TimsSlice.from_py_tims_slice(self.__slice_ptr.get_fragments_dda())
+
+    @property
+    def frames(self) -> List[TimsFrameVectorized]:
+        """Get the frames.
+
+        Returns:
+            List[TimsFrame]: Frames.
+        """
+        return [TimsFrameVectorized.from_py_tims_frame_vectorized(frame) for frame in
+                self.__slice_ptr.get_vectorized_frames()]
+
+    @property
+    def df(self) -> pd.DataFrame:
+        """Get the data as a pandas DataFrame.
+
+        Returns:
+            pd.DataFrame: Data.
+        """
+        columns = ['frame', 'scan', 'tof', 'retention_time', 'mobility', 'index', 'intensity']
+        return pd.DataFrame({c: v for c, v in zip(columns, self.__slice_ptr.to_arrays())})
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        if self.__current_index < self.__slice_ptr.frame_count:
+            frame_ptr = self.__slice_ptr.get_frame_at_index(self.__current_index)
+            self.__current_index += 1
+            if frame_ptr is not None:
+                return TimsFrameVectorized.from_py_tims_frame_vectorized(frame_ptr)
+            else:
+                raise ValueError("Frame pointer is None for valid index.")
+        else:
+            self.__current_index = 0
+            raise StopIteration
+
+    def __repr__(self):
+        return f"TimsSliceVectorized({self.first_frame_id}, {self.last_frame_id})"
+
+    def get_tensor_repr(self, dense=True, zero_index=True, re_index=True, frame_max=None, scan_max=None,
+                        index_max=None):
+
+        frames, scans, _, _, _, indices, intensities = self.__slice_ptr.to_arrays()
+
+        if zero_index:
+            scans = scans - np.min(scans)
+            frames = frames - np.min(frames)
+            indices = indices - np.min(indices)
+
+        if re_index:
+            frames = re_index_indices(frames)
+
+        if scan_max is None:
+            m_s = np.max(scans) + 1
+        else:
+            m_s = scan_max + 1
+
+        if index_max is None:
+            m_i = np.max(indices) + 1
+        else:
+            m_i = index_max + 1
+
+        if frame_max is None:
+            m_f = np.max(frames) + 1
+        else:
+            m_f = frame_max + 1
+
+        sv = sp.reorder(
+            sp.SparseTensor(indices=np.c_[frames, scans, indices], values=intensities, dense_shape=(m_f, m_s, m_i)))
+
+        if dense:
+            return sp.to_dense(sv)
+        else:
+            return sv
+
+
+class TimsPlane:
+    def __init__(self):
+        self.__plane_ptr = None
+
+    @classmethod
+    def from_py_tims_plane(cls, plane: pims.PyTimsPlane):
+        """Create a TimsPlane from a PyTimsPlane.
+
+        Args:
+            plane (pims.PyTimsPlane): PyTimsPlane to create the TimsPlane from.
+
+        Returns:
+            TimsPlane: TimsPlane created from the PyTimsPlane.
+        """
+        instance = cls.__new__(cls)
+        instance.__plane_ptr = plane
+        return instance
+
+    @property
+    def mz_mean(self):
+        return self.__plane_ptr.mz_mean
+
+    @property
+    def mz_std(self):
+        return self.__plane_ptr.mz_std
+
+    @property
+    def tof_mean(self):
+        return self.__plane_ptr.tof_mean
+
+    @property
+    def tof_std(self):
+        return self.__plane_ptr.tof_std
+
+    @property
+    def frame_ids(self):
+        return self.__plane_ptr.frame_ids
+
+    @property
+    def scans(self):
+        return self.__plane_ptr.scans
+
+    @property
+    def intensities(self):
+        return self.__plane_ptr.intensity
+
+    @property
+    def retention_times(self):
+        return self.__plane_ptr.retention_times
+
+    @property
+    def mobilities(self):
+        return self.__plane_ptr.mobilities
+
+    @property
+    def num_points(self):
+        return len(self.frame_ids)
+
+    @property
+    def df(self):
+        return pd.DataFrame({
+            'frame': self.frame_ids,
+            'scan': self.scans,
+            'retention_time': self.retention_times,
+            'mobility': self.mobilities,
+            'intensity': self.intensities
+        })
+
+    def __repr__(self):
+        return (f"TimsPlane(mz_mean: "
+                f"{np.round(self.mz_mean, 4)}, "
+                f"mz_std: {np.round(self.mz_std, 4)},"
+                f" tof_mean: {np.round(self.tof_mean, 4)}, "
+                f"tof_std: {np.round(self.tof_std, 4)}, "
+                f"num_points: {len(self.frame_ids)})")
diff --git a/imspy/imspy/data/spectrum.py b/imspy/imspy/data/spectrum.py
new file mode 100644
index 00000000..80baa757
--- /dev/null
+++ b/imspy/imspy/data/spectrum.py
@@ -0,0 +1,377 @@
+import numpy as np
+from typing import List, Tuple
+
+import pandas as pd
+from numpy.typing import NDArray
+
+import imspy_connector as pims
+
+
+class IndexedMzSpectrum:
+    def __init__(self, index: NDArray[np.int32], mz: NDArray[np.float64], intensity: NDArray[np.float64]):
+        """IndexedMzSpectrum class.
+
+        Args:
+            index (NDArray[np.int32]): Index.
+            mz (NDArray[np.float64]): m/z.
+            intensity (NDArray[np.float64]): Intensity.
+
+        Raises:
+            AssertionError: If the length of the index, mz and intensity arrays are not equal.
+        """
+        assert len(index) == len(mz) == len(intensity), ("The length of the index, mz and intensity arrays must be "
+                                                         "equal.")
+        self.__spec_ptr = pims.PyIndexedMzSpectrum(index, mz, intensity)
+
+    @classmethod
+    def from_py_indexed_mz_spectrum(cls, spec: pims.PyIndexedMzSpectrum):
+        """Create a IndexedMzSpectrum from a PyIndexedMzSpectrum.
+
+        Args:
+            spec (pims.PyIndexedMzSpectrum): PyIndexedMzSpectrum to create the IndexedMzSpectrum from.
+
+        Returns:
+            IndexedMzSpectrum: IndexedMzSpectrum created from the PyIndexedMzSpectrum.
+        """
+        instance = cls.__new__(cls)
+        instance.__spec_ptr = spec
+        return instance
+
+    @property
+    def index(self) -> NDArray[np.int32]:
+        """Index.
+
+        Returns:
+            NDArray[np.int32]: Index.
+        """
+        return self.__spec_ptr.index
+
+    @property
+    def mz(self) -> NDArray[np.float64]:
+        """m/z.
+
+        Returns:
+            NDArray[np.float64]: m/z.
+        """
+        return self.__spec_ptr.mz
+
+    @property
+    def intensity(self) -> NDArray[np.float64]:
+        """Intensity.
+
+        Returns:
+            NDArray[np.float64]: Intensity.
+        """
+        return self.__spec_ptr.intensity
+
+    @property
+    def df(self) -> pd.DataFrame:
+        """Data.
+
+        Returns:
+            pd.DataFrame: Data.
+        """
+
+        return pd.DataFrame({'index': self.index, 'mz': self.mz, 'intensity': self.intensity})
+
+    def __repr__(self):
+        return f"IndexedMzSpectrum(num_peaks={len(self.index)})"
+
+
+class MzSpectrum:
+    def __init__(self, mz: NDArray[np.float64], intensity: NDArray[np.float64]):
+        """MzSpectrum class.
+
+        Args:
+            mz (NDArray[np.float64]): m/z.
+            intensity (NDArray[np.float64]): Intensity.
+
+        Raises:
+            AssertionError: If the length of the mz and intensity arrays are not equal.
+        """
+        assert len(mz) == len(intensity), "The length of the mz and intensity arrays must be equal."
+        self.__spec_ptr = pims.PyMzSpectrum(mz, intensity)
+
+    @property
+    def mz(self) -> NDArray[np.float64]:
+        """m/z.
+
+        Returns:
+            NDArray[np.float64]: m/z.
+        """
+        return self.__spec_ptr.mz
+
+    @property
+    def intensity(self) -> NDArray[np.float64]:
+        """Intensity.
+
+        Returns:
+            NDArray[np.float64]: Intensity.
+        """
+        return self.__spec_ptr.intensity
+
+    @property
+    def df(self) -> pd.DataFrame:
+        """Data.
+
+        Returns:
+            pd.DataFrame: Data.
+        """
+
+        return pd.DataFrame({'mz': self.mz, 'intensity': self.intensity})
+
+    @classmethod
+    def from_py_mz_spectrum(cls, spec: pims.PyMzSpectrum):
+        """Create a MzSpectrum from a PyMzSpectrum.
+
+        Args:
+            spec (pims.PyMzSpectrum): PyMzSpectrum to create the MzSpectrum from.
+
+        Returns:
+            MzSpectrum: MzSpectrum created from the PyMzSpectrum.
+        """
+        instance = cls.__new__(cls)
+        instance.__spec_ptr = spec
+        return instance
+
+    def __repr__(self):
+        return f"MzSpectrum(num_peaks={len(self.mz)})"
+
+    def to_windows(self, window_length: float = 10, overlapping: bool = True, min_num_peaks: int = 5,
+                   min_intensity: float = 1) -> Tuple[NDArray, List['MzSpectrum']]:
+        """Convert the spectrum to a list of windows.
+
+        Args:
+            window_length (float, optional): Window length. Defaults to 10.
+            overlapping (bool, optional): Whether the windows should overlap. Defaults to True.
+            min_num_peaks (int, optional): Minimum number of peaks in a window. Defaults to 5.
+            min_intensity (float, optional): Minimum intensity of a peak in a window. Defaults to 1.
+
+        Returns:
+            Tuple[NDArray, List[MzSpectrum]]: List of windows.
+        """
+
+        indices, windows = self.__spec_ptr.to_windows(window_length, overlapping, min_num_peaks, min_intensity)
+        return indices, [MzSpectrum.from_py_mz_spectrum(window) for window in windows]
+
+    def filter(self, mz_min: float = 0.0, mz_max: float = 2000.0, intensity_min: float = 0.0,
+               intensity_max: float = 1e9) -> 'MzSpectrum':
+        """Filter the spectrum for a given m/z range and intensity range.
+
+        Args:
+            mz_min (float): Minimum m/z value.
+            mz_max (float): Maximum m/z value.
+            intensity_min (float, optional): Minimum intensity value. Defaults to 0.0.
+            intensity_max (float, optional): Maximum intensity value. Defaults to 1e9.
+
+        Returns:
+            MzSpectrum: Filtered spectrum.
+        """
+        return MzSpectrum.from_py_mz_spectrum(
+            self.__spec_ptr.filter_ranged(mz_min, mz_max, intensity_min, intensity_max))
+
+    def vectorized(self, resolution: int = 2) -> 'MzSpectrumVectorized':
+        """Convert the spectrum to a vectorized spectrum.
+
+        Args:
+            resolution (int, optional): Resolution. Defaults to 2.
+
+        Returns:
+            MzSpectrumVectorized: Vectorized spectrum.
+        """
+        return MzSpectrumVectorized.from_py_mz_spectrum_vectorized(self.__spec_ptr.vectorized(resolution))
+
+
+class MzSpectrumVectorized:
+    def __init__(self, indices: NDArray[np.int32], values: NDArray[np.float64], resolution: int):
+        """MzSpectrum class.
+
+        Args:
+            mz (NDArray[np.float64]): m/z.
+            values (NDArray[np.float64]): Intensity.
+
+        Raises:
+            AssertionError: If the length of the mz and intensity arrays are not equal.
+        """
+        assert len(indices) == len(values), "The length of the mz and intensity arrays must be equal."
+        self.__spec_ptr = pims.PyMzSpectrumVectorized(indices, values, resolution)
+
+    @classmethod
+    def from_py_mz_spectrum_vectorized(cls, spec: pims.PyMzSpectrumVectorized):
+        """Create a MzSpectrum from a PyMzSpectrum.
+
+        Args:
+            spec (pims.PyMzSpectrum): PyMzSpectrum to create the MzSpectrum from.
+
+        Returns:
+            MzSpectrum: MzSpectrum created from the PyMzSpectrum.
+        """
+        instance = cls.__new__(cls)
+        instance.__spec_ptr = spec
+        return instance
+
+    @property
+    def resolution(self) -> float:
+        """Resolution.
+
+        Returns:
+            float: Resolution.
+        """
+        return self.__spec_ptr.resolution
+
+    @property
+    def indices(self) -> NDArray[np.int32]:
+        """m/z.
+
+        Returns:
+            NDArray[np.float64]: m/z.
+        """
+        return self.__spec_ptr.indices
+
+    @property
+    def values(self) -> NDArray[np.float64]:
+        """Intensity.
+
+        Returns:
+            NDArray[np.float64]: Intensity.
+        """
+        return self.__spec_ptr.values
+
+    def __repr__(self):
+        return f"MzSpectrumVectorized(num_values={len(self.values)})"
+
+
+class TimsSpectrum:
+    def __init__(self, frame_id: int, scan: int, retention_time: float, mobility: float, ms_type: int,
+                 index: NDArray[np.int32], mz: NDArray[np.float64], intensity: NDArray[np.float64]):
+        """TimsSpectrum class.
+
+        Args:
+            index (NDArray[np.int32]): Index.
+            mz (NDArray[np.float64]): m/z.
+            intensity (NDArray[np.float64]): Intensity.
+
+        Raises:
+            AssertionError: If the length of the index, mz and intensity arrays are not equal.
+        """
+        assert len(index) == len(mz) == len(intensity), ("The length of the index, mz and intensity arrays must be "
+                                                         "equal.")
+        self.__spec_ptr = pims.PyTimsSpectrum(frame_id, scan, retention_time, mobility, ms_type, index, mz, intensity)
+
+    @classmethod
+    def from_py_tims_spectrum(cls, spec: pims.PyTimsSpectrum):
+        """Create a TimsSpectrum from a PyTimsSpectrum.
+
+        Args:
+            spec (pims.PyTimsSpectrum): PyTimsSpectrum to create the TimsSpectrum from.
+
+        Returns:
+            TimsSpectrum: TimsSpectrum created from the PyTimsSpectrum.
+        """
+        instance = cls.__new__(cls)
+        instance.__spec_ptr = spec
+        return instance
+
+    @property
+    def index(self) -> NDArray[np.int32]:
+        """Index.
+
+        Returns:
+            NDArray[np.int32]: Index.
+        """
+        return self.__spec_ptr.index
+
+    @property
+    def mz(self) -> NDArray[np.float64]:
+        """m/z.
+
+        Returns:
+            NDArray[np.float64]: m/z.
+        """
+        return self.__spec_ptr.mz
+
+    @property
+    def intensity(self) -> NDArray[np.float64]:
+        """Intensity.
+
+        Returns:
+            NDArray[np.float64]: Intensity.
+        """
+        return self.__spec_ptr.intensity
+
+    @property
+    def ms_type(self) -> str:
+        """MS type.
+
+        Returns:
+            str: MS type.
+        """
+        return self.__spec_ptr.ms_type
+
+    @property
+    def mobility(self) -> float:
+        """Inverse mobility.
+
+        Returns:
+            float: Inverse mobility.
+        """
+        return self.__spec_ptr.mobility
+
+    @property
+    def scan(self) -> int:
+        """Scan.
+
+        Returns:
+            int: Scan.
+        """
+        return self.__spec_ptr.scan
+
+    @property
+    def retention_time(self) -> float:
+        """Retention time.
+
+        Returns:
+            float: Retention time.
+        """
+        return self.__spec_ptr.retention_time
+
+    @property
+    def frame_id(self) -> int:
+        """Frame ID.
+
+        Returns:
+            int: Frame ID.
+        """
+        return self.__spec_ptr.frame_id
+
+    @property
+    def mz_spectrum(self) -> MzSpectrum:
+        """Get the MzSpectrum.
+
+        Returns:
+            MzSpectrum: Spectrum.
+        """
+        return MzSpectrum.from_py_mz_spectrum(self.__spec_ptr.mz_spectrum)
+
+    @property
+    def df(self) -> pd.DataFrame:
+        """Data.
+        
+        Returns:
+            pd.DataFrame: Data.
+        """
+
+        return pd.DataFrame({
+            'frame': np.repeat(self.frame_id, len(self.index)),
+            'retention_time': np.repeat(self.retention_time, len(self.index)),
+            'scan': np.repeat(self.scan, len(self.index)),
+            'mobility': np.repeat(self.mobility, len(self.index)),
+            'tof': self.index,
+            'mz': self.mz,
+            'intensity': self.intensity
+        })
+
+    def __repr__(self):
+        return (f"TimsSpectrum(id={self.frame_id}, retention_time={np.round(self.retention_time, 2)}, "
+                f"scan={self.scan}, mobility={np.round(self.mobility, 2)}, ms_type={self.ms_type}, "
+                f"num_peaks={len(self.index)})")
diff --git a/imspy/imspy/feature.py b/imspy/imspy/feature.py
new file mode 100644
index 00000000..9abf5c5c
--- /dev/null
+++ b/imspy/imspy/feature.py
@@ -0,0 +1,211 @@
+import pandas as pd
+import numpy as np
+from numpy.typing import ArrayLike
+
+import json
+
+from pyims.data import TimsSlice, TimsFrame, MzSpectrum
+from pyims.utility import gaussian, exp_gaussian
+from pyims.isotopes import IsotopePatternGenerator, create_initial_feature_distribution
+from abc import ABC, abstractmethod
+from typing import Optional, Dict
+class Profile:
+
+    def __init__(self,positions:Optional[ArrayLike] = None, rel_abundancies:Optional[ArrayLike] = None, model_params: Optional[Dict] = None, jsons:Optional[str] = None):
+        if jsons is not None:
+            self._jsons = jsons
+            self.positions, self.rel_abundancies, self.model_params, self.access_dictionary = self._from_jsons(jsons)
+        else:
+            self.positions = np.asarray(positions)
+            self.rel_abundancies = np.asarray(rel_abundancies)
+            self.model_params = model_params
+            self.access_dictionary = {p:i for p,i in zip(positions, rel_abundancies)}
+            self._jsons = self._to_jsons()
+
+    def __iter__(self):
+        self.__size = len(self.positions)
+        self.__iterator_pos = 0
+        return self
+
+    def __next__(self):
+        if self.__iterator_pos < self.__size:
+            p = self.positions[self.__iterator_pos]
+            ra = self.rel_abundancies[self.__iterator_pos]
+            self.__iterator_pos += 1
+            return p,ra
+        raise StopIteration
+
+    def __getitem__(self, position: int):
+        return self.access_dictionary[position]
+
+    def _to_jsons(self):
+        mp = {}
+        for key,val in self.model_params.items():
+            if isinstance(val,np.generic):
+                mp[key] = val.item()
+            else:
+                mp[key] = val
+
+        json_dict = {"positions": self.positions.tolist(),
+                     "rel_abundancies": self.rel_abundancies.tolist(),
+                     "model_params": mp}
+        return json.dumps(json_dict, allow_nan = False)
+
+    def _from_jsons(self, jsons:str):
+        json_dict = json.loads(jsons)
+        positions = json_dict["positions"]
+        rel_abundancies = json_dict["rel_abundancies"]
+        access_dictionary = {p:i for p,i in zip(positions, rel_abundancies)}
+        return positions,rel_abundancies,json_dict["model_params"], access_dictionary
+
+    @property
+    def jsons(self):
+        return self._jsons
+
+class RTProfile(Profile):
+
+    def __init__(self,frames:Optional[ArrayLike] = None, rel_abundancies:Optional[ArrayLike] = None, model_params: Optional[Dict] = None, jsons:Optional[str] = None):
+        super().__init__(frames, rel_abundancies, model_params, jsons)
+
+    @property
+    def frames(self):
+        return self.positions
+
+class ScanProfile(Profile):
+
+    def __init__(self,scans:Optional[ArrayLike] = None, rel_abundancies:Optional[ArrayLike] = None, model_params: Optional[Dict] = None, jsons:Optional[str] = None):
+        super().__init__(scans, rel_abundancies, model_params,  jsons)
+
+    @property
+    def scans(self):
+        return self.positions
+
+class ChargeProfile(Profile):
+    def __init__(self,charges:Optional[ArrayLike] = None, rel_abundancies:Optional[ArrayLike] = None, model_params: Optional[Dict] = None, jsons:Optional[str] = None):
+        abundant_charges = charges[rel_abundancies>0]
+        relative_abundancies_over_0 = rel_abundancies[rel_abundancies>0]
+        super().__init__(abundant_charges, relative_abundancies_over_0, model_params, jsons)
+
+    @property
+    def charges(self):
+        return self.positions
+
+class FeatureGenerator(ABC):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def generate_feature(self, mz: float, charge: int):
+        pass
+
+
+class PrecursorFeatureGenerator(FeatureGenerator):
+
+    def __init__(self):
+        super(PrecursorFeatureGenerator).__init__()
+
+    def generate_feature(self, mz: float, charge: int,
+                         pattern_generator: IsotopePatternGenerator,
+                         num_rt: int = 64,
+                         num_im: int = 32,
+                         distr_im=gaussian,
+                         distr_rt=exp_gaussian,
+                         rt_lower: float = -9,
+                         rt_upper: float = 18,
+                         im_lower: float = -4,
+                         im_upper: float = 4,
+                         intensity_amplitude: float = 1e3,
+                         min_intensity: int = 5) -> TimsSlice:
+
+        I = create_initial_feature_distribution(num_rt, num_im, rt_lower, rt_upper,
+                                                im_lower, im_upper, distr_im, distr_rt)
+
+        frame_list = []
+
+        spec = pattern_generator.generate_spectrum(mz, charge, min_intensity=5, centroided=True, k=12)
+        mz, intensity = spec.mz(), spec.intensity() / np.max(spec.intensity()) * 100
+
+        for i in range(num_rt):
+
+            scan_arr, mz_arr, intensity_arr, tof_arr, inv_mob_arr = [], [], [], [], []
+
+            for j in range(num_im):
+                intensity_scaled = intensity * I[i, j]
+                intensity_scaled = intensity_scaled * intensity_amplitude
+                int_intensity = np.array([int(x) for x in intensity_scaled])
+
+                bt = [(x, y) for x, y in zip(mz, int_intensity) if y >= min_intensity]
+
+                mz_tmp = np.array([x for x, y in bt])
+                intensity_tmp = np.array([y for x, y in bt]).astype(np.int32)
+
+                rts = np.repeat(i, intensity_tmp.shape[0]).astype(np.float32)
+                scans = np.repeat(j, intensity_tmp.shape[0]).astype(np.int32)
+                tof = np.ones_like(rts).astype(np.int32)
+                inv_mob = np.ones_like(scans)
+
+                scan_arr.append(scans)
+                mz_arr.append(mz_tmp)
+                intensity_arr.append(intensity_tmp)
+                tof_arr.append(tof)
+                inv_mob_arr.append(inv_mob)
+
+            frame = TimsFrame(None, i, float(i),
+                              np.hstack(scan_arr), np.hstack(mz_arr), np.hstack(intensity_arr),
+                              np.hstack(tof_arr), np.hstack(inv_mob_arr))
+
+            frame_list.append(frame)
+
+        return TimsSlice(None, frame_list, [])
+
+
+class FeatureBatch:
+    def __init__(self, feature_table: pd.DataFrame, raw_data: TimsSlice):
+        """
+
+        :param feature_table:
+        :param raw_data:
+        """
+        self.feature_table = feature_table.sort_values(['rt_length'], ascending=False)
+        self.raw_data = raw_data
+        self.__feature_counter = 0
+        self.current_row = self.feature_table.iloc[0]
+        r = self.current_row
+        self.current_feature = self.get_feature(r.mz - 0.1, r.mz + (r.num_peaks / r.charge) + 0.1,
+                                                r.im_start,
+                                                r.im_stop,
+                                                r.rt_start,
+                                                r.rt_stop)
+
+    def __repr__(self):
+        return f'FeatureBatch(features={self.feature_table.shape[0]}, slice={self.raw_data})'
+
+    def get_feature(self, mz_min, mz_max, scan_min, scan_max, rt_min, rt_max, intensity_min=20):
+        return self.raw_data.filter_ranged(mz_min=mz_min,
+                                           mz_max=mz_max,
+                                           scan_min=scan_min,
+                                           scan_max=scan_max,
+                                           intensity_min=intensity_min,
+                                           rt_min=rt_min,
+                                           rt_max=rt_max)
+
+    def __iter__(self):
+        return self.current_row, self.current_feature
+
+    def __next__(self):
+        feature = self.current_row
+        data = self.current_feature
+
+        if self.__feature_counter < self.feature_table.shape[0]:
+            self.__feature_counter += 1
+            self.current_row = self.feature_table.iloc[self.__feature_counter]
+            r = self.current_row
+            self.current_feature = self.get_feature(r.mz - 0.1,
+                                                    r.mz + (r.num_peaks / r.charge) + 0.1,
+                                                    r.im_start,
+                                                    r.im_stop,
+                                                    r.rt_start,
+                                                    r.rt_stop)
+            return feature, data
+
+        raise StopIteration
diff --git a/imspy/imspy/handle.py b/imspy/imspy/handle.py
index 8c8430b3..4aec5145 100644
--- a/imspy/imspy/handle.py
+++ b/imspy/imspy/handle.py
@@ -10,8 +10,13 @@
 
 from abc import ABC
 
+<<<<<<<< HEAD:imspy/imspy/handle.py
 from imspy.frame import TimsFrame
 from imspy.slice import TimsSlice
+========
+from pyims.data import TimsFrame, TimsSlice
+
+>>>>>>>> 1cd02c1 (transfer simulation framework from proteolizard):imspy/imspy/data/handle.py
 
 
 class TimsDataset(ABC):
diff --git a/imspy/imspy/isotopes.py b/imspy/imspy/isotopes.py
new file mode 100644
index 00000000..49b874df
--- /dev/null
+++ b/imspy/imspy/isotopes.py
@@ -0,0 +1,242 @@
+from __future__ import annotations
+from typing import Optional, Tuple
+import warnings
+import numpy as np
+from numpy.typing import ArrayLike
+from abc import ABC, abstractmethod
+
+from scipy.signal import argrelextrema
+from pyims.data import MzSpectrum
+from pyims.utility import gaussian, exp_gaussian, normal_pdf
+import numba
+import pyopenms
+
+from pyims.noise import detection_noise
+
+MASS_PROTON = 1.007276466621
+MASS_NEUTRON = 1.00866491595
+
+
+@numba.jit(nopython=True)
+def factorial(n: int):
+    if n <= 0:
+        return 1
+    return n * factorial(n - 1)
+
+
+# linear dependency of mass
+@numba.jit(nopython=True)
+def lam(mass: float, slope: float = 0.000594, intercept: float = -0.03091):
+    """
+    :param intercept:
+    :param slope:
+    :param mass:
+    :return:
+    """
+    return slope * mass + intercept
+
+
+@numba.jit(nopython=True)
+def weight(mass: float, peak_nums: ArrayLike, normalize: bool = True):
+    """
+    :param mass:
+    :param num_steps:
+    :param normalize:
+    :return:
+    """
+    factorials = np.zeros_like(peak_nums)
+    norm = 1
+    for i,k in enumerate(peak_nums):
+        factorials[i] = factorial(k)
+    weights = np.exp(-lam(mass)) * np.power(lam(mass), peak_nums) / factorials
+    if normalize:
+        norm = weights.sum()
+    return weights/norm
+
+def get_pyopenms_weights(sequence: str, peak_nums: ArrayLike, generator: pyopenms.CoarseIsotopePatternGenerator):
+    """
+    Gets hypothetical intensities of isotopic peaks.
+
+    :param sequence: Peptide sequence in proForma format.
+    :type sequence: str
+    :param generator: pyopenms generator for isotopic pattern calculation
+    :type generator: pyopenms.CoarseIsotopePatternGenerator
+    :return: List of isotopic peak intensities
+    :rtype: List
+    """
+
+    n = peak_nums.shape[0]
+    generator.setMaxIsotope(n)
+    aa_sequence = sequence.strip("_")
+    peptide= pyopenms.AASequence().fromString(aa_sequence.replace("[","(").replace("]",")"))
+    formula = peptide.getFormula()
+    distribution = generator.run(formula)
+    intensities = [i.getIntensity() for i in distribution.getContainer()]
+    return intensities
+
+@numba.jit(nopython=True)
+def iso(x: ArrayLike, mass: float, charge: float, sigma: float, amp: float, K: int, step_size:float, add_detection_noise: bool = False, mass_neutron: float = MASS_NEUTRON):
+    """
+    :param mass_neutron:
+    :param x:
+    :param mass:
+    :param charge:
+    :param sigma:
+    :param amp:
+    :param K:4
+    :param step_size:
+    :param add_detection_noise:
+    :return:
+    """
+    k = np.arange(K)
+    means = ((mass + mass_neutron * k) / charge).reshape((1,-1))
+    weights = weight(mass,k).reshape((1,-1))
+    intensities = np.sum(weights*normal_pdf(x.reshape((-1,1)), means, sigma), axis= 1)*step_size
+    if add_detection_noise:
+        return detection_noise(intensities*amp)
+    else:
+        return intensities * amp
+
+
+@numba.jit(nopython=True)
+def numba_generate_pattern(lower_bound: float,
+                     upper_bound: float,
+                     mass: float,
+                     charge: float,
+                     amp: float,
+                     k: int,
+                     sigma: float = 0.008492569002123142,
+                     resolution: int = 3):
+    """
+    :param lower_bound:
+    :param upper_bound:
+    :param mass:
+    :param charge:
+    :param amp:
+    :param k:
+    :param sigma:
+    :param resolution:
+    :return:
+    """
+    step_size = min(sigma/10,1/np.power(10,resolution))
+    size = int((upper_bound-lower_bound)//step_size+1)
+    mzs = np.linspace(lower_bound,upper_bound,size)
+    intensities = iso(mzs,mass,charge,sigma,amp,k,step_size)
+
+    return mzs + MASS_PROTON, intensities.astype(np.int64)
+
+#@numba.jit(nopython= True)
+def numba_ion_sampler(mass: float, charge: int, sigma: ArrayLike, k: int, ion_count: int, intensity_per_ion: int ):
+        sigma = np.atleast_1d(sigma)
+        if sigma.size == 1:
+            sigma = np.repeat(sigma,k)
+        if sigma.size != k:
+            raise ValueError("Sigma must be either length 1 or k")
+
+
+        us = np.zeros(ion_count)
+        devs_std = np.zeros(ion_count)
+        for ion in range(ion_count):
+            u = np.random.uniform()
+            dev_std = np.random.normal()
+            us[ion] = u
+            devs_std[ion] = dev_std
+
+        weights = weight(mass, np.arange(k)).cumsum()
+
+        def _get_component(u: float, weights_cum_sum: ArrayLike = weights) -> int:
+            for idx, weight_sum in enumerate(weights_cum_sum):
+                if u < weight_sum:
+                    return idx
+
+        comps = np.zeros_like(us)
+        devs = np.zeros_like(us)
+        for idx, u in enumerate(us):
+            comp = _get_component(u)
+            comps[idx] = comp
+            devs[idx] = sigma[comp] * devs_std[idx]
+
+        mz = ((comps*MASS_NEUTRON) + mass) / charge + MASS_PROTON + devs
+        i = np.ones_like(mz) * intensity_per_ion
+        return (mz, i.astype(np.int64))
+
+@numba.jit
+def create_initial_feature_distribution(num_rt: int, num_im: int,
+                                        rt_lower: float = -9,
+                                        rt_upper: float = 18,
+                                        im_lower: float = -4,
+                                        im_upper: float = 4,
+                                        distr_im=gaussian,
+                                        distr_rt=exp_gaussian) -> np.array:
+
+    I = np.ones((num_rt, num_im)).astype(np.float32)
+
+    for i, x in enumerate(np.linspace(im_lower, im_upper, num_im)):
+        for j, y in enumerate(np.linspace(rt_lower, rt_upper, num_rt)):
+            I[j, i] *= (distr_im(x) * distr_rt(y))
+
+    return I
+
+
+class IsotopePatternGenerator(ABC):
+    def __init__(self):
+        pass
+    @abstractmethod
+    def generate_pattern(self, mass: float, charge: int) -> Tuple[ArrayLike, ArrayLike]:
+        pass
+
+    @abstractmethod
+    def generate_spectrum(self, mass: int, charge: int, min_intensity: int) -> MzSpectrum:
+        pass
+
+
+class AveragineGenerator(IsotopePatternGenerator):
+    def __init__(self):
+        super().__init__()
+        self.default_abundancy = 1e4
+
+    def generate_pattern(self, mass: float, charge: int, k: int = 7,
+                         amp: Optional[float] = None, resolution: float = 3,
+                         min_intensity: int = 5) -> Tuple[ArrayLike, ArrayLike]:
+        pass
+
+    def generate_spectrum(self, mass: int, charge: int, frame_id: int, scan_id: int, k: int = 7,
+                          amp :Optional[float] = None, resolution:float =3, min_intensity: int = 5, centroided: bool = True) -> MzSpectrum:
+        if amp is None:
+            amp = self.default_abundancy
+
+        lb = mass / charge - .2
+        ub = mass / charge + k + .2
+
+        mz, i = numba_generate_pattern(lower_bound=lb, upper_bound=ub,
+                                 mass=mass, charge=charge, amp=amp, k=k, resolution=resolution)
+
+        if centroided:
+            return MzSpectrum(None, frame_id, scan_id, mz, i)\
+                    .to_resolution(resolution).filter(lb,ub,min_intensity)\
+                    .to_centroided(baseline_noise_level=max(min_intensity,1), sigma=1/np.power(10,resolution-1))
+
+        return MzSpectrum(None, frame_id, scan_id, mz, i).to_resolution(resolution).filter(lb,ub,min_intensity)
+
+    def generate_spectrum_by_sampling(self,
+                                      mass: float,
+                                      charge: int,
+                                      frame_id:int,
+                                      scan_id: int,
+                                      k:int =7,
+                                      sigma: ArrayLike = 0.008492569002123142,
+                                      ion_count:int = 1000000,
+                                      resolution:float = 3,
+                                      intensity_per_ion:int = 1,
+                                      centroided = True,
+                                      min_intensity = 5) -> MzSpectrum:
+
+        assert 100 <= mass / charge <= 2000, f"m/z should be between 100 and 2000, was: {mass / charge}"
+        mz, i = numba_ion_sampler(mass, charge, sigma, k, ion_count, intensity_per_ion)
+
+        if centroided:
+            return MzSpectrum(None,frame_id,scan_id,mz,i)\
+                    .to_resolution(resolution).filter(-1,-1,min_intensity)\
+                    .to_centroided(baseline_noise_level=max(min_intensity,1), sigma=1/np.power(10,resolution-1))
+        return MzSpectrum(None,frame_id,scan_id,mz,i)\
+                .to_resolution(resolution).filter(-1,-1,min_intensity)
\ No newline at end of file
diff --git a/imspy/imspy/noise.py b/imspy/imspy/noise.py
new file mode 100644
index 00000000..1702faef
--- /dev/null
+++ b/imspy/imspy/noise.py
@@ -0,0 +1,124 @@
+""" This module contains several noise models,
+    such as detection noise, shot noise and baseline noise.
+"""
+import numpy as np
+import numba
+from typing import Callable, Optional, Tuple
+from numpy.typing import ArrayLike
+from pyims.data import MzSpectrum
+from pyims.utility import normal_pdf
+
+@numba.jit(nopython=True)
+def mu_function_normal_default(intensity: ArrayLike) -> ArrayLike:
+    return intensity
+
+@numba.jit(nopython=True)
+def sigma_function_normal_default(intensity:ArrayLike) -> ArrayLike:
+    return np.sqrt(intensity)
+
+@numba.jit(nopython=True)
+def mu_function_poisson_default(intensity:ArrayLike) -> ArrayLike:
+    offset = 0
+    return intensity+ offset
+
+@numba.jit(nopython=True)
+def detection_noise(signal: ArrayLike,
+                    method: str = "poisson",
+                    custom_mu_function: Optional[Callable] = None,
+                    custom_sigma_function: Optional[Callable] = None) -> ArrayLike:
+    """
+
+    :param signal:
+    :param method:
+    :param custom_mu_function:
+    :param custom_sigma_function:
+    """
+    if method == "normal":
+        if custom_sigma_function is None:
+            sigma_function:Callable = sigma_function_normal_default
+        else:
+            sigma_function:Callable = custom_sigma_function
+        if custom_mu_function is None:
+            mu_function:Callable = mu_function_normal_default
+        else:
+            mu_function:Callable = custom_mu_function
+
+        sigmas = sigma_function(signal)
+        mus = mu_function(signal)
+        noised_signal = np.zeros_like(mus)
+
+        for i in range(noised_signal.size):
+            s = sigmas[i]
+            m = mus[i]
+            n = np.random.normal()*s+m
+            noised_signal[i] = n
+
+    elif method == "poisson":
+        if custom_sigma_function is not None:
+            raise ValueError("Sigma function is not used if method is 'poisson'")
+        if custom_mu_function is None:
+            mu_function:Callable = mu_function_poisson_default
+        else:
+            mu_function:Callable = custom_mu_function
+        mus = mu_function(signal)
+        noised_signal = np.zeros_like(mus)
+
+        for i in range(noised_signal.size):
+            mu = mus[i]
+            n = np.random.poisson(lam = mu)
+            noised_signal[i] = n
+
+    else:
+        raise NotImplementedError("This method is not implemented, choose 'normal' or 'poisson'.")
+
+    return noised_signal
+
+@numba.jit(nopython=True)
+def generate_noise_peak(pos:float, sigma: float, intensity: float, min_intensity:int = 0, resolution:float = 3):
+    step_size = min(sigma/10,1/np.power(10,resolution))
+    lower = int((pos-4*sigma)//step_size)
+    upper = int((pos+4*sigma)//step_size)
+    mzs = np.arange(lower,upper)*step_size
+    intensities = normal_pdf(mzs,pos,sigma,normalize=True)*intensity*step_size
+    return (mzs[intensities>=min_intensity], intensities[intensities>=min_intensity].astype(np.int64))
+
+
+def baseline_shot_noise_window(window:MzSpectrum,
+                               window_theoretical_mz_min:float,
+                               window_theoretical_mz_max:float,
+                               expected_noise_peaks: int = 5,
+                               expected_noise_intensity: float = 10,
+                               expected_noise_sigma:float = 0.001,
+                               resolution:float = 3) -> MzSpectrum:
+    """
+
+    """
+    num_noise_peaks = np.random.poisson(lam=expected_noise_peaks)
+    noised_window = MzSpectrum(None,-1,-1,[],[])
+    for i in range(num_noise_peaks):
+        location_i = np.random.uniform(window_theoretical_mz_min,window_theoretical_mz_max)
+        intensity_i = np.random.exponential(expected_noise_intensity)
+        sigma_i = np.random.exponential(expected_noise_sigma)
+        noise_mz, noise_intensity = generate_noise_peak(location_i,sigma_i,intensity_i,resolution=resolution)
+        noise_peak = MzSpectrum(None,-1,-1, noise_mz, noise_intensity)
+        noised_window = noised_window+noise_peak
+    return noised_window
+
+
+def baseline_shot_noise(spectrum:MzSpectrum,window_size:float=50,expected_noise_peaks_per_Th:int=10,min_intensity:int = 5, resolution = 3):
+    """
+
+
+    """
+    min_mz = spectrum.mz().min()-0.1
+    max_mz = spectrum.mz().max()+0.1
+    bins,windows = spectrum.windows(window_size,overlapping=False,min_peaks=0,min_intensity=0)
+    noised_spectrum = spectrum
+    for b,w in zip(bins,windows):
+        lower = b*window_size
+        upper = (b+1)*window_size
+        noised_spectrum = noised_spectrum+baseline_shot_noise_window(w,lower,upper,window_size*expected_noise_peaks_per_Th)
+    return noised_spectrum.to_resolution(resolution).filter(min_mz,max_mz,min_intensity)
+
+def baseline_noise():
+    pass
\ No newline at end of file
diff --git a/imspy/imspy/proteome.py b/imspy/imspy/proteome.py
new file mode 100644
index 00000000..e4490467
--- /dev/null
+++ b/imspy/imspy/proteome.py
@@ -0,0 +1,322 @@
+from __future__ import annotations
+import os
+import warnings
+import numpy as np
+from numpy.typing import ArrayLike
+import pandas as pd
+import sqlite3
+from pyims.data import MzSpectrum
+from pyims.chemistry import get_mono_isotopic_weight, MASS_PROTON
+
+from pyims.feature import RTProfile, ScanProfile, ChargeProfile
+from pyims.utility import tokenize_proforma_sequence, TokenSequence, get_aa_num_proforma_sequence
+from enum import Enum
+from abc import ABC, abstractmethod
+
+from typing import Optional, List, Union
+
+class ENZYME(Enum):
+    TRYPSIN = 1
+
+
+class ORGANISM(Enum):
+    HOMO_SAPIENS = 1
+
+
+class Enzyme(ABC):
+    def __init__(self, name: ENZYME):
+        self.name = name
+
+    @abstractmethod
+    def calculate_cleavages(self, sequence: str) -> np.array:
+        pass
+
+    def digest(self, sequence: str, missed_cleavages: int, min_length) -> list[str]:
+        pass
+
+
+class Trypsin(Enzyme):
+    def __init__(self, name: ENZYME = ENZYME.TRYPSIN):
+        super().__init__(name)
+        self.name = name
+
+    def calculate_cleavages(self, sequence: str):
+        """
+        Scans sequence for cleavage motifs
+        and returns list of tuples of peptide intervals.
+
+        :param sequence: protein sequence to digest
+        :type sequence: str
+        :return: beginning with
+        :rtype: List[Tuple[int,int]]
+        """
+        cut_sites = []
+        start = 0
+        end = 0
+        in_unimod_bracket = False
+        for i,aa in enumerate(sequence[:-1]):
+            # skip unimod brackets
+            if aa == "(":
+                in_unimod_bracket = True
+            if in_unimod_bracket:
+                if aa == ")":
+                    in_unimod_bracket = False
+                continue
+
+            # cut after every 'K' or 'R' that is not followed by 'P'
+
+            next_aa = sequence[i+1]
+            if ((aa == 'K') or (aa == 'R')) and next_aa != 'P':
+                end = i+1
+                cut_sites.append((start, end))
+                start = end
+
+        cut_sites.append((start, len(sequence)))
+
+        return np.array(cut_sites)
+
+    def __repr__(self):
+        return f'Enzyme(name: {self.name.name})'
+
+    def digest(self, sequence:str, abundancy:float, missed_cleavages:int =0, min_length:int =7):
+        """
+        Digests a protein into peptides.
+
+        :param sequence: Sequence of protein in ProForma standard.
+        :type sequence: str
+        :param abundancy: Abundance of protein
+        :type abundancy: float
+        :param missed_cleavages: Number of allowed missed cleavages, defaults to 0
+        :type missed_cleavages: int, optional
+        :param min_length: Min length of recorded peptides, defaults to 7
+        :type min_length: int, optional
+        :return: List of dictionaries with `sequence`,`start`, `end` and `abundance` as keys.
+        :rtype: List[Dict]
+        """
+        assert 0 <= missed_cleavages <= 2, f'Number of missed cleavages might be between 0 and 2, was: {missed_cleavages}'
+
+        peptide_intervals = self.calculate_cleavages(sequence)
+
+        # TODO: implement
+        if missed_cleavages == 1:
+            pass
+        if missed_cleavages == 2:
+            pass
+
+        dig_list = []
+
+        for s, e in peptide_intervals:
+            dig_list.append(sequence[s: e])
+
+        # pair sequence digests with indices
+        wi = zip(dig_list, peptide_intervals)
+        # filter out short sequences and clean index display
+        wi = map(lambda p: (p[0], p[1][0], p[1][1]), filter(lambda s: get_aa_num_proforma_sequence(s[0]) >= min_length, wi))
+
+        return list(map(lambda e: {'sequence': e[0], 'start': e[1], 'end': e[2], 'abundancy': abundancy}, wi))
+
+
+class PeptideDigest:
+    def __init__(self, data: pd.DataFrame, organism: ORGANISM, enzyme: ENZYME):
+        self.data = data
+        self.organism = organism
+        self.enzyme = enzyme
+
+    def __repr__(self):
+        return f'PeptideMix(Sample: {self.organism.name}, Enzyme: {self.enzyme.name}, number peptides: {self.data.shape[0]})'
+
+
+class ProteinSample:
+
+    def __init__(self, data: pd.DataFrame, name: ORGANISM):
+        self.data = data
+        self.name = name
+
+    def digest(self, enzyme: Enzyme, missed_cleavages: int = 0, min_length: int = 7) -> PeptideDigest:
+        """
+        Digest all proteins in the sample with `enzyme`
+
+        :param enzyme: Enzyme for digestion e.g. instance of `Trypsin`
+        :type enzyme: Enzyme
+        :param missed_cleavages: Number of allowed missed cleavages, defaults to 0
+        :type missed_cleavages: int, optional
+        :param min_length: Minimum length of peptide to be recorded, defaults to 7
+        :type min_length: int, optional
+        :return: Digested sample
+        :rtype: PeptideDigest
+        """
+        # digest with enzyme row-wise (one protein at a time)
+        digest = self.data.apply(lambda r: enzyme.digest(r['sequence'], r['abundancy'], missed_cleavages, min_length), axis=1)
+
+        V = zip(self.data['id'].values, digest.values)
+
+        r_list = []
+
+        for (gene, peptides) in V:
+            for (pep_idx, pep) in enumerate(peptides):
+                # discard sequences with unknown amino acids
+                if pep['sequence'].find('X') == -1:
+                    pep['gene_id'] = gene
+                    pep['pep_id'] = f"{gene}_{pep_idx}"
+                    pep['sequence_tokenized'] = tokenize_proforma_sequence(pep['sequence'])
+                    pep['mass_theoretical'] = get_mono_isotopic_weight(pep['sequence_tokenized'])
+                    pep['sequence_tokenized'] = TokenSequence(pep['sequence_tokenized'])
+                    pep['sequence'] = '_' + pep['sequence'] + '_'
+                    r_list.append(pep)
+
+        return PeptideDigest(pd.DataFrame(r_list), self.name, enzyme.name)
+
+    def __repr__(self):
+        return f'ProteinSample(Organism: {self.name.name})'
+
+class ProteomicsExperimentDatabaseHandle:
+    def __init__(self,path:str):
+        if not os.path.exists(path):
+            print("Connect to Database")
+
+        self.con = sqlite3.connect(path)
+        self._chunk_size = None
+
+    def push(self, table_name:str, data:PeptideDigest):
+        if table_name == "PeptideDigest":
+            assert isinstance(data, PeptideDigest), "For pushing to table 'PeptideDigest' data type must be `PeptideDigest`"
+            df = data.data.apply(self.make_sql_compatible)
+        else:
+            raise ValueError("This Table does not exist and is not supported")
+
+        df.to_sql(table_name, self.con, if_exists="replace")
+
+    def update(self, data_slice: ProteomicsExperimentSampleSlice):
+        assert isinstance(data_slice, ProteomicsExperimentSampleSlice)
+        df_separated_peptides = data_slice.peptides.apply(self.make_sql_compatible)
+        df_ions = data_slice.ions.apply(self.make_sql_compatible)
+        df_separated_peptides.to_sql("SeparatedPeptides", self.con, if_exists="append")
+        df_ions.to_sql("Ions", self.con , if_exists="append")
+
+    def load(self, table_name:str, query:Optional[str] = None):
+        if query is None:
+            query = f"SELECT * FROM {table_name}"
+        return pd.read_sql(query,self.con, index_col="index")
+
+    def load_chunks(self, chunk_size: int, query:Optional[str] = None):
+        if query is None:
+            query = "SELECT * FROM PeptideDigest"
+        self.__chunk_generator =  pd.read_sql(query,self.con, chunksize=chunk_size, index_col="index")
+        for chunk in self.__chunk_generator:
+            chunk.loc[:,"sequence_tokenized"] = chunk.loc[:,"sequence_tokenized"].transform(lambda st: TokenSequence(None, jsons=st))
+            yield(ProteomicsExperimentSampleSlice(peptides = chunk))
+
+    def load_frames(self, frame_range:Tuple[int,int], spectra_as_jsons = False):
+        query = (
+                "SELECT SeparatedPeptides.pep_id, "
+                "SeparatedPeptides.sequence, "
+                "SeparatedPeptides.simulated_frame_profile, "
+                "SeparatedPeptides.mass_theoretical, "
+                "SeparatedPeptides.abundancy, "
+                "SeparatedPeptides.frame_min, "
+                "SeparatedPeptides.frame_max, "
+                "Ions.mz, "
+                "Ions.charge, "
+                "Ions.relative_abundancy, "
+                "Ions.scan_min, "
+                "Ions.scan_max, "
+                "Ions.simulated_scan_profile, "
+                "Ions.simulated_mz_spectrum "
+                "FROM SeparatedPeptides "
+                "INNER JOIN Ions "
+                "ON SeparatedPeptides.pep_id = Ions.pep_id "
+                f"AND SeparatedPeptides.frame_min < {frame_range[1]} "
+                f"AND SeparatedPeptides.frame_max >= {frame_range[0]} "
+                )
+        df = pd.read_sql(query, self.con)
+
+        # unzip jsons
+        df.loc[:,"simulated_scan_profile"] = df["simulated_scan_profile"].transform(lambda sp: ScanProfile(jsons=sp))
+        df.loc[:,"simulated_frame_profile"] = df["simulated_frame_profile"].transform(lambda rp: RTProfile(jsons=rp))
+        if spectra_as_jsons:
+            return df
+        else:
+            df.loc[:,"simulated_mz_spectrum"] = df["simulated_mz_spectrum"].transform(lambda s: MzSpectrum.from_jsons(jsons=s))
+
+        return df
+
+    @staticmethod
+    def make_sql_compatible(column):
+        if column.size < 1:
+            return
+        if isinstance(column.iloc[0], (RTProfile, ScanProfile, ChargeProfile, TokenSequence)):
+            return column.apply(lambda x: x.jsons)
+        elif isinstance(column.iloc[0], MzSpectrum):
+            return column.apply(lambda x: x.to_jsons(only_spectrum = True))
+        else:
+            return column
+
+class ProteomicsExperimentSampleSlice:
+    """
+    exposed dataframe of database
+    """
+    def __init__(self, peptides:pd.DataFrame, ions:Optional[pd.DataFrame]=None):
+        self.peptides = peptides
+        self.ions = ions
+
+    def add_simulation(self, simulation_name:str, simulation_data: ArrayLike):
+        accepted_peptide_simulations = [
+                                "simulated_irt_apex",
+                                "simulated_frame_apex",
+                                "simulated_frame_profile",
+                                "simulated_charge_profile",
+                                ]
+
+        accepted_ion_simulations = [
+                                "simulated_scan_apex",
+                                "simulated_k0",
+                                "simulated_scan_profile",
+                                "simulated_mz_spectrum",
+                                ]
+
+        # for profiles store min and max values
+        get_min_position = np.vectorize(lambda p:p.positions.min(), otypes=[int])
+        get_max_position = np.vectorize(lambda p:p.positions.max(), otypes=[int])
+
+        if simulation_name == "simulated_frame_profile":
+
+            self.peptides["frame_min"] = get_min_position(simulation_data)
+            self.peptides["frame_max"] = get_max_position(simulation_data)
+
+        elif simulation_name == "simulated_charge_profile":
+            ions_dict = {
+                "sequence":[],
+                "pep_id":[],
+                "mz":[],
+                "charge":[],
+                "relative_abundancy":[]
+                }
+            sequences = self.peptides["sequence"].values
+            pep_ids = self.peptides["pep_id"].values
+            masses = self.peptides["mass_theoretical"].values
+
+            for (s, pi, m, charge_profile) in zip(sequences, pep_ids, masses, simulation_data):
+                for (c, r) in charge_profile:
+                    ions_dict["sequence"].append(s)
+                    ions_dict["pep_id"].append(pi)
+                    ions_dict["charge"].append(c)
+                    ions_dict["relative_abundancy"].append(r)
+                    ions_dict["mz"].append(m/c + MASS_PROTON)
+            self.ions = pd.DataFrame(ions_dict)
+
+            self.peptides["charge_min"] = get_min_position(simulation_data)
+            self.peptides["charge_max"] = get_max_position(simulation_data)
+
+        elif simulation_name == "simulated_scan_profile":
+
+            self.ions["scan_min"] = get_min_position(simulation_data)
+            self.ions["scan_max"] = get_max_position(simulation_data)
+
+        if simulation_name in accepted_peptide_simulations:
+            self.peptides[simulation_name] = simulation_data
+
+        elif simulation_name in accepted_ion_simulations:
+            self.ions[simulation_name] = simulation_data
+
+        else:
+            raise ValueError(f"Simulation name '{simulation_name}' is not defined")
\ No newline at end of file
diff --git a/imspy/imspy/simulation/experiment.py b/imspy/imspy/simulation/experiment.py
new file mode 100644
index 00000000..c6896f31
--- /dev/null
+++ b/imspy/imspy/simulation/experiment.py
@@ -0,0 +1,204 @@
+import os
+import json
+from multiprocessing import Pool
+import functools
+from abc import ABC, abstractmethod
+import pandas as pd
+import numpy as np
+import pyarrow as pa
+import pyarrow.parquet as pq
+from tqdm import tqdm
+from pyims.data import MzSpectrum, TimsFrame
+from pyims.proteome import PeptideDigest, ProteomicsExperimentSampleSlice, ProteomicsExperimentDatabaseHandle
+from pyims.isotopes import AveragineGenerator
+import pyims.simulation.hardware_models as hardware
+
+class ProteomicsExperiment(ABC):
+    def __init__(self, path: str):
+
+        # path strings to experiment folder, database and output subfolder
+        self.path = path
+        self.output_path = f"{os.path.dirname(path)}/output"
+        self.database_path = f"{self.path}/experiment_database.db"
+
+        if not os.path.exists(self.path):
+            os.makedirs(self.path)
+
+        if os.path.exists(self.database_path):
+            raise FileExistsError("Experiment found in the given path.")
+
+        if not os.path.exists(self.output_path):
+            os.mkdir(self.output_path)
+
+        # output folder must be empty, otherwise it is
+        # assumend that it contains old experiments
+        if len(os.listdir(self.output_path)) > 0:
+            raise FileExistsError("Experiment found in the given path.")
+
+        # init database and loaded sample
+        self.database = ProteomicsExperimentDatabaseHandle(self.database_path)
+        self.loaded_sample = None
+
+        # hardware methods
+        self._lc_method = None
+        self._ionization_method = None
+        self._ion_mobility_separation_method = None
+        self._mz_separation_method = None
+
+    @property
+    def lc_method(self):
+        return self._lc_method
+
+    @lc_method.setter
+    def lc_method(self, method: hardware.LiquidChromatography):
+        self._lc_method = method
+
+    @property
+    def ionization_method(self):
+        return self._ionization_method
+
+    @ionization_method.setter
+    def ionization_method(self, method: hardware.IonSource):
+        self._ionization_method = method
+
+    @property
+    def ion_mobility_separation_method(self):
+        return self._ion_mobility_separation_method
+
+    @ion_mobility_separation_method.setter
+    def ion_mobility_separation_method(self, method: hardware.IonMobilitySeparation):
+        self._ion_mobility_separation_method = method
+
+    @property
+    def mz_separation_method(self):
+        return self._mz_separation_method
+
+    @mz_separation_method.setter
+    def mz_separation_method(self, method: hardware.MzSeparation):
+        self._mz_separation_method = method
+
+    @abstractmethod
+    def load_sample(self, sample: PeptideDigest):
+        self.database.push("PeptideDigest",sample)
+
+    @abstractmethod
+    def run(self):
+        pass
+
+
+class LcImsMsMs(ProteomicsExperiment):
+    def __init__(self, path:str):
+        super().__init__(path)
+
+    def load_sample(self, sample: PeptideDigest):
+        return super().load_sample(sample)
+
+    def run(self, chunk_size: int = 1000, assemble_processes: int = 8, frames_per_assemble_process:int = 20):
+        self._simulate_features(chunk_size)
+        self._assemble(frames_per_assemble_process, assemble_processes)
+
+
+    def _simulate_features(self, chunk_size):
+        # load bulks of data here as dataframe if necessary
+        for data_chunk in self.database.load_chunks(chunk_size):
+            self.lc_method.run(data_chunk)
+            self.ionization_method.run(data_chunk)
+            self.ion_mobility_separation_method.run(data_chunk)
+            self.mz_separation_method.run(data_chunk)
+            self.database.update(data_chunk)
+    @staticmethod
+    def _assemble_frame_range(frame_range, scan_id_min, scan_id_max, default_abundance, resolution, output_path, database_path):
+
+        frame_range_start = frame_range[0]
+        frame_range_end = frame_range[1]
+        # generate file_name
+        file_name = f"frames_{frame_range_start}_{frame_range_end}.parquet"
+        output_file_path = f"{output_path}/{file_name}"
+
+        frame_range = range(frame_range_start,frame_range_end)
+        scan_range  = range(scan_id_min, scan_id_max+1)
+
+        thread_db_handle = ProteomicsExperimentDatabaseHandle(database_path)
+        ions_in_split = thread_db_handle.load_frames((frame_range_start, frame_range_end), spectra_as_jsons = True)
+
+
+        # skip if no peptides in split
+        if ions_in_split.shape[0] == 0:
+            return {}
+
+        # spectra are currently stored in json format (from SQL db)
+        ions_in_split.loc[:,"simulated_mz_spectrum"] = ions_in_split["simulated_mz_spectrum"].transform(lambda s: MzSpectrum.from_jsons(jsons=s))
+
+        # construct signal data set
+        signal = {f_id:{s_id:[] for s_id in scan_range} for f_id in frame_range }
+
+        for _,row in ions_in_split.iterrows():
+
+            ion_frame_start = max(frame_range_start, row["frame_min"])
+            ion_frame_end = min(frame_range_end-1, row["frame_max"]) # -1 here because frame_range_end is covered by next frame range
+
+            ion_scan_start = max(scan_id_min, row["scan_min"])
+            ion_scan_end = min(scan_id_max, row["scan_max"])
+
+            ion_frame_profile = row["simulated_frame_profile"]
+            ion_scan_profile = row["simulated_scan_profile"]
+
+            ion_charge_abundance = row["abundancy"]*row["relative_abundancy"]
+
+            ion_spectrum = row["simulated_mz_spectrum"]
+
+            # frame start and end inclusive
+            for f_id in range(ion_frame_start, ion_frame_end+1):
+                # scan start and end inclusive
+                for s_id in range(ion_scan_start, ion_scan_end+1):
+
+                    abundance = ion_charge_abundance*ion_frame_profile[f_id]*ion_scan_profile[s_id]
+                    rel_to_default_abundance = abundance/default_abundance
+
+                    signal[f_id][s_id].append(ion_spectrum*rel_to_default_abundance)
+
+        output_dict = {"frame_id" : [],
+                       "scan_id" : [],
+                       "mz" : [],
+                       "intensity" : [],
+        }
+        for (f_id,frame_dict) in signal.items():
+            for (s_id,scan_spectra_list) in frame_dict.items():
+
+                if len(scan_spectra_list) > 0:
+                    scan_spectrum = MzSpectrum.from_mzSpectra_list(scan_spectra_list,resolution = resolution, sigma=1/np.power(10,(resolution-1)))
+                    output_dict["mz"].append(scan_spectrum.mz().tolist())
+                    output_dict["intensity"].append(scan_spectrum.intensity().tolist())
+                    output_dict["scan_id"].append(s_id)
+                    output_dict["frame_id"].append(f_id)
+                    signal[f_id][s_id] = None
+
+        for key, value in output_dict.items():
+            output_dict[key] = pa.array(value)
+
+        pa_table = pa.Table.from_pydict(output_dict)
+
+        pq.write_table(pa_table, output_file_path, compression=None)
+
+    def _assemble(self, frames_per_process:int, num_processes:int):
+
+        scan_id_min = self.ion_mobility_separation_method.scan_id_min
+        scan_id_max = self.ion_mobility_separation_method.scan_id_max
+        default_abundance = self.mz_separation_method.model.default_abundance
+        resolution = self.mz_separation_method.resolution
+
+        split_positions = np.arange(0, self.lc_method.num_frames , step=frames_per_process ).astype(int)
+        split_start = split_positions[:-1]
+        split_end = split_positions[1:]
+
+        assemble_frame_range = functools.partial(self._assemble_frame_range, scan_id_min = scan_id_min, scan_id_max = scan_id_max, default_abundance = default_abundance, resolution = resolution, output_path = self.output_path, database_path= self.database_path)
+
+        if num_processes > 1:
+            with Pool(num_processes) as pool:
+                list(tqdm(pool.imap(assemble_frame_range, zip(split_start, split_end)),total=len(split_start)))
+        else:
+            for start,end in tqdm(zip(split_start,split_end), total = len(split_start)):
+                assemble_frame_range((start, end))
+
+
+
diff --git a/imspy/imspy/simulation/hardware_models.py b/imspy/imspy/simulation/hardware_models.py
new file mode 100644
index 00000000..817d9797
--- /dev/null
+++ b/imspy/imspy/simulation/hardware_models.py
@@ -0,0 +1,766 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from typing import List, Tuple
+from multiprocessing import Pool
+
+import tensorflow as tf
+import numpy as np
+from numpy.typing import ArrayLike, NDArray
+import pandas as pd
+from scipy.stats import exponnorm, norm, binom, gamma
+
+from pyims.chemistry import  STANDARD_TEMPERATURE, STANDARD_PRESSURE, CCS_K0_CONVERSION_CONSTANT, BufferGas, get_num_protonizable_sites
+from pyims.proteome import ProteomicsExperimentSampleSlice
+from pyims.feature import RTProfile, ScanProfile, ChargeProfile
+from pyims.isotopes import AveragineGenerator
+from pyims.utility import tokenizer_from_json
+
+class Device(ABC):
+    def __init__(self, name:str):
+        self.name = name
+        self._temperature = STANDARD_TEMPERATURE
+        self._pressure = STANDARD_PRESSURE
+
+    @property
+    def temperature(self):
+        """
+        Get device temperature
+
+        :return: Temperature of device in Kelvin.
+        :rtype: float
+        """
+        return self._temperature
+
+    @temperature.setter
+    def temperature(self, T:float):
+        """
+        Set device temperature
+
+        :param T: Temperature in Kelvin.
+        :type T: float
+        """
+        self._temperature = T
+
+    @property
+    def pressure(self):
+        """
+        Get device pressure
+
+        :return: Pressure of device in Pa.
+        :rtype: float
+        """
+        return self._pressure
+
+    @pressure.setter
+    def pressure(self, p:float):
+        """
+        Set device pressure
+        :param p: Pressure in Pa.
+        :type p: float
+        """
+        self._pressure = p
+
+    @abstractmethod
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        pass
+
+class Model(ABC):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Device):
+        pass
+
+class Chromatography(Device):
+    def __init__(self, name:str="ChromatographyDevice"):
+        super().__init__(name)
+        self._apex_model = None
+        self._profile_model = None
+        self._irt_to_rt_converter = None
+        self._frame_length = 1200
+        self._gradient_length = 120*60*1000 # 120 minutes in miliseconds
+
+    @property
+    def frame_length(self):
+        return self._frame_length
+
+    @frame_length.setter
+    def frame_length(self, milliseconds: int):
+        self._frame_length = milliseconds
+
+    @property
+    def gradient_length(self):
+        return self._gradient_length/(60*1000)
+
+    @gradient_length.setter
+    def gradient_length(self, minutes: int):
+        self._gradient_length = minutes*60*1000
+
+    @property
+    def num_frames(self):
+        return self._gradient_length//self._frame_length
+
+    @property
+    def apex_model(self):
+        return self._apex_model
+
+    @apex_model.setter
+    def apex_model(self, model: ChromatographyApexModel):
+        self._apex_model = model
+
+    @property
+    def profile_model(self):
+        return self._profile_model
+
+    @property
+    def irt_to_rt_converter(self):
+        return self._irt_to_rt_converter
+
+    @irt_to_rt_converter.setter
+    def irt_to_rt_converter(self, converter:callable):
+        self._irt_to_rt_converter = converter
+
+    @profile_model.setter
+    def profile_model(self, model:ChromatographyProfileModel):
+        self._profile_model = model
+
+    def irt_to_frame_id(self, irt: float):
+        return self.rt_to_frame_id(self.irt_to_rt(irt))
+
+    @abstractmethod
+    def rt_to_frame_id(self, rt: float):
+        pass
+
+    def irt_to_rt(self, irt):
+        return self.irt_to_rt_converter(irt)
+
+    def frame_time_interval(self, frame_id:ArrayLike):
+        frame_id = np.atleast_1d(frame_id)
+        frame_length_minutes = self.frame_length/(60*1000)
+        s = (frame_id-1)*frame_length_minutes
+        e = frame_id*frame_length_minutes
+        return np.stack([s, e], axis = 1)
+
+    def frame_time_middle(self, frame_id: ArrayLike):
+        return np.mean(self.frame_time_interval(frame_id),axis=1)
+
+    def frame_time_end(self, frame_id: ArrayLike):
+        return self.frame_time_interval(frame_id)[:,1]
+
+    def frame_time_start(self, frame_id: ArrayLike):
+        return self.frame_time_interval(frame_id)[:,0]
+
+class LiquidChromatography(Chromatography):
+    def __init__(self, name: str = "LiquidChromatographyDevice"):
+        super().__init__(name)
+
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        # retention time apex simulation
+        retention_time_apex = self._apex_model.simulate(sample, self)
+        # in irt and rt
+        sample.add_simulation("simulated_irt_apex", retention_time_apex)
+        # in frame id
+        sample.add_simulation("simulated_frame_apex", self.irt_to_frame_id(retention_time_apex))
+
+        # profile simulation
+
+        retention_profile = self._profile_model.simulate(sample, self)
+        sample.add_simulation("simulated_frame_profile", retention_profile)
+
+    def rt_to_frame_id(self,  rt_minutes: ArrayLike):
+        rt_minutes = np.asarray(rt_minutes)
+        # first frame is completed not at 0 but at frame_length
+        frame_id = (rt_minutes/self.frame_length*1000*60).astype(np.int64)+1
+        return frame_id
+
+class ChromatographyApexModel(Model):
+    def __init__(self):
+        self._device = None
+
+    @abstractmethod
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Chromatography) -> NDArray[np.float64]:
+        pass
+
+class ChromatographyProfileModel(Model):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Chromatography) -> List[RTProfile]:
+        pass
+
+class EMGChromatographyProfileModel(ChromatographyProfileModel):
+
+    def __init__(self):
+        super().__init__()
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Chromatography) -> List[RTProfile]:
+        mus = sample.peptides["simulated_irt_apex"].values
+        profile_list = []
+
+        for mu in mus:
+            σ = 0.1 # must be sampled
+            λ = 10 # must be sampled
+            K = 1/(σ*λ)
+            μ = device.irt_to_rt(mu)
+            model_params = { "sigma":σ,
+                             "lambda":λ,
+                             "mu":μ,
+                             "name":"EMG"
+                            }
+
+            emg = exponnorm(loc=μ, scale=σ, K = K)
+            # start and end value (in retention times)
+            s_rt, e_rt = emg.ppf([0.01,0.9])
+            # as frames
+            s_frame, e_frame = device.rt_to_frame_id(s_rt), device.rt_to_frame_id(e_rt)
+
+            profile_frames = np.arange(s_frame-1,e_frame+1) # starting with s_frame-1 for cdf interval calculation
+            profile_rt_ends = device.frame_time_end(profile_frames)
+            profile_rt_cdfs = emg.cdf(profile_rt_ends)
+            profile_rel_intensities = np.diff(profile_rt_cdfs)
+
+            profile_list.append(RTProfile(profile_frames[1:],profile_rel_intensities,model_params))
+        return profile_list
+
+
+class NormalChromatographyProfileModel(ChromatographyProfileModel):
+
+    def __init__(self):
+        super().__init__()
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Chromatography) -> List[RTProfile]:
+        mus = sample.peptides["simulated_irt_apex"].values
+        sigmas = gamma(a=4.929,scale=1/18.784).rvs(mus.size)/6
+        profile_list = []
+
+        for mu,σ in zip(mus,sigmas):
+
+            μ = device.irt_to_rt(mu)
+            model_params = { "sigma":σ,
+                             "mu":μ,
+                             "name":"Normal"
+                            }
+
+            normal = norm(loc=μ, scale=σ)
+            # start and end value (in retention times)
+            s_rt, e_rt = normal.ppf([0.02,0.98])
+            # as frames
+            s_frame, e_frame = device.rt_to_frame_id(s_rt), device.rt_to_frame_id(e_rt)
+
+            profile_frames = np.arange(s_frame-1,e_frame+1) # starting with s_frame-1 for cdf interval calculation
+            profile_rt_ends = device.frame_time_end(profile_frames)
+            profile_rt_cdfs = normal.cdf(profile_rt_ends)
+            profile_rel_intensities = np.diff(profile_rt_cdfs)
+
+            profile_list.append(RTProfile(profile_frames[1:],profile_rel_intensities,model_params))
+        return profile_list
+
+
+class NeuralChromatographyApex(ChromatographyApexModel):
+
+    def __init__(self, model_path: str, tokenizer_path: str):
+        super().__init__()
+        self.model_path = model_path
+        self.tokenizer = tokenizer_from_json(tokenizer_path)
+
+    def sequences_to_tokens(self, sequences_tokenized: np.array) -> np.array:
+        print('tokenizing sequences...')
+        tokens = np.apply_along_axis(self.tokenizer.texts_to_sequences, 0, sequences_tokenized)
+        tokens_padded = tf.keras.preprocessing.sequence.pad_sequences(tokens, 50, padding='post')
+        return tokens_padded
+
+    @staticmethod
+    def _worker(model_path: str, tokens_padded: np.array, batched: bool = True, bs: int = 2048):
+        pseudo_target = np.expand_dims(np.zeros_like(tokens_padded[:, 0]), axis=1)
+        if batched:
+            dataset = tf.data.Dataset.from_tensor_slices((tokens_padded, pseudo_target)).batch(bs)
+        else:
+            dataset = tf.data.Dataset.from_tensor_slices((tokens_padded, pseudo_target))
+        model = tf.keras.models.load_model(model_path)
+        print('predicting irts...')
+        return_val = model.predict(dataset)
+        return return_val
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Chromatography) ->  NDArray[np.float64]:
+
+        data = sample.peptides
+        tokens = data["sequence_tokenized"].apply(lambda st: st.sequence_tokenized)
+        print('generating tf dataset...')
+        tokens_padded = self.sequences_to_tokens(tokens)
+
+        with Pool(1) as pool:
+            r = pool.starmap(self._worker, [(self.model_path, tokens_padded)])
+        return r[0]
+
+
+class IonSource(Device):
+    def __init__(self, name:str ="IonizationDevice"):
+        super().__init__(name)
+        self._ionization_model = None
+
+    @property
+    def ionization_model(self):
+        return self._ionization_model
+
+    @ionization_model.setter
+    def ionization_model(self, model: IonizationModel):
+        self._ionization_model = model
+
+    @abstractmethod
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        pass
+
+class ElectroSpray(IonSource):
+    def __init__(self, name:str ="ElectrosprayDevice"):
+        super().__init__(name)
+
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        charge_profiles = self.ionization_model.simulate(sample, self)
+        sample.add_simulation("simulated_charge_profile", charge_profiles)
+
+class IonizationModel(Model):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def simulate(self, sample:ProteomicsExperimentSampleSlice, device: IonSource) -> NDArray:
+        pass
+
+class RandomIonSource(IonizationModel):
+    def __init__(self):
+        super().__init__()
+        self._charge_probabilities =  np.array([0.1, 0.5, 0.3, 0.1])
+        self._allowed_charges =  np.array([1, 2, 3, 4], dtype=np.int8)
+
+    @property
+    def allowed_charges(self):
+        return self._allowed_charges
+
+    @allowed_charges.setter
+    def allowed_charges(self, charges: ArrayLike):
+        self._allowed_charges = np.asarray(charges, dtype=np.int8)
+
+    @property
+    def charge_probabilities(self):
+        return self._charge_probabilities
+
+    @charge_probabilities.setter
+    def charge_probabilities(self, probabilities: ArrayLike):
+        self._charge_probabilities = np.asarray(probabilities)
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: IonSource) -> List[ChargeProfile]:
+        if self.charge_probabilities.shape != self.allowed_charges.shape:
+            raise ValueError("Number of allowed charges must fit to number of probabilities")
+
+        data = sample.peptides
+        charge = np.random.choice(self.allowed_charges, data.shape[0], p=self.charge_probabilities)
+        rel_intensity = np.ones_like(charge)
+        charge_profiles = []
+        for c,i in zip(charge, rel_intensity):
+            charge_profiles.append(ChargeProfile([c],[i],model_params={"name":"RandomIonSource"}))
+        return charge_profiles
+
+class BinomialIonSource():
+    def __init__(self):
+        super().__init__()
+        self._charged_probability =  0.5
+        self._allowed_charges =  np.array([1, 2, 3, 4], dtype=np.int8)
+
+    @property
+    def allowed_charges(self):
+        return self._allowed_charges
+
+    @allowed_charges.setter
+    def allowed_charges(self, charges: ArrayLike):
+        self._allowed_charges = np.asarray(charges, dtype=np.int8)
+
+    @property
+    def charged_probability(self):
+        return self._charged_probability
+
+    @charged_probability.setter
+    def charged_probability(self, probability: float):
+        self._charged_probability = probability
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: IonSource) -> List[ChargeProfile]:
+        sequences = sample.peptides.sequence
+        vec_get_num_protonizable_sites = np.vectorize(get_num_protonizable_sites)
+        basic_aa_nums = vec_get_num_protonizable_sites(sequences)
+
+        charge_profiles = []
+        for baa_num in basic_aa_nums:
+            rel_intensities = binom(baa_num,self.charged_probability).pmf(self.allowed_charges)
+            charge_profiles.append(ChargeProfile(self.allowed_charges,rel_intensities,model_params={"name":"BinomialIonSource","basic_aa_num":baa_num}))
+        return charge_profiles
+
+class IonMobilitySeparation(Device):
+    def __init__(self, name:str = "IonMobilityDevice"):
+        super().__init__(name)
+        # models
+        self._apex_model = None
+        self._profile_model = None
+
+        # hardware parameter
+        self._scan_intervall = 1
+        self._scan_time = None
+        self._scan_id_min = None
+        self._scan_id_max = None
+        self._buffer_gas = None
+
+        # converters
+        self._reduced_im_to_scan_converter = None
+        self._scan_to_reduced_im_interval_converter = None
+
+    @property
+    def reduced_im_to_scan_converter(self):
+        return self._reduced_im_to_scan_converter
+
+    @reduced_im_to_scan_converter.setter
+    def reduced_im_to_scan_converter(self, converter:callable):
+        self._reduced_im_to_scan_converter = converter
+
+    @property
+    def scan_to_reduced_im_interval_converter(self):
+        return self._scan_to_reduced_im_interval_converter
+
+    @scan_to_reduced_im_interval_converter.setter
+    def scan_to_reduced_im_interval_converter(self, converter:callable):
+        self._scan_to_reduced_im_interval_converter = converter
+
+    @property
+    def buffer_gas(self):
+        return self._buffer_gas
+
+    @buffer_gas.setter
+    def buffer_gas(self, gas: BufferGas):
+        self._buffer_gas = gas
+
+    @property
+    def scan_intervall(self):
+        return self._scan_intervall
+
+    @scan_intervall.setter
+    def scan_intervall(self, number:int):
+        self._scan_intervall = number
+
+    @property
+    def scan_id_min(self):
+        return self._scan_id_min
+
+    @scan_id_min.setter
+    def scan_id_min(self, number:int):
+        self._scan_id_min = number
+
+    @property
+    def scan_id_max(self):
+        return self._scan_id_max
+
+    @scan_id_max.setter
+    def scan_id_max(self, number:int):
+        self._scan_id_max = number
+
+    @property
+    def scan_time(self):
+        return self._scan_time
+
+    @scan_time.setter
+    def scan_time(self, microseconds:int):
+        self._scan_time = microseconds
+
+    @property
+    def apex_model(self):
+        return self._apex_model
+
+    @apex_model.setter
+    def apex_model(self, model: IonMobilityApexModel):
+        self._apex_model = model
+
+    @property
+    def profile_model(self):
+        return self._profile_model
+
+    @profile_model.setter
+    def profile_model(self, model: IonMobilityProfileModel):
+        self._profile_model = model
+
+    def scan_to_reduced_im_interval(self, scan_id: ArrayLike):
+        return self.scan_to_reduced_im_interval_converter(scan_id)
+
+    def reduced_im_to_scan(self, ion_mobility):
+        return self.reduced_im_to_scan_converter(ion_mobility)
+
+    def scan_im_middle(self, scan_id: ArrayLike):
+        return np.mean(self.scan_to_reduced_im_interval(scan_id), axis = 1)
+
+    def scan_im_lower(self, scan_id: ArrayLike):
+        return self.scan_to_reduced_im_interval(scan_id)[:,0]
+
+    def scan_im_upper(self, scan_id:ArrayLike):
+        return self.scan_to_reduced_im_interval(scan_id)[:,1]
+
+    def im_to_reduced_im(self, ion_mobility: float, p_0: float = STANDARD_PRESSURE, T_0: float = STANDARD_TEMPERATURE):
+        """
+        Calculate reduced ion mobility K_0
+        (normalized to standard pressure p_0
+        and standard temperature T_0), from
+        ion mobility K.
+
+        K_0 = K * p/p_0 * T_0/T
+
+        [1] J. N. Dodds and E. S. Baker,
+        “Ion mobility spectrometry: fundamental concepts,
+        instrumentation, applications, and the road ahead,”
+        Journal of the American Society for Mass Spectrometry,
+        vol. 30, no. 11, pp. 2185–2195, 2019,
+        doi: 10.1007/s13361-019-02288-2.
+
+        :param ion_mobility: Ion mobility K to
+            normalize to standard conditions
+        :param p_0: Standard pressure (Pa).
+        :param T_0: Standard temperature (K).
+        """
+        T = self.temperature
+        p = self.pressure
+        return ion_mobility*p/p_0*T_0/T
+
+    def reduced_im_to_im(self, reduced_ion_mobility: float, p_0: float = STANDARD_PRESSURE, T_0: float = STANDARD_TEMPERATURE):
+        """
+        Inverse of `.im_to_reduced_im()`
+        """
+        T = self.temperature
+        p = self.pressure
+        return reduced_ion_mobility*p_0/p*T/T_0
+
+    def ccs_to_reduced_im(self, ccs:float, mz:float, charge:int):
+        # TODO Citation
+        """
+        Conversion of collision cross-section values (ccs)
+        to reduced ion mobility according to
+        Mason-Schamp equation.
+
+        :param ccs: collision cross-section (ccs)
+        :type ccs: float
+        :param mz: Mass (Da) to charge ratio of peptide
+        :type mz: float
+        :param charge: Charge of peptide
+        :type charge: int
+        :return: Reduced ion mobility
+        :rtype: float
+        """
+
+        T = self.temperature
+        mass = mz*charge
+        μ = self.buffer_gas.mass*mass/(self.buffer_gas.mass+mass)
+        z = charge
+
+        K0 = CCS_K0_CONVERSION_CONSTANT*z*1/(np.sqrt(μ*T)*ccs)
+        return K0
+
+    def reduced_im_to_ccs(self, reduced_ion_mobility:float, mz:float, charge:int):
+        """
+        Conversion of reduced ion mobility
+        to collision cross-section values (ccs)
+        according to Mason-Schamp equation.
+
+        :param reduced_ion_mobility: reduced ion mobility K0
+        :type reduced_ion_mobility: float
+        :param mz: Mass (Da) to charge ratio of peptide
+        :type mz: float
+        :param charge: Charge of peptide
+        :type charge: int
+        :return: Collision cross-section (ccs)
+        :rtype: float
+        """
+
+        T = self.temperature
+        mass = mz*charge
+        μ = self.buffer_gas.mass*mass/(self.buffer_gas.mass+mass)
+        z = charge
+        K0 = reduced_ion_mobility
+
+        ccs = CCS_K0_CONVERSION_CONSTANT*z*1/(np.sqrt(μ*T)*K0)
+        return ccs
+
+    @abstractmethod
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        pass
+
+
+
+class TrappedIon(IonMobilitySeparation):
+
+    def __init__(self, name:str = "TrappedIonMobilitySeparation"):
+        super().__init__()
+        self._scan_id_min = 1
+        self._scan_id_max = 918
+
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        # scan apex simulation
+        one_over_k0, scan_apex = self._apex_model.simulate(sample, self)
+        # in irt and rt
+        sample.add_simulation("simulated_scan_apex", scan_apex)
+        sample.add_simulation("simulated_k0", one_over_k0)
+        # scan profile simulation
+        scan_profile = self._profile_model.simulate(sample, self)
+        sample.add_simulation("simulated_scan_profile", scan_profile)
+
+
+class IonMobilityApexModel(Model):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: IonMobilitySeparation) -> NDArray[np.float64]:
+        return super().simulate(sample, device)
+
+class IonMobilityProfileModel(Model):
+    def __init__(self):
+        pass
+
+    @abstractmethod
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: IonMobilitySeparation) -> NDArray:
+        return super().simulate(sample, device)
+
+class NeuralIonMobilityApex(IonMobilityApexModel):
+
+    def __init__(self, model_path: str, tokenizer_path: str):
+        super().__init__()
+        self.model_path = model_path
+        self.tokenizer = tokenizer_from_json(tokenizer_path)
+
+    def sequences_to_tokens(self, sequences_tokenized: np.array) -> np.array:
+        print('tokenizing sequences...')
+        tokens = np.apply_along_axis(self.tokenizer.texts_to_sequences, 0, sequences_tokenized)
+        tokens_padded = tf.keras.preprocessing.sequence.pad_sequences(tokens, 50, padding='post')
+        return tokens_padded
+
+    @staticmethod
+    def _worker(model_path: str, tokens_padded: np.array, mz: np.array, charges: np.array, batched: bool = True, bs: int = 2048):
+
+        mz = np.expand_dims(mz, 1)
+        c = tf.one_hot(charges - 1, depth=4)
+        pseudo_target = np.expand_dims(np.zeros_like(tokens_padded[:, 0]), axis=1)
+        if batched:
+            dataset = tf.data.Dataset.from_tensor_slices(((mz, c, tokens_padded), pseudo_target)).batch(bs)
+        else:
+            dataset = tf.data.Dataset.from_tensor_slices(((mz, c, tokens_padded), pseudo_target))
+
+        print('predicting mobilities...')
+        model = tf.keras.models.load_model(model_path)
+        ccs, _ = model.predict(dataset)
+        return ccs
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: IonMobilitySeparation) -> Tuple[NDArray]:
+        data = sample.ions.merge(sample.peptides.loc[:,["pep_id","sequence_tokenized"]],on="pep_id",validate="many_to_one")
+        tokens = data.sequence_tokenized.apply(lambda st: st.sequence_tokenized)
+        tokens_padded = self.sequences_to_tokens(tokens)
+        mz = data['mz'].values
+        charges = data["charge"].values
+        with Pool(1) as pool:
+            r = pool.starmap(self._worker, [(self.model_path, tokens_padded, mz, charges)])
+            ccs = r[0]
+        K0s = device.ccs_to_reduced_im(np.squeeze(ccs), mz, data['charge'].values)
+        scans = device.reduced_im_to_scan(K0s)
+        return K0s,scans
+
+class NormalIonMobilityProfileModel(IonMobilityProfileModel):
+    def __init__(self):
+        super().__init__()
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: IonMobilitySeparation) -> List[ScanProfile]:
+        mus = 1/sample.ions["simulated_k0"].values #1 over k0
+        sigmas = gamma(a=3.703,scale=1/79.614).rvs(mus.size)/6
+        profile_list = []
+        for μ,σ in zip(mus,sigmas):
+
+            model_params = { "sigma":σ,
+                             "mu":μ,
+                             "name":"NORMAL"
+                            }
+
+            normal = norm(loc=μ, scale=σ)
+            # start and end value (k0)
+            s_one_over_k0, e_one_over_k0 = normal.ppf([0.01,0.99])
+            # as scan ids, remember first scans elutes largest ions
+            e_scan, s_scan = device.reduced_im_to_scan(1/s_one_over_k0), device.reduced_im_to_scan(1/e_one_over_k0)
+
+            profile_scans = np.arange(s_scan-1,e_scan+1) # starting s_scan-1 is necessary here to include its end value for cdf interval
+            profile_end_im = 1/device.scan_im_upper(profile_scans)
+            profile_end_cdfs = 1-normal.cdf(profile_end_im)
+            profile_rel_intensities = np.diff(profile_end_cdfs)
+
+            profile_list.append(ScanProfile(profile_scans[1:],profile_rel_intensities,model_params))
+        return profile_list
+
+
+class MzSeparation(Device):
+    def __init__(self, name:str = "MassSpectrometer"):
+        super().__init__(name)
+        self._model = None
+        self._resolution = 3
+
+    @property
+    def resolution(self):
+        return self._resolution
+
+    @resolution.setter
+    def resolution(self, res: int):
+        self._resolution = res
+
+    @property
+    def model(self):
+        return self._model
+
+    @model.setter
+    def model(self, model: MzSeparationModel):
+        self._model = model
+
+    @abstractmethod
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        pass
+
+class TOF(MzSeparation):
+    def __init__(self, name:str = "TimeOfFlightMassSpectrometer"):
+        super().__init__(name)
+
+    def run(self, sample: ProteomicsExperimentSampleSlice):
+        spectra = self.model.simulate(sample, self)
+        sample.add_simulation("simulated_mz_spectrum", spectra)
+
+class MzSeparationModel(Model):
+    def __init__(self):
+        self._default_abundance = 1e4
+
+    @property
+    def default_abundance(self):
+        return self._default_abundance
+
+    @default_abundance.setter
+    def default_abundance(self, abundance:int):
+        self._default_abundance = abundance
+
+    @abstractmethod
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: MzSeparation):
+        return super().simulate(sample, device)
+
+class AveragineModel(MzSeparationModel):
+    def __init__(self):
+        super().__init__()
+
+    def simulate(self, sample: ProteomicsExperimentSampleSlice, device: MzSeparation):
+
+        avg = AveragineGenerator()
+        # right join here to keep order of keys in right df
+        joined_df = pd.merge(sample.peptides[["pep_id","mass_theoretical"]],
+                        sample.ions[["pep_id","charge"]],
+                        how = "right",
+                        on = "pep_id")
+        masses = joined_df["mass_theoretical"].values
+        charges = joined_df["charge"].values
+        spectra = []
+        for (m, c) in zip(masses, charges):
+            spectrum = avg.generate_spectrum(m,c,-1,-1,amp = self.default_abundance, centroided=False)
+            spectra.append(spectrum)
+        return spectra
\ No newline at end of file
diff --git a/imspy/imspy/slice.py b/imspy/imspy/slice.py
index da161409..056febaa 100644
--- a/imspy/imspy/slice.py
+++ b/imspy/imspy/slice.py
@@ -7,9 +7,15 @@
 
 from imspy.utilities import re_index_indices
 
+<<<<<<<< HEAD:imspy/imspy/slice.py
 import imspy_connector as pims
 from imspy.frame import TimsFrame, TimsFrameVectorized
 from imspy.spectrum import MzSpectrum
+========
+import pyims_connector as pims
+from pyims.data.frame import TimsFrame, TimsFrameVectorized
+from pyims.data.spectrum import MzSpectrum
+>>>>>>>> 1cd02c1 (transfer simulation framework from proteolizard):imspy/imspy/data/slice.py
 
 
 class TimsSlice:
diff --git a/imspy/imspy/utility.py b/imspy/imspy/utility.py
new file mode 100644
index 00000000..001c0246
--- /dev/null
+++ b/imspy/imspy/utility.py
@@ -0,0 +1,508 @@
+import io
+import json
+from typing import List, Optional
+import tensorflow as tf
+import numpy as np
+from numpy.typing import ArrayLike
+
+import numba
+import math
+import pandas as pd
+
+from tqdm import tqdm
+from pyims.data import MzSpectrum
+# TODO bring to pyims
+#from proteolizardalgo.hashing import ReferencePattern
+
+
+class TokenSequence:
+
+    def __init__(self, sequence_tokenized: Optional[List[str]] = None, jsons:Optional[str] = None):
+        if jsons is not None:
+            self.sequence_tokenized = self._from_jsons(jsons)
+            self._jsons = jsons
+        else :
+            self.sequence_tokenized = sequence_tokenized
+            self._jsons = self._to_jsons()
+
+    def _to_jsons(self):
+        json_dict = self.sequence_tokenized
+        return json.dumps(json_dict)
+
+    def _from_jsons(self, jsons:str):
+        return json.loads(jsons)
+
+    @property
+    def jsons(self):
+        return self._jsons
+
+def proteome_from_fasta(path: str) -> pd.DataFrame:
+    """
+    Read a fasta file and return a dataframe with the protein name and sequence.
+    :param path: path to the fasta file
+    :return: a dataframe with the protein name and sequence
+    """
+    d = {}
+    with open(path) as infile:
+        gene = ''
+        header = ''
+        for i, line in enumerate(infile):
+            if line.find('>') == -1:
+                gene += line
+            elif line.find('>') != -1 and i > 0:
+                header = line
+                d[header.replace('\n', '')[4:]] = gene.replace('\n', '')
+                gene = ''
+            elif i == 0:
+                header = line
+
+    row_list = []
+
+    for key, value in d.items():
+        split_index = key.find(' ')
+        gene_id = key[:split_index].split('|')[0]
+        rest = key[split_index:]
+        row = {'id': gene_id, 'meta_data': rest, 'sequence': value}
+        row_list.append(row)
+
+    return pd.DataFrame(row_list)
+
+
+def peak_width_preserving_mz_transform(
+        mz: np.array,
+        M0: float = 500,
+        resolution: float = 50_000):
+    """
+    Transform values into an index that fixes the width of the peak at half full width.
+    Arguments:
+        mz (np.array): An array of mass to charge ratios to transform.
+        M0 (float): The m/z value at which TOFs resolution is reported.
+        resolution (float): The resolution of the TOF instrument.
+    """
+    return (np.log(mz) - np.log(M0)) / np.log1p(1 / resolution)
+
+
+def bins_to_mz(mz_bin, win_len):
+    return np.abs(mz_bin) * win_len + (int(mz_bin < 0) * (0.5 * win_len))
+
+
+def get_ref_pattern_as_spectra(ref_patterns):
+    """
+    Get the reference patterns as a list of spectra.
+    :param ref_patterns: the reference patterns
+    :return: a list of spectra
+    """
+    spec_list = []
+
+    for index, row in ref_patterns.iterrows():
+
+        if 150 <= (row['m'] + 1) / row['z'] <= 1700:
+            last_contrib = row['last_contrib']
+
+            pattern = row['pattern'][:1000]
+
+            mz_bin = np.arange(1000)
+
+            both = list(zip(mz_bin, pattern))
+
+            both = list(filter(lambda x: x[1] >= 0.001, both))
+
+            mz = [x[0] for x in both]
+            i = [x[1] for x in both]
+
+            first_peak = np.where(np.diff(mz) > 1)[0][0]
+            max_i = np.argmax(i[:first_peak])
+            mono_mz_max = mz[max_i]
+
+            mz = (mz - mono_mz_max) / 100 + row['m'] / row['z']
+
+            mz = np.round(np.array(mz), 2) + 1.0
+            i = np.array([int(x) for x in i])
+
+            spectum = MzSpectrum(None, -1, -1, mz, i)
+            spec_list.append((row['m'], row['z'], spectum, last_contrib))
+
+    return spec_list
+
+
+def get_refspec_list(ref_patterns, win_len=5):
+    """
+
+    """
+
+    d_list = []
+
+    for spec in tqdm(ref_patterns, desc='creating reference patterns', ncols=100):
+        m = spec[0]
+        z = spec[1]
+        sp = spec[2]
+        lc = spec[3]
+        peak_width = 0.04
+        mz_mono = np.round((m + 1.001) / z, 2) + peak_width
+        mz_bin = np.floor(mz_mono / win_len)
+        ref_spec = ReferencePattern(mz_bin=mz_bin, mz_mono=mz_mono, charge=z, mz_last_contrib=lc,
+                                    spectrum=sp, window_length=win_len)
+
+        d_list.append((mz_bin, ref_spec))
+
+    return d_list
+
+
+def create_reference_dict(D):
+    # create members
+    keys = set([t[0] for t in D])
+    ref_d = dict([(k, []) for k in keys])
+
+    for b, p in D:
+        ref_d[b].append(p)
+
+    tmp_dict = {}
+    # go over all keys
+    for key, values in ref_d.items():
+
+        # create charges
+        r = np.arange(5) + 1
+
+        # go over all charge states
+        for c in r:
+
+            # go over all reference pattern
+            for ca in values:
+
+                # append exactly one charge state per reference bin
+                if c == ca.charge:
+                    # if key already exists, append
+                    if key in tmp_dict:
+                        tmp_dict[key].append(ca)
+                        break
+
+                    # if key does not exist, create new list
+                    else:
+                        tmp_dict[key] = [ca]
+                        break
+
+    return tmp_dict
+
+@numba.jit(nopython=True)
+def normal_pdf(x: ArrayLike, mass: float, s: float = 0.001, inv_sqrt_2pi: float = 0.3989422804014327, normalize: bool = False):
+    """
+    :param inv_sqrt_2pi:
+    :param x:
+    :param mass:
+    :param s:
+    :return:
+    """
+    a = (x - mass) / s
+    if normalize:
+        return np.exp(-0.5 * np.power(a,2))
+    else:
+        return inv_sqrt_2pi / s * np.exp(-0.5 * np.power(a,2))
+
+
+@numba.jit
+def gaussian(x, μ=0, σ=1):
+    """
+    Gaussian function
+    :param x:
+    :param μ:
+    :param σ:
+    :return:
+    """
+    A = 1 / np.sqrt(2 * np.pi * np.power(σ, 2))
+    B = np.exp(- (np.power(x - μ, 2) / 2 * np.power(σ, 2)))
+
+    return A * B
+
+
+@numba.jit
+def exp_distribution(x, λ=1):
+    """
+    Exponential function
+    :param x:
+    :param λ:
+    :return:
+    """
+    if x > 0:
+        return λ * np.exp(-λ * x)
+    return 0
+
+
+@numba.jit
+def exp_gaussian(x, μ=-3, σ=1, λ=.25):
+    """
+    laplacian distribution with exponential decay
+    :param x:
+    :param μ:
+    :param σ:
+    :param λ:
+    :return:
+    """
+    A = λ / 2 * np.exp(λ / 2 * (2 * μ + λ * np.power(σ, 2) - 2 * x))
+    B = math.erfc((μ + λ * np.power(σ, 2) - x) / (np.sqrt(2) * σ))
+    return A * B
+
+
+class NormalDistribution:
+    def __init__(self, μ: float, σ: float):
+        self.μ = μ
+        self.σ = σ
+
+    def __call__(self, x):
+        return gaussian(x, self.μ, self.σ)
+
+
+class ExponentialGaussianDistribution:
+    def __init__(self, μ: float = -3, σ: float = 1, λ: float = .25):
+        self.μ = μ
+        self.σ = σ
+        self.λ = λ
+
+    def __call__(self, x):
+        return exp_gaussian(x, self.μ, self.σ, self.λ)
+
+
+def preprocess_max_quant_evidence(exp: pd.DataFrame) -> pd.DataFrame:
+    """
+    select columns from evidence txt, rename to ionmob naming convention and transform to raw data rt in seconds
+    Args:
+        exp: a MaxQuant evidence dataframe from evidence.txt table
+    Returns: cleaned evidence dataframe, columns renamed to ionmob naming convention
+    """
+
+    # select columns
+    exp = exp[['m/z', 'Mass', 'Charge', 'Modified sequence', 'Retention time',
+               'Retention length', 'Ion mobility index', 'Ion mobility length', '1/K0', '1/K0 length',
+               'Number of isotopic peaks', 'Max intensity m/z 0', 'Intensity', 'Raw file', 'CCS']].rename(
+        # rename columns to ionmob naming convention
+        columns={'m/z': 'mz', 'Mass': 'mass',
+                 'Charge': 'charge', 'Modified sequence': 'sequence', 'Retention time': 'rt',
+                 'Retention length': 'rt_length', 'Ion mobility index': 'im', 'Ion mobility length': 'im_length',
+                 '1/K0': 'inv_ion_mob', '1/K0 length': 'inv_ion_mob_length', 'CCS': 'ccs',
+                 'Number of isotopic peaks': 'num_peaks', 'Max intensity m/z 0': 'mz_max_intensity',
+                 'Intensity': 'intensity', 'Raw file': 'raw'}).dropna()
+
+    # transform retention time from minutes to seconds as stored in tdf raw data
+    exp['rt'] = exp.apply(lambda r: r['rt'] * 60, axis=1)
+    exp['rt_length'] = exp.apply(lambda r: r['rt_length'] * 60, axis=1)
+    exp['rt_start'] = exp.apply(lambda r: r['rt'] - r['rt_length'] / 2, axis=1)
+    exp['rt_stop'] = exp.apply(lambda r: r['rt'] + r['rt_length'] / 2, axis=1)
+
+    exp['im_start'] = exp.apply(lambda r: int(np.round(r['im'] - r['im_length'] / 2)), axis=1)
+    exp['im_stop'] = exp.apply(lambda r: int(np.round(r['im'] + r['im_length'] / 2)), axis=1)
+
+    exp['inv_ion_mob_start'] = exp.apply(lambda r: r['inv_ion_mob'] - r['inv_ion_mob_length'] / 2, axis=1)
+    exp['inv_ion_mob_stop'] = exp.apply(lambda r: r['inv_ion_mob'] + r['inv_ion_mob_length'] / 2, axis=1)
+
+    # remove duplicates
+    exp = exp.drop_duplicates(['sequence', 'charge', 'rt', 'ccs'])
+
+    return exp
+
+def max_quant_to_proforma(s:str, old_annotation:bool=False):
+    """
+    Convert MaxQuant amino acid sequences (with PTM) to
+    proForma format.
+
+    :param s: amino acid sequence
+    :type s: str
+    :param old_annotation: Wether old annotation is used in `s`, defaults to False
+    :type old_annotation: bool, optional
+    """
+    seq = s.strip("_")
+
+    proforma_seq = ""
+    if old_annotation:
+        proforma_seq = (seq
+                        .replace("(ox)", "[UNIMOD:35]")
+                        .replace("(ac)", "[UNIMOD:1]")
+        )
+    else:
+        proforma_seq = (seq
+                        .replace("(Oxidation (M))", "[UNIMOD:35]")
+                        .replace("(Phospho (STY))", "[UNIMOD:21]")
+                        .replace("(Acetyl (Protein N-term))", "[UNIMOD:1]")
+                        )
+    return f"_{proforma_seq}_"
+
+def preprocess_max_quant_sequence(s, old_annotation=False):
+    """
+    :param s:
+    :param old_annotation:
+    """
+
+    seq = s[1:-1]
+
+    is_acc = False
+
+    if old_annotation:
+        seq = seq.replace('(ox)', '$')
+
+        if seq.find('(ac)') != -1:
+            is_acc = True
+            seq = seq.replace('(ac)', '')
+
+    else:
+        seq = seq.replace('(Oxidation (M))', '$')
+        seq = seq.replace('(Phospho (STY))', '&')
+
+        if seq.find('(Acetyl (Protein N-term))') != -1:
+            is_acc = True
+            seq = seq.replace('(Acetyl (Protein N-term))', '')
+
+    # form list from string
+    slist = list(seq)
+
+    tmp_list = []
+
+    for item in slist:
+        if item == '$':
+            tmp_list.append('<OX>')
+
+        elif item == '&':
+            tmp_list.append('<PH>')
+
+        else:
+            tmp_list.append(item)
+
+    slist = tmp_list
+
+    r_list = []
+
+    for i, char in enumerate(slist):
+
+        if char == '<OX>':
+            C = slist[i - 1]
+            C = C + '-<OX>'
+            r_list = r_list[:-1]
+            r_list.append(C)
+
+        elif char == 'C':
+            r_list.append('C-<CM>')
+
+        elif char == '<PH>':
+            C = slist[i - 1]
+            C = C + '-<PH>'
+            r_list = r_list[:-1]
+            r_list.append(C)
+
+        else:
+            r_list.append(char)
+
+    if is_acc:
+        return ['<START>-<AC>'] + r_list + ['<END>']
+
+    return ['<START>'] + r_list + ['<END>']
+
+def is_unimod_start(char:str):
+    """
+    Tests if char is start of unimod
+    bracket
+
+    :param char: Character of a proForma formatted aa sequence
+    :type char: str
+    :return: Wether char is start of unimod bracket
+    :rtype: bool
+    """
+    if char in ["(","[","{"]:
+        return True
+    else:
+        return False
+
+def is_unimod_end(char:str):
+    """
+    Tests if char is end of unimod
+    bracket
+
+    :param char: Character of a proForma formatted aa sequence
+    :type char: str
+    :return: Wether char is end of unimod bracket
+    :rtype: bool
+    """
+    if char in [")","]","}"]:
+        return True
+    else:
+        return False
+
+def tokenize_proforma_sequence(sequence: str):
+    """
+    Tokenize a ProForma formatted sequence string.
+
+    :param sequence: Sequence string (ProForma formatted)
+    :type sequence: str
+    :return: List of tokens
+    :rtype: List
+    """
+    sequence = sequence.upper().replace("(","[").replace(")","]")
+    token_list = ["<START>"]
+    in_unimod_bracket = False
+    tmp_token = ""
+
+    for aa in sequence:
+        if is_unimod_start(aa):
+            in_unimod_bracket = True
+        if in_unimod_bracket:
+            if is_unimod_end(aa):
+                in_unimod_bracket = False
+            tmp_token += aa
+            continue
+        if tmp_token != "":
+            token_list.append(tmp_token)
+            tmp_token = ""
+        tmp_token += aa
+
+    if tmp_token != "":
+        token_list.append(tmp_token)
+
+    if len(token_list) > 1:
+        if token_list[1].find("UNIMOD:1") != -1:
+            token_list[1] = "<START>"+token_list[1]
+            token_list = token_list[1:]
+    token_list.append("<END>")
+
+    return token_list
+
+def get_aa_num_proforma_sequence(sequence:str):
+    """
+    get number of amino acids in sequence
+
+    :param sequence: proforma formatted aa sequence
+    :type sequence: str
+    :return: Number of amino acids
+    :rtype: int
+    """
+    num_aa = 0
+    inside_bracket = False
+
+    for aa in sequence:
+        if is_unimod_start(aa):
+            inside_bracket = True
+        if inside_bracket:
+            if is_unimod_end(aa):
+                inside_bracket = False
+            continue
+        num_aa += 1
+    return num_aa
+
+
+
+def tokenizer_to_json(tokenizer: tf.keras.preprocessing.text.Tokenizer, path: str):
+    """
+    save a fit keras tokenizer to json for later use
+    :param tokenizer: fit keras tokenizer to save
+    :param path: path to save json to
+    """
+    tokenizer_json = tokenizer.to_json()
+    with io.open(path, 'w', encoding='utf-8') as f:
+        f.write(json.dumps(tokenizer_json, ensure_ascii=False))
+
+
+def tokenizer_from_json(path: str):
+    """
+    load a pre-fit tokenizer from a json file
+    :param path: path to tokenizer as json file
+    :return: a keras tokenizer loaded from json
+    """
+    with open(path) as f:
+        data = json.load(f)
+    return tf.keras.preprocessing.text.tokenizer_from_json(data)
+
diff --git a/pyims/examples/simulation/run_example_simulation.py b/pyims/examples/simulation/run_example_simulation.py
new file mode 100644
index 00000000..03f01c71
--- /dev/null
+++ b/pyims/examples/simulation/run_example_simulation.py
@@ -0,0 +1,104 @@
+from pyims.simulation.experiment import LcImsMsMs
+from pyims.simulation.hardware_models import (NeuralChromatographyApex, 
+                                              NormalChromatographyProfileModel, 
+                                              LiquidChromatography, 
+                                              ElectroSpray,
+                                              TrappedIon,
+                                              TOF,
+                                              NeuralIonMobilityApex, 
+                                              NormalIonMobilityProfileModel,
+                                              AveragineModel,
+                                              BinomialIonSource
+                                                )
+from pyims.proteome import ProteinSample, Trypsin, ORGANISM
+from pyims.chemistry import BufferGas
+
+import pandas as pd
+import numpy as np
+
+def irt_to_rt(irt):
+    return irt
+
+def scan_im_interval(scan_id):
+    intercept = 1451.357
+    slope = -877.361
+    scan_id = np.atleast_1d(scan_id)
+    lower = ( scan_id    - intercept ) / slope
+    upper = ((scan_id+1) - intercept ) / slope
+    return np.stack([1/lower, 1/upper], axis=1)
+
+def im_to_scan(reduced_ion_mobility):
+    intercept = 1451.357
+    slope = -877.361
+    # TODO more appropriate function here ?
+    one_over_k0 = 1/reduced_ion_mobility
+    return np.round(one_over_k0 * slope + intercept).astype(np.int16)
+
+def build_experiment():
+    t = LcImsMsMs("./timstofexp1_binomial_ion_source_21_7/") # maybe rather call this class LCIMSMSExperiment
+
+    lc = LiquidChromatography()
+    lc.frame_length = 1200 #ms
+    lc.gradient_length = 120 # min
+    esi = ElectroSpray()
+    tims = TrappedIon()
+    tims.scan_time = 110 # us
+    tof_mz = TOF()
+
+    t.lc_method = lc
+    t.ionization_method = esi
+    t.ion_mobility_separation_method = tims
+    t.mz_separation_method = tof_mz
+
+    N2 = BufferGas("N2")
+
+    tokenizer_path = '/home/tim/Workspaces/ionmob/pretrained-models/tokenizers/tokenizer.json'
+
+    rt_model_weights = "/home/tim/Workspaces/Resources/models/DeepChromatograpy/"
+    t.lc_method.apex_model = NeuralChromatographyApex(rt_model_weights,tokenizer_path = tokenizer_path)
+
+    t.lc_method.profile_model = NormalChromatographyProfileModel()
+    t.lc_method.irt_to_rt_converter = irt_to_rt
+
+
+
+
+
+    im_model_weights = "/home/tim/Workspaces/ionmob/pretrained-models/GRUPredictor"
+    t.ion_mobility_separation_method.apex_model = NeuralIonMobilityApex(im_model_weights, tokenizer_path = tokenizer_path)
+
+    t.ion_mobility_separation_method.profile_model = NormalIonMobilityProfileModel()
+    t.ion_mobility_separation_method.buffer_gas = N2
+    t.ion_mobility_separation_method.scan_to_reduced_im_interval_converter = scan_im_interval
+    t.ion_mobility_separation_method.reduced_im_to_scan_converter = im_to_scan
+
+    t.ionization_method.ionization_model = BinomialIonSource()
+
+
+    t.mz_separation_method.model = AveragineModel()
+
+
+    rng = np.random.default_rng(2023)
+    # read proteome
+    proteome = pd.read_feather('/home/tim/Workspaces/Resources/Homo-sapiens-proteome.feather')
+    random_abundances = rng.integers(1e3,1e7,size=proteome.shape[0])
+    proteome = proteome.assign(abundancy= random_abundances)
+    # create sample and sample digest; TODO: add missed cleavages to ENZYMEs
+    sample = ProteinSample(proteome, ORGANISM.HOMO_SAPIENS)
+    sample_digest = sample.digest(Trypsin())
+
+
+    # to reduce computational load in example
+    sample_digest.data = sample_digest.data.sample(100000, random_state= rng)
+
+
+    t.load_sample(sample_digest)
+    return t
+
+if __name__ == "__main__":
+
+
+    t = build_experiment()
+
+    #cProfile.run("t.run(10000)", filename="profiler_10000_8_process",sort="cumtime")
+    t.run(100,frames_per_assemble_process=10)
\ No newline at end of file
diff --git a/pyims/pyims/chemistry.py b/pyims/pyims/chemistry.py
new file mode 100644
index 00000000..b5b7d6b3
--- /dev/null
+++ b/pyims/pyims/chemistry.py
@@ -0,0 +1,213 @@
+import numpy as np
+import mendeleev as me
+
+AMINO_ACIDS = {'Lysine': 'K', 'Alanine': 'A', 'Glycine': 'G', 'Valine': 'V', 'Tyrosine': 'Y',
+               'Arginine': 'R', 'Glutamic Acid': 'E', 'Phenylalanine': 'F', 'Tryptophan': 'W',
+               'Leucine': 'L', 'Threonine': 'T', 'Cysteine': 'C', 'Serine': 'S', 'Glutamine': 'Q',
+               'Methionine': 'M', 'Isoleucine': 'I', 'Asparagine': 'N', 'Proline': 'P', 'Histidine': 'H',
+               'Aspartic Acid': 'D'}
+
+AA_MASSES = {'A': 71.03711, 'C': 103.00919, 'D': 115.02694, 'E': 129.04259, 'F': 147.06841, 'G': 57.02146,
+             'H': 137.05891, 'I': 113.08406, 'K': 128.09496, 'L': 113.08406, 'M': 131.04049, 'N': 114.04293,
+             'P': 97.05276, 'Q': 128.05858, 'R': 156.10111, 'S': 87.03203, 'T': 101.04768, 'V': 99.06841,
+             'W': 186.07931, 'Y': 163.06333, '[UNIMOD:1]': 42.010565, '[UNIMOD:35]': 15.994915, 'U': 168.964203,
+             '[UNIMOD:4]': 57.021464, '[UNIMOD:21]': 79.966331, '[UNIMOD:312]': 119.004099, '<START>': 0.0, '<END>': 0.0}
+
+VARIANT_DICT = {'L': ['L'], 'E': ['E'], 'S': ['S', 'S[UNIMOD:21]'], 'A': ['A'], 'V': ['V'], 'D': ['D'], 'G': ['G'],
+                '<END>': ['<END>'], 'P': ['P'], '<START>': ['<START>', '<START>[UNIMOD:1]'], 'T': ['T', 'T[UNIMOD:21]'],
+                'I': ['I'], 'Q': ['Q'], 'K': ['K', 'K[UNIMOD:1]'], 'N': ['N'], 'R': ['R'], 'F': ['F'], 'H': ['H'],
+                'Y': ['Y', 'Y[UNIMOD:21]'], 'M': ['M', 'M[UNIMOD:35]'],
+                'W': ['W'], 'C': ['C', 'C[UNIMOD:312]', 'C[UNIMOD:4]'], 'C[UNIMOD:4]': ['C', 'C[UNIMOD:312]', 'C[UNIMOD:4]']}
+
+MASS_PROTON = 1.007276466583
+
+MASS_WATER = 18.010564684
+# IUPAC standard in Kelvin
+STANDARD_TEMPERATURE = 273.15
+# IUPAC standard in Pa
+STANDARD_PRESSURE = 1e5
+# IUPAC elementary charge
+ELEMENTARY_CHARGE = 1.602176634e-19
+# IUPAC BOLTZMANN'S CONSTANT
+K_BOLTZMANN = 1.380649e-23
+# constant part of Mason-Schamp equation
+# 3/16*sqrt(2π/kb)*e/N0 *
+# 1e20 (correction for using A² instead of m²) *
+# 1/sqrt(1.660 5402(10)×10−27 kg) (correction for using Da instead of kg) *
+# 10000 * (to get cm²/Vs from m²/Vs)
+# TODO CITATION
+CCS_K0_CONVERSION_CONSTANT = 18509.8632163405
+
+def get_monoisotopic_token_weight(token:str):
+    """
+    Gets monoisotopic weight of token
+
+    :param token: Token of aa sequence e.g. "<START>[UNIMOD:1]"
+    :type token: str
+    :return: Weight in Dalton.
+    :rtype: float
+    """
+    splits = token.split("[")
+    for i in range(1, len(splits)):
+        splits[i] = "["+splits[i]
+
+    mass = 0
+    for split in splits:
+        mass += AA_MASSES[split]
+    return mass
+
+
+def get_mono_isotopic_weight(sequence_tokenized: list[str]) -> float:
+    mass = 0
+    for token in sequence_tokenized:
+        mass += get_monoisotopic_token_weight(token)
+    return mass + MASS_WATER
+
+
+def get_mass_over_charge(mass: float, charge: int) -> float:
+    return (mass / charge) + MASS_PROTON
+
+def get_num_protonizable_sites(sequence: str) -> int:
+    """
+    Gets number of sites that can be protonized.
+    This function does not yet account for PTMs.
+
+    :param sequence: Amino acid sequence
+    :type sequence: str
+    :return: Number of protonizable sites
+    :rtype: int
+    """
+    sites = 1 # n-terminus
+    for s in sequence:
+        if s in ["H","R","K"]:
+            sites += 1
+    return sites
+
+
+def reduced_mobility_to_ccs(one_over_k0, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15):
+    """
+    convert reduced ion mobility (1/k0) to CCS
+    :param one_over_k0: reduced ion mobility
+    :param charge: charge state of the ion
+    :param mz: mass-over-charge of the ion
+    :param mass_gas: mass of drift gas
+    :param temp: temperature of the drift gas in C°
+    :param t_diff: factor to translate from C° to K
+    """
+    reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas)
+    return (CCS_K0_CONVERSION_CONSTANT * charge) / (np.sqrt(reduced_mass * (temp + t_diff)) * 1 / one_over_k0)
+
+
+def ccs_to_one_over_reduced_mobility(ccs, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15):
+    """
+    convert CCS to 1 over reduced ion mobility (1/k0)
+    :param ccs: collision cross-section
+    :param charge: charge state of the ion
+    :param mz: mass-over-charge of the ion
+    :param mass_gas: mass of drift gas (N2)
+    :param temp: temperature of the drift gas in C°
+    :param t_diff: factor to translate from C° to K
+    """
+    reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas)
+    return  ((np.sqrt(reduced_mass * (temp + t_diff))) * ccs) / (CCS_K0_CONVERSION_CONSTANT * charge)
+
+
+class ChemicalCompound:
+
+    def _calculate_molecular_mass(self):
+        mass = 0
+        for (atom, abundance) in self.element_composition.items():
+            mass += me.element(atom).atomic_weight * abundance
+        return mass
+
+    def __init__(self, formula):
+        self.element_composition = self.get_composition(formula)
+        self.mass = self._calculate_molecular_mass()
+
+    def get_composition(self, formula:str):
+        """
+        Parse chemical formula into Dict[str:int] with
+        atoms as keys and the respective counts as values.
+
+        :param formula: Chemical formula of compound e.g. 'C6H12O6'
+        :type formula: str
+        :return: Dictionary Atom: Count
+        :rtype: Dict[str:int]
+        """
+        if formula.startswith("("):
+            assert formula.endswith(")")
+            formula = formula[1:-1]
+
+        tmp_group = ""
+        tmp_group_count = ""
+        depth = 0
+        comp_list = []
+        comp_counts = []
+
+        # extract components: everything in brackets and atoms
+        # extract component counts: number behind component or 1
+        for (i,e) in enumerate(formula):
+            if e == "(":
+                depth += 1
+                if depth == 1:
+                    if tmp_group != "":
+                        comp_list.append(tmp_group)
+                        tmp_group = ""
+                        if tmp_group_count == "":
+                            comp_counts.append(1)
+                        else:
+                            comp_counts.append(int(tmp_group_count))
+                            tmp_group_count = ""
+                tmp_group += e
+                continue
+            if e == ")":
+                depth -= 1
+                tmp_group += e
+                continue
+            if depth > 0:
+                tmp_group += e
+                continue
+            if e.isupper():
+                if tmp_group != "":
+                    comp_list.append(tmp_group)
+                    tmp_group = ""
+                    if tmp_group_count == "":
+                        comp_counts.append(1)
+                    else:
+                        comp_counts.append(int(tmp_group_count))
+                        tmp_group_count = ""
+                tmp_group += e
+                continue
+            if e.islower():
+                tmp_group += e
+                continue
+            if e.isnumeric():
+                tmp_group_count += e
+        if tmp_group != "":
+            comp_list.append(tmp_group)
+            if tmp_group_count == "":
+                comp_counts.append(1)
+            else:
+                comp_counts.append(int(tmp_group_count))
+
+        # assemble dictionary from component lists
+        atom_dict = {}
+        for (comp,count) in zip(comp_list,comp_counts):
+            if not comp.startswith("("):
+                atom_dict[comp] = count
+            else:
+                atom_dicts_depth = self.get_composition(comp)
+                for atom in atom_dicts_depth:
+                    atom_dicts_depth[atom] *= count
+                    if atom in atom_dict:
+                        atom_dict[atom] += atom_dicts_depth[atom]
+                    else:
+                        atom_dict[atom] = atom_dicts_depth[atom]
+                atom_dicts_depth = {}
+        return atom_dict
+
+class BufferGas(ChemicalCompound):
+
+    def __init__(self, formula: str):
+        super().__init__(formula)
+

From 6d2f431e9a3999a0bda1259330d5316d08e6acc6 Mon Sep 17 00:00:00 2001
From: "Tim Maier (Ubuntu Desktop)" <tmaier01@students.uni-mainz.de>
Date: Fri, 27 Oct 2023 11:39:43 +0200
Subject: [PATCH 2/7] prevent circular import and fix tokenizer error

---
 imspy/imspy/data/frame.py                     |  2 +-
 imspy/imspy/data/handle.py                    | 10 ++-------
 imspy/imspy/data/spectrum.py                  | 22 ++++++++++++++-----
 imspy/imspy/feature.py                        |  2 +-
 imspy/imspy/simulation/hardware_models.py     |  8 +++----
 .../simulation/run_example_simulation.py      |  2 +-
 6 files changed, 26 insertions(+), 20 deletions(-)

diff --git a/imspy/imspy/data/frame.py b/imspy/imspy/data/frame.py
index 659209b0..6cd4ba17 100644
--- a/imspy/imspy/data/frame.py
+++ b/imspy/imspy/data/frame.py
@@ -7,7 +7,7 @@
 
 import numpy as np
 import imspy_connector as pims
-from imspy.spectrum import MzSpectrum, TimsSpectrum, IndexedMzSpectrum
+from imspy.data.spectrum import MzSpectrum, TimsSpectrum, IndexedMzSpectrum
 
 from imspy.utilities import re_index_indices
 
diff --git a/imspy/imspy/data/handle.py b/imspy/imspy/data/handle.py
index 4aec5145..2a5f052c 100644
--- a/imspy/imspy/data/handle.py
+++ b/imspy/imspy/data/handle.py
@@ -10,14 +10,8 @@
 
 from abc import ABC
 
-<<<<<<<< HEAD:imspy/imspy/handle.py
-from imspy.frame import TimsFrame
-from imspy.slice import TimsSlice
-========
-from pyims.data import TimsFrame, TimsSlice
-
->>>>>>>> 1cd02c1 (transfer simulation framework from proteolizard):imspy/imspy/data/handle.py
-
+from imspy.data.frame import TimsFrame
+from imspy.data.slice import TimsSlice
 
 class TimsDataset(ABC):
     def __init__(self, data_path: str):
diff --git a/imspy/imspy/data/spectrum.py b/imspy/imspy/data/spectrum.py
index 80baa757..c561b1bb 100644
--- a/imspy/imspy/data/spectrum.py
+++ b/imspy/imspy/data/spectrum.py
@@ -1,6 +1,6 @@
 import numpy as np
 from typing import List, Tuple
-
+from __future__ import annotations
 import pandas as pd
 from numpy.typing import NDArray
 
@@ -138,7 +138,7 @@ def __repr__(self):
         return f"MzSpectrum(num_peaks={len(self.mz)})"
 
     def to_windows(self, window_length: float = 10, overlapping: bool = True, min_num_peaks: int = 5,
-                   min_intensity: float = 1) -> Tuple[NDArray, List['MzSpectrum']]:
+                   min_intensity: float = 1) -> Tuple[NDArray, List[MzSpectrum]]:
         """Convert the spectrum to a list of windows.
 
         Args:
@@ -154,8 +154,20 @@ def to_windows(self, window_length: float = 10, overlapping: bool = True, min_nu
         indices, windows = self.__spec_ptr.to_windows(window_length, overlapping, min_num_peaks, min_intensity)
         return indices, [MzSpectrum.from_py_mz_spectrum(window) for window in windows]
 
+    def to_resolution(self, resolution: int) -> MzSpectrum:
+        """Bins the spectrum's m/z values to a 
+        given resolution and sums the intensities.
+
+        Args:
+            resolution (int): Negative decadic logarithm of bin size.
+
+        Returns:
+            MzSpectrum: A new `MzSpectrum` where m/z values are binned according to the given resolution.
+        """
+        return self.__spec_ptr.to_resolution(resolution)
+    
     def filter(self, mz_min: float = 0.0, mz_max: float = 2000.0, intensity_min: float = 0.0,
-               intensity_max: float = 1e9) -> 'MzSpectrum':
+               intensity_max: float = 1e9) -> MzSpectrum:
         """Filter the spectrum for a given m/z range and intensity range.
 
         Args:
@@ -170,7 +182,7 @@ def filter(self, mz_min: float = 0.0, mz_max: float = 2000.0, intensity_min: flo
         return MzSpectrum.from_py_mz_spectrum(
             self.__spec_ptr.filter_ranged(mz_min, mz_max, intensity_min, intensity_max))
 
-    def vectorized(self, resolution: int = 2) -> 'MzSpectrumVectorized':
+    def vectorized(self, resolution: int = 2) -> MzSpectrumVectorized:
         """Convert the spectrum to a vectorized spectrum.
 
         Args:
@@ -213,7 +225,7 @@ def from_py_mz_spectrum_vectorized(cls, spec: pims.PyMzSpectrumVectorized):
     @property
     def resolution(self) -> float:
         """Resolution.
-
+        
         Returns:
             float: Resolution.
         """
diff --git a/imspy/imspy/feature.py b/imspy/imspy/feature.py
index 9abf5c5c..2124b5f8 100644
--- a/imspy/imspy/feature.py
+++ b/imspy/imspy/feature.py
@@ -4,7 +4,7 @@
 
 import json
 
-from pyims.data import TimsSlice, TimsFrame, MzSpectrum
+from pyims.data import TimsSlice, TimsFrame
 from pyims.utility import gaussian, exp_gaussian
 from pyims.isotopes import IsotopePatternGenerator, create_initial_feature_distribution
 from abc import ABC, abstractmethod
diff --git a/imspy/imspy/simulation/hardware_models.py b/imspy/imspy/simulation/hardware_models.py
index 817d9797..2a0b2811 100644
--- a/imspy/imspy/simulation/hardware_models.py
+++ b/imspy/imspy/simulation/hardware_models.py
@@ -267,7 +267,7 @@ def __init__(self, model_path: str, tokenizer_path: str):
 
     def sequences_to_tokens(self, sequences_tokenized: np.array) -> np.array:
         print('tokenizing sequences...')
-        tokens = np.apply_along_axis(self.tokenizer.texts_to_sequences, 0, sequences_tokenized)
+        tokens = self.tokenizer.texts_to_sequences(sequences_tokenized)
         tokens_padded = tf.keras.preprocessing.sequence.pad_sequences(tokens, 50, padding='post')
         return tokens_padded
 
@@ -286,7 +286,7 @@ def _worker(model_path: str, tokens_padded: np.array, batched: bool = True, bs:
     def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Chromatography) ->  NDArray[np.float64]:
 
         data = sample.peptides
-        tokens = data["sequence_tokenized"].apply(lambda st: st.sequence_tokenized)
+        tokens = data["sequence_tokenized"].apply(lambda st: st.sequence_tokenized).to_numpy()
         print('generating tf dataset...')
         tokens_padded = self.sequences_to_tokens(tokens)
 
@@ -632,7 +632,7 @@ def __init__(self, model_path: str, tokenizer_path: str):
 
     def sequences_to_tokens(self, sequences_tokenized: np.array) -> np.array:
         print('tokenizing sequences...')
-        tokens = np.apply_along_axis(self.tokenizer.texts_to_sequences, 0, sequences_tokenized)
+        tokens = self.tokenizer.texts_to_sequences(sequences_tokenized)
         tokens_padded = tf.keras.preprocessing.sequence.pad_sequences(tokens, 50, padding='post')
         return tokens_padded
 
@@ -654,7 +654,7 @@ def _worker(model_path: str, tokens_padded: np.array, mz: np.array, charges: np.
 
     def simulate(self, sample: ProteomicsExperimentSampleSlice, device: IonMobilitySeparation) -> Tuple[NDArray]:
         data = sample.ions.merge(sample.peptides.loc[:,["pep_id","sequence_tokenized"]],on="pep_id",validate="many_to_one")
-        tokens = data.sequence_tokenized.apply(lambda st: st.sequence_tokenized)
+        tokens = data.sequence_tokenized.apply(lambda st: st.sequence_tokenized).to_numpy()
         tokens_padded = self.sequences_to_tokens(tokens)
         mz = data['mz'].values
         charges = data["charge"].values
diff --git a/pyims/examples/simulation/run_example_simulation.py b/pyims/examples/simulation/run_example_simulation.py
index 03f01c71..1df9cef0 100644
--- a/pyims/examples/simulation/run_example_simulation.py
+++ b/pyims/examples/simulation/run_example_simulation.py
@@ -89,7 +89,7 @@ def build_experiment():
 
 
     # to reduce computational load in example
-    sample_digest.data = sample_digest.data.sample(100000, random_state= rng)
+    sample_digest.data = sample_digest.data.sample(100, random_state= rng)
 
 
     t.load_sample(sample_digest)

From 0f93ebf18ab104349d5fd1e56c668da5576564bb Mon Sep 17 00:00:00 2001
From: "Tim Maier (Ubuntu Desktop)" <tmaier01@students.uni-mainz.de>
Date: Fri, 3 Nov 2023 10:29:47 +0100
Subject: [PATCH 3/7] Changed calling of MzSpectrum

---
 imspy/imspy/data/spectrum.py          | 30 +++++++++++++++++++++++++--
 imspy/imspy/isotopes.py               | 16 +++++++-------
 imspy/imspy/noise.py                  |  4 ++--
 imspy/imspy/simulation/experiment.py  |  1 +
 imspy/imspy/utility.py                |  4 ++--
 imspy/pyproject.toml                  |  7 +++++++
 imspy_connector/src/py_mz_spectrum.rs |  4 ++++
 7 files changed, 51 insertions(+), 15 deletions(-)

diff --git a/imspy/imspy/data/spectrum.py b/imspy/imspy/data/spectrum.py
index c561b1bb..cb6c7559 100644
--- a/imspy/imspy/data/spectrum.py
+++ b/imspy/imspy/data/spectrum.py
@@ -1,6 +1,7 @@
+from __future__ import annotations
+import json
 import numpy as np
 from typing import List, Tuple
-from __future__ import annotations
 import pandas as pd
 from numpy.typing import NDArray
 
@@ -79,6 +80,18 @@ def __repr__(self):
 
 
 class MzSpectrum:
+    
+    @classmethod
+    def from_jsons(cls, jsons:str) -> MzSpectrum:
+        json_dict:dict = json.loads(jsons)
+        mz = json_dict["mz"]
+        intensity = json_dict["intensity"]
+        return cls(mz, intensity)
+    
+    @classmethod
+    def from_spectra_list(cls, spectra_list:List[MzSpectrum], resolution: int, centroided: bool)->MzSpectrum:
+        pass
+    
     def __init__(self, mz: NDArray[np.float64], intensity: NDArray[np.float64]):
         """MzSpectrum class.
 
@@ -192,8 +205,18 @@ def vectorized(self, resolution: int = 2) -> MzSpectrumVectorized:
             MzSpectrumVectorized: Vectorized spectrum.
         """
         return MzSpectrumVectorized.from_py_mz_spectrum_vectorized(self.__spec_ptr.vectorized(resolution))
+    
+    def to_jsons(self) -> str:
+        """
+        generates json string representation of MzSpectrum
+        """
+        json_dict = {}
+        json_dict["mz"] = self.mz().tolist()
+        json_dict["intensity"] = self.intensity().tolist()
 
+        return json.dumps(json_dict)
 
+    
 class MzSpectrumVectorized:
     def __init__(self, indices: NDArray[np.int32], values: NDArray[np.float64], resolution: int):
         """MzSpectrum class.
@@ -253,7 +276,7 @@ def __repr__(self):
         return f"MzSpectrumVectorized(num_values={len(self.values)})"
 
 
-class TimsSpectrum:
+class TimsSpectrum(AbstractSpectrum):
     def __init__(self, frame_id: int, scan: int, retention_time: float, mobility: float, ms_type: int,
                  index: NDArray[np.int32], mz: NDArray[np.float64], intensity: NDArray[np.float64]):
         """TimsSpectrum class.
@@ -387,3 +410,6 @@ def __repr__(self):
         return (f"TimsSpectrum(id={self.frame_id}, retention_time={np.round(self.retention_time, 2)}, "
                 f"scan={self.scan}, mobility={np.round(self.mobility, 2)}, ms_type={self.ms_type}, "
                 f"num_peaks={len(self.index)})")
+
+    def to_jsons(self) -> str:
+        return super().to_jsons()
\ No newline at end of file
diff --git a/imspy/imspy/isotopes.py b/imspy/imspy/isotopes.py
index 49b874df..114ae17e 100644
--- a/imspy/imspy/isotopes.py
+++ b/imspy/imspy/isotopes.py
@@ -200,7 +200,7 @@ def generate_pattern(self, mass: float, charge: int, k: int = 7,
                          min_intensity: int = 5) -> Tuple[ArrayLike, ArrayLike]:
         pass
 
-    def generate_spectrum(self, mass: int, charge: int, frame_id: int, scan_id: int, k: int = 7,
+    def generate_spectrum(self, mass: int, charge: int, k: int = 7,
                           amp :Optional[float] = None, resolution:float =3, min_intensity: int = 5, centroided: bool = True) -> MzSpectrum:
         if amp is None:
             amp = self.default_abundancy
@@ -212,17 +212,15 @@ def generate_spectrum(self, mass: int, charge: int, frame_id: int, scan_id: int,
                                  mass=mass, charge=charge, amp=amp, k=k, resolution=resolution)
 
         if centroided:
-            return MzSpectrum(None, frame_id, scan_id, mz, i)\
+            return MzSpectrum(mz, i)\
                     .to_resolution(resolution).filter(lb,ub,min_intensity)\
                     .to_centroided(baseline_noise_level=max(min_intensity,1), sigma=1/np.power(10,resolution-1))
 
-        return MzSpectrum(None, frame_id, scan_id, mz, i).to_resolution(resolution).filter(lb,ub,min_intensity)
+        return MzSpectrum(mz, i).to_resolution(resolution).filter(lb,ub,min_intensity)
 
     def generate_spectrum_by_sampling(self,
                                       mass: float,
                                       charge: int,
-                                      frame_id:int,
-                                      scan_id: int,
                                       k:int =7,
                                       sigma: ArrayLike = 0.008492569002123142,
                                       ion_count:int = 1000000,
@@ -235,8 +233,8 @@ def generate_spectrum_by_sampling(self,
         mz, i = numba_ion_sampler(mass, charge, sigma, k, ion_count, intensity_per_ion)
 
         if centroided:
-            return MzSpectrum(None,frame_id,scan_id,mz,i)\
-                    .to_resolution(resolution).filter(-1,-1,min_intensity)\
-                    .to_centroided(baseline_noise_level=max(min_intensity,1), sigma=1/np.power(10,resolution-1))
-        return MzSpectrum(None,frame_id,scan_id,mz,i)\
+            return MzSpectrum(mz,i)\
+                    .to_resolution(resolution).filter(-1,-1,min_intensity)\ 
+                    .to_centroided(baseline_noise_level=max(min_intensity,1), sigma=1/np.power(10,resolution-1)) #Todo implement centroid method
+        return MzSpectrum(mz,i)\
                 .to_resolution(resolution).filter(-1,-1,min_intensity)
\ No newline at end of file
diff --git a/imspy/imspy/noise.py b/imspy/imspy/noise.py
index 1702faef..009aded0 100644
--- a/imspy/imspy/noise.py
+++ b/imspy/imspy/noise.py
@@ -94,13 +94,13 @@ def baseline_shot_noise_window(window:MzSpectrum,
 
     """
     num_noise_peaks = np.random.poisson(lam=expected_noise_peaks)
-    noised_window = MzSpectrum(None,-1,-1,[],[])
+    noised_window = MzSpectrum([],[])
     for i in range(num_noise_peaks):
         location_i = np.random.uniform(window_theoretical_mz_min,window_theoretical_mz_max)
         intensity_i = np.random.exponential(expected_noise_intensity)
         sigma_i = np.random.exponential(expected_noise_sigma)
         noise_mz, noise_intensity = generate_noise_peak(location_i,sigma_i,intensity_i,resolution=resolution)
-        noise_peak = MzSpectrum(None,-1,-1, noise_mz, noise_intensity)
+        noise_peak = MzSpectrum(noise_mz, noise_intensity)
         noised_window = noised_window+noise_peak
     return noised_window
 
diff --git a/imspy/imspy/simulation/experiment.py b/imspy/imspy/simulation/experiment.py
index c6896f31..3dc4bd4d 100644
--- a/imspy/imspy/simulation/experiment.py
+++ b/imspy/imspy/simulation/experiment.py
@@ -106,6 +106,7 @@ def _simulate_features(self, chunk_size):
             self.ion_mobility_separation_method.run(data_chunk)
             self.mz_separation_method.run(data_chunk)
             self.database.update(data_chunk)
+    
     @staticmethod
     def _assemble_frame_range(frame_range, scan_id_min, scan_id_max, default_abundance, resolution, output_path, database_path):
 
diff --git a/imspy/imspy/utility.py b/imspy/imspy/utility.py
index 001c0246..ea47e70b 100644
--- a/imspy/imspy/utility.py
+++ b/imspy/imspy/utility.py
@@ -119,8 +119,8 @@ def get_ref_pattern_as_spectra(ref_patterns):
             mz = np.round(np.array(mz), 2) + 1.0
             i = np.array([int(x) for x in i])
 
-            spectum = MzSpectrum(None, -1, -1, mz, i)
-            spec_list.append((row['m'], row['z'], spectum, last_contrib))
+            spectrum = MzSpectrum(mz, i)
+            spec_list.append((row['m'], row['z'], spectrum, last_contrib))
 
     return spec_list
 
diff --git a/imspy/pyproject.toml b/imspy/pyproject.toml
index e33a348a..f18cf6d7 100644
--- a/imspy/pyproject.toml
+++ b/imspy/pyproject.toml
@@ -12,6 +12,13 @@ numpy = ">=1.21"
 tensorflow = ">=2.14"
 tensorflow-probability = ">=0.22.1"
 imspy-connector = ">=0.2.0"
+mendeleev = ">=0.14"
+pyopenms = ">=3.1"
+scipy = ">=1.11.2"
+tqdm = ">=4.66"
+pyarrow =">=13.0"
+numba = ">=0.57"
+
 
 [build-system]
 requires = ["poetry-core"]
diff --git a/imspy_connector/src/py_mz_spectrum.rs b/imspy_connector/src/py_mz_spectrum.rs
index 8a53c75e..70f09c35 100644
--- a/imspy_connector/src/py_mz_spectrum.rs
+++ b/imspy_connector/src/py_mz_spectrum.rs
@@ -234,4 +234,8 @@ impl PyTimsSpectrum {
     pub fn mz_spectrum(&self) -> PyMzSpectrum {
         PyMzSpectrum { inner: self.inner.spectrum.mz_spectrum.clone() }
     }
+
+    pub fn to_resolution(&self, resolution: i32) -> Self {
+        PyTimsSpectrum{ inner: self.inner.to_resolution(resolution)}
+    }
 }

From 338abe9f70b5753458d9cac715c2f7dcd7b65761 Mon Sep 17 00:00:00 2001
From: "Tim Maier (Ubuntu Desktop)" <tmaier01@students.uni-mainz.de>
Date: Sat, 4 Nov 2023 01:44:55 +0100
Subject: [PATCH 4/7] first syn run without runtime error

---
 imspy/imspy/data/spectrum.py              | 38 +++++++++++++++++------
 imspy/imspy/feature.py                    |  2 +-
 imspy/imspy/isotopes.py                   | 16 +++++-----
 imspy/imspy/noise.py                      |  4 +--
 imspy/imspy/proteome.py                   |  2 +-
 imspy/imspy/simulation/experiment.py      |  6 ++--
 imspy/imspy/simulation/hardware_models.py |  4 +--
 imspy_connector/src/py_mz_spectrum.rs     | 28 ++++++++++++++++-
 mscore/src/mz_spectrum.rs                 | 11 +++++++
 9 files changed, 84 insertions(+), 27 deletions(-)

diff --git a/imspy/imspy/data/spectrum.py b/imspy/imspy/data/spectrum.py
index cb6c7559..2bb3a4c8 100644
--- a/imspy/imspy/data/spectrum.py
+++ b/imspy/imspy/data/spectrum.py
@@ -86,11 +86,20 @@ def from_jsons(cls, jsons:str) -> MzSpectrum:
         json_dict:dict = json.loads(jsons)
         mz = json_dict["mz"]
         intensity = json_dict["intensity"]
-        return cls(mz, intensity)
+        return cls(np.array(mz, dtype=np.float64), np.array(intensity, dtype=np.float64))
     
     @classmethod
-    def from_spectra_list(cls, spectra_list:List[MzSpectrum], resolution: int, centroided: bool)->MzSpectrum:
-        pass
+    def from_mz_spectra_list(cls, spectra_list:List[MzSpectrum], resolution: int)->MzSpectrum:
+        """Generates a convoluted mass spectrum by adding all spectra in the given list.
+
+        Args:
+            spectra_list (List[MzSpectrum]): List of mass spectra.
+            resolution (int): Desired resolution of returned spectrum.
+
+        Returns:
+            MzSpectrum: Convoluted spectrum.
+        """
+        return cls.from_py_mz_spectrum(pims.PyMzSpectrum.from_mzspectra_list([spectrum.__spec_ptr for spectrum in spectra_list], resolution))
     
     def __init__(self, mz: NDArray[np.float64], intensity: NDArray[np.float64]):
         """MzSpectrum class.
@@ -149,6 +158,18 @@ def from_py_mz_spectrum(cls, spec: pims.PyMzSpectrum):
 
     def __repr__(self):
         return f"MzSpectrum(num_peaks={len(self.mz)})"
+    
+    def __mul__(self, scale) -> MzSpectrum:
+        """Overwrite * operator for scaling of spectrum
+
+        Args:
+            scale (float): Scale.
+
+        Returns:
+            MzSpectrum: Scaled spectrum
+        """
+        tmp: pims.PyMzSpectrum =  self.__spec_ptr * scale
+        return self.from_py_mz_spectrum(tmp)
 
     def to_windows(self, window_length: float = 10, overlapping: bool = True, min_num_peaks: int = 5,
                    min_intensity: float = 1) -> Tuple[NDArray, List[MzSpectrum]]:
@@ -177,7 +198,7 @@ def to_resolution(self, resolution: int) -> MzSpectrum:
         Returns:
             MzSpectrum: A new `MzSpectrum` where m/z values are binned according to the given resolution.
         """
-        return self.__spec_ptr.to_resolution(resolution)
+        return MzSpectrum.from_py_mz_spectrum(self.__spec_ptr.to_resolution(resolution))
     
     def filter(self, mz_min: float = 0.0, mz_max: float = 2000.0, intensity_min: float = 0.0,
                intensity_max: float = 1e9) -> MzSpectrum:
@@ -211,8 +232,8 @@ def to_jsons(self) -> str:
         generates json string representation of MzSpectrum
         """
         json_dict = {}
-        json_dict["mz"] = self.mz().tolist()
-        json_dict["intensity"] = self.intensity().tolist()
+        json_dict["mz"] = self.mz.tolist()
+        json_dict["intensity"] = self.intensity.tolist()
 
         return json.dumps(json_dict)
 
@@ -276,7 +297,7 @@ def __repr__(self):
         return f"MzSpectrumVectorized(num_values={len(self.values)})"
 
 
-class TimsSpectrum(AbstractSpectrum):
+class TimsSpectrum:
     def __init__(self, frame_id: int, scan: int, retention_time: float, mobility: float, ms_type: int,
                  index: NDArray[np.int32], mz: NDArray[np.float64], intensity: NDArray[np.float64]):
         """TimsSpectrum class.
@@ -410,6 +431,3 @@ def __repr__(self):
         return (f"TimsSpectrum(id={self.frame_id}, retention_time={np.round(self.retention_time, 2)}, "
                 f"scan={self.scan}, mobility={np.round(self.mobility, 2)}, ms_type={self.ms_type}, "
                 f"num_peaks={len(self.index)})")
-
-    def to_jsons(self) -> str:
-        return super().to_jsons()
\ No newline at end of file
diff --git a/imspy/imspy/feature.py b/imspy/imspy/feature.py
index 2124b5f8..eb30dddd 100644
--- a/imspy/imspy/feature.py
+++ b/imspy/imspy/feature.py
@@ -123,7 +123,7 @@ def generate_feature(self, mz: float, charge: int,
         frame_list = []
 
         spec = pattern_generator.generate_spectrum(mz, charge, min_intensity=5, centroided=True, k=12)
-        mz, intensity = spec.mz(), spec.intensity() / np.max(spec.intensity()) * 100
+        mz, intensity = spec.mz, spec.intensity / np.max(spec.intensity) * 100
 
         for i in range(num_rt):
 
diff --git a/imspy/imspy/isotopes.py b/imspy/imspy/isotopes.py
index 114ae17e..458e94fb 100644
--- a/imspy/imspy/isotopes.py
+++ b/imspy/imspy/isotopes.py
@@ -123,7 +123,7 @@ def numba_generate_pattern(lower_bound: float,
     mzs = np.linspace(lower_bound,upper_bound,size)
     intensities = iso(mzs,mass,charge,sigma,amp,k,step_size)
 
-    return mzs + MASS_PROTON, intensities.astype(np.int64)
+    return mzs + MASS_PROTON, intensities
 
 #@numba.jit(nopython= True)
 def numba_ion_sampler(mass: float, charge: int, sigma: ArrayLike, k: int, ion_count: int, intensity_per_ion: int ):
@@ -158,7 +158,7 @@ def _get_component(u: float, weights_cum_sum: ArrayLike = weights) -> int:
 
         mz = ((comps*MASS_NEUTRON) + mass) / charge + MASS_PROTON + devs
         i = np.ones_like(mz) * intensity_per_ion
-        return (mz, i.astype(np.int64))
+        return (mz, i)
 
 @numba.jit
 def create_initial_feature_distribution(num_rt: int, num_im: int,
@@ -200,8 +200,8 @@ def generate_pattern(self, mass: float, charge: int, k: int = 7,
                          min_intensity: int = 5) -> Tuple[ArrayLike, ArrayLike]:
         pass
 
-    def generate_spectrum(self, mass: int, charge: int, k: int = 7,
-                          amp :Optional[float] = None, resolution:float =3, min_intensity: int = 5, centroided: bool = True) -> MzSpectrum:
+    def generate_spectrum(self, mass: int, charge: int, min_intensity: int = 5, k: int = 7,
+                          amp :Optional[float] = None, resolution:float =3,  centroided: bool = True) -> MzSpectrum:
         if amp is None:
             amp = self.default_abundancy
 
@@ -210,6 +210,7 @@ def generate_spectrum(self, mass: int, charge: int, k: int = 7,
 
         mz, i = numba_generate_pattern(lower_bound=lb, upper_bound=ub,
                                  mass=mass, charge=charge, amp=amp, k=k, resolution=resolution)
+        #Todo implement centroid method
 
         if centroided:
             return MzSpectrum(mz, i)\
@@ -233,8 +234,9 @@ def generate_spectrum_by_sampling(self,
         mz, i = numba_ion_sampler(mass, charge, sigma, k, ion_count, intensity_per_ion)
 
         if centroided:
-            return MzSpectrum(mz,i)\
-                    .to_resolution(resolution).filter(-1,-1,min_intensity)\ 
-                    .to_centroided(baseline_noise_level=max(min_intensity,1), sigma=1/np.power(10,resolution-1)) #Todo implement centroid method
+            return ( MzSpectrum(mz,i)
+                    .to_resolution(resolution).filter(-1,-1,min_intensity) 
+                    .to_centroided(baseline_noise_level=max(min_intensity,1), sigma=1/np.power(10,resolution-1))
+            ) 
         return MzSpectrum(mz,i)\
                 .to_resolution(resolution).filter(-1,-1,min_intensity)
\ No newline at end of file
diff --git a/imspy/imspy/noise.py b/imspy/imspy/noise.py
index 009aded0..daef5073 100644
--- a/imspy/imspy/noise.py
+++ b/imspy/imspy/noise.py
@@ -110,8 +110,8 @@ def baseline_shot_noise(spectrum:MzSpectrum,window_size:float=50,expected_noise_
 
 
     """
-    min_mz = spectrum.mz().min()-0.1
-    max_mz = spectrum.mz().max()+0.1
+    min_mz = spectrum.mz.min()-0.1
+    max_mz = spectrum.mz.max()+0.1
     bins,windows = spectrum.windows(window_size,overlapping=False,min_peaks=0,min_intensity=0)
     noised_spectrum = spectrum
     for b,w in zip(bins,windows):
diff --git a/imspy/imspy/proteome.py b/imspy/imspy/proteome.py
index e4490467..5a601e64 100644
--- a/imspy/imspy/proteome.py
+++ b/imspy/imspy/proteome.py
@@ -247,7 +247,7 @@ def make_sql_compatible(column):
         if isinstance(column.iloc[0], (RTProfile, ScanProfile, ChargeProfile, TokenSequence)):
             return column.apply(lambda x: x.jsons)
         elif isinstance(column.iloc[0], MzSpectrum):
-            return column.apply(lambda x: x.to_jsons(only_spectrum = True))
+            return column.apply(lambda x: x.to_jsons())
         else:
             return column
 
diff --git a/imspy/imspy/simulation/experiment.py b/imspy/imspy/simulation/experiment.py
index 3dc4bd4d..2b4846d4 100644
--- a/imspy/imspy/simulation/experiment.py
+++ b/imspy/imspy/simulation/experiment.py
@@ -167,9 +167,9 @@ def _assemble_frame_range(frame_range, scan_id_min, scan_id_max, default_abundan
             for (s_id,scan_spectra_list) in frame_dict.items():
 
                 if len(scan_spectra_list) > 0:
-                    scan_spectrum = MzSpectrum.from_mzSpectra_list(scan_spectra_list,resolution = resolution, sigma=1/np.power(10,(resolution-1)))
-                    output_dict["mz"].append(scan_spectrum.mz().tolist())
-                    output_dict["intensity"].append(scan_spectrum.intensity().tolist())
+                    scan_spectrum = MzSpectrum.from_mz_spectra_list(scan_spectra_list,resolution = resolution)
+                    output_dict["mz"].append(scan_spectrum.mz.tolist())
+                    output_dict["intensity"].append(scan_spectrum.intensity.tolist())
                     output_dict["scan_id"].append(s_id)
                     output_dict["frame_id"].append(f_id)
                     signal[f_id][s_id] = None
diff --git a/imspy/imspy/simulation/hardware_models.py b/imspy/imspy/simulation/hardware_models.py
index 2a0b2811..84a82c3f 100644
--- a/imspy/imspy/simulation/hardware_models.py
+++ b/imspy/imspy/simulation/hardware_models.py
@@ -761,6 +761,6 @@ def simulate(self, sample: ProteomicsExperimentSampleSlice, device: MzSeparation
         charges = joined_df["charge"].values
         spectra = []
         for (m, c) in zip(masses, charges):
-            spectrum = avg.generate_spectrum(m,c,-1,-1,amp = self.default_abundance, centroided=False)
+            spectrum = avg.generate_spectrum(m,c,amp = self.default_abundance, centroided=False)
             spectra.append(spectrum)
-        return spectra
\ No newline at end of file
+        return spectra
diff --git a/imspy_connector/src/py_mz_spectrum.rs b/imspy_connector/src/py_mz_spectrum.rs
index 70f09c35..0bfb1020 100644
--- a/imspy_connector/src/py_mz_spectrum.rs
+++ b/imspy_connector/src/py_mz_spectrum.rs
@@ -25,11 +25,13 @@ impl PyMsType {
 }
 
 #[pyclass]
+#[derive(Clone)]
 pub struct PyMzSpectrum {
     pub inner: MzSpectrum,
 }
 
 #[pymethods]
+
 impl PyMzSpectrum {
     #[new]
     pub unsafe fn new(mz: &PyArray1<f64>, intensity: &PyArray1<f64>) -> PyResult<Self> {
@@ -37,9 +39,27 @@ impl PyMzSpectrum {
             inner: MzSpectrum {
                 mz: mz.as_slice()?.to_vec(),
                 intensity: intensity.as_slice()?.to_vec(),
-            },
+            }
         })
     }
+    #[staticmethod]
+    pub unsafe fn from_mzspectra_list(list: Vec<PyMzSpectrum>, resolution: i32) -> PyResult<Self> {
+        if list.is_empty(){
+            Ok(PyMzSpectrum { 
+                inner: MzSpectrum {
+                    mz: Vec::new(),
+                    intensity: Vec::new(),
+                }
+            })
+        }
+        else {
+            let mut convoluted: MzSpectrum = MzSpectrum { mz: vec![], intensity: vec![] };
+            for spectrum in list {
+                convoluted = convoluted + spectrum.inner;
+            }
+            Ok(PyMzSpectrum { inner: convoluted.to_resolution(resolution) })
+        }
+    }
 
     #[getter]
     pub fn mz(&self, py: Python) -> Py<PyArray1<f64>> {
@@ -86,6 +106,12 @@ impl PyMzSpectrum {
         };
         Ok(py_filtered)
     }
+    pub fn __add__(&self, other: PyMzSpectrum) -> PyResult<PyMzSpectrum> {
+        Ok(PyMzSpectrum { inner: (self.inner.clone() + other.inner) })
+    }
+    pub fn __mul__(&self, scale: f64) -> PyResult<PyMzSpectrum> {
+        Ok(PyMzSpectrum { inner: (self.inner.clone() * scale) })
+    }
 }
 
 #[pyclass]
diff --git a/mscore/src/mz_spectrum.rs b/mscore/src/mz_spectrum.rs
index 8d0aa497..ef40e72a 100644
--- a/mscore/src/mz_spectrum.rs
+++ b/mscore/src/mz_spectrum.rs
@@ -293,6 +293,17 @@ impl std::ops::Add for MzSpectrum {
     }
 }
 
+impl std::ops::Mul<f64> for MzSpectrum {
+    type Output = Self;
+    fn mul(self, scale: f64) -> Self::Output{
+        let mut scaled_intensities: Vec<f64> = vec![0.0; self.intensity.len()];
+        for (idx,intensity) in self.intensity.iter().enumerate(){
+            scaled_intensities[idx] = scale*intensity;
+        }
+        Self{ mz: self.mz.clone(), intensity: scaled_intensities}
+
+    }
+}
 /// Represents a mass spectrum with associated m/z indices, m/z values, and intensities
 #[derive(Clone)]
 pub struct IndexedMzSpectrum {

From 654ddd1f9f5b54ff4587a1e7f3defbd7a1c88566 Mon Sep 17 00:00:00 2001
From: "Tim Maier (Ubuntu Desktop)" <tmaier01@students.uni-mainz.de>
Date: Thu, 9 Nov 2023 10:11:46 +0100
Subject: [PATCH 5/7] renamed module to simplify rebase

---
 .../simulation/run_example_simulation.py      |   0
 imspy/imspy/chemistry.py                      | 213 ++++++++++++++++++
 imspy/imspy/data/__init__.py                  |   8 +-
 imspy/imspy/data/slice.py                     |  10 +-
 imspy/imspy/feature.py                        |   6 +-
 imspy/imspy/isotopes.py                       |   6 +-
 imspy/imspy/noise.py                          |   4 +-
 imspy/imspy/proteome.py                       |   8 +-
 imspy/imspy/simulation/experiment.py          |   8 +-
 imspy/imspy/simulation/hardware_models.py     |  10 +-
 imspy/imspy/utility.py                        |   4 +-
 pyims/pyims/chemistry.py                      | 213 ------------------
 12 files changed, 242 insertions(+), 248 deletions(-)
 rename {pyims => imspy}/examples/simulation/run_example_simulation.py (100%)
 delete mode 100644 pyims/pyims/chemistry.py

diff --git a/pyims/examples/simulation/run_example_simulation.py b/imspy/examples/simulation/run_example_simulation.py
similarity index 100%
rename from pyims/examples/simulation/run_example_simulation.py
rename to imspy/examples/simulation/run_example_simulation.py
diff --git a/imspy/imspy/chemistry.py b/imspy/imspy/chemistry.py
index e69de29b..b5b7d6b3 100644
--- a/imspy/imspy/chemistry.py
+++ b/imspy/imspy/chemistry.py
@@ -0,0 +1,213 @@
+import numpy as np
+import mendeleev as me
+
+AMINO_ACIDS = {'Lysine': 'K', 'Alanine': 'A', 'Glycine': 'G', 'Valine': 'V', 'Tyrosine': 'Y',
+               'Arginine': 'R', 'Glutamic Acid': 'E', 'Phenylalanine': 'F', 'Tryptophan': 'W',
+               'Leucine': 'L', 'Threonine': 'T', 'Cysteine': 'C', 'Serine': 'S', 'Glutamine': 'Q',
+               'Methionine': 'M', 'Isoleucine': 'I', 'Asparagine': 'N', 'Proline': 'P', 'Histidine': 'H',
+               'Aspartic Acid': 'D'}
+
+AA_MASSES = {'A': 71.03711, 'C': 103.00919, 'D': 115.02694, 'E': 129.04259, 'F': 147.06841, 'G': 57.02146,
+             'H': 137.05891, 'I': 113.08406, 'K': 128.09496, 'L': 113.08406, 'M': 131.04049, 'N': 114.04293,
+             'P': 97.05276, 'Q': 128.05858, 'R': 156.10111, 'S': 87.03203, 'T': 101.04768, 'V': 99.06841,
+             'W': 186.07931, 'Y': 163.06333, '[UNIMOD:1]': 42.010565, '[UNIMOD:35]': 15.994915, 'U': 168.964203,
+             '[UNIMOD:4]': 57.021464, '[UNIMOD:21]': 79.966331, '[UNIMOD:312]': 119.004099, '<START>': 0.0, '<END>': 0.0}
+
+VARIANT_DICT = {'L': ['L'], 'E': ['E'], 'S': ['S', 'S[UNIMOD:21]'], 'A': ['A'], 'V': ['V'], 'D': ['D'], 'G': ['G'],
+                '<END>': ['<END>'], 'P': ['P'], '<START>': ['<START>', '<START>[UNIMOD:1]'], 'T': ['T', 'T[UNIMOD:21]'],
+                'I': ['I'], 'Q': ['Q'], 'K': ['K', 'K[UNIMOD:1]'], 'N': ['N'], 'R': ['R'], 'F': ['F'], 'H': ['H'],
+                'Y': ['Y', 'Y[UNIMOD:21]'], 'M': ['M', 'M[UNIMOD:35]'],
+                'W': ['W'], 'C': ['C', 'C[UNIMOD:312]', 'C[UNIMOD:4]'], 'C[UNIMOD:4]': ['C', 'C[UNIMOD:312]', 'C[UNIMOD:4]']}
+
+MASS_PROTON = 1.007276466583
+
+MASS_WATER = 18.010564684
+# IUPAC standard in Kelvin
+STANDARD_TEMPERATURE = 273.15
+# IUPAC standard in Pa
+STANDARD_PRESSURE = 1e5
+# IUPAC elementary charge
+ELEMENTARY_CHARGE = 1.602176634e-19
+# IUPAC BOLTZMANN'S CONSTANT
+K_BOLTZMANN = 1.380649e-23
+# constant part of Mason-Schamp equation
+# 3/16*sqrt(2π/kb)*e/N0 *
+# 1e20 (correction for using A² instead of m²) *
+# 1/sqrt(1.660 5402(10)×10−27 kg) (correction for using Da instead of kg) *
+# 10000 * (to get cm²/Vs from m²/Vs)
+# TODO CITATION
+CCS_K0_CONVERSION_CONSTANT = 18509.8632163405
+
+def get_monoisotopic_token_weight(token:str):
+    """
+    Gets monoisotopic weight of token
+
+    :param token: Token of aa sequence e.g. "<START>[UNIMOD:1]"
+    :type token: str
+    :return: Weight in Dalton.
+    :rtype: float
+    """
+    splits = token.split("[")
+    for i in range(1, len(splits)):
+        splits[i] = "["+splits[i]
+
+    mass = 0
+    for split in splits:
+        mass += AA_MASSES[split]
+    return mass
+
+
+def get_mono_isotopic_weight(sequence_tokenized: list[str]) -> float:
+    mass = 0
+    for token in sequence_tokenized:
+        mass += get_monoisotopic_token_weight(token)
+    return mass + MASS_WATER
+
+
+def get_mass_over_charge(mass: float, charge: int) -> float:
+    return (mass / charge) + MASS_PROTON
+
+def get_num_protonizable_sites(sequence: str) -> int:
+    """
+    Gets number of sites that can be protonized.
+    This function does not yet account for PTMs.
+
+    :param sequence: Amino acid sequence
+    :type sequence: str
+    :return: Number of protonizable sites
+    :rtype: int
+    """
+    sites = 1 # n-terminus
+    for s in sequence:
+        if s in ["H","R","K"]:
+            sites += 1
+    return sites
+
+
+def reduced_mobility_to_ccs(one_over_k0, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15):
+    """
+    convert reduced ion mobility (1/k0) to CCS
+    :param one_over_k0: reduced ion mobility
+    :param charge: charge state of the ion
+    :param mz: mass-over-charge of the ion
+    :param mass_gas: mass of drift gas
+    :param temp: temperature of the drift gas in C°
+    :param t_diff: factor to translate from C° to K
+    """
+    reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas)
+    return (CCS_K0_CONVERSION_CONSTANT * charge) / (np.sqrt(reduced_mass * (temp + t_diff)) * 1 / one_over_k0)
+
+
+def ccs_to_one_over_reduced_mobility(ccs, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15):
+    """
+    convert CCS to 1 over reduced ion mobility (1/k0)
+    :param ccs: collision cross-section
+    :param charge: charge state of the ion
+    :param mz: mass-over-charge of the ion
+    :param mass_gas: mass of drift gas (N2)
+    :param temp: temperature of the drift gas in C°
+    :param t_diff: factor to translate from C° to K
+    """
+    reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas)
+    return  ((np.sqrt(reduced_mass * (temp + t_diff))) * ccs) / (CCS_K0_CONVERSION_CONSTANT * charge)
+
+
+class ChemicalCompound:
+
+    def _calculate_molecular_mass(self):
+        mass = 0
+        for (atom, abundance) in self.element_composition.items():
+            mass += me.element(atom).atomic_weight * abundance
+        return mass
+
+    def __init__(self, formula):
+        self.element_composition = self.get_composition(formula)
+        self.mass = self._calculate_molecular_mass()
+
+    def get_composition(self, formula:str):
+        """
+        Parse chemical formula into Dict[str:int] with
+        atoms as keys and the respective counts as values.
+
+        :param formula: Chemical formula of compound e.g. 'C6H12O6'
+        :type formula: str
+        :return: Dictionary Atom: Count
+        :rtype: Dict[str:int]
+        """
+        if formula.startswith("("):
+            assert formula.endswith(")")
+            formula = formula[1:-1]
+
+        tmp_group = ""
+        tmp_group_count = ""
+        depth = 0
+        comp_list = []
+        comp_counts = []
+
+        # extract components: everything in brackets and atoms
+        # extract component counts: number behind component or 1
+        for (i,e) in enumerate(formula):
+            if e == "(":
+                depth += 1
+                if depth == 1:
+                    if tmp_group != "":
+                        comp_list.append(tmp_group)
+                        tmp_group = ""
+                        if tmp_group_count == "":
+                            comp_counts.append(1)
+                        else:
+                            comp_counts.append(int(tmp_group_count))
+                            tmp_group_count = ""
+                tmp_group += e
+                continue
+            if e == ")":
+                depth -= 1
+                tmp_group += e
+                continue
+            if depth > 0:
+                tmp_group += e
+                continue
+            if e.isupper():
+                if tmp_group != "":
+                    comp_list.append(tmp_group)
+                    tmp_group = ""
+                    if tmp_group_count == "":
+                        comp_counts.append(1)
+                    else:
+                        comp_counts.append(int(tmp_group_count))
+                        tmp_group_count = ""
+                tmp_group += e
+                continue
+            if e.islower():
+                tmp_group += e
+                continue
+            if e.isnumeric():
+                tmp_group_count += e
+        if tmp_group != "":
+            comp_list.append(tmp_group)
+            if tmp_group_count == "":
+                comp_counts.append(1)
+            else:
+                comp_counts.append(int(tmp_group_count))
+
+        # assemble dictionary from component lists
+        atom_dict = {}
+        for (comp,count) in zip(comp_list,comp_counts):
+            if not comp.startswith("("):
+                atom_dict[comp] = count
+            else:
+                atom_dicts_depth = self.get_composition(comp)
+                for atom in atom_dicts_depth:
+                    atom_dicts_depth[atom] *= count
+                    if atom in atom_dict:
+                        atom_dict[atom] += atom_dicts_depth[atom]
+                    else:
+                        atom_dict[atom] = atom_dicts_depth[atom]
+                atom_dicts_depth = {}
+        return atom_dict
+
+class BufferGas(ChemicalCompound):
+
+    def __init__(self, formula: str):
+        super().__init__(formula)
+
diff --git a/imspy/imspy/data/__init__.py b/imspy/imspy/data/__init__.py
index 1809a4f5..75c253f3 100644
--- a/imspy/imspy/data/__init__.py
+++ b/imspy/imspy/data/__init__.py
@@ -1,4 +1,4 @@
-from pyims.data.frame import TimsFrame
-from pyims.data.spectrum import TimsSpectrum, MzSpectrum
-from pyims.data.handle import TimsDataset, TimsDatasetDDA, TimsDatasetDIA
-from pyims.data.slice import TimsSlice
+from imspy.data.frame import TimsFrame
+from imspy.data.spectrum import TimsSpectrum, MzSpectrum
+from imspy.data.handle import TimsDataset, TimsDatasetDDA, TimsDatasetDIA
+from imspy.data.slice import TimsSlice
diff --git a/imspy/imspy/data/slice.py b/imspy/imspy/data/slice.py
index 056febaa..9e43bd5c 100644
--- a/imspy/imspy/data/slice.py
+++ b/imspy/imspy/data/slice.py
@@ -7,15 +7,9 @@
 
 from imspy.utilities import re_index_indices
 
-<<<<<<<< HEAD:imspy/imspy/slice.py
 import imspy_connector as pims
-from imspy.frame import TimsFrame, TimsFrameVectorized
-from imspy.spectrum import MzSpectrum
-========
-import pyims_connector as pims
-from pyims.data.frame import TimsFrame, TimsFrameVectorized
-from pyims.data.spectrum import MzSpectrum
->>>>>>>> 1cd02c1 (transfer simulation framework from proteolizard):imspy/imspy/data/slice.py
+from imspy.data.frame import TimsFrame, TimsFrameVectorized
+from imspy.data.spectrum import MzSpectrum
 
 
 class TimsSlice:
diff --git a/imspy/imspy/feature.py b/imspy/imspy/feature.py
index eb30dddd..53ab596f 100644
--- a/imspy/imspy/feature.py
+++ b/imspy/imspy/feature.py
@@ -4,9 +4,9 @@
 
 import json
 
-from pyims.data import TimsSlice, TimsFrame
-from pyims.utility import gaussian, exp_gaussian
-from pyims.isotopes import IsotopePatternGenerator, create_initial_feature_distribution
+from imspy.data import TimsSlice, TimsFrame
+from imspy.utility import gaussian, exp_gaussian
+from imspy.isotopes import IsotopePatternGenerator, create_initial_feature_distribution
 from abc import ABC, abstractmethod
 from typing import Optional, Dict
 class Profile:
diff --git a/imspy/imspy/isotopes.py b/imspy/imspy/isotopes.py
index 458e94fb..da75dae2 100644
--- a/imspy/imspy/isotopes.py
+++ b/imspy/imspy/isotopes.py
@@ -6,12 +6,12 @@
 from abc import ABC, abstractmethod
 
 from scipy.signal import argrelextrema
-from pyims.data import MzSpectrum
-from pyims.utility import gaussian, exp_gaussian, normal_pdf
+from imspy.data import MzSpectrum
+from imspy.utility import gaussian, exp_gaussian, normal_pdf
 import numba
 import pyopenms
 
-from pyims.noise import detection_noise
+from imspy.noise import detection_noise
 
 MASS_PROTON = 1.007276466621
 MASS_NEUTRON = 1.00866491595
diff --git a/imspy/imspy/noise.py b/imspy/imspy/noise.py
index daef5073..c2cf163b 100644
--- a/imspy/imspy/noise.py
+++ b/imspy/imspy/noise.py
@@ -5,8 +5,8 @@
 import numba
 from typing import Callable, Optional, Tuple
 from numpy.typing import ArrayLike
-from pyims.data import MzSpectrum
-from pyims.utility import normal_pdf
+from imspy.data import MzSpectrum
+from imspy.utility import normal_pdf
 
 @numba.jit(nopython=True)
 def mu_function_normal_default(intensity: ArrayLike) -> ArrayLike:
diff --git a/imspy/imspy/proteome.py b/imspy/imspy/proteome.py
index 5a601e64..85103430 100644
--- a/imspy/imspy/proteome.py
+++ b/imspy/imspy/proteome.py
@@ -5,11 +5,11 @@
 from numpy.typing import ArrayLike
 import pandas as pd
 import sqlite3
-from pyims.data import MzSpectrum
-from pyims.chemistry import get_mono_isotopic_weight, MASS_PROTON
+from imspy.data import MzSpectrum
+from imspy.chemistry import get_mono_isotopic_weight, MASS_PROTON
 
-from pyims.feature import RTProfile, ScanProfile, ChargeProfile
-from pyims.utility import tokenize_proforma_sequence, TokenSequence, get_aa_num_proforma_sequence
+from imspy.feature import RTProfile, ScanProfile, ChargeProfile
+from imspy.utility import tokenize_proforma_sequence, TokenSequence, get_aa_num_proforma_sequence
 from enum import Enum
 from abc import ABC, abstractmethod
 
diff --git a/imspy/imspy/simulation/experiment.py b/imspy/imspy/simulation/experiment.py
index 2b4846d4..0f53cd20 100644
--- a/imspy/imspy/simulation/experiment.py
+++ b/imspy/imspy/simulation/experiment.py
@@ -8,10 +8,10 @@
 import pyarrow as pa
 import pyarrow.parquet as pq
 from tqdm import tqdm
-from pyims.data import MzSpectrum, TimsFrame
-from pyims.proteome import PeptideDigest, ProteomicsExperimentSampleSlice, ProteomicsExperimentDatabaseHandle
-from pyims.isotopes import AveragineGenerator
-import pyims.simulation.hardware_models as hardware
+from imspy.data import MzSpectrum, TimsFrame
+from imspy.proteome import PeptideDigest, ProteomicsExperimentSampleSlice, ProteomicsExperimentDatabaseHandle
+from imspy.isotopes import AveragineGenerator
+import imspy.simulation.hardware_models as hardware
 
 class ProteomicsExperiment(ABC):
     def __init__(self, path: str):
diff --git a/imspy/imspy/simulation/hardware_models.py b/imspy/imspy/simulation/hardware_models.py
index 84a82c3f..a4d24e3e 100644
--- a/imspy/imspy/simulation/hardware_models.py
+++ b/imspy/imspy/simulation/hardware_models.py
@@ -9,11 +9,11 @@
 import pandas as pd
 from scipy.stats import exponnorm, norm, binom, gamma
 
-from pyims.chemistry import  STANDARD_TEMPERATURE, STANDARD_PRESSURE, CCS_K0_CONVERSION_CONSTANT, BufferGas, get_num_protonizable_sites
-from pyims.proteome import ProteomicsExperimentSampleSlice
-from pyims.feature import RTProfile, ScanProfile, ChargeProfile
-from pyims.isotopes import AveragineGenerator
-from pyims.utility import tokenizer_from_json
+from imspy.chemistry import  STANDARD_TEMPERATURE, STANDARD_PRESSURE, CCS_K0_CONVERSION_CONSTANT, BufferGas, get_num_protonizable_sites
+from imspy.proteome import ProteomicsExperimentSampleSlice
+from imspy.feature import RTProfile, ScanProfile, ChargeProfile
+from imspy.isotopes import AveragineGenerator
+from imspy.utility import tokenizer_from_json
 
 class Device(ABC):
     def __init__(self, name:str):
diff --git a/imspy/imspy/utility.py b/imspy/imspy/utility.py
index ea47e70b..dd504b39 100644
--- a/imspy/imspy/utility.py
+++ b/imspy/imspy/utility.py
@@ -10,8 +10,8 @@
 import pandas as pd
 
 from tqdm import tqdm
-from pyims.data import MzSpectrum
-# TODO bring to pyims
+from imspy.data import MzSpectrum
+# TODO bring to imspy
 #from proteolizardalgo.hashing import ReferencePattern
 
 
diff --git a/pyims/pyims/chemistry.py b/pyims/pyims/chemistry.py
deleted file mode 100644
index b5b7d6b3..00000000
--- a/pyims/pyims/chemistry.py
+++ /dev/null
@@ -1,213 +0,0 @@
-import numpy as np
-import mendeleev as me
-
-AMINO_ACIDS = {'Lysine': 'K', 'Alanine': 'A', 'Glycine': 'G', 'Valine': 'V', 'Tyrosine': 'Y',
-               'Arginine': 'R', 'Glutamic Acid': 'E', 'Phenylalanine': 'F', 'Tryptophan': 'W',
-               'Leucine': 'L', 'Threonine': 'T', 'Cysteine': 'C', 'Serine': 'S', 'Glutamine': 'Q',
-               'Methionine': 'M', 'Isoleucine': 'I', 'Asparagine': 'N', 'Proline': 'P', 'Histidine': 'H',
-               'Aspartic Acid': 'D'}
-
-AA_MASSES = {'A': 71.03711, 'C': 103.00919, 'D': 115.02694, 'E': 129.04259, 'F': 147.06841, 'G': 57.02146,
-             'H': 137.05891, 'I': 113.08406, 'K': 128.09496, 'L': 113.08406, 'M': 131.04049, 'N': 114.04293,
-             'P': 97.05276, 'Q': 128.05858, 'R': 156.10111, 'S': 87.03203, 'T': 101.04768, 'V': 99.06841,
-             'W': 186.07931, 'Y': 163.06333, '[UNIMOD:1]': 42.010565, '[UNIMOD:35]': 15.994915, 'U': 168.964203,
-             '[UNIMOD:4]': 57.021464, '[UNIMOD:21]': 79.966331, '[UNIMOD:312]': 119.004099, '<START>': 0.0, '<END>': 0.0}
-
-VARIANT_DICT = {'L': ['L'], 'E': ['E'], 'S': ['S', 'S[UNIMOD:21]'], 'A': ['A'], 'V': ['V'], 'D': ['D'], 'G': ['G'],
-                '<END>': ['<END>'], 'P': ['P'], '<START>': ['<START>', '<START>[UNIMOD:1]'], 'T': ['T', 'T[UNIMOD:21]'],
-                'I': ['I'], 'Q': ['Q'], 'K': ['K', 'K[UNIMOD:1]'], 'N': ['N'], 'R': ['R'], 'F': ['F'], 'H': ['H'],
-                'Y': ['Y', 'Y[UNIMOD:21]'], 'M': ['M', 'M[UNIMOD:35]'],
-                'W': ['W'], 'C': ['C', 'C[UNIMOD:312]', 'C[UNIMOD:4]'], 'C[UNIMOD:4]': ['C', 'C[UNIMOD:312]', 'C[UNIMOD:4]']}
-
-MASS_PROTON = 1.007276466583
-
-MASS_WATER = 18.010564684
-# IUPAC standard in Kelvin
-STANDARD_TEMPERATURE = 273.15
-# IUPAC standard in Pa
-STANDARD_PRESSURE = 1e5
-# IUPAC elementary charge
-ELEMENTARY_CHARGE = 1.602176634e-19
-# IUPAC BOLTZMANN'S CONSTANT
-K_BOLTZMANN = 1.380649e-23
-# constant part of Mason-Schamp equation
-# 3/16*sqrt(2π/kb)*e/N0 *
-# 1e20 (correction for using A² instead of m²) *
-# 1/sqrt(1.660 5402(10)×10−27 kg) (correction for using Da instead of kg) *
-# 10000 * (to get cm²/Vs from m²/Vs)
-# TODO CITATION
-CCS_K0_CONVERSION_CONSTANT = 18509.8632163405
-
-def get_monoisotopic_token_weight(token:str):
-    """
-    Gets monoisotopic weight of token
-
-    :param token: Token of aa sequence e.g. "<START>[UNIMOD:1]"
-    :type token: str
-    :return: Weight in Dalton.
-    :rtype: float
-    """
-    splits = token.split("[")
-    for i in range(1, len(splits)):
-        splits[i] = "["+splits[i]
-
-    mass = 0
-    for split in splits:
-        mass += AA_MASSES[split]
-    return mass
-
-
-def get_mono_isotopic_weight(sequence_tokenized: list[str]) -> float:
-    mass = 0
-    for token in sequence_tokenized:
-        mass += get_monoisotopic_token_weight(token)
-    return mass + MASS_WATER
-
-
-def get_mass_over_charge(mass: float, charge: int) -> float:
-    return (mass / charge) + MASS_PROTON
-
-def get_num_protonizable_sites(sequence: str) -> int:
-    """
-    Gets number of sites that can be protonized.
-    This function does not yet account for PTMs.
-
-    :param sequence: Amino acid sequence
-    :type sequence: str
-    :return: Number of protonizable sites
-    :rtype: int
-    """
-    sites = 1 # n-terminus
-    for s in sequence:
-        if s in ["H","R","K"]:
-            sites += 1
-    return sites
-
-
-def reduced_mobility_to_ccs(one_over_k0, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15):
-    """
-    convert reduced ion mobility (1/k0) to CCS
-    :param one_over_k0: reduced ion mobility
-    :param charge: charge state of the ion
-    :param mz: mass-over-charge of the ion
-    :param mass_gas: mass of drift gas
-    :param temp: temperature of the drift gas in C°
-    :param t_diff: factor to translate from C° to K
-    """
-    reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas)
-    return (CCS_K0_CONVERSION_CONSTANT * charge) / (np.sqrt(reduced_mass * (temp + t_diff)) * 1 / one_over_k0)
-
-
-def ccs_to_one_over_reduced_mobility(ccs, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15):
-    """
-    convert CCS to 1 over reduced ion mobility (1/k0)
-    :param ccs: collision cross-section
-    :param charge: charge state of the ion
-    :param mz: mass-over-charge of the ion
-    :param mass_gas: mass of drift gas (N2)
-    :param temp: temperature of the drift gas in C°
-    :param t_diff: factor to translate from C° to K
-    """
-    reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas)
-    return  ((np.sqrt(reduced_mass * (temp + t_diff))) * ccs) / (CCS_K0_CONVERSION_CONSTANT * charge)
-
-
-class ChemicalCompound:
-
-    def _calculate_molecular_mass(self):
-        mass = 0
-        for (atom, abundance) in self.element_composition.items():
-            mass += me.element(atom).atomic_weight * abundance
-        return mass
-
-    def __init__(self, formula):
-        self.element_composition = self.get_composition(formula)
-        self.mass = self._calculate_molecular_mass()
-
-    def get_composition(self, formula:str):
-        """
-        Parse chemical formula into Dict[str:int] with
-        atoms as keys and the respective counts as values.
-
-        :param formula: Chemical formula of compound e.g. 'C6H12O6'
-        :type formula: str
-        :return: Dictionary Atom: Count
-        :rtype: Dict[str:int]
-        """
-        if formula.startswith("("):
-            assert formula.endswith(")")
-            formula = formula[1:-1]
-
-        tmp_group = ""
-        tmp_group_count = ""
-        depth = 0
-        comp_list = []
-        comp_counts = []
-
-        # extract components: everything in brackets and atoms
-        # extract component counts: number behind component or 1
-        for (i,e) in enumerate(formula):
-            if e == "(":
-                depth += 1
-                if depth == 1:
-                    if tmp_group != "":
-                        comp_list.append(tmp_group)
-                        tmp_group = ""
-                        if tmp_group_count == "":
-                            comp_counts.append(1)
-                        else:
-                            comp_counts.append(int(tmp_group_count))
-                            tmp_group_count = ""
-                tmp_group += e
-                continue
-            if e == ")":
-                depth -= 1
-                tmp_group += e
-                continue
-            if depth > 0:
-                tmp_group += e
-                continue
-            if e.isupper():
-                if tmp_group != "":
-                    comp_list.append(tmp_group)
-                    tmp_group = ""
-                    if tmp_group_count == "":
-                        comp_counts.append(1)
-                    else:
-                        comp_counts.append(int(tmp_group_count))
-                        tmp_group_count = ""
-                tmp_group += e
-                continue
-            if e.islower():
-                tmp_group += e
-                continue
-            if e.isnumeric():
-                tmp_group_count += e
-        if tmp_group != "":
-            comp_list.append(tmp_group)
-            if tmp_group_count == "":
-                comp_counts.append(1)
-            else:
-                comp_counts.append(int(tmp_group_count))
-
-        # assemble dictionary from component lists
-        atom_dict = {}
-        for (comp,count) in zip(comp_list,comp_counts):
-            if not comp.startswith("("):
-                atom_dict[comp] = count
-            else:
-                atom_dicts_depth = self.get_composition(comp)
-                for atom in atom_dicts_depth:
-                    atom_dicts_depth[atom] *= count
-                    if atom in atom_dict:
-                        atom_dict[atom] += atom_dicts_depth[atom]
-                    else:
-                        atom_dict[atom] = atom_dicts_depth[atom]
-                atom_dicts_depth = {}
-        return atom_dict
-
-class BufferGas(ChemicalCompound):
-
-    def __init__(self, formula: str):
-        super().__init__(formula)
-

From 939b3292ee2b85b42943805ecaea0c7cd0f46126 Mon Sep 17 00:00:00 2001
From: "Tim Maier (Ubuntu Desktop)" <tmaier01@students.uni-mainz.de>
Date: Thu, 9 Nov 2023 11:58:26 +0100
Subject: [PATCH 6/7] removed duplicated files

---
 imspy/imspy/frame.py    | 391 ---------------------------------------
 imspy/imspy/handle.py   | 157 ----------------
 imspy/imspy/slice.py    | 394 ----------------------------------------
 imspy/imspy/spectrum.py | 377 --------------------------------------
 4 files changed, 1319 deletions(-)
 delete mode 100644 imspy/imspy/frame.py
 delete mode 100644 imspy/imspy/handle.py
 delete mode 100644 imspy/imspy/slice.py
 delete mode 100644 imspy/imspy/spectrum.py

diff --git a/imspy/imspy/frame.py b/imspy/imspy/frame.py
deleted file mode 100644
index 659209b0..00000000
--- a/imspy/imspy/frame.py
+++ /dev/null
@@ -1,391 +0,0 @@
-import pandas as pd
-
-from typing import List
-from numpy.typing import NDArray
-
-from tensorflow import sparse as sp
-
-import numpy as np
-import imspy_connector as pims
-from imspy.spectrum import MzSpectrum, TimsSpectrum, IndexedMzSpectrum
-
-from imspy.utilities import re_index_indices
-
-
-class TimsFrame:
-    def __init__(self, frame_id: int, ms_type: int, retention_time: float, scan: NDArray[np.int32],
-                 mobility: NDArray[np.float64], tof: NDArray[np.int32],
-                 mz: NDArray[np.float64], intensity: NDArray[np.float64]):
-        """TimsFrame class.
-
-        Args:
-            frame_id (int): Frame ID.
-            ms_type (int): MS type.
-            retention_time (float): Retention time.
-            scan (NDArray[np.int32]): Scan.
-            mobility (NDArray[np.float64]): Inverse mobility.
-            tof (NDArray[np.int32]): Time of flight.
-            mz (NDArray[np.float64]): m/z.
-            intensity (NDArray[np.float64]): Intensity.
-
-        Raises:
-            AssertionError: If the length of the scan, mobility, tof, mz and intensity arrays are not equal.
-        """
-
-        assert len(scan) == len(mobility) == len(tof) == len(mz) == len(intensity), \
-            "The length of the scan, mobility, tof, mz and intensity arrays must be equal."
-
-        self.__frame_ptr = pims.PyTimsFrame(frame_id, ms_type, retention_time, scan, mobility, tof, mz, intensity)
-
-    @classmethod
-    def from_py_tims_frame(cls, frame: pims.PyTimsFrame):
-        """Create a TimsFrame from a PyTimsFrame.
-
-        Args:
-            frame (pims.PyTimsFrame): PyTimsFrame to create the TimsFrame from.
-
-        Returns:
-            TimsFrame: TimsFrame created from the PyTimsFrame.
-        """
-        instance = cls.__new__(cls)
-        instance.__frame_ptr = frame
-        return instance
-
-    @property
-    def frame_id(self) -> int:
-        """Frame ID.
-
-        Returns:
-            int: Frame ID.
-        """
-        return self.__frame_ptr.frame_id
-
-    @property
-    def ms_type(self) -> str:
-        """MS type.
-
-        Returns:
-            int: MS type.
-        """
-        return self.__frame_ptr.ms_type_as_string
-
-    @property
-    def retention_time(self) -> float:
-        """Retention time.
-
-        Returns:
-            float: Retention time.
-        """
-        return self.__frame_ptr.retention_time
-
-    @property
-    def scan(self) -> NDArray[np.int32]:
-        """Scan.
-
-        Returns:
-            NDArray[np.int32]: Scan.
-        """
-        return self.__frame_ptr.scan
-
-    @property
-    def mobility(self) -> NDArray[np.float64]:
-        """Inverse mobility.
-
-        Returns:
-            NDArray[np.float64]: Inverse mobility.
-        """
-        return self.__frame_ptr.mobility
-
-    @property
-    def tof(self) -> NDArray[np.int32]:
-        """Time of flight.
-
-        Returns:
-            NDArray[np.int32]: Time of flight.
-        """
-        return self.__frame_ptr.tof
-
-    @property
-    def mz(self) -> NDArray[np.float64]:
-        """m/z.
-
-        Returns:
-            NDArray[np.float64]: m/z.
-        """
-        return self.__frame_ptr.mz
-
-    @property
-    def intensity(self) -> NDArray[np.float64]:
-        """Intensity.
-
-        Returns:
-            NDArray[np.float64]: Intensity.
-        """
-        return self.__frame_ptr.intensity
-
-    @property
-    def df(self) -> pd.DataFrame:
-        """ Data as a pandas DataFrame.
-
-        Returns:
-            pd.DataFrame: Data.
-        """
-
-        return pd.DataFrame({
-            'frame': np.repeat(self.frame_id, len(self.scan)),
-            'retention_time': np.repeat(self.retention_time, len(self.scan)),
-            'scan': self.scan,
-            'mobility': self.mobility,
-            'tof': self.tof,
-            'mz': self.mz,
-            'intensity': self.intensity})
-
-    def filter(self,
-               mz_min: float = 0.0,
-               mz_max: float = 2000.0,
-               scan_min: int = 0,
-               scan_max: int = 1000,
-               mobility_min: float = 0.0,
-               mobility_max: float = 2.0,
-               intensity_min: float = 0.0,
-               intensity_max: float = 1e9,
-               ) -> 'TimsFrame':
-        """Filter the frame for a given m/z range, scan range and intensity range.
-
-        Args:
-            mz_min (float): Minimum m/z value.
-            mz_max (float): Maximum m/z value.
-            scan_min (int, optional): Minimum scan value. Defaults to 0.
-            scan_max (int, optional): Maximum scan value. Defaults to 1000.
-            mobility_min (float, optional): Minimum inverse mobility value. Defaults to 0.0.
-            mobility_max (float, optional): Maximum inverse mobility value. Defaults to 2.0.
-            intensity_min (float, optional): Minimum intensity value. Defaults to 0.0.
-            intensity_max (float, optional): Maximum intensity value. Defaults to 1e9.
-
-        Returns:
-            TimsFrame: Filtered frame.
-        """
-
-        return TimsFrame.from_py_tims_frame(self.__frame_ptr.filter_ranged(mz_min, mz_max, scan_min, scan_max, mobility_min, mobility_max,
-                                                                           intensity_min, intensity_max))
-
-    def to_indexed_mz_spectrum(self) -> 'IndexedMzSpectrum':
-        """Convert the frame to an IndexedMzSpectrum.
-
-        Returns:
-            IndexedMzSpectrum: IndexedMzSpectrum.
-        """
-        return IndexedMzSpectrum.from_py_indexed_mz_spectrum(self.__frame_ptr.to_indexed_mz_spectrum())
-
-    def to_resolution(self, resolution: int) -> 'TimsFrame':
-        """Convert the frame to a given resolution.
-
-        Args:
-            resolution (int): Resolution.
-
-        Returns:
-            TimsFrame: Frame with the given resolution.
-        """
-        return TimsFrame.from_py_tims_frame(self.__frame_ptr.to_resolution(resolution))
-
-    def vectorized(self, resolution: int = 2) -> 'TimsFrameVectorized':
-        """Convert the frame to a vectorized frame.
-
-        Args:
-            resolution (int, optional): Resolution. Defaults to 2.
-
-        Returns:
-            TimsFrameVectorized: Vectorized frame.
-        """
-        return TimsFrameVectorized.from_py_tims_frame_vectorized(self.__frame_ptr.vectorized(resolution))
-
-    def to_tims_spectra(self) -> List['TimsSpectrum']:
-        """Convert the frame to a list of TimsSpectrum.
-
-        Returns:
-            List[TimsSpectrum]: List of TimsSpectrum.
-        """
-        return [TimsSpectrum.from_py_tims_spectrum(spec) for spec in self.__frame_ptr.to_tims_spectra()]
-
-    def to_windows(self, window_length: float = 10, overlapping: bool = True, min_num_peaks: int = 5,
-                   min_intensity: float = 1) -> List[MzSpectrum]:
-        """Convert the frame to a list of windows.
-
-        Args:
-            window_length (float, optional): Window length. Defaults to 10.
-            overlapping (bool, optional): Whether the windows should overlap. Defaults to True.
-            min_num_peaks (int, optional): Minimum number of peaks in a window. Defaults to 5.
-            min_intensity (float, optional): Minimum intensity of a peak in a window. Defaults to 1.
-
-        Returns:
-            List[MzSpectrum]: List of windows.
-        """
-        return [MzSpectrum.from_py_mz_spectrum(spec) for spec in self.__frame_ptr.to_windows(
-            window_length, overlapping, min_num_peaks, min_intensity)]
-
-    def __repr__(self):
-        return (f"TimsFrame(frame_id={self.__frame_ptr.frame_id}, ms_type={self.__frame_ptr.ms_type}, "
-                f"num_peaks={len(self.__frame_ptr.mz)})")
-
-
-class TimsFrameVectorized:
-    def __init__(self, frame_id: int, ms_type: int, retention_time: float, scan: NDArray[np.int32],
-                 mobility: NDArray[np.float64], tof: NDArray[np.int32],
-                 indices: NDArray[np.int32], intensity: NDArray[np.float64]):
-        """TimsFrameVectorized class.
-
-        Args:
-            frame_id (int): Frame ID.
-            ms_type (int): MS type.
-            retention_time (float): Retention time.
-            scan (NDArray[np.int32]): Scan.
-            mobility (NDArray[np.float64]): Inverse mobility.
-            tof (NDArray[np.int32]): Time of flight.
-            indices (NDArray[np.int32]): Indices.
-            intensity (NDArray[np.float64]): Intensity.
-
-        Raises:
-            AssertionError: If the length of the scan, mobility, tof, indices and intensity arrays are not equal.
-        """
-
-        assert len(scan) == len(mobility) == len(tof) == len(indices) == len(intensity), \
-            "The length of the scan, mobility, tof, indices and intensity arrays must be equal."
-
-        self.__frame_ptr = pims.PyTimsFrameVectorized(frame_id, ms_type, retention_time, scan, mobility, tof, indices,
-                                                      intensity)
-
-    @classmethod
-    def from_py_tims_frame_vectorized(cls, frame: pims.PyTimsFrameVectorized):
-        """Create a TimsFrameVectorized from a PyTimsFrameVectorized.
-
-        Args:
-            frame (pims.PyTimsFrameVectorized): PyTimsFrameVectorized to create the TimsFrameVectorized from.
-
-        Returns:
-            TimsFrameVectorized: TimsFrameVectorized created from the PyTimsFrameVectorized.
-        """
-        instance = cls.__new__(cls)
-        instance.__frame_ptr = frame
-        return instance
-
-    @property
-    def frame_id(self) -> int:
-        """Frame ID.
-
-        Returns:
-            int: Frame ID.
-        """
-        return self.__frame_ptr.frame_id
-
-    @property
-    def ms_type(self) -> str:
-        """MS type.
-
-        Returns:
-            int: MS type.
-        """
-        return self.__frame_ptr.ms_type_as_string
-
-    @property
-    def retention_time(self) -> float:
-        """Retention time.
-
-        Returns:
-            float: Retention time.
-        """
-        return self.__frame_ptr.retention_time
-
-    @property
-    def scan(self) -> NDArray[np.int32]:
-        """Scan.
-
-        Returns:
-            NDArray[np.int32]: Scan.
-        """
-        return self.__frame_ptr.scan
-
-    @property
-    def mobility(self) -> NDArray[np.float64]:
-        """Inverse mobility.
-
-        Returns:
-            NDArray[np.float64]: Inverse mobility.
-        """
-        return self.__frame_ptr.mobility
-
-    @property
-    def tof(self) -> NDArray[np.int32]:
-        """Time of flight.
-
-        Returns:
-            NDArray[np.int32]: Time of flight.
-        """
-        return self.__frame_ptr.tof
-
-    @property
-    def indices(self) -> NDArray[np.int32]:
-        """Indices.
-
-        Returns:
-            NDArray[np.int32]: Indices.
-        """
-        return self.__frame_ptr.indices
-
-    @property
-    def intensity(self) -> NDArray[np.float64]:
-        """Intensity.
-
-        Returns:
-            NDArray[np.float64]: Intensity.
-        """
-        return self.__frame_ptr.values
-
-    @property
-    def df(self) -> pd.DataFrame:
-        """ Data as a pandas DataFrame.
-
-        Returns:
-            pd.DataFrame: Data.
-        """
-
-        return pd.DataFrame({
-            'frame': np.repeat(self.frame_id, len(self.scan)),
-            'retention_time': np.repeat(self.retention_time, len(self.scan)),
-            'scan': self.scan,
-            'mobility': self.mobility,
-            'tof': self.tof,
-            'indices': self.indices,
-            'intensity': self.intensity})
-
-    def __repr__(self):
-        return (f"TimsFrameVectorized(frame_id={self.__frame_ptr.frame_id}, ms_type={self.__frame_ptr.ms_type}, "
-                f"num_peaks={len(self.__frame_ptr.indices)})")
-
-    def get_tensor_repr(self, dense=True, zero_indexed=True, re_index=True, scan_max=None, index_max=None):
-        s = self.scan
-        f = self.indices
-        i = self.intensity
-
-        if zero_indexed:
-            s = s - np.min(s)
-            f = f - np.min(f)
-
-        if re_index:
-            f = re_index_indices(f)
-
-        if scan_max is None:
-            m_s = np.max(s) + 1
-        else:
-            m_s = scan_max + 1
-
-        if index_max is None:
-            m_f = np.max(f) + 1
-        else:
-            m_f = index_max + 1
-
-        sv = sp.reorder(sp.SparseTensor(indices=np.c_[s, f], values=i, dense_shape=(m_s, m_f)))
-
-        if dense:
-            return sp.to_dense(sv)
-        else:
-            return sv
diff --git a/imspy/imspy/handle.py b/imspy/imspy/handle.py
deleted file mode 100644
index 4aec5145..00000000
--- a/imspy/imspy/handle.py
+++ /dev/null
@@ -1,157 +0,0 @@
-from typing import List
-
-import numpy as np
-import pandas as pd
-import sqlite3
-from numpy.typing import NDArray
-
-import imspy_connector as pims
-import opentims_bruker_bridge as obb
-
-from abc import ABC
-
-<<<<<<<< HEAD:imspy/imspy/handle.py
-from imspy.frame import TimsFrame
-from imspy.slice import TimsSlice
-========
-from pyims.data import TimsFrame, TimsSlice
-
->>>>>>>> 1cd02c1 (transfer simulation framework from proteolizard):imspy/imspy/data/handle.py
-
-
-class TimsDataset(ABC):
-    def __init__(self, data_path: str):
-        """TimsDataHandle class.
-
-        Args:
-            data_path (str): Path to the data.
-        """
-        self.data_path = data_path
-        self.bp: List[str] = obb.get_so_paths()
-        self.meta_data = self.__load_meta_data()
-        self.precursor_frames = self.meta_data[self.meta_data["MsMsType"] == 0].Id.values.astype(np.int32)
-        self.fragment_frames = self.meta_data[self.meta_data["MsMsType"] > 0].Id.values.astype(np.int32)
-        self.__handle = None
-        self.__current_index = 1
-
-        # Try to load the data with the first binary found
-        appropriate_found = False
-        for so_path in self.bp:
-            try:
-                self.__handle = pims.PyTimsDataHandle(self.data_path, so_path)
-                appropriate_found = True
-                break
-            except Exception:
-                continue
-        assert appropriate_found is True, ("No appropriate bruker binary could be found, please check if your "
-                                           "operating system is supported by open-tims-bruker-bridge.")
-
-    @property
-    def acquisition_mode(self) -> str:
-        """Get the acquisition mode.
-
-        Returns:
-            str: Acquisition mode.
-        """
-        return self.__handle.get_acquisition_mode_as_string()
-
-    @property
-    def acquisition_mode_numerical(self) -> int:
-        """Get the acquisition mode as a numerical value.
-
-        Returns:
-            int: Acquisition mode as a numerical value.
-        """
-        return self.__handle.get_acquisition_mode()
-
-    @property
-    def frame_count(self) -> int:
-        """Get the number of frames.
-
-        Returns:
-            int: Number of frames.
-        """
-        return self.__handle.frame_count
-
-    def __load_meta_data(self) -> pd.DataFrame:
-        """Get the meta data.
-
-        Returns:
-            pd.DataFrame: Meta data.
-        """
-        return pd.read_sql_query("SELECT * from Frames", sqlite3.connect(self.data_path + "/analysis.tdf"))
-
-    def get_tims_frame(self, frame_id: int) -> TimsFrame:
-        """Get a TimsFrame.
-
-        Args:
-            frame_id (int): Frame ID.
-
-        Returns:
-            TimsFrame: TimsFrame.
-        """
-        return TimsFrame.from_py_tims_frame(self.__handle.get_tims_frame(frame_id))
-
-    def get_tims_slice(self, frame_ids: NDArray[np.int32]) -> TimsSlice:
-        """Get a TimsFrame.
-
-        Args:
-            frame_ids (int): Frame ID.
-
-        Returns:
-            TimsFrame: TimsFrame.
-        """
-        return TimsSlice.from_py_tims_slice(self.__handle.get_tims_slice(frame_ids))
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        if self.__current_index <= self.frame_count:
-            frame_ptr = self.__handle.get_tims_frame(self.__current_index)
-            self.__current_index += 1
-            if frame_ptr is not None:
-                return TimsFrame.from_py_tims_frame(frame_ptr)
-            else:
-                raise ValueError(f"Frame pointer is None for valid index: {self.__current_index}")
-        else:
-            self.__current_index = 1  # Reset for next iteration
-            raise StopIteration
-
-    def __getitem__(self, index):
-        if isinstance(index, slice):
-            return self.get_tims_slice(np.arange(index.start, index.stop, index.step).astype(np.int32))
-        return self.get_tims_frame(index)
-
-
-class TimsDatasetDDA(TimsDataset):
-    @property
-    def selected_precursors(self):
-        """Get precursors selected for fragmentation.
-
-        Returns:
-            pd.DataFrame: Precursors selected for fragmentation.
-        """
-        return pd.read_sql_query("SELECT * from Precursors", sqlite3.connect(self.data_path + "/analysis.tdf"))
-
-    @property
-    def pasef_meta_data(self):
-        """Get PASEF meta data for DDA.
-
-        Returns:
-            pd.DataFrame: PASEF meta data.
-        """
-        return pd.read_sql_query("SELECT * from PasefFrameMsMsInfo",
-                                 sqlite3.connect(self.data_path + "/analysis.tdf"))
-
-
-class TimsDatasetDIA(TimsDataset):
-    @property
-    def pasef_meta_data(self):
-        """Get PASEF meta data for DIA.
-
-        Returns:
-            pd.DataFrame: PASEF meta data.
-        """
-        return pd.read_sql_query("SELECT * from DiaFrameMsMsWindows",
-                                 sqlite3.connect(self.data_path + "/analysis.tdf"))
diff --git a/imspy/imspy/slice.py b/imspy/imspy/slice.py
deleted file mode 100644
index 056febaa..00000000
--- a/imspy/imspy/slice.py
+++ /dev/null
@@ -1,394 +0,0 @@
-import numpy as np
-import pandas as pd
-from typing import List
-
-from numpy.typing import NDArray
-from tensorflow import sparse as sp
-
-from imspy.utilities import re_index_indices
-
-<<<<<<<< HEAD:imspy/imspy/slice.py
-import imspy_connector as pims
-from imspy.frame import TimsFrame, TimsFrameVectorized
-from imspy.spectrum import MzSpectrum
-========
-import pyims_connector as pims
-from pyims.data.frame import TimsFrame, TimsFrameVectorized
-from pyims.data.spectrum import MzSpectrum
->>>>>>>> 1cd02c1 (transfer simulation framework from proteolizard):imspy/imspy/data/slice.py
-
-
-class TimsSlice:
-    def __init__(self,
-                 frame_id: NDArray[np.int32],
-                 scan: NDArray[np.int32],
-                 tof: NDArray[np.int32],
-                 retention_time: NDArray[np.float64],
-                 mobility: NDArray[np.float64],
-                 mz: NDArray[np.float64],
-                 intensity: NDArray[np.float64]):
-        """Create a TimsSlice.
-
-        Args:
-            frame_id (NDArray[np.int32]): Frame ID.
-            scan (NDArray[np.int32]): Scan.
-            tof (NDArray[np.int32]): TOF.
-            retention_time (NDArray[np.float64]): Retention time.
-            mobility (NDArray[np.float64]): Mobility.
-            mz (NDArray[np.float64]): m/z.
-            intensity (NDArray[np.float64]): Intensity.
-        """
-
-        assert len(frame_id) == len(scan) == len(tof) == len(retention_time) == len(mobility) == len(mz) == len(
-            intensity), "All arrays must have the same length."
-
-        self.__slice_ptr = pims.PyTimsSlice(
-            frame_id, scan, tof, retention_time, mobility, mz, intensity
-        )
-        self.__current_index = 0
-
-    @classmethod
-    def from_py_tims_slice(cls, tims_slice: pims.PyTimsSlice):
-        """Create a TimsSlice from a PyTimsSlice.
-
-        Args:
-            tims_slice (pims.PyTimsSlice): PyTimsSlice to create the TimsSlice from.
-
-        Returns:
-            TimsSlice: TimsSlice created from the PyTimsSlice.
-        """
-        instance = cls.__new__(cls)
-        instance.__slice_ptr = tims_slice
-        instance.__current_index = 0
-        return instance
-
-    @property
-    def first_frame_id(self) -> int:
-        """First frame ID.
-
-        Returns:
-            int: First frame ID.
-        """
-        return self.__slice_ptr.first_frame_id
-
-    @property
-    def last_frame_id(self) -> int:
-        """Last frame ID.
-
-        Returns:
-            int: Last frame ID.
-        """
-        return self.__slice_ptr.last_frame_id
-
-    def __repr__(self):
-        return f"TimsSlice({self.first_frame_id}, {self.last_frame_id})"
-
-    @property
-    def precursors(self):
-        return TimsSlice.from_py_tims_slice(self.__slice_ptr.get_precursors())
-
-    @property
-    def fragments(self):
-        return TimsSlice.from_py_tims_slice(self.__slice_ptr.get_fragments_dda())
-
-    def filter(self, mz_min: float = 0.0, mz_max: float = 2000.0, scan_min: int = 0, scan_max: int = 1000,
-               mobility_min: float = 0.0,
-               mobility_max: float = 2.0,
-               intensity_min: float = 0.0, intensity_max: float = 1e9, num_threads: int = 4) -> 'TimsSlice':
-        """Filter the slice by m/z, scan and intensity.
-
-        Args:
-            mz_min (float): Minimum m/z value.
-            mz_max (float): Maximum m/z value.
-            scan_min (int, optional): Minimum scan value. Defaults to 0.
-            scan_max (int, optional): Maximum scan value. Defaults to 1000.
-            mobility_min (float, optional): Minimum inverse mobility value. Defaults to 0.0.
-            mobility_max (float, optional): Maximum inverse mobility value. Defaults to 2.0.
-            intensity_min (float, optional): Minimum intensity value. Defaults to 0.0.
-            intensity_max (float, optional): Maximum intensity value. Defaults to 1e9.
-            num_threads (int, optional): Number of threads to use. Defaults to 4.
-
-        Returns:
-            TimsSlice: Filtered slice.
-        """
-        return TimsSlice.from_py_tims_slice(
-            self.__slice_ptr.filter_ranged(mz_min, mz_max, scan_min, scan_max, mobility_min, mobility_max,
-                                           intensity_min, intensity_max, num_threads))
-
-    @property
-    def frames(self) -> List[TimsFrame]:
-        """Get the frames.
-
-        Returns:
-            List[TimsFrame]: Frames.
-        """
-        return [TimsFrame.from_py_tims_frame(frame) for frame in self.__slice_ptr.get_frames()]
-
-    def to_resolution(self, resolution: int, num_threads: int = 4) -> 'TimsSlice':
-        """Convert the slice to a given resolution.
-
-        Args:
-            resolution (int): Resolution.
-            num_threads (int, optional): Number of threads to use. Defaults to 4.
-
-        Returns:
-            TimsSlice: Slice with given resolution.
-        """
-        return TimsSlice.from_py_tims_slice(self.__slice_ptr.to_resolution(resolution, num_threads))
-
-    def to_windows(self, window_length: float = 10, overlapping: bool = True, min_num_peaks: int = 5,
-                   min_intensity: float = 1, num_threads: int = 4) -> List[MzSpectrum]:
-        """Convert the slice to a list of windows.
-
-        Args:
-            window_length (float, optional): Window length. Defaults to 10.
-            overlapping (bool, optional): Whether the windows should overlap. Defaults to True.
-            min_num_peaks (int, optional): Minimum number of peaks in a window. Defaults to 5.
-            min_intensity (float, optional): Minimum intensity of a peak in a window. Defaults to 1.
-            num_threads (int, optional): Number of threads to use. Defaults to 1.
-
-        Returns:
-            List[MzSpectrum]: List of windows.
-        """
-        return [MzSpectrum.from_py_mz_spectrum(spec) for spec in self.__slice_ptr.to_windows(
-            window_length, overlapping, min_num_peaks, min_intensity, num_threads)]
-
-    @property
-    def df(self) -> pd.DataFrame:
-        """Get the data as a pandas DataFrame.
-
-        Returns:
-            pd.DataFrame: Data.
-        """
-        columns = ['frame', 'scan', 'tof', 'retention_time', 'mobility', 'mz', 'intensity']
-        return pd.DataFrame({c: v for c, v in zip(columns, self.__slice_ptr.to_arrays())})
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        if self.__current_index < self.__slice_ptr.frame_count:
-            frame_ptr = self.__slice_ptr.get_frame_at_index(self.__current_index)
-            self.__current_index += 1
-            if frame_ptr is not None:
-                return TimsFrame.from_py_tims_frame(frame_ptr)
-            else:
-                raise ValueError("Frame pointer is None for valid index.")
-        else:
-            self.__current_index = 0  # Reset for next iteration
-            raise StopIteration
-
-    def vectorized(self, resolution: int = 2, num_threads: int = 4) -> 'TimsSliceVectorized':
-        """Get a vectorized version of the slice.
-
-        Args:
-            resolution (int, optional): Resolution. Defaults to 2.
-            num_threads (int, optional): Number of threads to use. Defaults to 4.
-
-        Returns:
-            TimsSliceVectorized: Vectorized version of the slice.
-        """
-        return TimsSliceVectorized.from_vectorized_py_tims_slice(self.__slice_ptr.vectorized(resolution, num_threads))
-
-    def get_tims_planes(self, tof_max_value: int = 400_000, num_chunks: int = 7, num_threads: int = 4) -> List[
-        'TimsPlane']:
-        return [TimsPlane.from_py_tims_plane(plane) for plane in
-                self.__slice_ptr.to_tims_planes(tof_max_value, num_chunks, num_threads)]
-
-
-class TimsSliceVectorized:
-    def __init__(self):
-        self.__slice_ptr = None
-        self.__current_index = 0
-
-    @classmethod
-    def from_vectorized_py_tims_slice(cls, tims_slice: pims.PyTimsSliceVectorized):
-        """Create a TimsSlice from a PyTimsSlice.
-
-        Args:
-            tims_slice (pims.PyTimsSlice): PyTimsSlice to create the TimsSlice from.
-
-        Returns:
-            TimsSlice: TimsSlice created from the PyTimsSlice.
-        """
-        instance = cls.__new__(cls)
-        instance.__slice_ptr = tims_slice
-        instance.__current_index = 0
-        return instance
-
-    @property
-    def first_frame_id(self) -> int:
-        """First frame ID.
-
-        Returns:
-            int: First frame ID.
-        """
-        return self.__slice_ptr.first_frame_id
-
-    @property
-    def last_frame_id(self) -> int:
-        """Last frame ID.
-
-        Returns:
-            int: Last frame ID.
-        """
-        return self.__slice_ptr.last_frame_id
-
-    @property
-    def precursors(self):
-        return TimsSlice.from_py_tims_slice(self.__slice_ptr.get_precursors())
-
-    @property
-    def fragments(self):
-        return TimsSlice.from_py_tims_slice(self.__slice_ptr.get_fragments_dda())
-
-    @property
-    def frames(self) -> List[TimsFrameVectorized]:
-        """Get the frames.
-
-        Returns:
-            List[TimsFrame]: Frames.
-        """
-        return [TimsFrameVectorized.from_py_tims_frame_vectorized(frame) for frame in
-                self.__slice_ptr.get_vectorized_frames()]
-
-    @property
-    def df(self) -> pd.DataFrame:
-        """Get the data as a pandas DataFrame.
-
-        Returns:
-            pd.DataFrame: Data.
-        """
-        columns = ['frame', 'scan', 'tof', 'retention_time', 'mobility', 'index', 'intensity']
-        return pd.DataFrame({c: v for c, v in zip(columns, self.__slice_ptr.to_arrays())})
-
-    def __iter__(self):
-        return self
-
-    def __next__(self):
-        if self.__current_index < self.__slice_ptr.frame_count:
-            frame_ptr = self.__slice_ptr.get_frame_at_index(self.__current_index)
-            self.__current_index += 1
-            if frame_ptr is not None:
-                return TimsFrameVectorized.from_py_tims_frame_vectorized(frame_ptr)
-            else:
-                raise ValueError("Frame pointer is None for valid index.")
-        else:
-            self.__current_index = 0
-            raise StopIteration
-
-    def __repr__(self):
-        return f"TimsSliceVectorized({self.first_frame_id}, {self.last_frame_id})"
-
-    def get_tensor_repr(self, dense=True, zero_index=True, re_index=True, frame_max=None, scan_max=None,
-                        index_max=None):
-
-        frames, scans, _, _, _, indices, intensities = self.__slice_ptr.to_arrays()
-
-        if zero_index:
-            scans = scans - np.min(scans)
-            frames = frames - np.min(frames)
-            indices = indices - np.min(indices)
-
-        if re_index:
-            frames = re_index_indices(frames)
-
-        if scan_max is None:
-            m_s = np.max(scans) + 1
-        else:
-            m_s = scan_max + 1
-
-        if index_max is None:
-            m_i = np.max(indices) + 1
-        else:
-            m_i = index_max + 1
-
-        if frame_max is None:
-            m_f = np.max(frames) + 1
-        else:
-            m_f = frame_max + 1
-
-        sv = sp.reorder(
-            sp.SparseTensor(indices=np.c_[frames, scans, indices], values=intensities, dense_shape=(m_f, m_s, m_i)))
-
-        if dense:
-            return sp.to_dense(sv)
-        else:
-            return sv
-
-
-class TimsPlane:
-    def __init__(self):
-        self.__plane_ptr = None
-
-    @classmethod
-    def from_py_tims_plane(cls, plane: pims.PyTimsPlane):
-        """Create a TimsPlane from a PyTimsPlane.
-
-        Args:
-            plane (pims.PyTimsPlane): PyTimsPlane to create the TimsPlane from.
-
-        Returns:
-            TimsPlane: TimsPlane created from the PyTimsPlane.
-        """
-        instance = cls.__new__(cls)
-        instance.__plane_ptr = plane
-        return instance
-
-    @property
-    def mz_mean(self):
-        return self.__plane_ptr.mz_mean
-
-    @property
-    def mz_std(self):
-        return self.__plane_ptr.mz_std
-
-    @property
-    def tof_mean(self):
-        return self.__plane_ptr.tof_mean
-
-    @property
-    def tof_std(self):
-        return self.__plane_ptr.tof_std
-
-    @property
-    def frame_ids(self):
-        return self.__plane_ptr.frame_ids
-
-    @property
-    def scans(self):
-        return self.__plane_ptr.scans
-
-    @property
-    def intensities(self):
-        return self.__plane_ptr.intensity
-
-    @property
-    def retention_times(self):
-        return self.__plane_ptr.retention_times
-
-    @property
-    def mobilities(self):
-        return self.__plane_ptr.mobilities
-
-    @property
-    def num_points(self):
-        return len(self.frame_ids)
-
-    @property
-    def df(self):
-        return pd.DataFrame({
-            'frame': self.frame_ids,
-            'scan': self.scans,
-            'retention_time': self.retention_times,
-            'mobility': self.mobilities,
-            'intensity': self.intensities
-        })
-
-    def __repr__(self):
-        return (f"TimsPlane(mz_mean: "
-                f"{np.round(self.mz_mean, 4)}, "
-                f"mz_std: {np.round(self.mz_std, 4)},"
-                f" tof_mean: {np.round(self.tof_mean, 4)}, "
-                f"tof_std: {np.round(self.tof_std, 4)}, "
-                f"num_points: {len(self.frame_ids)})")
diff --git a/imspy/imspy/spectrum.py b/imspy/imspy/spectrum.py
deleted file mode 100644
index 80baa757..00000000
--- a/imspy/imspy/spectrum.py
+++ /dev/null
@@ -1,377 +0,0 @@
-import numpy as np
-from typing import List, Tuple
-
-import pandas as pd
-from numpy.typing import NDArray
-
-import imspy_connector as pims
-
-
-class IndexedMzSpectrum:
-    def __init__(self, index: NDArray[np.int32], mz: NDArray[np.float64], intensity: NDArray[np.float64]):
-        """IndexedMzSpectrum class.
-
-        Args:
-            index (NDArray[np.int32]): Index.
-            mz (NDArray[np.float64]): m/z.
-            intensity (NDArray[np.float64]): Intensity.
-
-        Raises:
-            AssertionError: If the length of the index, mz and intensity arrays are not equal.
-        """
-        assert len(index) == len(mz) == len(intensity), ("The length of the index, mz and intensity arrays must be "
-                                                         "equal.")
-        self.__spec_ptr = pims.PyIndexedMzSpectrum(index, mz, intensity)
-
-    @classmethod
-    def from_py_indexed_mz_spectrum(cls, spec: pims.PyIndexedMzSpectrum):
-        """Create a IndexedMzSpectrum from a PyIndexedMzSpectrum.
-
-        Args:
-            spec (pims.PyIndexedMzSpectrum): PyIndexedMzSpectrum to create the IndexedMzSpectrum from.
-
-        Returns:
-            IndexedMzSpectrum: IndexedMzSpectrum created from the PyIndexedMzSpectrum.
-        """
-        instance = cls.__new__(cls)
-        instance.__spec_ptr = spec
-        return instance
-
-    @property
-    def index(self) -> NDArray[np.int32]:
-        """Index.
-
-        Returns:
-            NDArray[np.int32]: Index.
-        """
-        return self.__spec_ptr.index
-
-    @property
-    def mz(self) -> NDArray[np.float64]:
-        """m/z.
-
-        Returns:
-            NDArray[np.float64]: m/z.
-        """
-        return self.__spec_ptr.mz
-
-    @property
-    def intensity(self) -> NDArray[np.float64]:
-        """Intensity.
-
-        Returns:
-            NDArray[np.float64]: Intensity.
-        """
-        return self.__spec_ptr.intensity
-
-    @property
-    def df(self) -> pd.DataFrame:
-        """Data.
-
-        Returns:
-            pd.DataFrame: Data.
-        """
-
-        return pd.DataFrame({'index': self.index, 'mz': self.mz, 'intensity': self.intensity})
-
-    def __repr__(self):
-        return f"IndexedMzSpectrum(num_peaks={len(self.index)})"
-
-
-class MzSpectrum:
-    def __init__(self, mz: NDArray[np.float64], intensity: NDArray[np.float64]):
-        """MzSpectrum class.
-
-        Args:
-            mz (NDArray[np.float64]): m/z.
-            intensity (NDArray[np.float64]): Intensity.
-
-        Raises:
-            AssertionError: If the length of the mz and intensity arrays are not equal.
-        """
-        assert len(mz) == len(intensity), "The length of the mz and intensity arrays must be equal."
-        self.__spec_ptr = pims.PyMzSpectrum(mz, intensity)
-
-    @property
-    def mz(self) -> NDArray[np.float64]:
-        """m/z.
-
-        Returns:
-            NDArray[np.float64]: m/z.
-        """
-        return self.__spec_ptr.mz
-
-    @property
-    def intensity(self) -> NDArray[np.float64]:
-        """Intensity.
-
-        Returns:
-            NDArray[np.float64]: Intensity.
-        """
-        return self.__spec_ptr.intensity
-
-    @property
-    def df(self) -> pd.DataFrame:
-        """Data.
-
-        Returns:
-            pd.DataFrame: Data.
-        """
-
-        return pd.DataFrame({'mz': self.mz, 'intensity': self.intensity})
-
-    @classmethod
-    def from_py_mz_spectrum(cls, spec: pims.PyMzSpectrum):
-        """Create a MzSpectrum from a PyMzSpectrum.
-
-        Args:
-            spec (pims.PyMzSpectrum): PyMzSpectrum to create the MzSpectrum from.
-
-        Returns:
-            MzSpectrum: MzSpectrum created from the PyMzSpectrum.
-        """
-        instance = cls.__new__(cls)
-        instance.__spec_ptr = spec
-        return instance
-
-    def __repr__(self):
-        return f"MzSpectrum(num_peaks={len(self.mz)})"
-
-    def to_windows(self, window_length: float = 10, overlapping: bool = True, min_num_peaks: int = 5,
-                   min_intensity: float = 1) -> Tuple[NDArray, List['MzSpectrum']]:
-        """Convert the spectrum to a list of windows.
-
-        Args:
-            window_length (float, optional): Window length. Defaults to 10.
-            overlapping (bool, optional): Whether the windows should overlap. Defaults to True.
-            min_num_peaks (int, optional): Minimum number of peaks in a window. Defaults to 5.
-            min_intensity (float, optional): Minimum intensity of a peak in a window. Defaults to 1.
-
-        Returns:
-            Tuple[NDArray, List[MzSpectrum]]: List of windows.
-        """
-
-        indices, windows = self.__spec_ptr.to_windows(window_length, overlapping, min_num_peaks, min_intensity)
-        return indices, [MzSpectrum.from_py_mz_spectrum(window) for window in windows]
-
-    def filter(self, mz_min: float = 0.0, mz_max: float = 2000.0, intensity_min: float = 0.0,
-               intensity_max: float = 1e9) -> 'MzSpectrum':
-        """Filter the spectrum for a given m/z range and intensity range.
-
-        Args:
-            mz_min (float): Minimum m/z value.
-            mz_max (float): Maximum m/z value.
-            intensity_min (float, optional): Minimum intensity value. Defaults to 0.0.
-            intensity_max (float, optional): Maximum intensity value. Defaults to 1e9.
-
-        Returns:
-            MzSpectrum: Filtered spectrum.
-        """
-        return MzSpectrum.from_py_mz_spectrum(
-            self.__spec_ptr.filter_ranged(mz_min, mz_max, intensity_min, intensity_max))
-
-    def vectorized(self, resolution: int = 2) -> 'MzSpectrumVectorized':
-        """Convert the spectrum to a vectorized spectrum.
-
-        Args:
-            resolution (int, optional): Resolution. Defaults to 2.
-
-        Returns:
-            MzSpectrumVectorized: Vectorized spectrum.
-        """
-        return MzSpectrumVectorized.from_py_mz_spectrum_vectorized(self.__spec_ptr.vectorized(resolution))
-
-
-class MzSpectrumVectorized:
-    def __init__(self, indices: NDArray[np.int32], values: NDArray[np.float64], resolution: int):
-        """MzSpectrum class.
-
-        Args:
-            mz (NDArray[np.float64]): m/z.
-            values (NDArray[np.float64]): Intensity.
-
-        Raises:
-            AssertionError: If the length of the mz and intensity arrays are not equal.
-        """
-        assert len(indices) == len(values), "The length of the mz and intensity arrays must be equal."
-        self.__spec_ptr = pims.PyMzSpectrumVectorized(indices, values, resolution)
-
-    @classmethod
-    def from_py_mz_spectrum_vectorized(cls, spec: pims.PyMzSpectrumVectorized):
-        """Create a MzSpectrum from a PyMzSpectrum.
-
-        Args:
-            spec (pims.PyMzSpectrum): PyMzSpectrum to create the MzSpectrum from.
-
-        Returns:
-            MzSpectrum: MzSpectrum created from the PyMzSpectrum.
-        """
-        instance = cls.__new__(cls)
-        instance.__spec_ptr = spec
-        return instance
-
-    @property
-    def resolution(self) -> float:
-        """Resolution.
-
-        Returns:
-            float: Resolution.
-        """
-        return self.__spec_ptr.resolution
-
-    @property
-    def indices(self) -> NDArray[np.int32]:
-        """m/z.
-
-        Returns:
-            NDArray[np.float64]: m/z.
-        """
-        return self.__spec_ptr.indices
-
-    @property
-    def values(self) -> NDArray[np.float64]:
-        """Intensity.
-
-        Returns:
-            NDArray[np.float64]: Intensity.
-        """
-        return self.__spec_ptr.values
-
-    def __repr__(self):
-        return f"MzSpectrumVectorized(num_values={len(self.values)})"
-
-
-class TimsSpectrum:
-    def __init__(self, frame_id: int, scan: int, retention_time: float, mobility: float, ms_type: int,
-                 index: NDArray[np.int32], mz: NDArray[np.float64], intensity: NDArray[np.float64]):
-        """TimsSpectrum class.
-
-        Args:
-            index (NDArray[np.int32]): Index.
-            mz (NDArray[np.float64]): m/z.
-            intensity (NDArray[np.float64]): Intensity.
-
-        Raises:
-            AssertionError: If the length of the index, mz and intensity arrays are not equal.
-        """
-        assert len(index) == len(mz) == len(intensity), ("The length of the index, mz and intensity arrays must be "
-                                                         "equal.")
-        self.__spec_ptr = pims.PyTimsSpectrum(frame_id, scan, retention_time, mobility, ms_type, index, mz, intensity)
-
-    @classmethod
-    def from_py_tims_spectrum(cls, spec: pims.PyTimsSpectrum):
-        """Create a TimsSpectrum from a PyTimsSpectrum.
-
-        Args:
-            spec (pims.PyTimsSpectrum): PyTimsSpectrum to create the TimsSpectrum from.
-
-        Returns:
-            TimsSpectrum: TimsSpectrum created from the PyTimsSpectrum.
-        """
-        instance = cls.__new__(cls)
-        instance.__spec_ptr = spec
-        return instance
-
-    @property
-    def index(self) -> NDArray[np.int32]:
-        """Index.
-
-        Returns:
-            NDArray[np.int32]: Index.
-        """
-        return self.__spec_ptr.index
-
-    @property
-    def mz(self) -> NDArray[np.float64]:
-        """m/z.
-
-        Returns:
-            NDArray[np.float64]: m/z.
-        """
-        return self.__spec_ptr.mz
-
-    @property
-    def intensity(self) -> NDArray[np.float64]:
-        """Intensity.
-
-        Returns:
-            NDArray[np.float64]: Intensity.
-        """
-        return self.__spec_ptr.intensity
-
-    @property
-    def ms_type(self) -> str:
-        """MS type.
-
-        Returns:
-            str: MS type.
-        """
-        return self.__spec_ptr.ms_type
-
-    @property
-    def mobility(self) -> float:
-        """Inverse mobility.
-
-        Returns:
-            float: Inverse mobility.
-        """
-        return self.__spec_ptr.mobility
-
-    @property
-    def scan(self) -> int:
-        """Scan.
-
-        Returns:
-            int: Scan.
-        """
-        return self.__spec_ptr.scan
-
-    @property
-    def retention_time(self) -> float:
-        """Retention time.
-
-        Returns:
-            float: Retention time.
-        """
-        return self.__spec_ptr.retention_time
-
-    @property
-    def frame_id(self) -> int:
-        """Frame ID.
-
-        Returns:
-            int: Frame ID.
-        """
-        return self.__spec_ptr.frame_id
-
-    @property
-    def mz_spectrum(self) -> MzSpectrum:
-        """Get the MzSpectrum.
-
-        Returns:
-            MzSpectrum: Spectrum.
-        """
-        return MzSpectrum.from_py_mz_spectrum(self.__spec_ptr.mz_spectrum)
-
-    @property
-    def df(self) -> pd.DataFrame:
-        """Data.
-        
-        Returns:
-            pd.DataFrame: Data.
-        """
-
-        return pd.DataFrame({
-            'frame': np.repeat(self.frame_id, len(self.index)),
-            'retention_time': np.repeat(self.retention_time, len(self.index)),
-            'scan': np.repeat(self.scan, len(self.index)),
-            'mobility': np.repeat(self.mobility, len(self.index)),
-            'tof': self.index,
-            'mz': self.mz,
-            'intensity': self.intensity
-        })
-
-    def __repr__(self):
-        return (f"TimsSpectrum(id={self.frame_id}, retention_time={np.round(self.retention_time, 2)}, "
-                f"scan={self.scan}, mobility={np.round(self.mobility, 2)}, ms_type={self.ms_type}, "
-                f"num_peaks={len(self.index)})")

From 389396e49b4ca378c7c584f9a2ae1289baac0769 Mon Sep 17 00:00:00 2001
From: "Tim Maier (Ubuntu Desktop)" <tmaier01@students.uni-mainz.de>
Date: Mon, 13 Nov 2023 16:08:58 +0100
Subject: [PATCH 7/7] super slow centroiding simulation

---
 .../simulation/run_example_simulation.py      |  8 ++---
 imspy/imspy/data/spectrum.py                  | 36 +++++++++++++++++--
 imspy/imspy/simulation/experiment.py          |  2 +-
 imspy_connector/src/py_mz_spectrum.rs         |  4 +++
 mscore/src/mz_spectrum.rs                     | 29 ++++++++++++---
 5 files changed, 68 insertions(+), 11 deletions(-)

diff --git a/imspy/examples/simulation/run_example_simulation.py b/imspy/examples/simulation/run_example_simulation.py
index 1df9cef0..a77a2d7c 100644
--- a/imspy/examples/simulation/run_example_simulation.py
+++ b/imspy/examples/simulation/run_example_simulation.py
@@ -1,5 +1,5 @@
-from pyims.simulation.experiment import LcImsMsMs
-from pyims.simulation.hardware_models import (NeuralChromatographyApex, 
+from imspy.simulation.experiment import LcImsMsMs
+from imspy.simulation.hardware_models import (NeuralChromatographyApex, 
                                               NormalChromatographyProfileModel, 
                                               LiquidChromatography, 
                                               ElectroSpray,
@@ -10,8 +10,8 @@
                                               AveragineModel,
                                               BinomialIonSource
                                                 )
-from pyims.proteome import ProteinSample, Trypsin, ORGANISM
-from pyims.chemistry import BufferGas
+from imspy.proteome import ProteinSample, Trypsin, ORGANISM
+from imspy.chemistry import BufferGas
 
 import pandas as pd
 import numpy as np
diff --git a/imspy/imspy/data/spectrum.py b/imspy/imspy/data/spectrum.py
index 2bb3a4c8..dcd29bd5 100644
--- a/imspy/imspy/data/spectrum.py
+++ b/imspy/imspy/data/spectrum.py
@@ -1,13 +1,29 @@
 from __future__ import annotations
 import json
 import numpy as np
-from typing import List, Tuple
+from typing import List, Tuple, Callable
 import pandas as pd
 from numpy.typing import NDArray
-
+from scipy.signal import find_peaks
 import imspy_connector as pims
 
 
+def get_peak_integral(peaks: NDArray[np.int32], peak_info: dict) -> NDArray[np.float64]:
+    """Calculates the integral of the peaks in a spectrum.
+
+    Args:
+        peaks (NDArray[np.int32]): Peak indices.
+        peak_info (dict): Peak info.
+
+    Returns:
+        NDArray[np.float64]: Peak integrals.
+    """
+    integrals = np.zeros(len(peaks), dtype=np.float64)
+    FWHM = peak_info['widths']
+    h = peak_info['prominences']
+    integrals = np.sqrt(2*np.pi) * h * FWHM / (2*np.sqrt(2*np.log(2)))
+    return integrals
+
 class IndexedMzSpectrum:
     def __init__(self, index: NDArray[np.int32], mz: NDArray[np.float64], intensity: NDArray[np.float64]):
         """IndexedMzSpectrum class.
@@ -293,6 +309,22 @@ def values(self) -> NDArray[np.float64]:
         """
         return self.__spec_ptr.values
 
+    def to_centroided(self, integrate_method: Callable = get_peak_integral) -> MzSpectrum:
+        """Convert the spectrum to a centroided spectrum.
+
+        Returns:
+            MzSpectrum: Centroided spectrum.
+        """
+        # first generate dense spectrum
+        dense_spectrum = MzSpectrumVectorized.from_py_mz_spectrum_vectorized(self.__spec_ptr.to_dense_spectrum(None))
+        # find peaks in the dense spectrum and widths with scipy
+        peaks, peak_info = find_peaks(dense_spectrum.values, height=0, width=(0,0.5))
+        # then get the peak integrals
+        integrals = integrate_method(peaks, peak_info)
+        # then create a new spectrum with the peak indices and the integrals
+        return MzSpectrum.from_py_mz_spectrum(pims.PyMzSpectrum(dense_spectrum.indices[peaks]/np.power(10,dense_spectrum.resolution), integrals))
+        
+        
     def __repr__(self):
         return f"MzSpectrumVectorized(num_values={len(self.values)})"
 
diff --git a/imspy/imspy/simulation/experiment.py b/imspy/imspy/simulation/experiment.py
index 0f53cd20..663f7efe 100644
--- a/imspy/imspy/simulation/experiment.py
+++ b/imspy/imspy/simulation/experiment.py
@@ -167,7 +167,7 @@ def _assemble_frame_range(frame_range, scan_id_min, scan_id_max, default_abundan
             for (s_id,scan_spectra_list) in frame_dict.items():
 
                 if len(scan_spectra_list) > 0:
-                    scan_spectrum = MzSpectrum.from_mz_spectra_list(scan_spectra_list,resolution = resolution)
+                    scan_spectrum = MzSpectrum.from_mz_spectra_list(scan_spectra_list,resolution = resolution).vectorized(resolution=resolution).to_centroided()
                     output_dict["mz"].append(scan_spectrum.mz.tolist())
                     output_dict["intensity"].append(scan_spectrum.intensity.tolist())
                     output_dict["scan_id"].append(s_id)
diff --git a/imspy_connector/src/py_mz_spectrum.rs b/imspy_connector/src/py_mz_spectrum.rs
index 0bfb1020..5c90f626 100644
--- a/imspy_connector/src/py_mz_spectrum.rs
+++ b/imspy_connector/src/py_mz_spectrum.rs
@@ -133,6 +133,10 @@ impl PyMzSpectrumVectorized {
         })
     }
 
+    pub fn to_dense_spectrum(&self, max_index: Option<usize>) -> PyMzSpectrumVectorized {
+        PyMzSpectrumVectorized { inner: self.inner.to_dense_spectrum(max_index) }
+    }
+    
     #[getter]
     pub fn resolution(&self) -> i32 {
         self.inner.resolution
diff --git a/mscore/src/mz_spectrum.rs b/mscore/src/mz_spectrum.rs
index ef40e72a..9307a097 100644
--- a/mscore/src/mz_spectrum.rs
+++ b/mscore/src/mz_spectrum.rs
@@ -487,15 +487,36 @@ impl MzSpectrumVectorized {
     /// # Arguments
     ///
     /// * `max_index` - The maximum index for the dense vector.
-    pub fn to_dense(&self, max_index: usize) -> DVector<f64> {
-        let mut dense = DVector::zeros(max_index + 1);
+    
+    fn get_max_index(&self) -> usize {
+        let base: i32 = 10;
+        let max_mz: i32 = 2000;
+        let max_index: usize = (max_mz*base.pow(self.resolution as u32)) as usize;
+        max_index
+    }
 
+    pub fn to_dense(&self, max_index: Option<usize>) -> DVector<f64> {
+        let max_index = match max_index {
+            Some(max_index) => max_index,
+            None => self.get_max_index(),
+        };
+        let mut dense_intensities: DVector<f64> = DVector::<f64>::zeros(max_index + 1);
         for (&index, &intensity) in self.indices.iter().zip(self.values.iter()) {
             if (index as usize) <= max_index {
-                dense[index as usize] = intensity;
+                dense_intensities[index as usize] = intensity;
             }
         }
-        dense
+        dense_intensities
+    }
+    pub fn to_dense_spectrum(&self, max_index: Option<usize>) -> MzSpectrumVectorized{
+        let max_index = match max_index {
+            Some(max_index) => max_index,
+            None => self.get_max_index(),
+        };
+        let dense_intensities: Vec<f64> = self.to_dense(Some(max_index)).data.into();
+        let dense_indices: Vec<i32> = (0..=max_index).map(|i| i as i32).collect();
+        let dense_spectrum: MzSpectrumVectorized = MzSpectrumVectorized { resolution: (self.resolution), indices: (dense_indices), values: (dense_intensities) };
+        dense_spectrum
     }
 }