Skip to content

Commit

Permalink
fixing MOAMC
Browse files Browse the repository at this point in the history
  • Loading branch information
theGreatHerrLebert committed Nov 27, 2023
2 parents 389396e + 98d170d commit 7389121
Show file tree
Hide file tree
Showing 77 changed files with 2,432 additions and 422 deletions.
62 changes: 62 additions & 0 deletions .github/workflows/imspy-connector-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
name: Build and Publish Rust Binding

on:
release:
types: [published]

permissions:
contents: read

jobs:
build-and-publish:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, windows-latest, macos-13]
python-version: ['3.11']
include:
- os: ubuntu-latest
python-version: '3.11'
publish: true
- os: windows-latest
python-version: '3.11'
publish: true
- os: macos-13
python-version: '3.11'
publish: true

steps:
- uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

- name: Install Maturin
run: |
python -m pip install --upgrade pip
pip install maturin
- name: Set up Rust
uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true

- name: Change to imspy-connector directory
run: cd imspy_connector

- name: Build with Maturin
run: |
cd imspy_connector
maturin build --release
- name: Publish
if: matrix.publish
env:
MATURIN_PYPI_TOKEN: ${{ secrets.IMSPY_CONNECTOR_PYPI_API_TOKEN }}
run: |
cd imspy_connector
maturin publish --no-sdist
48 changes: 48 additions & 0 deletions .github/workflows/imspy-publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: Build and Publish Python Package

on:
release:
types: [published]

permissions:
contents: read

jobs:
deploy:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.11']
include:
- python-version: '3.11'
publish: true

steps:
- uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

- name: Install Poetry
run: |
python -m pip install --upgrade pip
pip install poetry
- name: Change to imspy directory
run: cd imspy

- name: Build package
run: |
cd imspy
poetry build
- name: Publish package
if: matrix.publish
env:
POETRY_PYPI_TOKEN_PYPI: ${{ secrets.IMSPY_PYPI_API_TOKEN }}
run: |
cd imspy
poetry config http-basic.pypi __token__ $POETRY_PYPI_TOKEN_PYPI
poetry publish
19 changes: 8 additions & 11 deletions imspy/examples/simulation/run_example_simulation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,18 @@
NormalIonMobilityProfileModel,
AveragineModel,
BinomialIonSource
)
)
from imspy.proteome import ProteinSample, Trypsin, ORGANISM
from imspy.chemistry import BufferGas
from imspy.chemistry.mass import BufferGas

import pandas as pd
import numpy as np


def irt_to_rt(irt):
return irt


def scan_im_interval(scan_id):
intercept = 1451.357
slope = -877.361
Expand All @@ -27,13 +29,15 @@ def scan_im_interval(scan_id):
upper = ((scan_id+1) - intercept ) / slope
return np.stack([1/lower, 1/upper], axis=1)


def im_to_scan(reduced_ion_mobility):
intercept = 1451.357
slope = -877.361
# TODO more appropriate function here ?
one_over_k0 = 1/reduced_ion_mobility
return np.round(one_over_k0 * slope + intercept).astype(np.int16)


def build_experiment():
t = LcImsMsMs("./timstofexp1_binomial_ion_source_21_7/") # maybe rather call this class LCIMSMSExperiment

Expand All @@ -60,10 +64,6 @@ def build_experiment():
t.lc_method.profile_model = NormalChromatographyProfileModel()
t.lc_method.irt_to_rt_converter = irt_to_rt





im_model_weights = "/home/tim/Workspaces/ionmob/pretrained-models/GRUPredictor"
t.ion_mobility_separation_method.apex_model = NeuralIonMobilityApex(im_model_weights, tokenizer_path = tokenizer_path)

Expand All @@ -74,10 +74,8 @@ def build_experiment():

t.ionization_method.ionization_model = BinomialIonSource()


t.mz_separation_method.model = AveragineModel()


rng = np.random.default_rng(2023)
# read proteome
proteome = pd.read_feather('/home/tim/Workspaces/Resources/Homo-sapiens-proteome.feather')
Expand All @@ -87,18 +85,17 @@ def build_experiment():
sample = ProteinSample(proteome, ORGANISM.HOMO_SAPIENS)
sample_digest = sample.digest(Trypsin())


# to reduce computational load in example
sample_digest.data = sample_digest.data.sample(100, random_state= rng)


t.load_sample(sample_digest)
return t

if __name__ == "__main__":

if __name__ == "__main__":

t = build_experiment()

#cProfile.run("t.run(10000)", filename="profiler_10000_8_process",sort="cumtime")
t.run(100,frames_per_assemble_process=10)
t.run(100, frames_per_assemble_process=10)
2 changes: 2 additions & 0 deletions imspy/imspy/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from imspy.core.spectrum import TimsSpectrum, MzSpectrum
from imspy.timstof.data import TimsDataset
1 change: 1 addition & 0 deletions imspy/imspy/algorithm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .mixture import GaussianMixtureModel
Empty file.
170 changes: 170 additions & 0 deletions imspy/imspy/algorithm/ccs/predictors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import numpy as np
import tensorflow as tf
from abc import ABC, abstractmethod
from numpy.typing import NDArray
from imspy.chemistry import ccs_to_one_over_k0
from scipy.optimize import curve_fit
from imspy.utility import tokenize_unimod_sequence
from imspy.algorithm.utilities import get_model_path


def load_deep_ccs_predictor() -> tf.keras.models.Model:
""" Get a pretrained deep predictor model
Returns:
The pretrained deep predictor model
"""
return tf.keras.models.load_model(get_model_path('DeepCCSPredictor'))


class PeptideIonMobilityApex(ABC):
"""
ABSTRACT INTERFACE for simulation of ion-mobility apex value
"""

def __init__(self):
pass

@abstractmethod
def simulate_ion_mobilities(self, sequences: list[str], charges: list[int]) -> NDArray:
pass


def get_sqrt_slopes_and_intercepts(mz: np.ndarray, charge: np.ndarray,
ccs: np.ndarray, fit_charge_state_one: bool = False) -> (np.ndarray, np.ndarray):
"""
Args:
mz:
charge:
ccs:
fit_charge_state_one:
Returns:
"""

if fit_charge_state_one:
slopes, intercepts = [], []
else:
slopes, intercepts = [0.0], [0.0]

if fit_charge_state_one:
c_begin = 1
else:
c_begin = 2

for c in range(c_begin, 5):
def fit_func(x, a, b):
return a * np.sqrt(x) + b

triples = list(filter(lambda x: x[1] == c, zip(mz, charge, ccs)))

mz_tmp, charge_tmp = np.array([x[0] for x in triples]), np.array([x[1] for x in triples])
ccs_tmp = np.array([x[2] for x in triples])

popt, _ = curve_fit(fit_func, mz_tmp, ccs_tmp)

slopes.append(popt[0])
intercepts.append(popt[1])

return np.array(slopes, np.float32), np.array(intercepts, np.float32)


class ProjectToInitialSqrtCCS(tf.keras.layers.Layer):
"""
Simple sqrt regression layer, calculates ccs value as linear mapping from mz, charge -> ccs
"""

def __init__(self, slopes, intercepts):
super(ProjectToInitialSqrtCCS, self).__init__()
self.slopes = tf.constant([slopes])
self.intercepts = tf.constant([intercepts])

def call(self, inputs):
mz, charge = inputs[0], inputs[1]
# since charge is one-hot encoded, can use it to gate linear prediction by charge state
return tf.expand_dims(tf.reduce_sum((self.slopes * tf.sqrt(mz) + self.intercepts) * tf.squeeze(charge), axis=1),
1)


class GRUCCSPredictor(tf.keras.models.Model):
"""
Deep Learning model combining initial linear fit with sequence based features, both scalar and complex
"""

def __init__(self, slopes, intercepts, num_tokens,
seq_len=50,
emb_dim=128,
gru_1=128,
gru_2=64,
rdo=0.0,
do=0.2):
super(GRUCCSPredictor, self).__init__()
self.__seq_len = seq_len

self.initial = ProjectToInitialSqrtCCS(slopes, intercepts)

self.emb = tf.keras.layers.Embedding(input_dim=num_tokens + 1, output_dim=emb_dim, input_length=seq_len)

self.gru1 = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(gru_1, return_sequences=True,
name='GRU1'))

self.gru2 = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(gru_2, return_sequences=False,
name='GRU2',
recurrent_dropout=rdo))

self.dense1 = tf.keras.layers.Dense(128, activation='relu',
kernel_regularizer=tf.keras.regularizers.l1_l2(1e-3, 1e-3))
self.dense2 = tf.keras.layers.Dense(64, activation='relu',
kernel_regularizer=tf.keras.regularizers.l1_l2(1e-3, 1e-3))

self.dropout = tf.keras.layers.Dropout(do)

self.out = tf.keras.layers.Dense(1, activation=None)

def call(self, inputs):
"""
:param inputs: should contain: (mz, charge_one_hot, seq_as_token_indices)
"""
# get inputs
mz, charge, seq = inputs[0], inputs[1], inputs[2]
# sequence learning
x_recurrent = self.gru2(self.gru1(self.emb(seq)))
# concat to feed to dense layers
concat = tf.keras.layers.Concatenate()([charge, x_recurrent])
# regularize
d1 = self.dropout(self.dense1(concat))
d2 = self.dense2(d1)
# combine simple linear hypotheses with deep part
return self.initial([mz, charge]) + self.out(d2), self.out(d2)


class DeepPeptideIonMobilityApex(PeptideIonMobilityApex):
def __init__(self, model: GRUCCSPredictor, tokenizer: tf.keras.preprocessing.text.Tokenizer):
super(DeepPeptideIonMobilityApex, self).__init__()
self.model = model
self.tokenizer = tokenizer

def _preprocess_sequences(self, sequences: list[str], pad_len: int = 50) -> NDArray:
char_tokens = [tokenize_unimod_sequence(sequence) for sequence in sequences]
char_tokens = self.tokenizer.texts_to_sequences(char_tokens)
char_tokens = tf.keras.preprocessing.sequence.pad_sequences(char_tokens, pad_len, padding='post')
return char_tokens

def simulate_ion_mobilities(self,
sequences: list[str],
charges: list[int],
mz: list[float],
verbose: bool = False,
batch_size: int = 1024) -> NDArray:
tokenized_sequences = self._preprocess_sequences(sequences)

# prepare masses, charges, sequences
m = np.expand_dims(mz, 1)
charges_one_hot = tf.one_hot(np.array(charges) - 1, 4)

ds = tf.data.Dataset.from_tensor_slices(((m, charges_one_hot, tokenized_sequences), np.zeros_like(mz))).batch(batch_size)
ccs, _ = self.model.predict(ds, verbose=verbose)

return np.array([ccs_to_one_over_k0(c, m, z) for c, m, z in zip(ccs, mz, charges)])
Loading

0 comments on commit 7389121

Please sign in to comment.