diff --git a/.github/workflows/imspy-connector-publish.yml b/.github/workflows/imspy-connector-publish.yml new file mode 100644 index 00000000..1f928158 --- /dev/null +++ b/.github/workflows/imspy-connector-publish.yml @@ -0,0 +1,62 @@ +name: Build and Publish Rust Binding + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + build-and-publish: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest, macos-13] + python-version: ['3.11'] + include: + - os: ubuntu-latest + python-version: '3.11' + publish: true + - os: windows-latest + python-version: '3.11' + publish: true + - os: macos-13 + python-version: '3.11' + publish: true + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Maturin + run: | + python -m pip install --upgrade pip + pip install maturin + + - name: Set up Rust + uses: actions-rs/toolchain@v1 + with: + profile: minimal + toolchain: stable + override: true + + - name: Change to imspy-connector directory + run: cd imspy_connector + + - name: Build with Maturin + run: | + cd imspy_connector + maturin build --release + + - name: Publish + if: matrix.publish + env: + MATURIN_PYPI_TOKEN: ${{ secrets.IMSPY_CONNECTOR_PYPI_API_TOKEN }} + run: | + cd imspy_connector + maturin publish --no-sdist diff --git a/.github/workflows/imspy-publish.yml b/.github/workflows/imspy-publish.yml new file mode 100644 index 00000000..6698472d --- /dev/null +++ b/.github/workflows/imspy-publish.yml @@ -0,0 +1,48 @@ +name: Build and Publish Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + deploy: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.11'] + include: + - python-version: '3.11' + publish: true + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Poetry + run: | + python -m pip install --upgrade pip + pip install poetry + + - name: Change to imspy directory + run: cd imspy + + - name: Build package + run: | + cd imspy + poetry build + + - name: Publish package + if: matrix.publish + env: + POETRY_PYPI_TOKEN_PYPI: ${{ secrets.IMSPY_PYPI_API_TOKEN }} + run: | + cd imspy + poetry config http-basic.pypi __token__ $POETRY_PYPI_TOKEN_PYPI + poetry publish diff --git a/imspy/examples/simulation/run_example_simulation.py b/imspy/examples/simulation/run_example_simulation.py index a77a2d7c..640b876e 100644 --- a/imspy/examples/simulation/run_example_simulation.py +++ b/imspy/examples/simulation/run_example_simulation.py @@ -9,16 +9,18 @@ NormalIonMobilityProfileModel, AveragineModel, BinomialIonSource - ) + ) from imspy.proteome import ProteinSample, Trypsin, ORGANISM -from imspy.chemistry import BufferGas +from imspy.chemistry.mass import BufferGas import pandas as pd import numpy as np + def irt_to_rt(irt): return irt + def scan_im_interval(scan_id): intercept = 1451.357 slope = -877.361 @@ -27,6 +29,7 @@ def scan_im_interval(scan_id): upper = ((scan_id+1) - intercept ) / slope return np.stack([1/lower, 1/upper], axis=1) + def im_to_scan(reduced_ion_mobility): intercept = 1451.357 slope = -877.361 @@ -34,6 +37,7 @@ def im_to_scan(reduced_ion_mobility): one_over_k0 = 1/reduced_ion_mobility return np.round(one_over_k0 * slope + intercept).astype(np.int16) + def build_experiment(): t = LcImsMsMs("./timstofexp1_binomial_ion_source_21_7/") # maybe rather call this class LCIMSMSExperiment @@ -60,10 +64,6 @@ def build_experiment(): t.lc_method.profile_model = NormalChromatographyProfileModel() t.lc_method.irt_to_rt_converter = irt_to_rt - - - - im_model_weights = "/home/tim/Workspaces/ionmob/pretrained-models/GRUPredictor" t.ion_mobility_separation_method.apex_model = NeuralIonMobilityApex(im_model_weights, tokenizer_path = tokenizer_path) @@ -74,10 +74,8 @@ def build_experiment(): t.ionization_method.ionization_model = BinomialIonSource() - t.mz_separation_method.model = AveragineModel() - rng = np.random.default_rng(2023) # read proteome proteome = pd.read_feather('/home/tim/Workspaces/Resources/Homo-sapiens-proteome.feather') @@ -87,7 +85,6 @@ def build_experiment(): sample = ProteinSample(proteome, ORGANISM.HOMO_SAPIENS) sample_digest = sample.digest(Trypsin()) - # to reduce computational load in example sample_digest.data = sample_digest.data.sample(100, random_state= rng) @@ -95,10 +92,10 @@ def build_experiment(): t.load_sample(sample_digest) return t -if __name__ == "__main__": +if __name__ == "__main__": t = build_experiment() #cProfile.run("t.run(10000)", filename="profiler_10000_8_process",sort="cumtime") - t.run(100,frames_per_assemble_process=10) \ No newline at end of file + t.run(100, frames_per_assemble_process=10) \ No newline at end of file diff --git a/imspy/imspy/__init__.py b/imspy/imspy/__init__.py index e69de29b..d3bbd075 100644 --- a/imspy/imspy/__init__.py +++ b/imspy/imspy/__init__.py @@ -0,0 +1,2 @@ +from imspy.core.spectrum import TimsSpectrum, MzSpectrum +from imspy.timstof.data import TimsDataset diff --git a/imspy/imspy/algorithm/__init__.py b/imspy/imspy/algorithm/__init__.py new file mode 100644 index 00000000..f9431f4a --- /dev/null +++ b/imspy/imspy/algorithm/__init__.py @@ -0,0 +1 @@ +from .mixture import GaussianMixtureModel diff --git a/imspy/imspy/algorithm/ccs/__init__.py b/imspy/imspy/algorithm/ccs/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/imspy/imspy/algorithm/ccs/predictors.py b/imspy/imspy/algorithm/ccs/predictors.py new file mode 100644 index 00000000..a810a66a --- /dev/null +++ b/imspy/imspy/algorithm/ccs/predictors.py @@ -0,0 +1,170 @@ +import numpy as np +import tensorflow as tf +from abc import ABC, abstractmethod +from numpy.typing import NDArray +from imspy.chemistry import ccs_to_one_over_k0 +from scipy.optimize import curve_fit +from imspy.utility import tokenize_unimod_sequence +from imspy.algorithm.utilities import get_model_path + + +def load_deep_ccs_predictor() -> tf.keras.models.Model: + """ Get a pretrained deep predictor model + + Returns: + The pretrained deep predictor model + """ + return tf.keras.models.load_model(get_model_path('DeepCCSPredictor')) + + +class PeptideIonMobilityApex(ABC): + """ + ABSTRACT INTERFACE for simulation of ion-mobility apex value + """ + + def __init__(self): + pass + + @abstractmethod + def simulate_ion_mobilities(self, sequences: list[str], charges: list[int]) -> NDArray: + pass + + +def get_sqrt_slopes_and_intercepts(mz: np.ndarray, charge: np.ndarray, + ccs: np.ndarray, fit_charge_state_one: bool = False) -> (np.ndarray, np.ndarray): + """ + + Args: + mz: + charge: + ccs: + fit_charge_state_one: + + Returns: + + """ + + if fit_charge_state_one: + slopes, intercepts = [], [] + else: + slopes, intercepts = [0.0], [0.0] + + if fit_charge_state_one: + c_begin = 1 + else: + c_begin = 2 + + for c in range(c_begin, 5): + def fit_func(x, a, b): + return a * np.sqrt(x) + b + + triples = list(filter(lambda x: x[1] == c, zip(mz, charge, ccs))) + + mz_tmp, charge_tmp = np.array([x[0] for x in triples]), np.array([x[1] for x in triples]) + ccs_tmp = np.array([x[2] for x in triples]) + + popt, _ = curve_fit(fit_func, mz_tmp, ccs_tmp) + + slopes.append(popt[0]) + intercepts.append(popt[1]) + + return np.array(slopes, np.float32), np.array(intercepts, np.float32) + + +class ProjectToInitialSqrtCCS(tf.keras.layers.Layer): + """ + Simple sqrt regression layer, calculates ccs value as linear mapping from mz, charge -> ccs + """ + + def __init__(self, slopes, intercepts): + super(ProjectToInitialSqrtCCS, self).__init__() + self.slopes = tf.constant([slopes]) + self.intercepts = tf.constant([intercepts]) + + def call(self, inputs): + mz, charge = inputs[0], inputs[1] + # since charge is one-hot encoded, can use it to gate linear prediction by charge state + return tf.expand_dims(tf.reduce_sum((self.slopes * tf.sqrt(mz) + self.intercepts) * tf.squeeze(charge), axis=1), + 1) + + +class GRUCCSPredictor(tf.keras.models.Model): + """ + Deep Learning model combining initial linear fit with sequence based features, both scalar and complex + """ + + def __init__(self, slopes, intercepts, num_tokens, + seq_len=50, + emb_dim=128, + gru_1=128, + gru_2=64, + rdo=0.0, + do=0.2): + super(GRUCCSPredictor, self).__init__() + self.__seq_len = seq_len + + self.initial = ProjectToInitialSqrtCCS(slopes, intercepts) + + self.emb = tf.keras.layers.Embedding(input_dim=num_tokens + 1, output_dim=emb_dim, input_length=seq_len) + + self.gru1 = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(gru_1, return_sequences=True, + name='GRU1')) + + self.gru2 = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(gru_2, return_sequences=False, + name='GRU2', + recurrent_dropout=rdo)) + + self.dense1 = tf.keras.layers.Dense(128, activation='relu', + kernel_regularizer=tf.keras.regularizers.l1_l2(1e-3, 1e-3)) + self.dense2 = tf.keras.layers.Dense(64, activation='relu', + kernel_regularizer=tf.keras.regularizers.l1_l2(1e-3, 1e-3)) + + self.dropout = tf.keras.layers.Dropout(do) + + self.out = tf.keras.layers.Dense(1, activation=None) + + def call(self, inputs): + """ + :param inputs: should contain: (mz, charge_one_hot, seq_as_token_indices) + """ + # get inputs + mz, charge, seq = inputs[0], inputs[1], inputs[2] + # sequence learning + x_recurrent = self.gru2(self.gru1(self.emb(seq))) + # concat to feed to dense layers + concat = tf.keras.layers.Concatenate()([charge, x_recurrent]) + # regularize + d1 = self.dropout(self.dense1(concat)) + d2 = self.dense2(d1) + # combine simple linear hypotheses with deep part + return self.initial([mz, charge]) + self.out(d2), self.out(d2) + + +class DeepPeptideIonMobilityApex(PeptideIonMobilityApex): + def __init__(self, model: GRUCCSPredictor, tokenizer: tf.keras.preprocessing.text.Tokenizer): + super(DeepPeptideIonMobilityApex, self).__init__() + self.model = model + self.tokenizer = tokenizer + + def _preprocess_sequences(self, sequences: list[str], pad_len: int = 50) -> NDArray: + char_tokens = [tokenize_unimod_sequence(sequence) for sequence in sequences] + char_tokens = self.tokenizer.texts_to_sequences(char_tokens) + char_tokens = tf.keras.preprocessing.sequence.pad_sequences(char_tokens, pad_len, padding='post') + return char_tokens + + def simulate_ion_mobilities(self, + sequences: list[str], + charges: list[int], + mz: list[float], + verbose: bool = False, + batch_size: int = 1024) -> NDArray: + tokenized_sequences = self._preprocess_sequences(sequences) + + # prepare masses, charges, sequences + m = np.expand_dims(mz, 1) + charges_one_hot = tf.one_hot(np.array(charges) - 1, 4) + + ds = tf.data.Dataset.from_tensor_slices(((m, charges_one_hot, tokenized_sequences), np.zeros_like(mz))).batch(batch_size) + ccs, _ = self.model.predict(ds, verbose=verbose) + + return np.array([ccs_to_one_over_k0(c, m, z) for c, m, z in zip(ccs, mz, charges)]) \ No newline at end of file diff --git a/imspy/imspy/algorithm/hashing.py b/imspy/imspy/algorithm/hashing.py new file mode 100644 index 00000000..3eee00b3 --- /dev/null +++ b/imspy/imspy/algorithm/hashing.py @@ -0,0 +1,80 @@ +import tensorflow as tf +import numpy as np +import warnings + + +class CosimHasher: + def __init__(self, target_vector_length, trials: int = 32, len_trial: int = 20, seed: int = 42): + + assert 0 < trials, f'trials variable needs to be greater then 1, was: {trials}' + assert 0 < len_trial, f'length trial variable needs to be greater then 1, was: {trials}' + + # check + if 0 < len_trial <= 32: + self.V = tf.constant( + np.expand_dims(np.array([np.power(2, i) for i in range(len_trial)]).astype(np.int32), 1)) + + elif 32 < len_trial <= 64: + warnings.warn(f"\nnum bits to hash set to: {len_trial}.\n" + + f"using int64 which might slow down computation significantly.") + self.V = tf.constant( + np.expand_dims(np.array([np.power(2, i) for i in range(len_trial)]).astype(np.int64), 1)) + else: + raise ValueError(f"bit number per hash cannot be greater then 64 or smaller 1, was: {len_trial}.") + + self.trials = trials + self.len_trial = len_trial + self.seed = seed + self.target_vector_length = target_vector_length + + np.random.seed(seed) + size = (len_trial * self.trials, target_vector_length) + X = np.random.normal(0, 1, size=size).astype(np.float32) + self.hash_tensor = tf.transpose(tf.constant(X)) + + def __repr__(self) -> str: + return f"CosimHasher(trials={self.trials}, len_trial={self.len_trial}, seed={self.seed}, " \ + f"target_vector_length={self.target_vector_length})" + + def calculate_keys(self, W: tf.Tensor) -> tf.Tensor: + + S = (tf.sign(W @ self.hash_tensor) + 1) / 2 + + if self.len_trial <= 32: + # reshape into window, num_hashes, len_single_hash + S = tf.cast(tf.reshape(S, shape=(S.shape[0], self.trials, self.len_trial)), tf.int32) + + # calculate int key from binary by base-transform + H = tf.squeeze(S @ self.V) + return H + else: + # reshape into window, num_hashes, len_single_hash + S = tf.cast(tf.reshape(S, shape=(S.shape[0], self.trials, self.len_trial)), tf.int64) + + # calculate int key from binary by base-transform + H = tf.squeeze(S @ self.V) + return H + + +class TimsHasher(CosimHasher): + """ + Class to create hash keys from a given set of weights. + + Args: + + trials (int): number of trials to use for random projection. + len_trial (int): length of each trial. + seed (int): seed for random projection. + resolution (int): resolution of the random projection. + num_dalton (int): number of dalton to use for random projection. + """ + def __init__(self, trials: int = 32, len_trial: int = 20, seed: int = 5671, resolution: int = 1, num_dalton: int = 10): + res_factor = 10 ** resolution + target_vector_length = num_dalton * res_factor + 1 + super().__init__(target_vector_length, trials, len_trial, seed) + self.resolution = resolution + self.num_dalton = num_dalton + + def __repr__(self) -> str: + return f"TimsHasher(trials={self.trials}, len_trial={self.len_trial}, seed={self.seed}, " \ + f"resolution={self.resolution}, num_dalton={self.num_dalton})" diff --git a/imspy/imspy/algorithm/intensity/__init__.py b/imspy/imspy/algorithm/intensity/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/imspy/imspy/algorithm/ionization/__init__.py b/imspy/imspy/algorithm/ionization/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/imspy/imspy/algorithm/ionization/predictors.py b/imspy/imspy/algorithm/ionization/predictors.py new file mode 100644 index 00000000..63030dbc --- /dev/null +++ b/imspy/imspy/algorithm/ionization/predictors.py @@ -0,0 +1,103 @@ +import tensorflow as tf +import numpy as np + +from abc import ABC, abstractmethod +from numpy.typing import NDArray + +from imspy.algorithm.utilities import get_model_path +from imspy.utility import tokenize_unimod_sequence + + +def load_deep_charge_state_predictor() -> tf.keras.models.Model: + """ Get a pretrained deep predictor model + + Returns: + The pretrained deep predictor model + """ + return tf.keras.models.load_model(get_model_path('DeepChargeStatePredictor')) + + +class PeptideChargeStateDistribution(ABC): + """ + ABSTRACT INTERFACE for ionization simulation of peptides + """ + + def __init__(self): + pass + + @abstractmethod + def simulate_ionizations(self, sequences: list[str]) -> np.array: + pass + + +class DeepChargeStateDistribution(PeptideChargeStateDistribution): + + def __init__(self, model: 'GRUChargeStatePredictor', tokenizer: tf.keras.preprocessing.text.Tokenizer): + super(DeepChargeStateDistribution, self).__init__() + self.model = model + self.tokenizer = tokenizer + + def _preprocess_sequences(self, sequences: list[str], pad_len: int = 50) -> np.array: + char_tokens = [tokenize_unimod_sequence(seq) for seq in sequences] + char_tokens = self.tokenizer.texts_to_sequences(char_tokens) + char_tokens = tf.keras.preprocessing.sequence.pad_sequences(char_tokens, pad_len, padding='post') + return char_tokens + + def simulate_ionizations(self, sequences: list[str], batch_size: int = 1024, verbose: bool = False) -> NDArray: + tokens = self._preprocess_sequences(sequences) + tf_ds = tf.data.Dataset.from_tensor_slices(tokens).batch(batch_size) + + probabilities = self.model.predict(tf_ds, verbose=verbose) + + c_list = [] + + for p in probabilities: + c_list.append(np.random.choice(range(1, len(p) + 1), 1, p=p)[0]) + + return np.array(c_list) + + +class GRUChargeStatePredictor(tf.keras.models.Model): + + def __init__(self, + num_tokens, + max_charge=4, + seq_len=50, + emb_dim=128, + gru_1=128, + gru_2=64, + rdo=0.0, + do=0.2): + super(GRUChargeStatePredictor, self).__init__() + + self.emb = tf.keras.layers.Embedding(input_dim=num_tokens + 1, output_dim=emb_dim, input_length=seq_len) + + self.gru1 = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(gru_1, return_sequences=True, + name='GRU1')) + + self.gru2 = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(gru_2, return_sequences=False, + name='GRU2', + recurrent_dropout=rdo)) + + self.dense1 = tf.keras.layers.Dense(128, activation='relu', name='Dense1', + kernel_regularizer=tf.keras.regularizers.l1_l2(1e-3, 1e-3)) + + self.dense2 = tf.keras.layers.Dense(64, activation='relu', name='Dense2', + kernel_regularizer=tf.keras.regularizers.l1_l2(1e-3, 1e-3)) + + self.dropout = tf.keras.layers.Dropout(do, name='Dropout') + + self.out = tf.keras.layers.Dense(max_charge, activation='softmax', name='Output') + + def call(self, inputs, **kwargs): + """ + :param inputs: should contain: (sequence) + """ + # get inputs + seq = inputs + # sequence learning + x_recurrent = self.gru2(self.gru1(self.emb(seq))) + # regularize + d1 = self.dropout(self.dense1(x_recurrent)) + # output + return self.out(self.dense2(d1)) diff --git a/imspy/imspy/mixture.py b/imspy/imspy/algorithm/mixture.py similarity index 99% rename from imspy/imspy/mixture.py rename to imspy/imspy/algorithm/mixture.py index a2e82e68..7942a8f9 100644 --- a/imspy/imspy/mixture.py +++ b/imspy/imspy/algorithm/mixture.py @@ -1,6 +1,3 @@ -import os -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' - import tensorflow as tf import numpy as np import tensorflow_probability as tfp diff --git a/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/fingerprint.pb b/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/fingerprint.pb new file mode 100644 index 00000000..b04aad94 --- /dev/null +++ b/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/fingerprint.pb @@ -0,0 +1 @@ +£׆0ٽ庘 ٲӡ(G2 \ No newline at end of file diff --git a/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/keras_metadata.pb b/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/keras_metadata.pb new file mode 100644 index 00000000..739a6bd0 --- /dev/null +++ b/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/keras_metadata.pb @@ -0,0 +1,26 @@ + +root"_tf_keras_model*{"name": "gruccs_predictor", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRUCCSPredictor", "config": {}, "shared_object_id": 0, "build_input_shape": [{"class_name": "__tuple__", "items": [null, 1]}, {"class_name": "__tuple__", "items": [null, 4]}, {"class_name": "__tuple__", "items": [null, 50]}], "is_graph_network": false, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "__tuple__", "items": [{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "input_1"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 4]}, "float32", "input_2"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 50]}, "int32", "input_3"]}]}], {}]}, "save_spec": {"class_name": "__tuple__", "items": [{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 1]}, "float32", "input_1"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 4]}, "float32", "input_2"]}, {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 50]}, "int32", "input_3"]}]}, "keras_version": "2.15.0", "backend": "tensorflow", "model_config": {"class_name": "GRUCCSPredictor", "config": {}}, "training_config": {"loss": {"class_name": "MeanAbsoluteError", "config": {"reduction": "auto", "name": "mean_absolute_error", "fn": "mean_absolute_error"}, "shared_object_id": 1}, "metrics": [[{"class_name": "MeanMetricWrapper", "config": {"name": "output_1_mae", "dtype": "float32", "fn": "mean_absolute_error"}, "shared_object_id": 2}, {"class_name": "MeanMetricWrapper", "config": {"name": "output_1_mean_absolute_percentage_error", "dtype": "float32", "fn": "mean_absolute_percentage_error"}, "shared_object_id": 3}], [{"class_name": "MeanMetricWrapper", "config": {"name": "output_2_mae", "dtype": "float32", "fn": "mean_absolute_error"}, "shared_object_id": 4}, {"class_name": "MeanMetricWrapper", "config": {"name": "output_2_mean_absolute_percentage_error", "dtype": "float32", "fn": "mean_absolute_percentage_error"}, "shared_object_id": 5}]], "weighted_metrics": null, "loss_weights": [1.0, 0.0], "optimizer_config": {"class_name": "Custom>Adam", "config": {"name": "Adam", "weight_decay": null, "clipnorm": null, "global_clipnorm": null, "clipvalue": null, "use_ema": false, "ema_momentum": 0.99, "ema_overwrite_frequency": null, "jit_compile": true, "is_legacy_optimizer": false, "learning_rate": 0.0010000000474974513, "beta_1": 0.9, "beta_2": 0.999, "epsilon": 1e-07, "amsgrad": false}}}}2 + root.initial"_tf_keras_layer*{"name": "project_to_initial_sqrt_ccs", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "ProjectToInitialSqrtCCS", "config": {"layer was saved without config": true}, "build_input_shape": [{"class_name": "TensorShape", "items": [null, 1]}, {"class_name": "TensorShape", "items": [null, 4]}]}2 + root.emb"_tf_keras_layer*{"name": "embedding", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 50]}, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": false, "class_name": "Embedding", "config": {"name": "embedding", "trainable": true, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 50]}, "input_dim": 49, "output_dim": 128, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}, "shared_object_id": 6}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 50}, "shared_object_id": 7, "build_input_shape": {"class_name": "TensorShape", "items": [null, 50]}}2 +  + root.gru1"_tf_keras_layer* {"name": "bidirectional", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Bidirectional", "config": {"name": "bidirectional", "trainable": true, "dtype": "float32", "layer": {"class_name": "GRU", "config": {"name": "GRU1", "trainable": true, "dtype": "float32", "return_sequences": true, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 8}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 9}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 10}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 12}, "merge_mode": "concat"}, "shared_object_id": 13, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 14}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 128]}}2 +   root.gru2"_tf_keras_layer* {"name": "bidirectional_1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Bidirectional", "config": {"name": "bidirectional_1", "trainable": true, "dtype": "float32", "layer": {"class_name": "GRU", "config": {"name": "GRU2", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 15}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 16}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 17}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 19}, "merge_mode": "concat"}, "shared_object_id": 20, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 21}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 256]}}2 +  root.dense1"_tf_keras_layer*{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 128, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 22}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 23}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0010000000474974513, "l2": 0.0010000000474974513}, "shared_object_id": 24}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 25, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 260}}, "shared_object_id": 26}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 260]}}2 +  root.dense2"_tf_keras_layer*{"name": "dense_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 27}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 28}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0010000000474974513, "l2": 0.0010000000474974513}, "shared_object_id": 29}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 30, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 128}}, "shared_object_id": 31}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 128]}}2 + root.dropout"_tf_keras_layer*{"name": "dropout", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dropout", "config": {"name": "dropout", "trainable": true, "dtype": "float32", "rate": 0.2, "noise_shape": null, "seed": null}, "shared_object_id": 32, "build_input_shape": {"class_name": "TensorShape", "items": [null, 128]}}2 +root.out"_tf_keras_layer*{"name": "dense_2", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 33}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 34}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 35, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 64}}, "shared_object_id": 36}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 64]}}2 + Droot.gru1.forward_layer"_tf_keras_rnn_layer* {"name": "forward_GRU1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRU", "config": {"name": "forward_GRU1", "trainable": true, "dtype": "float32", "return_sequences": true, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "zero_output_for_mask": true, "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 37}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 38}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 39}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 41, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 128]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 42}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 128]}}2 + Eroot.gru1.backward_layer"_tf_keras_rnn_layer* {"name": "backward_GRU1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRU", "config": {"name": "backward_GRU1", "trainable": true, "dtype": "float32", "return_sequences": true, "return_state": false, "go_backwards": true, "stateful": false, "unroll": false, "time_major": false, "zero_output_for_mask": true, "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 43}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 44}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 45}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 47, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 128]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 48}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 128]}}2 + Lroot.gru2.forward_layer"_tf_keras_rnn_layer* {"name": "forward_GRU2", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRU", "config": {"name": "forward_GRU2", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 49}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 50}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 51}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 53, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 256]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 54}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 256]}}2 + Mroot.gru2.backward_layer"_tf_keras_rnn_layer* {"name": "backward_GRU2", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRU", "config": {"name": "backward_GRU2", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": true, "stateful": false, "unroll": false, "time_major": false, "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 55}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 56}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 57}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 59, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 256]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 60}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 256]}}2 +qroot.keras_api.metrics.0"_tf_keras_metric*{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 61}2 +rroot.keras_api.metrics.1"_tf_keras_metric*{"class_name": "Mean", "name": "output_1_loss", "dtype": "float32", "config": {"name": "output_1_loss", "dtype": "float32"}, "shared_object_id": 62}2 +sroot.keras_api.metrics.2"_tf_keras_metric*{"class_name": "Mean", "name": "output_2_loss", "dtype": "float32", "config": {"name": "output_2_loss", "dtype": "float32"}, "shared_object_id": 63}2 +troot.keras_api.metrics.3"_tf_keras_metric*{"class_name": "MeanMetricWrapper", "name": "output_1_mae", "dtype": "float32", "config": {"name": "output_1_mae", "dtype": "float32", "fn": "mean_absolute_error"}, "shared_object_id": 2}2 +uroot.keras_api.metrics.4"_tf_keras_metric*{"class_name": "MeanMetricWrapper", "name": "output_1_mean_absolute_percentage_error", "dtype": "float32", "config": {"name": "output_1_mean_absolute_percentage_error", "dtype": "float32", "fn": "mean_absolute_percentage_error"}, "shared_object_id": 3}2 +vroot.keras_api.metrics.5"_tf_keras_metric*{"class_name": "MeanMetricWrapper", "name": "output_2_mae", "dtype": "float32", "config": {"name": "output_2_mae", "dtype": "float32", "fn": "mean_absolute_error"}, "shared_object_id": 4}2 +wroot.keras_api.metrics.6"_tf_keras_metric*{"class_name": "MeanMetricWrapper", "name": "output_2_mean_absolute_percentage_error", "dtype": "float32", "config": {"name": "output_2_mean_absolute_percentage_error", "dtype": "float32", "fn": "mean_absolute_percentage_error"}, "shared_object_id": 5}2 +root.gru1.forward_layer.cell"_tf_keras_layer*{"name": "gru_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRUCell", "config": {"name": "gru_cell", "trainable": true, "dtype": "float32", "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 37}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 38}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 39}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 40, "build_input_shape": {"class_name": "__tuple__", "items": [null, 128]}}2 +root.gru1.backward_layer.cell"_tf_keras_layer*{"name": "gru_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRUCell", "config": {"name": "gru_cell", "trainable": true, "dtype": "float32", "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 43}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 44}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 45}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 46, "build_input_shape": {"class_name": "__tuple__", "items": [null, 128]}}2 +root.gru2.forward_layer.cell"_tf_keras_layer*{"name": "gru_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRUCell", "config": {"name": "gru_cell", "trainable": true, "dtype": "float32", "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 49}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 50}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 51}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 52, "build_input_shape": {"class_name": "__tuple__", "items": [null, 256]}}2 +root.gru2.backward_layer.cell"_tf_keras_layer*{"name": "gru_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRUCell", "config": {"name": "gru_cell", "trainable": true, "dtype": "float32", "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 55}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 56}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 57}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 58, "build_input_shape": {"class_name": "__tuple__", "items": [null, 256]}}2 \ No newline at end of file diff --git a/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/saved_model.pb b/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/saved_model.pb new file mode 100644 index 00000000..dcce17fa Binary files /dev/null and b/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/saved_model.pb differ diff --git a/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/variables/variables.data-00000-of-00001 b/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/variables/variables.data-00000-of-00001 new file mode 100644 index 00000000..28ae37a2 Binary files /dev/null and b/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/variables/variables.data-00000-of-00001 differ diff --git a/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/variables/variables.index b/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/variables/variables.index new file mode 100644 index 00000000..2f5f6413 Binary files /dev/null and b/imspy/imspy/algorithm/pretrained/DeepCCSPredictor/variables/variables.index differ diff --git a/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/fingerprint.pb b/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/fingerprint.pb new file mode 100644 index 00000000..3244975b --- /dev/null +++ b/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/fingerprint.pb @@ -0,0 +1 @@ +êߧ֍ ֹB(󧝬2 \ No newline at end of file diff --git a/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/keras_metadata.pb b/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/keras_metadata.pb new file mode 100644 index 00000000..3a27dd24 --- /dev/null +++ b/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/keras_metadata.pb @@ -0,0 +1,20 @@ + +root"_tf_keras_model* {"name": "gru_charge_state_predictor", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRUChargeStatePredictor", "config": {"num_tokens": 48}, "shared_object_id": 0, "build_input_shape": {"class_name": "__tuple__", "items": [null, 50]}, "is_graph_network": false, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 50]}, "int32", "input_1"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 50]}, "int32", "input_1"]}, "keras_version": "2.15.0", "backend": "tensorflow", "model_config": {"class_name": "GRUChargeStatePredictor", "config": {"num_tokens": 48}}, "training_config": {"loss": {"class_name": "CategoricalCrossentropy", "config": {"reduction": "auto", "name": "categorical_crossentropy", "from_logits": false, "label_smoothing": 0.0, "axis": -1, "fn": "categorical_crossentropy"}, "shared_object_id": 1}, "metrics": [[{"class_name": "MeanMetricWrapper", "config": {"name": "acc", "dtype": "float32", "fn": "categorical_accuracy"}, "shared_object_id": 2}]], "weighted_metrics": null, "loss_weights": null, "optimizer_config": {"class_name": "Custom>Adam", "config": {"name": "Adam", "weight_decay": null, "clipnorm": null, "global_clipnorm": null, "clipvalue": null, "use_ema": false, "ema_momentum": 0.99, "ema_overwrite_frequency": null, "jit_compile": true, "is_legacy_optimizer": false, "learning_rate": 0.0010000000474974513, "beta_1": 0.9, "beta_2": 0.999, "epsilon": 1e-07, "amsgrad": false}}}}2 +root.emb"_tf_keras_layer*{"name": "embedding", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 50]}, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": false, "class_name": "Embedding", "config": {"name": "embedding", "trainable": true, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 50]}, "input_dim": 49, "output_dim": 128, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}, "shared_object_id": 3}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 50}, "shared_object_id": 4, "build_input_shape": {"class_name": "TensorShape", "items": [null, 50]}}2 +   root.gru1"_tf_keras_layer* {"name": "bidirectional", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Bidirectional", "config": {"name": "bidirectional", "trainable": true, "dtype": "float32", "layer": {"class_name": "GRU", "config": {"name": "GRU1", "trainable": true, "dtype": "float32", "return_sequences": true, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 5}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 6}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 7}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 9}, "merge_mode": "concat"}, "shared_object_id": 10, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 11}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 128]}}2 +  + root.gru2"_tf_keras_layer* {"name": "bidirectional_1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Bidirectional", "config": {"name": "bidirectional_1", "trainable": true, "dtype": "float32", "layer": {"class_name": "GRU", "config": {"name": "GRU2", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 64, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 12}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 13}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 14}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 16}, "merge_mode": "concat"}, "shared_object_id": 17, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 18}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 256]}}2 +  root.dense1"_tf_keras_layer*{"name": "Dense1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "Dense1", "trainable": true, "dtype": "float32", "units": 128, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 19}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 20}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0010000000474974513, "l2": 0.0010000000474974513}, "shared_object_id": 21}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 22, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 128}}, "shared_object_id": 23}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 128]}}2 +  root.dense2"_tf_keras_layer*{"name": "Dense2", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "Dense2", "trainable": true, "dtype": "float32", "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 24}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 25}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0010000000474974513, "l2": 0.0010000000474974513}, "shared_object_id": 26}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 27, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 128}}, "shared_object_id": 28}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 128]}}2 +  root.dropout"_tf_keras_layer*{"name": "Dropout", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dropout", "config": {"name": "Dropout", "trainable": true, "dtype": "float32", "rate": 0.2, "noise_shape": null, "seed": null}, "shared_object_id": 29, "build_input_shape": {"class_name": "TensorShape", "items": [null, 128]}}2 +root.out"_tf_keras_layer*{"name": "Output", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "Output", "trainable": true, "dtype": "float32", "units": 4, "activation": "softmax", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 30}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 31}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 32, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 64}}, "shared_object_id": 33}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 64]}}2 + ;root.gru1.forward_layer"_tf_keras_rnn_layer* {"name": "forward_GRU1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRU", "config": {"name": "forward_GRU1", "trainable": true, "dtype": "float32", "return_sequences": true, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "zero_output_for_mask": true, "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 34}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 35}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 36}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 38, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 128]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 39}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 128]}}2 + <root.gru1.backward_layer"_tf_keras_rnn_layer* {"name": "backward_GRU1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRU", "config": {"name": "backward_GRU1", "trainable": true, "dtype": "float32", "return_sequences": true, "return_state": false, "go_backwards": true, "stateful": false, "unroll": false, "time_major": false, "zero_output_for_mask": true, "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 40}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 41}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 42}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 44, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 128]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 45}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 128]}}2 + Croot.gru2.forward_layer"_tf_keras_rnn_layer* {"name": "forward_GRU2", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRU", "config": {"name": "forward_GRU2", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 64, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 46}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 47}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 48}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 50, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 256]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 51}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 256]}}2 + Droot.gru2.backward_layer"_tf_keras_rnn_layer* {"name": "backward_GRU2", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRU", "config": {"name": "backward_GRU2", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": true, "stateful": false, "unroll": false, "time_major": false, "units": 64, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 52}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 53}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 54}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 56, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 256]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 57}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 256]}}2 +hroot.keras_api.metrics.0"_tf_keras_metric*{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 58}2 +iroot.keras_api.metrics.1"_tf_keras_metric*{"class_name": "MeanMetricWrapper", "name": "acc", "dtype": "float32", "config": {"name": "acc", "dtype": "float32", "fn": "categorical_accuracy"}, "shared_object_id": 2}2 +root.gru1.forward_layer.cell"_tf_keras_layer*{"name": "gru_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRUCell", "config": {"name": "gru_cell", "trainable": true, "dtype": "float32", "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 34}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 35}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 36}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 37, "build_input_shape": {"class_name": "__tuple__", "items": [null, 128]}}2 +root.gru1.backward_layer.cell"_tf_keras_layer*{"name": "gru_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRUCell", "config": {"name": "gru_cell", "trainable": true, "dtype": "float32", "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 40}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 41}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 42}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 43, "build_input_shape": {"class_name": "__tuple__", "items": [null, 128]}}2 +root.gru2.forward_layer.cell"_tf_keras_layer*{"name": "gru_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRUCell", "config": {"name": "gru_cell", "trainable": true, "dtype": "float32", "units": 64, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 46}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 47}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 48}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 49, "build_input_shape": {"class_name": "__tuple__", "items": [null, 256]}}2 +root.gru2.backward_layer.cell"_tf_keras_layer*{"name": "gru_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRUCell", "config": {"name": "gru_cell", "trainable": true, "dtype": "float32", "units": 64, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 52}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 53}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 54}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 55, "build_input_shape": {"class_name": "__tuple__", "items": [null, 256]}}2 \ No newline at end of file diff --git a/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/saved_model.pb b/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/saved_model.pb new file mode 100644 index 00000000..c9e824f3 Binary files /dev/null and b/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/saved_model.pb differ diff --git a/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/variables/variables.data-00000-of-00001 b/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/variables/variables.data-00000-of-00001 new file mode 100644 index 00000000..01a6af68 Binary files /dev/null and b/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/variables/variables.data-00000-of-00001 differ diff --git a/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/variables/variables.index b/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/variables/variables.index new file mode 100644 index 00000000..7dae5205 Binary files /dev/null and b/imspy/imspy/algorithm/pretrained/DeepChargeStatePredictor/variables/variables.index differ diff --git a/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/fingerprint.pb b/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/fingerprint.pb new file mode 100644 index 00000000..0e4619bb --- /dev/null +++ b/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/fingerprint.pb @@ -0,0 +1 @@ +ľێև ؓU(ּʵ2 \ No newline at end of file diff --git a/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/keras_metadata.pb b/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/keras_metadata.pb new file mode 100644 index 00000000..a8a396ec --- /dev/null +++ b/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/keras_metadata.pb @@ -0,0 +1,19 @@ + + root"_tf_keras_model* {"name": "grurt_predictor", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRURTPredictor", "config": {"num_tokens": 48}, "shared_object_id": 0, "build_input_shape": {"class_name": "TensorShape", "items": [null, 50]}, "is_graph_network": false, "full_save_spec": {"class_name": "__tuple__", "items": [[{"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 50]}, "int32", "input_1"]}], {}]}, "save_spec": {"class_name": "TypeSpec", "type_spec": "tf.TensorSpec", "serialized": [{"class_name": "TensorShape", "items": [null, 50]}, "int32", "input_1"]}, "keras_version": "2.15.0", "backend": "tensorflow", "model_config": {"class_name": "GRURTPredictor", "config": {"num_tokens": 48}}, "training_config": {"loss": {"class_name": "MeanAbsoluteError", "config": {"reduction": "auto", "name": "mean_absolute_error", "fn": "mean_absolute_error"}, "shared_object_id": 1}, "metrics": null, "weighted_metrics": null, "loss_weights": [1.0], "optimizer_config": {"class_name": "Custom>Adam", "config": {"name": "Adam", "weight_decay": null, "clipnorm": null, "global_clipnorm": null, "clipvalue": null, "use_ema": false, "ema_momentum": 0.99, "ema_overwrite_frequency": null, "jit_compile": true, "is_legacy_optimizer": false, "learning_rate": 0.0010000000474974513, "beta_1": 0.9, "beta_2": 0.999, "epsilon": 1e-07, "amsgrad": false}}}}2 +root.emb"_tf_keras_layer*{"name": "embedding", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 50]}, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": false, "class_name": "Embedding", "config": {"name": "embedding", "trainable": true, "dtype": "float32", "batch_input_shape": {"class_name": "__tuple__", "items": [null, 50]}, "input_dim": 49, "output_dim": 128, "embeddings_initializer": {"class_name": "RandomUniform", "config": {"minval": -0.05, "maxval": 0.05, "seed": null}, "shared_object_id": 2}, "embeddings_regularizer": null, "activity_regularizer": null, "embeddings_constraint": null, "mask_zero": false, "input_length": 50}, "shared_object_id": 3, "build_input_shape": {"class_name": "TensorShape", "items": [null, 50]}}2 +   root.gru1"_tf_keras_layer* {"name": "bidirectional", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Bidirectional", "config": {"name": "bidirectional", "trainable": true, "dtype": "float32", "layer": {"class_name": "GRU", "config": {"name": "GRU1", "trainable": true, "dtype": "float32", "return_sequences": true, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 4}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 5}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 6}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 8}, "merge_mode": "concat"}, "shared_object_id": 9, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 10}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 128]}}2 +  + root.gru2"_tf_keras_layer* {"name": "bidirectional_1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Bidirectional", "config": {"name": "bidirectional_1", "trainable": true, "dtype": "float32", "layer": {"class_name": "GRU", "config": {"name": "GRU2", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 64, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 11}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 12}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 13}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 15}, "merge_mode": "concat"}, "shared_object_id": 16, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 17}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 256]}}2 +  root.dense1"_tf_keras_layer*{"name": "dense", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense", "trainable": true, "dtype": "float32", "units": 128, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 18}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 19}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0010000000474974513, "l2": 0.0010000000474974513}, "shared_object_id": 20}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 21, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 128}}, "shared_object_id": 22}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 128]}}2 +  root.dense2"_tf_keras_layer*{"name": "dense_1", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_1", "trainable": true, "dtype": "float32", "units": 64, "activation": "relu", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 23}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 24}, "kernel_regularizer": {"class_name": "L1L2", "config": {"l1": 0.0010000000474974513, "l2": 0.0010000000474974513}, "shared_object_id": 25}, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 26, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 128}}, "shared_object_id": 27}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 128]}}2 +  root.dropout"_tf_keras_layer*{"name": "dropout", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dropout", "config": {"name": "dropout", "trainable": true, "dtype": "float32", "rate": 0.2, "noise_shape": null, "seed": null}, "shared_object_id": 28, "build_input_shape": {"class_name": "TensorShape", "items": [null, 128]}}2 +root.out"_tf_keras_layer*{"name": "dense_2", "trainable": true, "expects_training_arg": false, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "Dense", "config": {"name": "dense_2", "trainable": true, "dtype": "float32", "units": 1, "activation": "linear", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 29}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 30}, "kernel_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "bias_constraint": null}, "shared_object_id": 31, "input_spec": {"class_name": "InputSpec", "config": {"dtype": null, "shape": null, "ndim": null, "max_ndim": null, "min_ndim": 2, "axes": {"-1": 64}}, "shared_object_id": 32}, "build_input_shape": {"class_name": "TensorShape", "items": [null, 64]}}2 + ;root.gru1.forward_layer"_tf_keras_rnn_layer* {"name": "forward_GRU1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRU", "config": {"name": "forward_GRU1", "trainable": true, "dtype": "float32", "return_sequences": true, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "zero_output_for_mask": true, "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 33}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 34}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 35}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 37, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 128]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 38}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 128]}}2 + <root.gru1.backward_layer"_tf_keras_rnn_layer* {"name": "backward_GRU1", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRU", "config": {"name": "backward_GRU1", "trainable": true, "dtype": "float32", "return_sequences": true, "return_state": false, "go_backwards": true, "stateful": false, "unroll": false, "time_major": false, "zero_output_for_mask": true, "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 39}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 40}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 41}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 43, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 128]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 44}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 128]}}2 + Croot.gru2.forward_layer"_tf_keras_rnn_layer* {"name": "forward_GRU2", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRU", "config": {"name": "forward_GRU2", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": false, "stateful": false, "unroll": false, "time_major": false, "units": 64, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 45}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 46}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 47}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 49, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 256]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 50}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 256]}}2 + Droot.gru2.backward_layer"_tf_keras_rnn_layer* {"name": "backward_GRU2", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRU", "config": {"name": "backward_GRU2", "trainable": true, "dtype": "float32", "return_sequences": false, "return_state": false, "go_backwards": true, "stateful": false, "unroll": false, "time_major": false, "units": 64, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 51}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 52}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 53}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "activity_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 55, "input_spec": [{"class_name": "InputSpec", "config": {"dtype": null, "shape": {"class_name": "__tuple__", "items": [null, null, 256]}, "ndim": 3, "max_ndim": null, "min_ndim": null, "axes": {}}, "shared_object_id": 56}], "build_input_shape": {"class_name": "TensorShape", "items": [null, 50, 256]}}2 +hroot.keras_api.metrics.0"_tf_keras_metric*{"class_name": "Mean", "name": "loss", "dtype": "float32", "config": {"name": "loss", "dtype": "float32"}, "shared_object_id": 57}2 +root.gru1.forward_layer.cell"_tf_keras_layer*{"name": "gru_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRUCell", "config": {"name": "gru_cell", "trainable": true, "dtype": "float32", "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 33}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 34}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 35}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 36, "build_input_shape": {"class_name": "__tuple__", "items": [null, 128]}}2 +root.gru1.backward_layer.cell"_tf_keras_layer*{"name": "gru_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRUCell", "config": {"name": "gru_cell", "trainable": true, "dtype": "float32", "units": 128, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 39}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 40}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 41}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 42, "build_input_shape": {"class_name": "__tuple__", "items": [null, 128]}}2 +root.gru2.forward_layer.cell"_tf_keras_layer*{"name": "gru_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRUCell", "config": {"name": "gru_cell", "trainable": true, "dtype": "float32", "units": 64, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 45}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 46}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 47}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 48, "build_input_shape": {"class_name": "__tuple__", "items": [null, 256]}}2 +root.gru2.backward_layer.cell"_tf_keras_layer*{"name": "gru_cell", "trainable": true, "expects_training_arg": true, "dtype": "float32", "batch_input_shape": null, "stateful": false, "must_restore_from_config": false, "preserve_input_structure_in_config": false, "autocast": true, "class_name": "GRUCell", "config": {"name": "gru_cell", "trainable": true, "dtype": "float32", "units": 64, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": true, "kernel_initializer": {"class_name": "GlorotUniform", "config": {"seed": null}, "shared_object_id": 51}, "recurrent_initializer": {"class_name": "Orthogonal", "config": {"gain": 1.0, "seed": null}, "shared_object_id": 52}, "bias_initializer": {"class_name": "Zeros", "config": {}, "shared_object_id": 53}, "kernel_regularizer": null, "recurrent_regularizer": null, "bias_regularizer": null, "kernel_constraint": null, "recurrent_constraint": null, "bias_constraint": null, "dropout": 0.0, "recurrent_dropout": 0.0, "implementation": 2, "reset_after": true}, "shared_object_id": 54, "build_input_shape": {"class_name": "__tuple__", "items": [null, 256]}}2 \ No newline at end of file diff --git a/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/saved_model.pb b/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/saved_model.pb new file mode 100644 index 00000000..4a6779a7 Binary files /dev/null and b/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/saved_model.pb differ diff --git a/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/variables/variables.data-00000-of-00001 b/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/variables/variables.data-00000-of-00001 new file mode 100644 index 00000000..29488a6c Binary files /dev/null and b/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/variables/variables.data-00000-of-00001 differ diff --git a/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/variables/variables.index b/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/variables/variables.index new file mode 100644 index 00000000..0f1d1ee4 Binary files /dev/null and b/imspy/imspy/algorithm/pretrained/DeepRetentionTimePredictor/variables/variables.index differ diff --git a/imspy/imspy/algorithm/pretrained/__init__.py b/imspy/imspy/algorithm/pretrained/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/imspy/imspy/algorithm/pretrained/tokenizer-ptm.json b/imspy/imspy/algorithm/pretrained/tokenizer-ptm.json new file mode 100644 index 00000000..358fcb5c --- /dev/null +++ b/imspy/imspy/algorithm/pretrained/tokenizer-ptm.json @@ -0,0 +1 @@ +"{\"class_name\": \"Tokenizer\", \"config\": {\"num_words\": null, \"filters\": \"!\\\"#$%&()*+,-./:;<=>?@[\\\\]^_`{|}~\\t\\n\", \"lower\": false, \"split\": \" \", \"char_level\": false, \"oov_token\": null, \"document_count\": 800315, \"word_counts\": \"{\\\"\\\": 792568, \\\"E\\\": 1150285, \\\"T\\\": 708378, \\\"I\\\": 647675, \\\"D\\\": 814486, \\\"G\\\": 801249, \\\"L\\\": 1243541, \\\"W\\\": 68294, \\\"S\\\": 986226, \\\"A\\\": 970370, \\\"R\\\": 487728, \\\"\\\": 800315, \\\"Q\\\": 624996, \\\"V\\\": 822187, \\\"P\\\": 777177, \\\"H\\\": 356941, \\\"K\\\": 635852, \\\"Y\\\": 323049, \\\"M\\\": 112508, \\\"N\\\": 537145, \\\"F\\\": 437148, \\\"C[UNIMOD:4]\\\": 144034, \\\"M[UNIMOD:35]\\\": 150500, \\\"K[UNIMOD:1363]\\\": 363, \\\"[UNIMOD:1]\\\": 7747, \\\"S[UNIMOD:21]\\\": 23097, \\\"K[UNIMOD:747]\\\": 325, \\\"K[UNIMOD:1849]\\\": 429, \\\"Y[UNIMOD:21]\\\": 233, \\\"K[UNIMOD:64]\\\": 471, \\\"T[UNIMOD:43]\\\": 74, \\\"K[UNIMOD:121]\\\": 589, \\\"R[UNIMOD:34]\\\": 270, \\\"R[UNIMOD:7]\\\": 439, \\\"K[UNIMOD:34]\\\": 524, \\\"T[UNIMOD:21]\\\": 1855, \\\"R[UNIMOD:36]\\\": 344, \\\"K[UNIMOD:1289]\\\": 440, \\\"K[UNIMOD:1]\\\": 297, \\\"K[UNIMOD:1848]\\\": 482, \\\"K[UNIMOD:122]\\\": 348, \\\"C[UNIMOD:312]\\\": 136, \\\"K[UNIMOD:37]\\\": 457, \\\"K[UNIMOD:58]\\\": 180, \\\"S[UNIMOD:43]\\\": 103, \\\"K[UNIMOD:3]\\\": 236, \\\"K[UNIMOD:36]\\\": 441, \\\"Y[UNIMOD:354]\\\": 222}\", \"word_docs\": \"{\\\"E\\\": 571986, \\\"T\\\": 446549, \\\"I\\\": 433228, \\\"A\\\": 506744, \\\"D\\\": 494531, \\\"G\\\": 451662, \\\"S\\\": 516302, \\\"L\\\": 628195, \\\"R\\\": 427701, \\\"\\\": 800315, \\\"W\\\": 64054, \\\"\\\": 792568, \\\"P\\\": 438886, \\\"V\\\": 495048, \\\"Q\\\": 405053, \\\"K\\\": 532020, \\\"H\\\": 285269, \\\"Y\\\": 254978, \\\"N\\\": 371606, \\\"M\\\": 98241, \\\"F\\\": 331117, \\\"C[UNIMOD:4]\\\": 118333, \\\"M[UNIMOD:35]\\\": 130818, \\\"K[UNIMOD:1363]\\\": 362, \\\"[UNIMOD:1]\\\": 7747, \\\"S[UNIMOD:21]\\\": 20402, \\\"K[UNIMOD:747]\\\": 322, \\\"K[UNIMOD:1849]\\\": 427, \\\"Y[UNIMOD:21]\\\": 232, \\\"K[UNIMOD:64]\\\": 461, \\\"T[UNIMOD:43]\\\": 74, \\\"K[UNIMOD:121]\\\": 587, \\\"R[UNIMOD:34]\\\": 270, \\\"R[UNIMOD:7]\\\": 383, \\\"K[UNIMOD:34]\\\": 512, \\\"T[UNIMOD:21]\\\": 1781, \\\"R[UNIMOD:36]\\\": 343, \\\"K[UNIMOD:1289]\\\": 437, \\\"K[UNIMOD:1]\\\": 297, \\\"K[UNIMOD:1848]\\\": 477, \\\"K[UNIMOD:122]\\\": 341, \\\"C[UNIMOD:312]\\\": 134, \\\"K[UNIMOD:37]\\\": 456, \\\"K[UNIMOD:58]\\\": 179, \\\"S[UNIMOD:43]\\\": 103, \\\"K[UNIMOD:3]\\\": 236, \\\"K[UNIMOD:36]\\\": 441, \\\"Y[UNIMOD:354]\\\": 214}\", \"index_docs\": \"{\\\"2\\\": 571986, \\\"11\\\": 446549, \\\"12\\\": 433228, \\\"4\\\": 506744, \\\"6\\\": 494531, \\\"7\\\": 451662, \\\"3\\\": 516302, \\\"1\\\": 628195, \\\"16\\\": 427701, \\\"8\\\": 800315, \\\"23\\\": 64054, \\\"9\\\": 792568, \\\"10\\\": 438886, \\\"5\\\": 495048, \\\"14\\\": 405053, \\\"13\\\": 532020, \\\"18\\\": 285269, \\\"19\\\": 254978, \\\"15\\\": 371606, \\\"22\\\": 98241, \\\"17\\\": 331117, \\\"21\\\": 118333, \\\"20\\\": 130818, \\\"36\\\": 362, \\\"25\\\": 7747, \\\"24\\\": 20402, \\\"39\\\": 322, \\\"35\\\": 427, \\\"43\\\": 232, \\\"30\\\": 461, \\\"48\\\": 74, \\\"27\\\": 587, \\\"41\\\": 270, \\\"34\\\": 383, \\\"28\\\": 512, \\\"26\\\": 1781, \\\"38\\\": 343, \\\"33\\\": 437, \\\"40\\\": 297, \\\"29\\\": 477, \\\"37\\\": 341, \\\"46\\\": 134, \\\"31\\\": 456, \\\"45\\\": 179, \\\"47\\\": 103, \\\"42\\\": 236, \\\"32\\\": 441, \\\"44\\\": 214}\", \"index_word\": \"{\\\"1\\\": \\\"L\\\", \\\"2\\\": \\\"E\\\", \\\"3\\\": \\\"S\\\", \\\"4\\\": \\\"A\\\", \\\"5\\\": \\\"V\\\", \\\"6\\\": \\\"D\\\", \\\"7\\\": \\\"G\\\", \\\"8\\\": \\\"\\\", \\\"9\\\": \\\"\\\", \\\"10\\\": \\\"P\\\", \\\"11\\\": \\\"T\\\", \\\"12\\\": \\\"I\\\", \\\"13\\\": \\\"K\\\", \\\"14\\\": \\\"Q\\\", \\\"15\\\": \\\"N\\\", \\\"16\\\": \\\"R\\\", \\\"17\\\": \\\"F\\\", \\\"18\\\": \\\"H\\\", \\\"19\\\": \\\"Y\\\", \\\"20\\\": \\\"M[UNIMOD:35]\\\", \\\"21\\\": \\\"C[UNIMOD:4]\\\", \\\"22\\\": \\\"M\\\", \\\"23\\\": \\\"W\\\", \\\"24\\\": \\\"S[UNIMOD:21]\\\", \\\"25\\\": \\\"[UNIMOD:1]\\\", \\\"26\\\": \\\"T[UNIMOD:21]\\\", \\\"27\\\": \\\"K[UNIMOD:121]\\\", \\\"28\\\": \\\"K[UNIMOD:34]\\\", \\\"29\\\": \\\"K[UNIMOD:1848]\\\", \\\"30\\\": \\\"K[UNIMOD:64]\\\", \\\"31\\\": \\\"K[UNIMOD:37]\\\", \\\"32\\\": \\\"K[UNIMOD:36]\\\", \\\"33\\\": \\\"K[UNIMOD:1289]\\\", \\\"34\\\": \\\"R[UNIMOD:7]\\\", \\\"35\\\": \\\"K[UNIMOD:1849]\\\", \\\"36\\\": \\\"K[UNIMOD:1363]\\\", \\\"37\\\": \\\"K[UNIMOD:122]\\\", \\\"38\\\": \\\"R[UNIMOD:36]\\\", \\\"39\\\": \\\"K[UNIMOD:747]\\\", \\\"40\\\": \\\"K[UNIMOD:1]\\\", \\\"41\\\": \\\"R[UNIMOD:34]\\\", \\\"42\\\": \\\"K[UNIMOD:3]\\\", \\\"43\\\": \\\"Y[UNIMOD:21]\\\", \\\"44\\\": \\\"Y[UNIMOD:354]\\\", \\\"45\\\": \\\"K[UNIMOD:58]\\\", \\\"46\\\": \\\"C[UNIMOD:312]\\\", \\\"47\\\": \\\"S[UNIMOD:43]\\\", \\\"48\\\": \\\"T[UNIMOD:43]\\\"}\", \"word_index\": \"{\\\"L\\\": 1, \\\"E\\\": 2, \\\"S\\\": 3, \\\"A\\\": 4, \\\"V\\\": 5, \\\"D\\\": 6, \\\"G\\\": 7, \\\"\\\": 8, \\\"\\\": 9, \\\"P\\\": 10, \\\"T\\\": 11, \\\"I\\\": 12, \\\"K\\\": 13, \\\"Q\\\": 14, \\\"N\\\": 15, \\\"R\\\": 16, \\\"F\\\": 17, \\\"H\\\": 18, \\\"Y\\\": 19, \\\"M[UNIMOD:35]\\\": 20, \\\"C[UNIMOD:4]\\\": 21, \\\"M\\\": 22, \\\"W\\\": 23, \\\"S[UNIMOD:21]\\\": 24, \\\"[UNIMOD:1]\\\": 25, \\\"T[UNIMOD:21]\\\": 26, \\\"K[UNIMOD:121]\\\": 27, \\\"K[UNIMOD:34]\\\": 28, \\\"K[UNIMOD:1848]\\\": 29, \\\"K[UNIMOD:64]\\\": 30, \\\"K[UNIMOD:37]\\\": 31, \\\"K[UNIMOD:36]\\\": 32, \\\"K[UNIMOD:1289]\\\": 33, \\\"R[UNIMOD:7]\\\": 34, \\\"K[UNIMOD:1849]\\\": 35, \\\"K[UNIMOD:1363]\\\": 36, \\\"K[UNIMOD:122]\\\": 37, \\\"R[UNIMOD:36]\\\": 38, \\\"K[UNIMOD:747]\\\": 39, \\\"K[UNIMOD:1]\\\": 40, \\\"R[UNIMOD:34]\\\": 41, \\\"K[UNIMOD:3]\\\": 42, \\\"Y[UNIMOD:21]\\\": 43, \\\"Y[UNIMOD:354]\\\": 44, \\\"K[UNIMOD:58]\\\": 45, \\\"C[UNIMOD:312]\\\": 46, \\\"S[UNIMOD:43]\\\": 47, \\\"T[UNIMOD:43]\\\": 48}\"}}" \ No newline at end of file diff --git a/imspy/imspy/algorithm/rt/__init__.py b/imspy/imspy/algorithm/rt/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/imspy/imspy/algorithm/rt/predictors.py b/imspy/imspy/algorithm/rt/predictors.py new file mode 100644 index 00000000..875c4e93 --- /dev/null +++ b/imspy/imspy/algorithm/rt/predictors.py @@ -0,0 +1,93 @@ +import tensorflow as tf +from abc import ABC, abstractmethod +from numpy.typing import NDArray + +from imspy.algorithm.utilities import get_model_path +from imspy.utility import tokenize_unimod_sequence + + +def load_deep_retention_time() -> tf.keras.models.Model: + """ Get a pretrained deep predictor model + + Returns: + The pretrained deep predictor model + """ + return tf.keras.models.load_model(get_model_path('DeepRetentionTimePredictor')) + + +class PeptideChromatographyApex(ABC): + """ + ABSTRACT INTERFACE for a chromatographic separation for peptides + """ + + def __init__(self): + pass + + @abstractmethod + def simulate_separation_times(self, sequences: list[str]) -> NDArray: + pass + + +class GRURetentionTimePredictor(tf.keras.models.Model): + + def __init__(self, + num_tokens, + seq_len=50, + emb_dim=128, + gru_1=128, + gru_2=64, + rdo=0.0, + do=0.2): + super(GRURetentionTimePredictor, self).__init__() + + self.emb = tf.keras.layers.Embedding(input_dim=num_tokens + 1, output_dim=emb_dim, input_length=seq_len) + + self.gru1 = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(gru_1, return_sequences=True, + name='GRU1')) + + self.gru2 = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(gru_2, return_sequences=False, + name='GRU2', + recurrent_dropout=rdo)) + + self.dense1 = tf.keras.layers.Dense(128, activation='relu', name='Dense1', + kernel_regularizer=tf.keras.regularizers.l1_l2(1e-3, 1e-3)) + + self.dense2 = tf.keras.layers.Dense(64, activation='relu', name='Dense2', + kernel_regularizer=tf.keras.regularizers.l1_l2(1e-3, 1e-3)) + + self.dropout = tf.keras.layers.Dropout(do, name='Dropout') + + self.out = tf.keras.layers.Dense(1, activation=None, name='Output') + + def call(self, inputs): + """ + :param inputs: should contain: (sequence) + """ + # get inputs + seq = inputs + # sequence learning + x_recurrent = self.gru2(self.gru1(self.emb(seq))) + # regularize + d1 = self.dropout(self.dense1(x_recurrent)) + # output + return self.out(self.dense2(d1)) + + +class DeepChromatographyApex(PeptideChromatographyApex): + + def __init__(self, model: GRURetentionTimePredictor, tokenizer: tf.keras.preprocessing.text.Tokenizer): + super(DeepChromatographyApex, self).__init__() + self.model = model + self.tokenizer = tokenizer + + def _preprocess_sequences(self, sequences: list[str], pad_len: int = 50) -> NDArray: + char_tokens = [tokenize_unimod_sequence(seq) for seq in sequences] + char_tokens = self.tokenizer.texts_to_sequences(char_tokens) + char_tokens = tf.keras.preprocessing.sequence.pad_sequences(char_tokens, pad_len, padding='post') + return char_tokens + + def simulate_separation_times(self, sequences: list[str], batch_size: int = 1024, verbose: bool = False) -> NDArray: + tokens = self._preprocess_sequences(sequences) + tf_ds = tf.data.Dataset.from_tensor_slices(tokens).batch(batch_size) + + return self.model.predict(tf_ds, verbose=verbose) diff --git a/imspy/imspy/algorithm/utilities.py b/imspy/imspy/algorithm/utilities.py new file mode 100644 index 00000000..61287ff4 --- /dev/null +++ b/imspy/imspy/algorithm/utilities.py @@ -0,0 +1,28 @@ +import tensorflow as tf +import importlib.resources as resources +from imspy.utility.utilities import tokenizer_from_json +from importlib.abc import Traversable + + +def get_model_path(model_name: str) -> Traversable: + """ Get the path to a pretrained model + + Args: + model_name: The name of the model to load + + Returns: + The path to the pretrained model + """ + return resources.files('imspy.algorithm.pretrained').joinpath(model_name) + + +def load_tokenizer_from_resources() -> tf.keras.preprocessing.text.Tokenizer: + """ Load a tokenizer from resources + + Args: + model_name: The name of the model to load + + Returns: + The pretrained tokenizer + """ + return tokenizer_from_json(resources.files('imspy.algorithm.pretrained').joinpath('tokenizer-ptm.json')) diff --git a/imspy/imspy/chemistry.py b/imspy/imspy/chemistry.py deleted file mode 100644 index b5b7d6b3..00000000 --- a/imspy/imspy/chemistry.py +++ /dev/null @@ -1,213 +0,0 @@ -import numpy as np -import mendeleev as me - -AMINO_ACIDS = {'Lysine': 'K', 'Alanine': 'A', 'Glycine': 'G', 'Valine': 'V', 'Tyrosine': 'Y', - 'Arginine': 'R', 'Glutamic Acid': 'E', 'Phenylalanine': 'F', 'Tryptophan': 'W', - 'Leucine': 'L', 'Threonine': 'T', 'Cysteine': 'C', 'Serine': 'S', 'Glutamine': 'Q', - 'Methionine': 'M', 'Isoleucine': 'I', 'Asparagine': 'N', 'Proline': 'P', 'Histidine': 'H', - 'Aspartic Acid': 'D'} - -AA_MASSES = {'A': 71.03711, 'C': 103.00919, 'D': 115.02694, 'E': 129.04259, 'F': 147.06841, 'G': 57.02146, - 'H': 137.05891, 'I': 113.08406, 'K': 128.09496, 'L': 113.08406, 'M': 131.04049, 'N': 114.04293, - 'P': 97.05276, 'Q': 128.05858, 'R': 156.10111, 'S': 87.03203, 'T': 101.04768, 'V': 99.06841, - 'W': 186.07931, 'Y': 163.06333, '[UNIMOD:1]': 42.010565, '[UNIMOD:35]': 15.994915, 'U': 168.964203, - '[UNIMOD:4]': 57.021464, '[UNIMOD:21]': 79.966331, '[UNIMOD:312]': 119.004099, '': 0.0, '': 0.0} - -VARIANT_DICT = {'L': ['L'], 'E': ['E'], 'S': ['S', 'S[UNIMOD:21]'], 'A': ['A'], 'V': ['V'], 'D': ['D'], 'G': ['G'], - '': [''], 'P': ['P'], '': ['', '[UNIMOD:1]'], 'T': ['T', 'T[UNIMOD:21]'], - 'I': ['I'], 'Q': ['Q'], 'K': ['K', 'K[UNIMOD:1]'], 'N': ['N'], 'R': ['R'], 'F': ['F'], 'H': ['H'], - 'Y': ['Y', 'Y[UNIMOD:21]'], 'M': ['M', 'M[UNIMOD:35]'], - 'W': ['W'], 'C': ['C', 'C[UNIMOD:312]', 'C[UNIMOD:4]'], 'C[UNIMOD:4]': ['C', 'C[UNIMOD:312]', 'C[UNIMOD:4]']} - -MASS_PROTON = 1.007276466583 - -MASS_WATER = 18.010564684 -# IUPAC standard in Kelvin -STANDARD_TEMPERATURE = 273.15 -# IUPAC standard in Pa -STANDARD_PRESSURE = 1e5 -# IUPAC elementary charge -ELEMENTARY_CHARGE = 1.602176634e-19 -# IUPAC BOLTZMANN'S CONSTANT -K_BOLTZMANN = 1.380649e-23 -# constant part of Mason-Schamp equation -# 3/16*sqrt(2π/kb)*e/N0 * -# 1e20 (correction for using A² instead of m²) * -# 1/sqrt(1.660 5402(10)×10−27 kg) (correction for using Da instead of kg) * -# 10000 * (to get cm²/Vs from m²/Vs) -# TODO CITATION -CCS_K0_CONVERSION_CONSTANT = 18509.8632163405 - -def get_monoisotopic_token_weight(token:str): - """ - Gets monoisotopic weight of token - - :param token: Token of aa sequence e.g. "[UNIMOD:1]" - :type token: str - :return: Weight in Dalton. - :rtype: float - """ - splits = token.split("[") - for i in range(1, len(splits)): - splits[i] = "["+splits[i] - - mass = 0 - for split in splits: - mass += AA_MASSES[split] - return mass - - -def get_mono_isotopic_weight(sequence_tokenized: list[str]) -> float: - mass = 0 - for token in sequence_tokenized: - mass += get_monoisotopic_token_weight(token) - return mass + MASS_WATER - - -def get_mass_over_charge(mass: float, charge: int) -> float: - return (mass / charge) + MASS_PROTON - -def get_num_protonizable_sites(sequence: str) -> int: - """ - Gets number of sites that can be protonized. - This function does not yet account for PTMs. - - :param sequence: Amino acid sequence - :type sequence: str - :return: Number of protonizable sites - :rtype: int - """ - sites = 1 # n-terminus - for s in sequence: - if s in ["H","R","K"]: - sites += 1 - return sites - - -def reduced_mobility_to_ccs(one_over_k0, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15): - """ - convert reduced ion mobility (1/k0) to CCS - :param one_over_k0: reduced ion mobility - :param charge: charge state of the ion - :param mz: mass-over-charge of the ion - :param mass_gas: mass of drift gas - :param temp: temperature of the drift gas in C° - :param t_diff: factor to translate from C° to K - """ - reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas) - return (CCS_K0_CONVERSION_CONSTANT * charge) / (np.sqrt(reduced_mass * (temp + t_diff)) * 1 / one_over_k0) - - -def ccs_to_one_over_reduced_mobility(ccs, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15): - """ - convert CCS to 1 over reduced ion mobility (1/k0) - :param ccs: collision cross-section - :param charge: charge state of the ion - :param mz: mass-over-charge of the ion - :param mass_gas: mass of drift gas (N2) - :param temp: temperature of the drift gas in C° - :param t_diff: factor to translate from C° to K - """ - reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas) - return ((np.sqrt(reduced_mass * (temp + t_diff))) * ccs) / (CCS_K0_CONVERSION_CONSTANT * charge) - - -class ChemicalCompound: - - def _calculate_molecular_mass(self): - mass = 0 - for (atom, abundance) in self.element_composition.items(): - mass += me.element(atom).atomic_weight * abundance - return mass - - def __init__(self, formula): - self.element_composition = self.get_composition(formula) - self.mass = self._calculate_molecular_mass() - - def get_composition(self, formula:str): - """ - Parse chemical formula into Dict[str:int] with - atoms as keys and the respective counts as values. - - :param formula: Chemical formula of compound e.g. 'C6H12O6' - :type formula: str - :return: Dictionary Atom: Count - :rtype: Dict[str:int] - """ - if formula.startswith("("): - assert formula.endswith(")") - formula = formula[1:-1] - - tmp_group = "" - tmp_group_count = "" - depth = 0 - comp_list = [] - comp_counts = [] - - # extract components: everything in brackets and atoms - # extract component counts: number behind component or 1 - for (i,e) in enumerate(formula): - if e == "(": - depth += 1 - if depth == 1: - if tmp_group != "": - comp_list.append(tmp_group) - tmp_group = "" - if tmp_group_count == "": - comp_counts.append(1) - else: - comp_counts.append(int(tmp_group_count)) - tmp_group_count = "" - tmp_group += e - continue - if e == ")": - depth -= 1 - tmp_group += e - continue - if depth > 0: - tmp_group += e - continue - if e.isupper(): - if tmp_group != "": - comp_list.append(tmp_group) - tmp_group = "" - if tmp_group_count == "": - comp_counts.append(1) - else: - comp_counts.append(int(tmp_group_count)) - tmp_group_count = "" - tmp_group += e - continue - if e.islower(): - tmp_group += e - continue - if e.isnumeric(): - tmp_group_count += e - if tmp_group != "": - comp_list.append(tmp_group) - if tmp_group_count == "": - comp_counts.append(1) - else: - comp_counts.append(int(tmp_group_count)) - - # assemble dictionary from component lists - atom_dict = {} - for (comp,count) in zip(comp_list,comp_counts): - if not comp.startswith("("): - atom_dict[comp] = count - else: - atom_dicts_depth = self.get_composition(comp) - for atom in atom_dicts_depth: - atom_dicts_depth[atom] *= count - if atom in atom_dict: - atom_dict[atom] += atom_dicts_depth[atom] - else: - atom_dict[atom] = atom_dicts_depth[atom] - atom_dicts_depth = {} - return atom_dict - -class BufferGas(ChemicalCompound): - - def __init__(self, formula: str): - super().__init__(formula) - diff --git a/imspy/imspy/chemistry/__init__.py b/imspy/imspy/chemistry/__init__.py new file mode 100644 index 00000000..da4dd4f4 --- /dev/null +++ b/imspy/imspy/chemistry/__init__.py @@ -0,0 +1,2 @@ +from .mass import calculate_monoisotopic_mass, calculate_mz, calculate_mz_from_sequence +from .mobility import ccs_to_one_over_k0, one_over_k0_to_ccs diff --git a/imspy/imspy/chemistry/mass.py b/imspy/imspy/chemistry/mass.py new file mode 100644 index 00000000..a1819638 --- /dev/null +++ b/imspy/imspy/chemistry/mass.py @@ -0,0 +1,334 @@ +import re +from collections import defaultdict +import numpy as np +import mendeleev as me + +MASS_PROTON = 1.007276466583 +MASS_NEUTRON = 1.00866491597 +MASS_ELECTRON = 0.00054857990946 +MASS_WATER = 18.0105646863 + +# IUPAC standard in Kelvin +STANDARD_TEMPERATURE = 273.15 +# IUPAC standard in Pa +STANDARD_PRESSURE = 1e5 +# IUPAC elementary charge +ELEMENTARY_CHARGE = 1.602176634e-19 +# IUPAC BOLTZMANN'S CONSTANT +K_BOLTZMANN = 1.380649e-23 + +AMINO_ACIDS = { + 'Lysine': 'K', + 'Alanine': 'A', + 'Glycine': 'G', + 'Valine': 'V', + 'Tyrosine': 'Y', + 'Arginine': 'R', + 'Glutamic Acid': 'E', + 'Phenylalanine': 'F', + 'Tryptophan': 'W', + 'Leucine': 'L', + 'Threonine': 'T', + 'Cysteine': 'C', + 'Serine': 'S', + 'Glutamine': 'Q', + 'Methionine': 'M', + 'Isoleucine': 'I', + 'Asparagine': 'N', + 'Proline': 'P', + 'Histidine': 'H', + 'Aspartic Acid': 'D' +} + +AMINO_ACID_MASSES = { + 'A': 71.037114, + 'R': 156.101111, + 'N': 114.042927, + 'D': 115.026943, + 'C': 103.009185, + 'E': 129.042593, + 'Q': 128.058578, + 'G': 57.021464, + 'H': 137.058912, + 'I': 113.084064, + 'L': 113.084064, + 'K': 128.094963, + 'M': 131.040485, + 'F': 147.068414, + 'P': 97.052764, + 'S': 87.032028, + 'T': 101.047679, + 'W': 186.079313, + 'Y': 163.063329, + 'V': 99.068414 +} + +MODIFICATIONS_MZ = { + # currently unclear if correct, TODO: check + '[UNIMOD:58]': 56.026215, '[UNIMOD:408]': 148.037173, + # correct + '[UNIMOD:43]': 203.079373, '[UNIMOD:7]': 0.984016, + '[UNIMOD:1]': 42.010565, '[UNIMOD:35]': 15.994915, '[UNIMOD:1289]': 70.041865, + '[UNIMOD:3]': 226.077598, '[UNIMOD:1363]': 68.026215, '[UNIMOD:36]': 28.031300, + '[UNIMOD:122]': 27.994915, '[UNIMOD:1848]': 114.031694, '[UNIMOD:1849]': 86.036779, + '[UNIMOD:64]': 100.016044, '[UNIMOD:37]': 42.046950, '[UNIMOD:121]': 114.042927, + '[UNIMOD:747]': 86.000394, '[UNIMOD:34]': 14.015650, '[UNIMOD:354]': 44.985078, + '[UNIMOD:4]': 57.021464, '[UNIMOD:21]': 79.966331, '[UNIMOD:312]': 119.004099 +} + + +MODIFICATIONS_MZ_NUMERICAL = { + # currently unclear if correct, TODO: check + 58: 56.026215, 408: 148.037173, + # correct + 43: 203.079373, 7: 0.984016, + 1: 42.010565, 35: 15.994915, 1289: 70.041865, + 3: 226.077598, 1363: 68.026215, 36: 28.031300, + 122: 27.994915, 1848: 114.031694, 1849: 86.036779, + 64: 100.016044, 37: 42.046950, 121: 114.042927, + 747: 86.000394, 34: 14.015650, 354: 44.985078, + 4: 57.021464, 21: 79.966331, 312: 119.004099 +} + + +def tokenize_amino_acids(sequence): + """ + Tokenizes a sequence of modified amino acids. + + Each character stands for itself, and if a modification is at the beginning, + the modification stands for itself. Otherwise, it should be the suffix of the amino acid. + + Args: + sequence (str): A string representing the sequence of amino acids with modifications. + + Returns: + List[str]: A list of tokenized amino acids. + """ + # Regular expression pattern to match amino acids with or without modifications + pattern = r'(\[UNIMOD:\d+\])?([A-Z])(\[[A-Z]+:\d+\])?' + + # Find all matches using the regular expression + matches = re.findall(pattern, sequence) + + # Process the matches to form the tokenized list + tokens = [] + for match in matches: + mod1, aa, mod2 = match + if mod1: + # If there's a modification at the beginning, it stands for itself + tokens.append(mod1) + # Add the amino acid, with or without the suffix modification + tokens.append(aa + mod2 if mod2 else aa) + + return tokens + + +def calculate_monoisotopic_mass(sequence): + """ + Calculates the monoisotopic mass of a sequence of amino acids with modifications. + + Args: + sequence (str): A string representing the sequence of amino acids with modifications. + + Returns: + float: The monoisotopic mass of the sequence. + """ + # Regex pattern to find modifications in the format [UNIMOD:number] + pattern = r"\[UNIMOD:(\d+)\]" + + # Find all occurrences of the pattern + modifications = re.findall(pattern, sequence) + + # Count occurrences of each modification number + mod_counts = defaultdict(int) + for mod in modifications: + mod_counts[int(mod)] += 1 + + # Remove the modifications from the sequence + sequence = re.sub(pattern, '', sequence) + + # Count occurrences of each amino acid + aa_counts = defaultdict(int) + for char in sequence: + aa_counts[char] += 1 + + # mass of amino acids and modifications + mass_sequence = np.sum([AMINO_ACID_MASSES[amino_acid] * count for amino_acid, count in aa_counts.items()]) + mass_modifics = np.sum([MODIFICATIONS_MZ_NUMERICAL[mod] * count for mod, count in mod_counts.items()]) + + return mass_sequence + mass_modifics + MASS_WATER + + +def calculate_mz(monoisotopic_mass, charge): + """ + Calculates the m/z of a sequence of amino acids with modifications. + + Args: + sequence (str): A string representing the sequence of amino acids with modifications. + + Returns: + float: The m/z of the sequence. + """ + + return (monoisotopic_mass + charge * MASS_PROTON) / charge + + +def calculate_mz_from_sequence(sequence, charge): + """ + Calculates the m/z of a sequence of amino acids with modifications. + + Args: + sequence (str): A string representing the sequence of amino acids with modifications. + + Returns: + float: The m/z of the sequence. + """ + return calculate_mz(calculate_monoisotopic_mass(sequence), charge) + + +def get_monoisotopic_token_weight(token:str): + """ + Gets monoisotopic weight of token + + :param token: Token of aa sequence e.g. "[UNIMOD:1]" + :type token: str + :return: Weight in Dalton. + :rtype: float + """ + splits = token.split("[") + for i in range(1, len(splits)): + splits[i] = "["+splits[i] + + mass = 0 + for split in splits: + mass += AMINO_ACID_MASSES[split] + return mass + + +def get_mono_isotopic_weight(sequence_tokenized: list[str]) -> float: + mass = 0 + for token in sequence_tokenized: + mass += get_monoisotopic_token_weight(token) + return mass + MASS_WATER + + +def get_mass_over_charge(mass: float, charge: int) -> float: + return (mass / charge) + MASS_PROTON + + +def get_num_protonizable_sites(sequence: str) -> int: + """ + Gets number of sites that can be protonized. + This function does not yet account for PTMs. + + :param sequence: Amino acid sequence + :type sequence: str + :return: Number of protonizable sites + :rtype: int + """ + sites = 1 # n-terminus + for s in sequence: + if s in ["H", "R", "K"]: + sites += 1 + return sites + + +class ChemicalCompound: + + def _calculate_molecular_mass(self): + mass = 0 + for (atom, abundance) in self.element_composition.items(): + mass += me.element(atom).atomic_weight * abundance + return mass + + def __init__(self, formula): + self.element_composition = self.get_composition(formula) + self.mass = self._calculate_molecular_mass() + + def get_composition(self, formula: str): + """ + Parse chemical formula into Dict[str:int] with + atoms as keys and the respective counts as values. + + :param formula: Chemical formula of compound e.g. 'C6H12O6' + :type formula: str + :return: Dictionary Atom: Count + :rtype: Dict[str:int] + """ + if formula.startswith("("): + assert formula.endswith(")") + formula = formula[1:-1] + + tmp_group = "" + tmp_group_count = "" + depth = 0 + comp_list = [] + comp_counts = [] + + # extract components: everything in brackets and atoms + # extract component counts: number behind component or 1 + for (i, e) in enumerate(formula): + if e == "(": + depth += 1 + if depth == 1: + if tmp_group != "": + comp_list.append(tmp_group) + tmp_group = "" + if tmp_group_count == "": + comp_counts.append(1) + else: + comp_counts.append(int(tmp_group_count)) + tmp_group_count = "" + tmp_group += e + continue + if e == ")": + depth -= 1 + tmp_group += e + continue + if depth > 0: + tmp_group += e + continue + if e.isupper(): + if tmp_group != "": + comp_list.append(tmp_group) + tmp_group = "" + if tmp_group_count == "": + comp_counts.append(1) + else: + comp_counts.append(int(tmp_group_count)) + tmp_group_count = "" + tmp_group += e + continue + if e.islower(): + tmp_group += e + continue + if e.isnumeric(): + tmp_group_count += e + if tmp_group != "": + comp_list.append(tmp_group) + if tmp_group_count == "": + comp_counts.append(1) + else: + comp_counts.append(int(tmp_group_count)) + + # assemble dictionary from component lists + atom_dict = {} + for (comp, count) in zip(comp_list, comp_counts): + if not comp.startswith("("): + atom_dict[comp] = count + else: + atom_dicts_depth = self.get_composition(comp) + for atom in atom_dicts_depth: + atom_dicts_depth[atom] *= count + if atom in atom_dict: + atom_dict[atom] += atom_dicts_depth[atom] + else: + atom_dict[atom] = atom_dicts_depth[atom] + atom_dicts_depth = {} + return atom_dict + + +class BufferGas(ChemicalCompound): + + def __init__(self, formula: str): + super().__init__(formula) diff --git a/imspy/imspy/chemistry/mobility.py b/imspy/imspy/chemistry/mobility.py new file mode 100644 index 00000000..d668d527 --- /dev/null +++ b/imspy/imspy/chemistry/mobility.py @@ -0,0 +1,31 @@ +import numpy as np + +SUMMARY_CONSTANT = 18509.8632163405 + + +def one_over_k0_to_ccs(one_over_k0, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15): + """ + convert reduced ion mobility (1/k0) to CCS + :param one_over_k0: reduced ion mobility + :param charge: charge state of the ion + :param mz: mass-over-charge of the ion + :param mass_gas: mass of drift gas + :param temp: temperature of the drift gas in C° + :param t_diff: factor to translate from C° to K + """ + reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas) + return (SUMMARY_CONSTANT * charge) / (np.sqrt(reduced_mass * (temp + t_diff)) * 1 / one_over_k0) + + +def ccs_to_one_over_k0(ccs, mz, charge, mass_gas=28.013, temp=31.85, t_diff=273.15): + """ + convert CCS to 1 over reduced ion mobility (1/k0) + :param ccs: collision cross-section + :param charge: charge state of the ion + :param mz: mass-over-charge of the ion + :param mass_gas: mass of drift gas (N2) + :param temp: temperature of the drift gas in C° + :param t_diff: factor to translate from C° to K + """ + reduced_mass = (mz * charge * mass_gas) / (mz * charge + mass_gas) + return ((np.sqrt(reduced_mass * (temp + t_diff))) * ccs) / (SUMMARY_CONSTANT * charge) diff --git a/imspy/imspy/core/__init__.py b/imspy/imspy/core/__init__.py new file mode 100644 index 00000000..34a5f1a0 --- /dev/null +++ b/imspy/imspy/core/__init__.py @@ -0,0 +1,4 @@ +from .frame import TimsFrame, TimsFrameVectorized +from .slice import TimsSlice, TimsSliceVectorized +from .spectrum import TimsSpectrum, MzSpectrum, MzSpectrumVectorized, IndexedMzSpectrum + diff --git a/imspy/imspy/data/frame.py b/imspy/imspy/core/frame.py similarity index 87% rename from imspy/imspy/data/frame.py rename to imspy/imspy/core/frame.py index 6cd4ba17..7a6ef5a1 100644 --- a/imspy/imspy/data/frame.py +++ b/imspy/imspy/core/frame.py @@ -7,9 +7,10 @@ import numpy as np import imspy_connector as pims -from imspy.data.spectrum import MzSpectrum, TimsSpectrum, IndexedMzSpectrum -from imspy.utilities import re_index_indices +from imspy.core.spectrum import MzSpectrum, TimsSpectrum, IndexedMzSpectrum + +from imspy.utility.utilities import re_index_indices class TimsFrame: @@ -37,6 +38,17 @@ def __init__(self, frame_id: int, ms_type: int, retention_time: float, scan: NDA self.__frame_ptr = pims.PyTimsFrame(frame_id, ms_type, retention_time, scan, mobility, tof, mz, intensity) + def __add__(self, other: 'TimsFrame') -> 'TimsFrame': + """Add two TimsFrames together. + + Args: + other (TimsFrame): TimsFrame to add. + + Returns: + TimsFrame: Sum of the two TimsFrames. + """ + return TimsFrame.from_py_tims_frame(self.__frame_ptr + other.__frame_ptr) + @classmethod def from_py_tims_frame(cls, frame: pims.PyTimsFrame): """Create a TimsFrame from a PyTimsFrame. @@ -208,7 +220,7 @@ def to_tims_spectra(self) -> List['TimsSpectrum']: return [TimsSpectrum.from_py_tims_spectrum(spec) for spec in self.__frame_ptr.to_tims_spectra()] def to_windows(self, window_length: float = 10, overlapping: bool = True, min_num_peaks: int = 5, - min_intensity: float = 1) -> List[MzSpectrum]: + min_intensity: float = 1) -> List[TimsSpectrum]: """Convert the frame to a list of windows. Args: @@ -220,9 +232,35 @@ def to_windows(self, window_length: float = 10, overlapping: bool = True, min_nu Returns: List[MzSpectrum]: List of windows. """ - return [MzSpectrum.from_py_mz_spectrum(spec) for spec in self.__frame_ptr.to_windows( + return [TimsSpectrum.from_py_tims_spectrum(spec) for spec in self.__frame_ptr.to_windows( window_length, overlapping, min_num_peaks, min_intensity)] + @classmethod + def from_windows(cls, windows: List[TimsSpectrum]) -> 'TimsFrame': + """Create a TimsFrame from a list of windows. + + Args: + windows (List[TimsSpectrum]): List of windows. + + Returns: + TimsFrame: TimsFrame created from the windows. + """ + return TimsFrame.from_py_tims_frame(pims.PyTimsFrame.from_windows( + [spec.get_spec_ptr() for spec in windows] + )) + + def to_dense_windows(self, window_length: float = 10, resolution: int = 1, overlapping: bool = True, + min_num_peaks: int = 5, min_intensity: float = 0.0) -> NDArray[np.float64]: + + rows, cols, values, scans, window_indices = self.__frame_ptr.to_dense_windows(window_length, resolution, + overlapping, min_num_peaks, + min_intensity) + + return scans, window_indices, np.reshape(values, (rows, cols)) + + def get_fragment_ptr(self): + return self.__frame_ptr + def __repr__(self): return (f"TimsFrame(frame_id={self.__frame_ptr.frame_id}, ms_type={self.__frame_ptr.ms_type}, " f"num_peaks={len(self.__frame_ptr.mz)})") diff --git a/imspy/imspy/data/slice.py b/imspy/imspy/core/slice.py similarity index 91% rename from imspy/imspy/data/slice.py rename to imspy/imspy/core/slice.py index 9e43bd5c..fc5f3a3b 100644 --- a/imspy/imspy/data/slice.py +++ b/imspy/imspy/core/slice.py @@ -1,15 +1,16 @@ import numpy as np import pandas as pd -from typing import List +from typing import List, Tuple, Any from numpy.typing import NDArray from tensorflow import sparse as sp -from imspy.utilities import re_index_indices +from imspy.utility.utilities import re_index_indices import imspy_connector as pims -from imspy.data.frame import TimsFrame, TimsFrameVectorized -from imspy.data.spectrum import MzSpectrum + +from imspy.core.frame import TimsFrame, TimsFrameVectorized +from imspy.core.spectrum import MzSpectrum, TimsSpectrum class TimsSlice: @@ -131,7 +132,7 @@ def to_resolution(self, resolution: int, num_threads: int = 4) -> 'TimsSlice': return TimsSlice.from_py_tims_slice(self.__slice_ptr.to_resolution(resolution, num_threads)) def to_windows(self, window_length: float = 10, overlapping: bool = True, min_num_peaks: int = 5, - min_intensity: float = 1, num_threads: int = 4) -> List[MzSpectrum]: + min_intensity: float = 1, num_threads: int = 4) -> List[TimsSpectrum]: """Convert the slice to a list of windows. Args: @@ -144,9 +145,26 @@ def to_windows(self, window_length: float = 10, overlapping: bool = True, min_nu Returns: List[MzSpectrum]: List of windows. """ - return [MzSpectrum.from_py_mz_spectrum(spec) for spec in self.__slice_ptr.to_windows( + return [TimsSpectrum.from_py_tims_spectrum(spec) for spec in self.__slice_ptr.to_windows( window_length, overlapping, min_num_peaks, min_intensity, num_threads)] + def to_dense_windows(self, window_length: float = 10, resolution: int = 1, overlapping: bool = True, + min_num_peaks: int = 5, min_intensity: float = 0.0, num_theads: int = 4) -> ( + tuple)[list[NDArray], list[NDArray], list[NDArray]]: + + DW = self.__slice_ptr.to_dense_windows(window_length, overlapping, min_num_peaks, min_intensity, resolution, + num_theads) + + scan_list, window_indices_list, values_list = [], [], [] + + for values, scans, bins, row, col in DW: + W = np.reshape(values, (row, col)) + scan_list.append(scans) + window_indices_list.append(bins) + values_list.append(W) + + return scan_list, window_indices_list, values_list + @property def df(self) -> pd.DataFrame: """Get the data as a pandas DataFrame. diff --git a/imspy/imspy/data/spectrum.py b/imspy/imspy/core/spectrum.py similarity index 92% rename from imspy/imspy/data/spectrum.py rename to imspy/imspy/core/spectrum.py index dcd29bd5..1d229e50 100644 --- a/imspy/imspy/data/spectrum.py +++ b/imspy/imspy/core/spectrum.py @@ -24,6 +24,7 @@ def get_peak_integral(peaks: NDArray[np.int32], peak_info: dict) -> NDArray[np.f integrals = np.sqrt(2*np.pi) * h * FWHM / (2*np.sqrt(2*np.log(2))) return integrals + class IndexedMzSpectrum: def __init__(self, index: NDArray[np.int32], mz: NDArray[np.float64], intensity: NDArray[np.float64]): """IndexedMzSpectrum class. @@ -81,6 +82,22 @@ def intensity(self) -> NDArray[np.float64]: """ return self.__spec_ptr.intensity + def filter(self, mz_min: float = 0.0, mz_max: float = 2000.0, intensity_min: float = 0.0, + intensity_max: float = 1e9) -> 'IndexedMzSpectrum': + """Filter the spectrum for a given m/z range and intensity range. + + Args: + mz_min (float): Minimum m/z value. + mz_max (float): Maximum m/z value. + intensity_min (float, optional): Minimum intensity value. Defaults to 0.0. + intensity_max (float, optional): Maximum intensity value. Defaults to 1e9. + + Returns: + IndexedMzSpectrum: Filtered spectrum. + """ + return IndexedMzSpectrum.from_py_indexed_mz_spectrum( + self.__spec_ptr.filter_ranged(mz_min, mz_max, intensity_min, intensity_max)) + @property def df(self) -> pd.DataFrame: """Data. @@ -91,6 +108,14 @@ def df(self) -> pd.DataFrame: return pd.DataFrame({'index': self.index, 'mz': self.mz, 'intensity': self.intensity}) + def get_spec_ptr(self) -> pims.PyIndexedMzSpectrum: + """Get the spec_ptr. + + Returns: + pims.PyIndexedMzSpectrum: spec_ptr. + """ + return self.__spec_ptr + def __repr__(self): return f"IndexedMzSpectrum(num_peaks={len(self.index)})" @@ -98,7 +123,7 @@ def __repr__(self): class MzSpectrum: @classmethod - def from_jsons(cls, jsons:str) -> MzSpectrum: + def from_jsons(cls, jsons: str) -> MzSpectrum: json_dict:dict = json.loads(jsons) mz = json_dict["mz"] intensity = json_dict["intensity"] @@ -323,8 +348,7 @@ def to_centroided(self, integrate_method: Callable = get_peak_integral) -> MzSpe integrals = integrate_method(peaks, peak_info) # then create a new spectrum with the peak indices and the integrals return MzSpectrum.from_py_mz_spectrum(pims.PyMzSpectrum(dense_spectrum.indices[peaks]/np.power(10,dense_spectrum.resolution), integrals)) - - + def __repr__(self): return f"MzSpectrumVectorized(num_values={len(self.values)})" @@ -463,3 +487,11 @@ def __repr__(self): return (f"TimsSpectrum(id={self.frame_id}, retention_time={np.round(self.retention_time, 2)}, " f"scan={self.scan}, mobility={np.round(self.mobility, 2)}, ms_type={self.ms_type}, " f"num_peaks={len(self.index)})") + + def get_spec_ptr(self) -> pims.PyTimsSpectrum: + """Get the spec_ptr. + + Returns: + pims.PyTimsSpectrum: spec_ptr. + """ + return self.__spec_ptr diff --git a/imspy/imspy/data/__init__.py b/imspy/imspy/data/__init__.py deleted file mode 100644 index 75c253f3..00000000 --- a/imspy/imspy/data/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from imspy.data.frame import TimsFrame -from imspy.data.spectrum import TimsSpectrum, MzSpectrum -from imspy.data.handle import TimsDataset, TimsDatasetDDA, TimsDatasetDIA -from imspy.data.slice import TimsSlice diff --git a/imspy/imspy/simulation/__init__.py b/imspy/imspy/simulation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/imspy/imspy/simulation/experiment.py b/imspy/imspy/simulation/experiment.py index 663f7efe..df60d5d8 100644 --- a/imspy/imspy/simulation/experiment.py +++ b/imspy/imspy/simulation/experiment.py @@ -1,18 +1,16 @@ import os -import json from multiprocessing import Pool import functools from abc import ABC, abstractmethod -import pandas as pd import numpy as np import pyarrow as pa import pyarrow.parquet as pq from tqdm import tqdm -from imspy.data import MzSpectrum, TimsFrame -from imspy.proteome import PeptideDigest, ProteomicsExperimentSampleSlice, ProteomicsExperimentDatabaseHandle -from imspy.isotopes import AveragineGenerator +from imspy.core import MzSpectrum +from imspy.proteome import PeptideDigest, ProteomicsExperimentDatabaseHandle import imspy.simulation.hardware_models as hardware + class ProteomicsExperiment(ABC): def __init__(self, path: str): diff --git a/imspy/imspy/simulation/hardware_models.py b/imspy/imspy/simulation/hardware_models.py index a4d24e3e..b2e29b1c 100644 --- a/imspy/imspy/simulation/hardware_models.py +++ b/imspy/imspy/simulation/hardware_models.py @@ -9,14 +9,16 @@ import pandas as pd from scipy.stats import exponnorm, norm, binom, gamma -from imspy.chemistry import STANDARD_TEMPERATURE, STANDARD_PRESSURE, CCS_K0_CONVERSION_CONSTANT, BufferGas, get_num_protonizable_sites +from imspy.chemistry.mass import STANDARD_TEMPERATURE, STANDARD_PRESSURE, BufferGas, get_num_protonizable_sites +from imspy.chemistry.mobility import SUMMARY_CONSTANT as CCS_K0_CONVERSION_CONSTANT from imspy.proteome import ProteomicsExperimentSampleSlice from imspy.feature import RTProfile, ScanProfile, ChargeProfile from imspy.isotopes import AveragineGenerator -from imspy.utility import tokenizer_from_json +from imspy.utility.utilities import tokenizer_from_json + class Device(ABC): - def __init__(self, name:str): + def __init__(self, name: str): self.name = name self._temperature = STANDARD_TEMPERATURE self._pressure = STANDARD_PRESSURE @@ -64,6 +66,7 @@ def pressure(self, p:float): def run(self, sample: ProteomicsExperimentSampleSlice): pass + class Model(ABC): def __init__(self): pass @@ -72,8 +75,9 @@ def __init__(self): def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Device): pass + class Chromatography(Device): - def __init__(self, name:str="ChromatographyDevice"): + def __init__(self, name: str="ChromatographyDevice"): super().__init__(name) self._apex_model = None self._profile_model = None @@ -151,6 +155,7 @@ def frame_time_end(self, frame_id: ArrayLike): def frame_time_start(self, frame_id: ArrayLike): return self.frame_time_interval(frame_id)[:,0] + class LiquidChromatography(Chromatography): def __init__(self, name: str = "LiquidChromatographyDevice"): super().__init__(name) @@ -174,6 +179,7 @@ def rt_to_frame_id(self, rt_minutes: ArrayLike): frame_id = (rt_minutes/self.frame_length*1000*60).astype(np.int64)+1 return frame_id + class ChromatographyApexModel(Model): def __init__(self): self._device = None @@ -182,6 +188,7 @@ def __init__(self): def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Chromatography) -> NDArray[np.float64]: pass + class ChromatographyProfileModel(Model): def __init__(self): pass @@ -190,6 +197,7 @@ def __init__(self): def simulate(self, sample: ProteomicsExperimentSampleSlice, device: Chromatography) -> List[RTProfile]: pass + class EMGChromatographyProfileModel(ChromatographyProfileModel): def __init__(self): diff --git a/imspy/imspy/timstof/__init__.py b/imspy/imspy/timstof/__init__.py new file mode 100644 index 00000000..4395ebeb --- /dev/null +++ b/imspy/imspy/timstof/__init__.py @@ -0,0 +1,3 @@ +from .data import TimsDataset +from .dia import TimsDatasetDIA +from .dda import TimsDatasetDDA, FragmentDDA diff --git a/imspy/imspy/data/handle.py b/imspy/imspy/timstof/data.py similarity index 64% rename from imspy/imspy/data/handle.py rename to imspy/imspy/timstof/data.py index 2a5f052c..a2121910 100644 --- a/imspy/imspy/data/handle.py +++ b/imspy/imspy/timstof/data.py @@ -1,5 +1,3 @@ -from typing import List - import numpy as np import pandas as pd import sqlite3 @@ -10,8 +8,9 @@ from abc import ABC -from imspy.data.frame import TimsFrame -from imspy.data.slice import TimsSlice +from imspy.core.frame import TimsFrame +from imspy.core.slice import TimsSlice + class TimsDataset(ABC): def __init__(self, data_path: str): @@ -20,19 +19,21 @@ def __init__(self, data_path: str): Args: data_path (str): Path to the data. """ + self.__dataset = None + self.binary_path = None + self.data_path = data_path - self.bp: List[str] = obb.get_so_paths() self.meta_data = self.__load_meta_data() self.precursor_frames = self.meta_data[self.meta_data["MsMsType"] == 0].Id.values.astype(np.int32) self.fragment_frames = self.meta_data[self.meta_data["MsMsType"] > 0].Id.values.astype(np.int32) - self.__handle = None self.__current_index = 1 # Try to load the data with the first binary found appropriate_found = False - for so_path in self.bp: + for so_path in obb.get_so_paths(): try: - self.__handle = pims.PyTimsDataHandle(self.data_path, so_path) + self.__dataset = pims.PyTimsDataset(self.data_path, so_path) + self.binary_path = so_path appropriate_found = True break except Exception: @@ -47,7 +48,7 @@ def acquisition_mode(self) -> str: Returns: str: Acquisition mode. """ - return self.__handle.get_acquisition_mode_as_string() + return self.__dataset.get_acquisition_mode_as_string() @property def acquisition_mode_numerical(self) -> int: @@ -56,7 +57,7 @@ def acquisition_mode_numerical(self) -> int: Returns: int: Acquisition mode as a numerical value. """ - return self.__handle.get_acquisition_mode() + return self.__dataset.get_acquisition_mode() @property def frame_count(self) -> int: @@ -65,7 +66,7 @@ def frame_count(self) -> int: Returns: int: Number of frames. """ - return self.__handle.frame_count + return self.__dataset.frame_count() def __load_meta_data(self) -> pd.DataFrame: """Get the meta data. @@ -84,7 +85,7 @@ def get_tims_frame(self, frame_id: int) -> TimsFrame: Returns: TimsFrame: TimsFrame. """ - return TimsFrame.from_py_tims_frame(self.__handle.get_tims_frame(frame_id)) + return TimsFrame.from_py_tims_frame(self.__dataset.get_frame(frame_id)) def get_tims_slice(self, frame_ids: NDArray[np.int32]) -> TimsSlice: """Get a TimsFrame. @@ -95,14 +96,14 @@ def get_tims_slice(self, frame_ids: NDArray[np.int32]) -> TimsSlice: Returns: TimsFrame: TimsFrame. """ - return TimsSlice.from_py_tims_slice(self.__handle.get_tims_slice(frame_ids)) + return TimsSlice.from_py_tims_slice(self.__dataset.get_slice(frame_ids)) def __iter__(self): return self def __next__(self): if self.__current_index <= self.frame_count: - frame_ptr = self.__handle.get_tims_frame(self.__current_index) + frame_ptr = self.__dataset.get_frame(self.__current_index) self.__current_index += 1 if frame_ptr is not None: return TimsFrame.from_py_tims_frame(frame_ptr) @@ -116,36 +117,3 @@ def __getitem__(self, index): if isinstance(index, slice): return self.get_tims_slice(np.arange(index.start, index.stop, index.step).astype(np.int32)) return self.get_tims_frame(index) - - -class TimsDatasetDDA(TimsDataset): - @property - def selected_precursors(self): - """Get precursors selected for fragmentation. - - Returns: - pd.DataFrame: Precursors selected for fragmentation. - """ - return pd.read_sql_query("SELECT * from Precursors", sqlite3.connect(self.data_path + "/analysis.tdf")) - - @property - def pasef_meta_data(self): - """Get PASEF meta data for DDA. - - Returns: - pd.DataFrame: PASEF meta data. - """ - return pd.read_sql_query("SELECT * from PasefFrameMsMsInfo", - sqlite3.connect(self.data_path + "/analysis.tdf")) - - -class TimsDatasetDIA(TimsDataset): - @property - def pasef_meta_data(self): - """Get PASEF meta data for DIA. - - Returns: - pd.DataFrame: PASEF meta data. - """ - return pd.read_sql_query("SELECT * from DiaFrameMsMsWindows", - sqlite3.connect(self.data_path + "/analysis.tdf")) diff --git a/imspy/imspy/timstof/dda.py b/imspy/imspy/timstof/dda.py new file mode 100644 index 00000000..368454bd --- /dev/null +++ b/imspy/imspy/timstof/dda.py @@ -0,0 +1,125 @@ +import sqlite3 +from imspy.timstof.data import TimsDataset +import pandas as pd + +import imspy_connector as pims +from imspy.core.frame import TimsFrame + + +class TimsDatasetDDA(TimsDataset): + + def __init__(self, data_path: str): + super().__init__(data_path=data_path) + self.__dataset = pims.PyTimsDatasetDDA(self.data_path, self.binary_path) + self.meta_data = self.meta_data.rename(columns={"Id": "frame_id"}) + self.fragmented_precursors = self._load_selected_precursors().rename( + columns={ + 'Id': 'precursor_id', + 'LargestPeakMz': 'largest_peak_mz', + 'AverageMz': 'average_mz', + 'MonoisotopicMz': 'monoisotopic_mz', + 'Charge': 'charge', + 'ScanNumber': 'average_scan', + 'Intensity': 'intensity', + 'Parent': 'parent_id', + } + ) + self.pasef_meta_data = self._load_pasef_meta_data().rename( + columns={ + 'Frame': 'frame_id', + 'ScanNumBegin': 'scan_begin', + 'ScanNumEnd': 'scan_end', + 'IsolationMz': 'isolation_mz', + 'IsolationWidth': 'isolation_width', + 'CollisionEnergy': 'collision_energy', + 'Precursor': 'precursor_id' + } + ) + + def _load_selected_precursors(self): + """Get precursors selected for fragmentation. + + Returns: + pd.DataFrame: Precursors selected for fragmentation. + """ + return pd.read_sql_query("SELECT * from Precursors", sqlite3.connect(self.data_path + "/analysis.tdf")) + + def _load_pasef_meta_data(self): + """Get PASEF meta data for DDA. + + Returns: + pd.DataFrame: PASEF meta data. + """ + return pd.read_sql_query("SELECT * from PasefFrameMsMsInfo", + sqlite3.connect(self.data_path + "/analysis.tdf")) + + def get_pasef_fragments(self) -> pd.DataFrame: + """Get PASEF fragments. + + Args: + num_threads (int, optional): Number of threads. Defaults to 4. + + Returns: + List[FragmentDDA]: List of PASEF fragments. + """ + pasef_fragments = [FragmentDDA.from_py_tims_fragment_dda(fragment) + for fragment in self.__dataset.get_pasef_fragments(1)] + + pasef_fragments = pd.DataFrame({ + 'frame_id': [s.frame_id for s in pasef_fragments], + 'precursor_id': [s.precursor_id for s in pasef_fragments], + 'raw_data': [s.selected_fragment for s in pasef_fragments] + }) + + A = pd.merge( + pasef_fragments, self.pasef_meta_data, + left_on=['precursor_id', 'frame_id'], + right_on=['precursor_id', 'frame_id'], + how='inner', + ) + + B = pd.merge( + A, self.fragmented_precursors, + left_on=['precursor_id'], + right_on=['precursor_id'], + how='inner' + ) + + time = self.meta_data[['frame_id']] + time.insert(time.shape[1], "time", self.meta_data['Time'] / 60) + + return pd.merge(time, B, left_on=['frame_id'], right_on=['frame_id'], how='inner') + + def __repr__(self): + return (f"TimsDatasetDDA(data_path={self.data_path}, num_frames={self.frame_count}, " + f"fragmented_precursors={self.fragmented_precursors.shape[0]})") + + +class FragmentDDA: + def __init__(self, frame_id: int, precursor_id: int, selected_fragment: TimsFrame): + self._fragment_ptr = pims.PyTimsFragmentDDA(frame_id, precursor_id, selected_fragment.get_fragment_ptr()) + + @classmethod + def from_py_tims_fragment_dda(cls, fragment: pims.PyTimsFragmentDDA): + instance = cls.__new__(cls) + instance._fragment_ptr = fragment + return instance + + @property + def frame_id(self) -> int: + return self._fragment_ptr.frame_id + + @property + def precursor_id(self) -> int: + return self._fragment_ptr.precursor_id + + @property + def selected_fragment(self) -> TimsFrame: + return TimsFrame.from_py_tims_frame(self._fragment_ptr.selected_fragment) + + def __repr__(self): + return f"FragmentDDA(frame_id={self.frame_id}, precursor_id={self.precursor_id}, " \ + f"selected_fragment={self.selected_fragment})" + + def get_fragment_ptr(self): + return self._fragment_ptr diff --git a/imspy/imspy/timstof/dia.py b/imspy/imspy/timstof/dia.py new file mode 100644 index 00000000..19402b35 --- /dev/null +++ b/imspy/imspy/timstof/dia.py @@ -0,0 +1,21 @@ +import sqlite3 +from imspy.timstof.data import TimsDataset +import pandas as pd + +import imspy_connector as pims + + +class TimsDatasetDIA(TimsDataset): + def __init__(self, data_path: str): + super().__init__(data_path=data_path) + self.__dataset = pims.PyTimsDatasetDIA(self.data_path, self.binary_path) + + @property + def pasef_meta_data(self): + """Get PASEF meta data for DIA. + + Returns: + pd.DataFrame: PASEF meta data. + """ + return pd.read_sql_query("SELECT * from DiaFrameMsMsWindows", + sqlite3.connect(self.data_path + "/analysis.tdf")) diff --git a/imspy/imspy/utilities.py b/imspy/imspy/utilities.py deleted file mode 100644 index 2b5910f8..00000000 --- a/imspy/imspy/utilities.py +++ /dev/null @@ -1,6 +0,0 @@ -import numpy as np - - -def re_index_indices(ids): - _, inverse = np.unique(ids, return_inverse=True) - return inverse diff --git a/imspy/imspy/utility/__init__.py b/imspy/imspy/utility/__init__.py new file mode 100644 index 00000000..5b216e1f --- /dev/null +++ b/imspy/imspy/utility/__init__.py @@ -0,0 +1,2 @@ +from .utilities import re_index_indices +from .sequence import tokenize_unimod_sequence diff --git a/imspy/imspy/utility/sequence.py b/imspy/imspy/utility/sequence.py new file mode 100644 index 00000000..ad12b032 --- /dev/null +++ b/imspy/imspy/utility/sequence.py @@ -0,0 +1,24 @@ +import re +from typing import List + + +def tokenize_unimod_sequence(unimod_sequence: str) -> List[str]: + """ + Tokenizes a sequence of modified amino acids. + Args: + unimod_sequence: A string representing the sequence of amino acids with modifications. + + Returns: + A list of tokenized amino acids. + """ + token_pattern = r'[A-Z](?:\[UNIMOD:\d+\])?' + + # Special case handling for [UNIMOD:1] at the beginning + if unimod_sequence.startswith("[UNIMOD:1]"): + special_token = "[UNIMOD:1]" + rest_of_string = unimod_sequence[len("[UNIMOD:1]"):] + other_tokens = re.findall(token_pattern, rest_of_string) + return [special_token] + other_tokens + [''] + else: + tokens = re.findall(token_pattern, unimod_sequence) + return [''] + tokens + [''] diff --git a/imspy/imspy/utility/utilities.py b/imspy/imspy/utility/utilities.py new file mode 100644 index 00000000..b46b4c43 --- /dev/null +++ b/imspy/imspy/utility/utilities.py @@ -0,0 +1,38 @@ +import io +import json +import numpy as np +import tensorflow as tf + + +def re_index_indices(ids): + """Re-index indices, i.e. replace gaps in indices with consecutive numbers. + Can be used, e.g., to re-index frame IDs from precursors for visualization. + Args: + ids: Indices. + Returns: + Indices. + """ + _, inverse = np.unique(ids, return_inverse=True) + return inverse + + +def tokenizer_to_json(tokenizer: tf.keras.preprocessing.text.Tokenizer, path: str): + """ + save a fit keras tokenizer to json for later use + :param tokenizer: fit keras tokenizer to save + :param path: path to save json to + """ + tokenizer_json = tokenizer.to_json() + with io.open(path, 'w', encoding='utf-8') as f: + f.write(json.dumps(tokenizer_json, ensure_ascii=False)) + + +def tokenizer_from_json(path: str): + """ + load a pre-fit tokenizer from a json file + :param path: path to tokenizer as json file + :return: a keras tokenizer loaded from json + """ + with open(path) as f: + data = json.load(f) + return tf.keras.preprocessing.text.tokenizer_from_json(data) diff --git a/imspy/pyproject.toml b/imspy/pyproject.toml index f18cf6d7..f95a7f9d 100644 --- a/imspy/pyproject.toml +++ b/imspy/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "imspy" -version = "0.2.2" +version = "0.2.13" description = "" authors = ["theGreatHerrLebert "] readme = "README.md" @@ -11,14 +11,13 @@ pandas = ">=2.1" numpy = ">=1.21" tensorflow = ">=2.14" tensorflow-probability = ">=0.22.1" -imspy-connector = ">=0.2.0" -mendeleev = ">=0.14" -pyopenms = ">=3.1" -scipy = ">=1.11.2" + +imspy-connector = ">=0.2.7" +tqdm = ">=4.62.0" +scipy = ">=1.7.1" tqdm = ">=4.66" pyarrow =">=13.0" -numba = ">=0.57" - +mendeleev = ">=0.7.0" [build-system] requires = ["poetry-core"] diff --git a/imspy/tests/test_s_curve.py b/imspy/tests/test_s_curve.py new file mode 100644 index 00000000..7092a530 --- /dev/null +++ b/imspy/tests/test_s_curve.py @@ -0,0 +1,106 @@ +import numpy as np +from sklearn.metrics.pairwise import cosine_similarity +from scipy.stats import binom +from tqdm import tqdm + +from imspy.algorithm.hashing import TimsHasher + +np.random.seed(0) + + +def get_signal_noise(sigma, n_windows, n_bins): + # generate F and F_prime + F = np.random.randn(n_windows, n_bins) + + noise = np.random.randn(1, n_bins) * sigma + F_prime = F + noise + + signal = np.zeros((n_windows, n_bins)) + noise = np.zeros_like(signal) + + for i in np.arange(n_windows): + mat_i = get_random_rotation(n_bins) + + new_sig = mat_i @ F[0, :] + signal[i, :] = new_sig + + new_noise = mat_i @ F_prime[0, :] + noise[i, :] = new_noise + + return signal, noise + + +def get_p_estimate(ors, ands, F, F_prime, seed): + hasher = TimsHasher(trials=ors, len_trial=ands, seed=seed, num_dalton=10, resolution=1) + H = hasher.calculate_keys(F) + H_p = hasher.calculate_keys(F_prime) + return H[np.any((H == H_p).numpy(), axis=1)].shape[0] / H.shape[0], H, H_p + + +def and_or(p, n, m): + # n times AND + # m times OR + return 1.0 - np.power((1.0 - np.power(p, n)), m) + + +def get_p_model(s, ands, ors): + pSim = 1.0 - (np.arccos(s)) / np.pi + return and_or(pSim, ands, ors) + + +def get_random_rotation(dim): + m = np.random.randn(dim, dim) + m_s = (m + m.T) / np.sqrt(2) + v, mat = np.linalg.eig(m_s) + return mat + + +def get_estimates(ors, ands, signal, noise, seed, n_windows): + p_est, H, H_p = get_p_estimate(ors, ands, signal, noise, seed) + + sims = [] + for (s, n) in zip(signal, noise): + sim = cosine_similarity(s.reshape(1, -1), n.reshape(1, -1))[0][0] + sims.append(sim) + + sim_median = np.median(sims) + + p_mod = get_p_model(sim_median, ands, ors) + inter = binom.ppf([0.025, 0.975], n_windows, p_mod) / n_windows + + return sim_median, p_est, p_mod, inter + + +def test_s_curve(): + """ + """ + # Arrange + n_windows = 3000 + n_bins = 101 + + ors = 128 + ands = 31 + + seed = 1354656 + + # Action + dict_values = {} + + for sigma in tqdm([0.2, 0.25, 0.3, 0.35, 0.4, 0.5, 0.55, 0.6, 0.65, 0.7, 0.75], + ncols=100, + desc="iterating collision probabilities"): + signal, noise = get_signal_noise(sigma, n_windows, n_bins) + sim_median, p_est, p_mod, inter = get_estimates(ors, ands, signal, noise, seed, n_windows) + dict_values[sigma] = (sim_median, p_est, p_mod, inter) + + # Assert + for (k, (sim_median, p_est, p_mod, inter)) in dict_values.items(): + assert inter[0] <= p_est <= inter[1] + + +def main(): + test_s_curve() + + +if __name__ == '__main__': + main() diff --git a/imspy_connector/Cargo.toml b/imspy_connector/Cargo.toml index 83818278..05f0e083 100644 --- a/imspy_connector/Cargo.toml +++ b/imspy_connector/Cargo.toml @@ -1,9 +1,8 @@ [package] -name = "imspy_connector" -version = "0.2.2" +name = "imspy-connector" +version = "0.2.13" edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] name = "imspy_connector" crate-type = ["cdylib"] diff --git a/imspy_connector/src/lib.rs b/imspy_connector/src/lib.rs index f27f65fa..6656437c 100644 --- a/imspy_connector/src/lib.rs +++ b/imspy_connector/src/lib.rs @@ -1,25 +1,31 @@ -mod py_handle; +mod py_dataset; mod py_mz_spectrum; mod py_tims_frame; mod py_tims_slice; +mod py_dda; +mod py_dia; use pyo3::prelude::*; -use crate::py_handle::PyTimsDataHandle; +use crate::py_dataset::PyTimsDataset; use crate::py_mz_spectrum::{PyMzSpectrum, PyIndexedMzSpectrum, PyTimsSpectrum, PyMzSpectrumVectorized}; -use crate::py_tims_frame::{PyTimsFrame, PyTimsFrameVectorized}; +use crate::py_tims_frame::{PyTimsFrame, PyTimsFrameVectorized, PyRawTimsFrame}; use crate::py_tims_slice::{PyTimsPlane, PyTimsSlice, PyTimsSliceVectorized}; +use crate::py_dda::{PyTimsDatasetDDA, PyTimsFragmentDDA}; #[pymodule] fn imspy_connector(_py: Python, m: &PyModule) -> PyResult<()> { - m.add_class::()?; + m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; + m.add_class::()?; Ok(()) } diff --git a/imspy_connector/src/py_dataset.rs b/imspy_connector/src/py_dataset.rs new file mode 100644 index 00000000..d76d2240 --- /dev/null +++ b/imspy_connector/src/py_dataset.rs @@ -0,0 +1,48 @@ +use pyo3::prelude::*; +use rustdf::data::dataset::TimsDataset; +use rustdf::data::handle::TimsData; + +use crate::py_tims_frame::{PyTimsFrame}; +use crate::py_tims_slice::PyTimsSlice; + +#[pyclass] +pub struct PyTimsDataset { + inner: TimsDataset, +} + +#[pymethods] +impl PyTimsDataset { + #[new] + pub fn new(data_path: &str, bruker_lib_path: &str) -> Self { + let dataset = TimsDataset::new(bruker_lib_path, data_path); + PyTimsDataset { inner: dataset } + } + + pub fn get_frame(&self, frame_id: u32) -> PyTimsFrame { + PyTimsFrame { inner: self.inner.get_frame(frame_id) } + } + + pub fn get_slice(&self, frame_ids: Vec) -> PyTimsSlice { + PyTimsSlice { inner: self.inner.get_slice(frame_ids) } + } + + pub fn get_aquisition_mode(&self) -> String { + self.inner.get_aquisition_mode().to_string() + } + + pub fn get_frame_count(&self) -> i32 { + self.inner.get_frame_count() + } + + pub fn get_data_path(&self) -> &str { + self.inner.get_data_path() + } + + pub fn get_bruker_lib_path(&self) -> &str { + self.inner.get_bruker_lib_path() + } + + pub fn frame_count(&self) -> i32 { + self.inner.get_frame_count() + } +} \ No newline at end of file diff --git a/imspy_connector/src/py_dda.rs b/imspy_connector/src/py_dda.rs new file mode 100644 index 00000000..d47be3ba --- /dev/null +++ b/imspy_connector/src/py_dda.rs @@ -0,0 +1,77 @@ +use pyo3::prelude::*; + +use rustdf::data::dda::{PASEFDDAFragment, TimsDatasetDDA}; +use rustdf::data::handle::TimsData; +use crate::py_tims_frame::PyTimsFrame; +use crate::py_tims_slice::PyTimsSlice; + +#[pyclass] +pub struct PyTimsDatasetDDA { + inner: TimsDatasetDDA, +} + +#[pymethods] +impl PyTimsDatasetDDA { + #[new] + pub fn new(data_path: &str, bruker_lib_path: &str) -> Self { + let dataset = TimsDatasetDDA::new(bruker_lib_path, data_path); + PyTimsDatasetDDA { inner: dataset } + } + pub fn get_frame(&self, frame_id: u32) -> PyTimsFrame { + PyTimsFrame { inner: self.inner.get_frame(frame_id) } + } + + pub fn get_slice(&self, frame_ids: Vec) -> PyTimsSlice { + PyTimsSlice { inner: self.inner.get_slice(frame_ids) } + } + + pub fn get_aquisition_mode(&self) -> String { + self.inner.get_aquisition_mode().to_string() + } + + pub fn get_frame_count(&self) -> i32 { + self.inner.get_frame_count() + } + + pub fn get_data_path(&self) -> &str { + self.inner.get_data_path() + } + + pub fn get_bruker_lib_path(&self) -> &str { + self.inner.get_bruker_lib_path() + } + + pub fn get_pasef_fragments(&self, num_threads: usize) -> Vec { + let pasef_fragments = self.inner.get_pasef_fragments(num_threads); + pasef_fragments.iter().map(|pasef_fragment| PyTimsFragmentDDA { inner: pasef_fragment.clone() }).collect() + } +} + +#[pyclass] +pub struct PyTimsFragmentDDA { + inner: PASEFDDAFragment, +} + +#[pymethods] +impl PyTimsFragmentDDA { + #[new] + pub fn new(frame_id: u32, precursor_id: u32, selected_fragment: &PyTimsFrame) -> PyResult { + + let pasef_fragment = PASEFDDAFragment { + frame_id, + precursor_id, + selected_fragment: selected_fragment.inner.clone(), + }; + + Ok(PyTimsFragmentDDA { inner: pasef_fragment }) + } + + #[getter] + pub fn frame_id(&self) -> u32 { self.inner.frame_id } + + #[getter] + pub fn precursor_id(&self) -> u32 { self.inner.precursor_id } + + #[getter] + pub fn selected_fragment(&self) -> PyTimsFrame { PyTimsFrame { inner: self.inner.selected_fragment.clone() } } +} \ No newline at end of file diff --git a/imspy_connector/src/py_dia.rs b/imspy_connector/src/py_dia.rs new file mode 100644 index 00000000..ba64c1ab --- /dev/null +++ b/imspy_connector/src/py_dia.rs @@ -0,0 +1,43 @@ +use pyo3::prelude::*; + +use rustdf::data::dia::TimsDatasetDIA; +use rustdf::data::handle::TimsData; +use crate::py_tims_frame::PyTimsFrame; +use crate::py_tims_slice::PyTimsSlice; + +#[pyclass] +pub struct PyTimsDatasetDIA { + inner: TimsDatasetDIA, +} + +#[pymethods] +impl PyTimsDatasetDIA { + #[new] + pub fn new(data_path: &str, bruker_lib_path: &str) -> Self { + let dataset = TimsDatasetDIA::new(bruker_lib_path, data_path); + PyTimsDatasetDIA { inner: dataset } + } + pub fn get_frame(&self, frame_id: u32) -> PyTimsFrame { + PyTimsFrame { inner: self.inner.get_frame(frame_id) } + } + + pub fn get_slice(&self, frame_ids: Vec) -> PyTimsSlice { + PyTimsSlice { inner: self.inner.get_slice(frame_ids) } + } + + pub fn get_aquisition_mode(&self) -> String { + self.inner.get_aquisition_mode().to_string() + } + + pub fn get_frame_count(&self) -> i32 { + self.inner.get_frame_count() + } + + pub fn get_data_path(&self) -> &str { + self.inner.get_data_path() + } + + pub fn get_bruker_lib_path(&self) -> &str { + self.inner.get_bruker_lib_path() + } +} \ No newline at end of file diff --git a/imspy_connector/src/py_handle.rs b/imspy_connector/src/py_handle.rs deleted file mode 100644 index d3976282..00000000 --- a/imspy_connector/src/py_handle.rs +++ /dev/null @@ -1,52 +0,0 @@ -use pyo3::prelude::*; -use numpy::{PyArray1}; - -use rustdf::data::handle::{TimsDataHandle}; -use crate::py_tims_frame::{PyTimsFrame}; -use crate::py_tims_slice::PyTimsSlice; - -#[pyclass] -pub struct PyTimsDataHandle { - inner: TimsDataHandle, -} - -#[pymethods] -impl PyTimsDataHandle { - #[new] - pub fn new(data_path: &str, bruker_lib_path: &str) -> Self { - let dataset = TimsDataHandle::new(bruker_lib_path, data_path).unwrap(); - PyTimsDataHandle { inner: dataset } - } - - #[getter] - pub fn get_data_path(&self) -> &str { - &self.inner.data_path - } - - #[getter] - pub fn get_bruker_lib_path(&self) -> &str { - &self.inner.bruker_lib_path - } - #[getter] - pub fn frame_count(&self) -> i32 { - self.inner.get_frame_count() - } - - pub fn get_tims_frame(&self, frame_id: u32) -> PyResult { - let frame = self.inner.get_frame(frame_id).unwrap(); - Ok(PyTimsFrame { inner: frame }) - } - - pub fn get_acquisition_mode(&self) -> i32 { - self.inner.acquisition_mode.to_i32() - } - - pub fn get_acquisition_mode_as_string(&self) -> String { - self.inner.acquisition_mode.to_string() - } - - pub fn get_tims_slice(&self, frame_ids: &PyArray1) -> PyTimsSlice { - let frames = self.inner.get_tims_slice(frame_ids.to_vec().unwrap().iter().map(|f| *f as u32).collect()); - PyTimsSlice { inner: frames } - } -} \ No newline at end of file diff --git a/imspy_connector/src/py_mz_spectrum.rs b/imspy_connector/src/py_mz_spectrum.rs index 5c90f626..4efc8ec8 100644 --- a/imspy_connector/src/py_mz_spectrum.rs +++ b/imspy_connector/src/py_mz_spectrum.rs @@ -184,6 +184,14 @@ impl PyIndexedMzSpectrum { pub fn intensity(&self, py: Python) -> Py> { self.inner.mz_spectrum.intensity.clone().into_pyarray(py).to_owned() } + + pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min: f64, intensity_max: f64) -> PyResult { + let filtered = self.inner.filter_ranged(mz_min, mz_max, intensity_min, intensity_max); + let py_filtered = PyIndexedMzSpectrum { + inner: filtered, + }; + Ok(py_filtered) + } } #[pyclass] diff --git a/imspy_connector/src/py_tims_frame.rs b/imspy_connector/src/py_tims_frame.rs index a5f391c5..00a3f94a 100644 --- a/imspy_connector/src/py_tims_frame.rs +++ b/imspy_connector/src/py_tims_frame.rs @@ -1,9 +1,72 @@ use pyo3::prelude::*; -use numpy::{PyArray1, IntoPyArray}; -use mscore::{TimsFrame, ImsFrame, MsType, TimsFrameVectorized, ImsFrameVectorized, ToResolution, Vectorized}; use pyo3::types::PyList; +use pyo3::types::PyTuple; +use numpy::{PyArray1, IntoPyArray}; +use mscore::{TimsFrame, ImsFrame, MsType, TimsFrameVectorized, ImsFrameVectorized, ToResolution, Vectorized, RawTimsFrame, TimsSpectrum}; -use crate::py_mz_spectrum::{PyIndexedMzSpectrum, PyMzSpectrum, PyTimsSpectrum}; +use crate::py_mz_spectrum::{PyIndexedMzSpectrum, PyTimsSpectrum}; + +#[pyclass] +#[derive(Clone)] +pub struct PyRawTimsFrame { + pub inner: RawTimsFrame, +} + +#[pymethods] +impl PyRawTimsFrame { + #[new] + pub unsafe fn new(frame_id: i32, + ms_type: i32, + retention_time: f64, + scan: &PyArray1, + tof: &PyArray1, + intensity: &PyArray1) -> PyResult { + Ok(PyRawTimsFrame { + inner: RawTimsFrame { + frame_id, + retention_time, + ms_type: MsType::new(ms_type), + scan: scan.as_slice()?.to_vec(), + tof: tof.as_slice()?.to_vec(), + intensity: intensity.as_slice()?.to_vec(), + }, + }) + } + + #[getter] + pub fn intensity(&self, py: Python) -> Py> { + self.inner.intensity.clone().into_pyarray(py).to_owned() + } + #[getter] + pub fn scan(&self, py: Python) -> Py> { + self.inner.scan.clone().into_pyarray(py).to_owned() + } + + #[getter] + pub fn tof(&self, py: Python) -> Py> { + self.inner.tof.clone().into_pyarray(py).to_owned() + } + + #[getter] + pub fn frame_id(&self) -> i32 { + self.inner.frame_id + } + + #[getter] + pub fn ms_type_numeric(&self) -> i32 { + self.inner.ms_type.ms_type_numeric() + } + + #[getter] + pub fn ms_type(&self) -> String { + self.inner.ms_type.to_string() + } + + #[getter] + pub fn retention_time(&self) -> f64 { + self.inner.retention_time + } +} #[pyclass] #[derive(Clone)] @@ -97,7 +160,7 @@ impl PyTimsFrame { let list: Py = PyList::empty(py).into(); for window in windows { - let py_mz_spectrum = Py::new(py, PyMzSpectrum { inner: window })?; + let py_mz_spectrum = Py::new(py, PyTimsSpectrum { inner: window })?; list.as_ref(py).append(py_mz_spectrum)?; } @@ -119,6 +182,33 @@ impl PyTimsFrame { pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, scan_min: i32, scan_max: i32, inv_mob_min: f64, inv_mob_max: f64, intensity_min: f64, intensity_max: f64) -> PyTimsFrame { return PyTimsFrame { inner: self.inner.filter_ranged(mz_min, mz_max, scan_min, scan_max, inv_mob_min, inv_mob_max, intensity_min, intensity_max) } } + + #[staticmethod] + pub fn from_windows(_py: Python, windows: &PyList) -> PyResult { + let mut spectra: Vec = Vec::new(); + for window in windows.iter() { + let window: PyRef = window.extract()?; + spectra.push(window.inner.clone()); + } + + Ok(PyTimsFrame { inner: TimsFrame::from_windows(spectra) }) + } + + pub fn to_dense_windows(&self, py: Python, window_length: f64, resolution: i32, overlapping: bool, min_peaks: usize, min_intensity: f64) -> PyResult { + + let (data, scans, window_indices, rows, cols) = self.inner.to_dense_windows(window_length, overlapping, min_peaks, min_intensity, resolution); + let py_array: &PyArray1 = data.into_pyarray(py); + let py_scans: &PyArray1 = scans.into_pyarray(py); + let py_window_indices: &PyArray1 = window_indices.into_pyarray(py); + let tuple = PyTuple::new(py, &[rows.into_py(py), cols.into_py(py), py_array.to_owned().into_py(py), py_scans.to_owned().into_py(py), py_window_indices.to_owned().into_py(py)]); + + Ok(tuple.into()) + } + + fn __add__(&self, other: PyTimsFrame) -> PyTimsFrame { + let result = self.inner.clone() + other.inner.clone(); + return PyTimsFrame { inner: result } + } } #[pyclass] diff --git a/imspy_connector/src/py_tims_slice.rs b/imspy_connector/src/py_tims_slice.rs index 4885d0b3..ec3771ba 100644 --- a/imspy_connector/src/py_tims_slice.rs +++ b/imspy_connector/src/py_tims_slice.rs @@ -2,7 +2,7 @@ use pyo3::prelude::*; use mscore::{MsType, TimsPlane, TimsSlice, TimsSliceVectorized}; use pyo3::types::{PyList}; use numpy::{IntoPyArray, PyArray1}; -use crate::py_mz_spectrum::PyMzSpectrum; +use crate::py_mz_spectrum::{PyTimsSpectrum}; use crate::py_tims_frame::{PyTimsFrame, PyTimsFrameVectorized}; @@ -90,12 +90,17 @@ impl PyTimsSlice { let list: Py = PyList::empty(py).into(); for window in windows { - let py_mz_spectrum = Py::new(py, PyMzSpectrum { inner: window })?; + let py_mz_spectrum = Py::new(py, PyTimsSpectrum { inner: window })?; list.as_ref(py).append(py_mz_spectrum)?; } Ok(list.into()) } + + pub fn to_dense_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64, resolution: i32, num_threads: usize) -> Vec<(Vec, Vec, Vec, usize, usize)> { + self.inner.to_dense_windows(window_length, overlapping, min_peaks, min_intensity, resolution, num_threads) + } + pub fn get_frame_at_index(&self, index: i32) -> PyTimsFrame { PyTimsFrame { inner: self.inner.frames[index as usize].clone() } } diff --git a/mscore/src/chemistry.rs b/mscore/src/chemistry.rs new file mode 100644 index 00000000..3c737fb3 --- /dev/null +++ b/mscore/src/chemistry.rs @@ -0,0 +1,72 @@ +/// convert 1 over reduced ion mobility (1/k0) to CCS +/// +/// Arguments: +/// +/// * `one_over_k0` - 1 over reduced ion mobility (1/k0) +/// * `charge` - charge state of the ion +/// * `mz` - mass-over-charge of the ion +/// * `mass_gas` - mass of drift gas (N2) +/// * `temp` - temperature of the drift gas in C° +/// * `t_diff` - factor to translate from C° to K +/// +/// Returns: +/// +/// * `ccs` - collision cross-section +/// +/// # Examples +/// +/// ``` +/// use mscore::one_over_reduced_mobility_to_ccs; +/// +/// let ccs = one_over_reduced_mobility_to_ccs(0.5, 1000.0, 2, 28.013, 31.85, 273.15); +/// assert_eq!(ccs, 806.5918693771381); +/// ``` +pub fn one_over_reduced_mobility_to_ccs( + one_over_k0: f64, + mz: f64, + charge: u32, + mass_gas: f64, + temp: f64, + t_diff: f64, +) -> f64 { + let summary_constant = 18509.8632163405; + let reduced_mass = (mz * charge as f64 * mass_gas) / (mz * charge as f64 + mass_gas); + summary_constant * charge as f64 / (reduced_mass * (temp + t_diff)).sqrt() / one_over_k0 +} + + +/// convert CCS to 1 over reduced ion mobility (1/k0) +/// +/// Arguments: +/// +/// * `ccs` - collision cross-section +/// * `charge` - charge state of the ion +/// * `mz` - mass-over-charge of the ion +/// * `mass_gas` - mass of drift gas (N2) +/// * `temp` - temperature of the drift gas in C° +/// * `t_diff` - factor to translate from C° to K +/// +/// Returns: +/// +/// * `one_over_k0` - 1 over reduced ion mobility (1/k0) +/// +/// # Examples +/// +/// ``` +/// use mscore::ccs_to_reduced_mobility; +/// +/// let k0 = ccs_to_reduced_mobility(806.5918693771381, 1000.0, 2, 28.013, 31.85, 273.15); +/// assert_eq!(1.0 / k0, 0.5); +/// ``` +pub fn ccs_to_reduced_mobility( + ccs: f64, + mz: f64, + charge: u32, + mass_gas: f64, + temp: f64, + t_diff: f64, +) -> f64 { + let summary_constant = 18509.8632163405; + let reduced_mass = (mz * charge as f64 * mass_gas) / (mz * charge as f64 + mass_gas); + ((reduced_mass * (temp + t_diff)).sqrt() * ccs) / (summary_constant * charge as f64) +} diff --git a/mscore/src/lib.rs b/mscore/src/lib.rs index 05641038..2e4155c9 100644 --- a/mscore/src/lib.rs +++ b/mscore/src/lib.rs @@ -1,8 +1,13 @@ pub mod mz_spectrum; mod tims_frame; mod tims_slice; +mod chemistry; pub use { + + chemistry::one_over_reduced_mobility_to_ccs, + chemistry::ccs_to_reduced_mobility, + mz_spectrum::MsType, mz_spectrum::MzSpectrum, @@ -17,6 +22,7 @@ pub use { tims_frame::ImsFrame, tims_frame::ImsFrameVectorized, + tims_frame::RawTimsFrame, tims_frame::TimsFrame, tims_frame::TimsFrameVectorized, tims_frame::ToResolution, diff --git a/mscore/src/mz_spectrum.rs b/mscore/src/mz_spectrum.rs index 9307a097..1f2728bb 100644 --- a/mscore/src/mz_spectrum.rs +++ b/mscore/src/mz_spectrum.rs @@ -409,6 +409,21 @@ impl IndexedMzSpectrum { } } } + + pub fn filter_ranged(&self, mz_min: f64, mz_max: f64, intensity_min:f64, intensity_max: f64) -> Self { + let mut mz_vec: Vec = Vec::new(); + let mut intensity_vec: Vec = Vec::new(); + let mut index_vec: Vec = Vec::new(); + + for ((&mz, &intensity), &index) in self.mz_spectrum.mz.iter().zip(self.mz_spectrum.intensity.iter()).zip(self.index.iter()) { + if mz_min <= mz && mz <= mz_max && intensity >= intensity_min && intensity <= intensity_max { + mz_vec.push(mz); + intensity_vec.push(intensity); + index_vec.push(index); + } + } + IndexedMzSpectrum { index: index_vec, mz_spectrum: MzSpectrum { mz: mz_vec, intensity: intensity_vec } } + } } impl Display for IndexedMzSpectrum { @@ -463,6 +478,74 @@ impl TimsSpectrum { let vector = self.spectrum.vectorized(resolution); TimsSpectrumVectorized { frame_id: self.frame_id, scan: self.scan, retention_time: self.retention_time, mobility: self.mobility, ms_type: self.ms_type.clone(), vector } } + + pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> BTreeMap { + + let mut splits: BTreeMap = BTreeMap::new(); + + for (i, &mz) in self.spectrum.mz_spectrum.mz.iter().enumerate() { + let intensity = self.spectrum.mz_spectrum.intensity[i]; + let tof = self.spectrum.index[i]; + + let tmp_key = (mz / window_length).floor() as i32; + + splits.entry(tmp_key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new( + Vec::new(), Vec::new(), Vec::new())) + ).spectrum.mz_spectrum.mz.push(mz); + + splits.entry(tmp_key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new( + Vec::new(), Vec::new(), Vec::new())) + ).spectrum.mz_spectrum.intensity.push(intensity); + + splits.entry(tmp_key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new( + Vec::new(), Vec::new(), Vec::new())) + ).spectrum.index.push(tof); + } + + if overlapping { + let mut splits_offset = BTreeMap::new(); + + for (i, &mmz) in self.spectrum.mz_spectrum.mz.iter().enumerate() { + let intensity = self.spectrum.mz_spectrum.intensity[i]; + let tof = self.spectrum.index[i]; + + let tmp_key = -((mmz + window_length / 2.0) / window_length).floor() as i32; + + splits_offset.entry(tmp_key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new( + Vec::new(), Vec::new(), Vec::new())) + ).spectrum.mz_spectrum.mz.push(mmz); + + splits_offset.entry(tmp_key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new( + Vec::new(), Vec::new(), Vec::new())) + ).spectrum.mz_spectrum.intensity.push(intensity); + + splits_offset.entry(tmp_key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new( + Vec::new(), Vec::new(), Vec::new())) + ).spectrum.index.push(tof); + } + + for (key, val) in splits_offset { + splits.entry(key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new( + Vec::new(), Vec::new(), Vec::new())) + ).spectrum.mz_spectrum.mz.extend(val.spectrum.mz_spectrum.mz); + + splits.entry(key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new( + Vec::new(), Vec::new(), Vec::new())) + ).spectrum.mz_spectrum.intensity.extend(val.spectrum.mz_spectrum.intensity); + + splits.entry(key).or_insert_with(|| TimsSpectrum::new(self.frame_id, self.scan, self.retention_time, self.mobility, self.ms_type.clone(), IndexedMzSpectrum::new( + Vec::new(), Vec::new(), Vec::new())) + ).spectrum.index.extend(val.spectrum.index); + } + } + + splits.retain(|_, spectrum| { + spectrum.spectrum.mz_spectrum.mz.len() >= min_peaks && spectrum.spectrum.mz_spectrum.intensity.iter().cloned().max_by( + |a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal)).unwrap_or(0.0) >= min_intensity + }); + + splits + } } impl Display for TimsSpectrum { diff --git a/mscore/src/tims_frame.rs b/mscore/src/tims_frame.rs index a93a5105..f078c40e 100644 --- a/mscore/src/tims_frame.rs +++ b/mscore/src/tims_frame.rs @@ -2,6 +2,7 @@ use std::fmt; use std::collections::BTreeMap; use std::fmt::{Formatter}; use itertools; +use itertools::izip; use crate::mz_spectrum::{MsType, MzSpectrum, IndexedMzSpectrum, TimsSpectrum}; @@ -13,6 +14,16 @@ pub trait Vectorized { fn vectorized(&self, resolution: i32) -> T; } +#[derive(Clone)] +pub struct RawTimsFrame { + pub frame_id: i32, + pub retention_time: f64, + pub ms_type: MsType, + pub scan: Vec, + pub tof: Vec, + pub intensity: Vec, +} + #[derive(Clone)] pub struct ImsFrame { pub retention_time: f64, @@ -187,25 +198,90 @@ impl TimsFrame { TimsFrame::new(self.frame_id, self.ms_type.clone(), self.ims_frame.retention_time, scan_vec, mobility_vec, tof_vec, mz_vec, intensity_vec) } - pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> Vec { - + pub fn to_windows_indexed(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> (Vec, Vec, Vec) { + // split by scan (ion mobility) let spectra = self.to_tims_spectra(); let windows: Vec<_> = spectra.iter().map(|spectrum| - spectrum.spectrum.mz_spectrum.to_windows(window_length, overlapping, min_peaks, min_intensity)) + spectrum.to_windows(window_length, overlapping, min_peaks, min_intensity)) .collect(); - let mut result: Vec = Vec::new(); + let mut scan_indices = Vec::new(); + + for tree in windows.iter() { + for (_, window) in tree { + scan_indices.push(window.scan) + } + } + + let mut spectra = Vec::new(); + let mut window_indices = Vec::new(); for window in windows { - for (_, spectrum) in window { - result.push(spectrum); + for (i, spectrum) in window { + spectra.push(spectrum); + window_indices.push(i); } } - result + (scan_indices, window_indices, spectra) } + pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64) -> Vec { + let (_, _, widows) = self.to_windows_indexed(window_length, overlapping, min_peaks, min_intensity); + widows + } + + pub fn from_windows(windows: Vec) -> TimsFrame { + + let first_window = windows.first().unwrap(); + + let mut scan = Vec::new(); + let mut tof = Vec::new(); + let mut mzs = Vec::new(); + let mut intensity = Vec::new(); + let mut mobility = Vec::new(); + + for window in windows.iter() { + for (i, mz) in window.spectrum.mz_spectrum.mz.iter().enumerate() { + scan.push(window.scan); + mobility.push(window.mobility); + tof.push(window.spectrum.index[i]); + mzs.push(*mz); + intensity.push(window.spectrum.mz_spectrum.intensity[i]); + } + } + + TimsFrame::new(first_window.frame_id, first_window.ms_type.clone(), first_window.retention_time, scan, mobility, tof, mzs, intensity) + } + + pub fn to_dense_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64, resolution: i32) -> (Vec, Vec, Vec, usize, usize) { + let factor = (10.0f64).powi(resolution); + let num_colums = ((window_length * factor).round() + 1.0) as usize; + + let (scans, window_indices, spectra) = self.to_windows_indexed(window_length, overlapping, min_peaks, min_intensity); + let vectorized_spectra = spectra.iter().map(|spectrum| spectrum.vectorized(resolution)).collect::>(); + + let mut flat_matrix: Vec = vec![0.0; spectra.len() * num_colums]; + + for (row_index, (window_index, spectrum)) in itertools::multizip((&window_indices, vectorized_spectra)).enumerate() { + + let vectorized_window_index = match *window_index >= 0 { + true => (*window_index as f64 * window_length * factor).round() as i32, + false => (((-1.0 * (*window_index as f64)) * window_length - (0.5 * window_length)) * factor).round() as i32, + }; + + for (i, index) in spectrum.vector.mz_vector.indices.iter().enumerate() { + let zero_based_index = (index - vectorized_window_index) as usize; + let current_index = row_index * num_colums + zero_based_index; + flat_matrix[current_index] = spectrum.vector.mz_vector.values[i]; + } + + } + (flat_matrix, scans, window_indices, spectra.len(), num_colums) + } + + pub fn to_indexed_mz_spectrum(&self) -> IndexedMzSpectrum { let mut grouped_data: BTreeMap> = BTreeMap::new(); @@ -232,7 +308,68 @@ impl TimsFrame { mz_spectrum: MzSpectrum { mz, intensity }, } } +} + +struct AggregateData { + intensity_sum: f64, + ion_mobility_sum: f64, + tof_sum: i32, + count: i32, +} + +impl std::ops::Add for TimsFrame { + type Output = Self; + fn add(self, other: Self) -> TimsFrame { + let mut combined_map: BTreeMap<(i64, i32), AggregateData> = BTreeMap::new(); + + let quantize = |mz: f64| -> i64 { + (mz * 1_000_000.0).round() as i64 + }; + let add_to_map = |map: &mut BTreeMap<(i64, i32), AggregateData>, mz, ion_mobility, tof, scan, intensity| { + let key = (quantize(mz), scan); + let entry = map.entry(key).or_insert(AggregateData { intensity_sum: 0.0, ion_mobility_sum: 0.0, tof_sum: 0, count: 0 }); + entry.intensity_sum += intensity; + entry.ion_mobility_sum += ion_mobility; + entry.tof_sum += tof; + entry.count += 1; + }; + + for (mz, tof, ion_mobility, scan, intensity) in izip!(&self.ims_frame.mz, &self.tof, &self.ims_frame.mobility, &self.scan, &self.ims_frame.intensity) { + add_to_map(&mut combined_map, *mz, *ion_mobility, *tof, *scan, *intensity); + } + + for (mz, tof, ion_mobility, scan, intensity) in izip!(&other.ims_frame.mz, &other.tof, &other.ims_frame.mobility, &other.scan, &other.ims_frame.intensity) { + add_to_map(&mut combined_map, *mz, *ion_mobility, *tof, *scan, *intensity); + } + + let mut mz_combined = Vec::new(); + let mut tof_combined = Vec::new(); + let mut ion_mobility_combined = Vec::new(); + let mut scan_combined = Vec::new(); + let mut intensity_combined = Vec::new(); + + for ((quantized_mz, scan), data) in combined_map { + mz_combined.push(quantized_mz as f64 / 1_000_000.0); + tof_combined.push(data.tof_sum / data.count); + ion_mobility_combined.push(data.ion_mobility_sum / data.count as f64); + scan_combined.push(scan); + intensity_combined.push(data.intensity_sum); + } + + TimsFrame { + frame_id: self.frame_id + other.frame_id, + ms_type: if self.ms_type == other.ms_type { self.ms_type.clone() } else { MsType::Unknown }, + scan: scan_combined, + tof: tof_combined, + ims_frame: ImsFrame { + retention_time: (self.ims_frame.retention_time + other.ims_frame.retention_time) / 2.0, + mobility: ion_mobility_combined, + mz: mz_combined, + intensity: intensity_combined, + }, + } + } } impl fmt::Display for TimsFrame { diff --git a/mscore/src/tims_slice.rs b/mscore/src/tims_slice.rs index 5ed18361..30177698 100644 --- a/mscore/src/tims_slice.rs +++ b/mscore/src/tims_slice.rs @@ -4,9 +4,8 @@ use rayon::ThreadPoolBuilder; use std::collections::BTreeMap; use std::collections::BTreeSet; use itertools::multizip; -use crate::MsType; +use crate::{MsType, TimsSpectrum}; -use crate::mz_spectrum::{MzSpectrum}; use crate::tims_frame::{ImsFrame, TimsFrame, TimsFrameVectorized, Vectorized, ToResolution}; #[derive(Clone)] @@ -204,7 +203,7 @@ impl TimsSlice { } } - pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64, num_threads: usize) -> Vec { + pub fn to_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64, num_threads: usize) -> Vec { // Create a thread pool let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); // Set to the desired number of threads @@ -219,6 +218,17 @@ impl TimsSlice { windows } + pub fn to_dense_windows(&self, window_length: f64, overlapping: bool, min_peaks: usize, min_intensity: f64, resolution: i32, num_threads: usize) -> Vec<(Vec, Vec, Vec, usize, usize)> { + let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); + + let result = pool.install(|| { + let t = self.frames.par_iter().map(|f| f.to_dense_windows(window_length, overlapping, min_peaks, min_intensity, resolution)).collect::>(); + t + }); + + result + } + pub fn to_tims_planes(&self, tof_max_value: i32, num_chunks: i32, num_threads: usize) -> Vec { let flat_slice = self.flatten(); diff --git a/rustdf/Cargo.toml b/rustdf/Cargo.toml index 70ae3b65..3a8cf327 100644 --- a/rustdf/Cargo.toml +++ b/rustdf/Cargo.toml @@ -9,10 +9,12 @@ path = "src/lib.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -libloading = "0.8" -rusqlite = "0.29.0" +libloading = "0.8.1" +rusqlite = { version = "0.30.0", features = ["bundled"] } lzf = "1.0.0" -zstd = "0.12.4" +zstd = "0.13.0" byteorder = "1.4.3" anyhow = "1.0.75" -mscore = {path = "../mscore"} \ No newline at end of file +mscore = {path = "../mscore"} +rayon = "1.8.0" +futures = { version = "0.3.29", features = ["thread-pool"] } \ No newline at end of file diff --git a/rustdf/src/data/dataset.rs b/rustdf/src/data/dataset.rs new file mode 100644 index 00000000..c7f8e0fa --- /dev/null +++ b/rustdf/src/data/dataset.rs @@ -0,0 +1,34 @@ +use mscore::{TimsFrame, TimsSlice}; +use crate::data::handle::{TimsDataHandle, TimsData, AcquisitionMode}; + +pub struct TimsDataset { + pub handle: TimsDataHandle, +} + +impl TimsDataset { + pub fn new(bruker_lib_path: &str, data_path: &str) -> Self { + let handle = TimsDataHandle::new(bruker_lib_path, data_path).unwrap(); + TimsDataset { handle } + } +} + +impl TimsData for TimsDataset { + fn get_frame(&self, frame_id: u32) -> TimsFrame { + self.handle.get_frame(frame_id).unwrap() + } + fn get_slice(&self, frame_ids: Vec) -> TimsSlice { + self.handle.get_tims_slice(frame_ids) + } + fn get_aquisition_mode(&self) -> AcquisitionMode { + self.handle.acquisition_mode.clone() + } + fn get_frame_count(&self) -> i32 { + self.handle.get_frame_count() + } + fn get_data_path(&self) -> &str { + &self.handle.data_path + } + fn get_bruker_lib_path(&self) -> &str { + &self.handle.bruker_lib_path + } +} \ No newline at end of file diff --git a/rustdf/src/data/dda.rs b/rustdf/src/data/dda.rs new file mode 100644 index 00000000..19a5f181 --- /dev/null +++ b/rustdf/src/data/dda.rs @@ -0,0 +1,100 @@ +use mscore::{TimsFrame, TimsSlice}; +use rayon::prelude::*; +use rayon::ThreadPoolBuilder; +use crate::data::handle::{TimsDataHandle, TimsData, AcquisitionMode}; +use crate::data::meta::{DDAPrecursorMeta, PasefMsMsMeta, read_dda_precursor_meta, read_pasef_frame_ms_ms_info}; + +#[derive(Clone)] +pub struct PASEFDDAFragment { + pub frame_id: u32, + pub precursor_id: u32, + pub selected_fragment: TimsFrame, +} + +pub struct TimsDatasetDDA { + pub handle: TimsDataHandle, +} + +impl TimsDatasetDDA { + + pub fn new(bruker_lib_path: &str, data_path: &str) -> Self { + let handle = TimsDataHandle::new(bruker_lib_path, data_path).unwrap(); + TimsDatasetDDA { handle } + } + + pub fn get_selected_precursors(&self) -> Vec { + read_dda_precursor_meta(&self.handle.data_path).unwrap() + } + + pub fn get_pasef_frame_ms_ms_info(&self) -> Vec { + read_pasef_frame_ms_ms_info(&self.handle.data_path).unwrap() + } + + /// Get the fragment spectra for all PASEF selected precursors + pub fn get_pasef_fragments(&self, num_threads: usize) -> Vec { + // extract fragment spectra information + let pasef_info = self.get_pasef_frame_ms_ms_info(); + + let pool = ThreadPoolBuilder::new().num_threads(num_threads).build().unwrap(); + + + + let filtered_frames = pool.install(|| { + + let result: Vec<_> = pasef_info.par_iter().map(|pasef_info| { + + // get the frame + let frame = self.handle.get_frame(pasef_info.frame_id as u32).unwrap(); + + // get the fragment spectrum by scan range + let filtered_frame = frame.filter_ranged( + 0.0, + 2000.0, + pasef_info.scan_num_begin as i32, + pasef_info.scan_num_end as i32, + 0.0, + 5.0, + 0.0, + 1e9, + ); + + PASEFDDAFragment { + frame_id: pasef_info.frame_id as u32, + precursor_id: pasef_info.precursor_id as u32, + // flatten the spectrum + selected_fragment: filtered_frame, + } + }).collect(); + + result + }); + + filtered_frames + } +} + +impl TimsData for TimsDatasetDDA { + fn get_frame(&self, frame_id: u32) -> TimsFrame { + self.handle.get_frame(frame_id).unwrap() + } + + fn get_slice(&self, frame_ids: Vec) -> TimsSlice { + self.handle.get_tims_slice(frame_ids) + } + + fn get_aquisition_mode(&self) -> AcquisitionMode { + self.handle.acquisition_mode.clone() + } + + fn get_frame_count(&self) -> i32 { + self.handle.get_frame_count() + } + + fn get_data_path(&self) -> &str { + &self.handle.data_path + } + + fn get_bruker_lib_path(&self) -> &str { + &self.handle.bruker_lib_path + } +} \ No newline at end of file diff --git a/rustdf/src/data/dia.rs b/rustdf/src/data/dia.rs new file mode 100644 index 00000000..08c06e69 --- /dev/null +++ b/rustdf/src/data/dia.rs @@ -0,0 +1,37 @@ +use mscore::{TimsFrame, TimsSlice}; +use crate::data::handle::{AcquisitionMode, TimsData, TimsDataHandle}; + +pub struct TimsDatasetDIA { + pub handle: TimsDataHandle, +} + +impl TimsDatasetDIA { + pub fn new(bruker_lib_path: &str, data_path: &str) -> Self { + let handle = TimsDataHandle::new(bruker_lib_path, data_path).unwrap(); + TimsDatasetDIA { handle } + } +} + +impl TimsData for TimsDatasetDIA { + fn get_frame(&self, frame_id: u32) -> TimsFrame { + self.handle.get_frame(frame_id).unwrap() + } + fn get_slice(&self, frame_ids: Vec) -> TimsSlice { + self.handle.get_tims_slice(frame_ids) + } + fn get_aquisition_mode(&self) -> AcquisitionMode { + self.handle.acquisition_mode.clone() + } + + fn get_frame_count(&self) -> i32 { + self.handle.get_frame_count() + } + + fn get_data_path(&self) -> &str { + &self.handle.data_path + } + + fn get_bruker_lib_path(&self) -> &str { + &self.handle.bruker_lib_path + } +} \ No newline at end of file diff --git a/rustdf/src/data/handle.rs b/rustdf/src/data/handle.rs index 67e5f35f..ce2da169 100644 --- a/rustdf/src/data/handle.rs +++ b/rustdf/src/data/handle.rs @@ -8,7 +8,16 @@ use std::fs::File; use std::io::{Seek, SeekFrom, Cursor}; use byteorder::{LittleEndian, ByteOrder, ReadBytesExt}; -use mscore::{TimsFrame, ImsFrame, MsType, TimsSlice}; +use mscore::{TimsFrame, RawTimsFrame, ImsFrame, MsType, TimsSlice}; + +pub trait TimsData { + fn get_frame(&self, frame_id: u32) -> TimsFrame; + fn get_slice(&self, frame_ids: Vec) -> TimsSlice; + fn get_aquisition_mode(&self) -> AcquisitionMode; + fn get_frame_count(&self) -> i32; + fn get_data_path(&self) -> &str; + fn get_bruker_lib_path(&self) -> &str; +} /// Decompresses a ZSTD compressed byte array /// @@ -101,7 +110,7 @@ fn parse_decompressed_bruker_binary_data(decompressed_bytes: &[u8]) -> Result<(V -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum AcquisitionMode { DDA, DIA, @@ -269,6 +278,65 @@ impl TimsDataHandle { .into_iter()).collect() } + pub fn get_raw_frame(&self, frame_id: u32) -> Result> { + + let frame_index = (frame_id - 1) as usize; + let offset = self.tims_offset_values[frame_index] as u64; + + let mut file_path = PathBuf::from(&self.data_path); + file_path.push("analysis.tdf_bin"); + let mut infile = File::open(&file_path)?; + + infile.seek(SeekFrom::Start(offset))?; + + let mut bin_buffer = [0u8; 4]; + infile.read_exact(&mut bin_buffer)?; + let bin_size = Cursor::new(bin_buffer).read_i32::()?; + + infile.read_exact(&mut bin_buffer)?; + + match self.global_meta_data.tims_compression_type { + // TODO: implement + _ if self.global_meta_data.tims_compression_type == 1 => { + return Err("Decompression Type1 not implemented.".into()); + }, + + // Extract from ZSTD compressed binary + _ if self.global_meta_data.tims_compression_type == 2 => { + + let mut compressed_data = vec![0u8; bin_size as usize - 8]; + infile.read_exact(&mut compressed_data)?; + + let decompressed_bytes = zstd_decompress(&compressed_data)?; + + let (scan, tof, intensity) = parse_decompressed_bruker_binary_data(&decompressed_bytes)?; + + let ms_type_raw = self.frame_meta_data[frame_index].ms_ms_type; + + let ms_type = match ms_type_raw { + 0 => MsType::Precursor, + 8 => MsType::FragmentDda, + 9 => MsType::FragmentDia, + _ => MsType::Unknown, + }; + + Ok(RawTimsFrame { + frame_id: frame_id as i32, + retention_time: self.frame_meta_data[(frame_id - 1) as usize].time, + ms_type, + scan: scan.iter().map(|&x| x as i32).collect(), + tof: tof.iter().map(|&x| x as i32).collect(), + intensity: intensity.iter().map(|&x| x as f64).collect(), + }) + }, + + // Error on unknown compression algorithm + _ => { + return Err("TimsCompressionType is not 1 or 2.".into()); + } + } + } + /// get a frame from the tims dataset /// /// # Arguments diff --git a/rustdf/src/data/meta.rs b/rustdf/src/data/meta.rs index 55f1a218..b65aabc9 100644 --- a/rustdf/src/data/meta.rs +++ b/rustdf/src/data/meta.rs @@ -3,12 +3,24 @@ extern crate rusqlite; use rusqlite::{Connection, Result}; use std::path::Path; -pub struct DDAPrecursorInfo { +#[derive(Debug, Clone)] +pub struct PasefMsMsMeta { + pub frame_id: i64, + pub scan_num_begin: i64, + pub scan_num_end: i64, + pub isolation_mz: f64, + pub isolation_width: f64, + pub collision_energy: f64, + pub precursor_id: i64, +} + +#[derive(Debug, Clone)] +pub struct DDAPrecursorMeta { pub precursor_id: i64, pub precursor_mz_highest_intensity: f64, pub precursor_mz_average: f64, - pub precursor_mz_monoisotopic: f64, - pub precursor_charge: i64, + pub precursor_mz_monoisotopic: Option, + pub precursor_charge: Option, pub precursor_average_scan_number: f64, pub precursor_total_intensity: f64, pub precursor_frame_id: i64, @@ -75,6 +87,58 @@ struct GlobalMetaInternal { value: String, } +pub fn read_dda_precursor_meta(bruker_d_folder_name: &str) -> Result, Box> { + // Connect to the database + let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf"); + let conn = Connection::open(db_path)?; + + // prepare the query + let rows: Vec<&str> = vec!["Id", "LargestPeakMz", "AverageMz", "MonoisotopicMz", "Charge", "ScanNumber", "Intensity", "Parent"]; + let query = format!("SELECT {} FROM Precursors", rows.join(", ")); + + // execute the query + let frames_rows: Result, _> = conn.prepare(&query)?.query_map([], |row| { + Ok(DDAPrecursorMeta { + precursor_id: row.get(0)?, + precursor_mz_highest_intensity: row.get(1)?, + precursor_mz_average: row.get(2)?, + precursor_mz_monoisotopic: row.get(3)?, // Now using Option + precursor_charge: row.get(4)?, // Now using Option + precursor_average_scan_number: row.get(5)?, + precursor_total_intensity: row.get(6)?, + precursor_frame_id: row.get(7)?, + }) + })?.collect(); + + // return the frames + Ok(frames_rows?) +} + +pub fn read_pasef_frame_ms_ms_info(bruker_d_folder_name: &str) -> Result, Box> { + // Connect to the database + let db_path = Path::new(bruker_d_folder_name).join("analysis.tdf"); + let conn = Connection::open(db_path)?; + + // prepare the query + let rows: Vec<&str> = vec!["Frame", "ScanNumBegin", "ScanNumEnd", "IsolationMz", "IsolationWidth", "CollisionEnergy", "Precursor"]; + let query = format!("SELECT {} FROM PasefFrameMsMsInfo", rows.join(", ")); + + // execute the query + let frames_rows: Result, _> = conn.prepare(&query)?.query_map([], |row| { + Ok(PasefMsMsMeta { + frame_id: row.get(0)?, + scan_num_begin: row.get(1)?, + scan_num_end: row.get(2)?, + isolation_mz: row.get(3)?, + isolation_width: row.get(4)?, + collision_energy: row.get(5)?, + precursor_id: row.get(6)?, }) + })?.collect(); + + // return the frames + Ok(frames_rows?) +} + // Read the global meta data from the analysis.tdf file pub fn read_global_meta_sql(bruker_d_folder_name: &str) -> Result> { diff --git a/rustdf/src/data/mod.rs b/rustdf/src/data/mod.rs index 54d9f656..5b73abb5 100644 --- a/rustdf/src/data/mod.rs +++ b/rustdf/src/data/mod.rs @@ -1,3 +1,6 @@ pub mod raw; pub mod meta; -pub mod handle; \ No newline at end of file +pub mod handle; +pub mod dataset; +pub mod dda; +pub mod dia; \ No newline at end of file diff --git a/rustdf/src/lib.rs b/rustdf/src/lib.rs index 06389443..092cf493 100644 --- a/rustdf/src/lib.rs +++ b/rustdf/src/lib.rs @@ -2,5 +2,9 @@ pub mod data { pub mod raw; pub mod meta; + pub mod dataset; + pub mod dda; + pub mod dia; + pub mod handle; } \ No newline at end of file diff --git a/rustdf/src/main.rs b/rustdf/src/main.rs index 97c3914c..0d1ce2fe 100644 --- a/rustdf/src/main.rs +++ b/rustdf/src/main.rs @@ -1,33 +1,21 @@ -use rustdf::data::handle::TimsDataHandle; use std::env; +use rustdf::data::meta::{read_dda_precursor_meta}; -fn main() { - let args: Vec = env::args().collect(); - // args[0] is always the path to the program itself - if args.len() <= 1 { - eprintln!("Please provide a frame id."); - return; - } +fn main() { + let _args: Vec = env::args().collect(); - let frame_id: u32 = match args[1].parse() { - Ok(id) => id, - Err(_) => { - eprintln!("Invalid frame id provided."); - return; - } - }; + let data_path = "/media/hd01/CCSPred/M210115_001_Slot1-1_1_850.d"; - println!("Frame ID: {}", frame_id); + let result = read_dda_precursor_meta(data_path); - let data_path = "/media/hd01/CCSPred/M210115_001_Slot1-1_1_850.d"; - let bruker_lib_path = "/home/administrator/Documents/promotion/ENV/lib/python3.8/site-packages/opentims_bruker_bridge/libtimsdata.so"; - let tims_data = TimsDataHandle::new(bruker_lib_path, data_path); - match tims_data { - Ok(tims_data) => { - let _frame = tims_data.get_frame(frame_id); + match result { + Ok(precursors) => { + println!("Precursors: {:?}", precursors); + }, + Err(e) => { + println!("Error: {:?}", e); } + } - Err(e) => println!("error: {}", e), - }; }