Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add correlate_single usage mode (API only) #232

Merged
merged 1 commit into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions docs/source/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,14 @@ For instance:
ms2pip correlate --psm-filetype sage results.sage.tsv spectra.mgf


``correlate-single``
--------------------

Predict spectrum intensities for a single peptide and correlate them with observed intensities from
an :py:class:`ObservedSpectrum` object. This mode is only available through the Python API, not
through the command-line interface.


``get-training-data``
---------------------

Expand Down
73 changes: 58 additions & 15 deletions ms2pip/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,17 +17,17 @@
from rich.progress import track

import ms2pip.exceptions as exceptions
from ms2pip import spectrum_output
from ms2pip._cython_modules import ms2pip_pyx
from ms2pip._utils.encoder import Encoder
from ms2pip._utils.feature_names import get_feature_names
from ms2pip._utils.ion_mobility import IonMobility
from ms2pip._utils.psm_input import read_psms
from ms2pip._utils.retention_time import RetentionTime
from ms2pip._utils.ion_mobility import IonMobility
from ms2pip._utils.xgb_models import get_predictions_xgb, validate_requested_xgb_model
from ms2pip.constants import MODELS
from ms2pip.result import ProcessingResult, calculate_correlations
from ms2pip.search_space import ProteomeSearchSpace
from ms2pip.spectrum import ObservedSpectrum
from ms2pip.spectrum_input import read_spectrum_file
from ms2pip.spectrum_output import SUPPORTED_FORMATS

Expand Down Expand Up @@ -291,6 +291,62 @@ def correlate(
return results


def correlate_single(
observed_spectrum: ObservedSpectrum,
ms2_tolerance: float = 0.02,
model: str = "HCD",
) -> ProcessingResult:
"""
Correlate single observed spectrum with predicted intensities.\f

Parameters
----------
observed_spectrum
ObservedSpectrum instance with observed m/z and intensity values and peptidoform.
ms2_tolerance
MS2 tolerance in Da for observed spectrum peak annotation. By default, 0.02 Da.
model
Model to use for prediction. Default: "HCD".

Returns
-------
result: ProcessingResult
Result with theoretical m/z, predicted intensity, observed intensity, and correlation.

"""
# Check peptidoform in observed spectrum
if not isinstance(observed_spectrum.peptidoform, Peptidoform):
raise ValueError("Peptidoform must be set in observed spectrum to correlate.")

# Annotate spectrum and get target intensities
with Encoder.from_peptidoform(observed_spectrum.peptidoform) as encoder:
ms2pip_pyx.ms2pip_init(*encoder.encoder_files)
enc_peptidoform = encoder.encode_peptidoform(observed_spectrum.peptidoform)
targets = ms2pip_pyx.get_targets(
enc_peptidoform,
observed_spectrum.mz.astype(np.float32),
observed_spectrum.intensity.astype(np.float32),
float(ms2_tolerance),
MODELS[model]["peaks_version"],
)

# Reshape to dict with intensities per ion type
ion_types = [it.lower() for it in MODELS[model]["ion_types"]]
observed_intensity = {
i: np.array(p, dtype=np.float32).clip(min=np.log2(0.001)) # Clip negative intensities
for i, p in zip(ion_types, targets)
}

# Predict spectrum and add target intensities
result = predict_single(observed_spectrum.peptidoform, model=model)
result.observed_intensity = observed_intensity

# Add correlation
calculate_correlations([result])

return result


def get_training_data(
psms: Union[PSMList, str, Path],
spectrum_file: Union[str, Path],
Expand Down Expand Up @@ -704,19 +760,6 @@ def _add_xgboost_predictions(self, results: List[ProcessingResult]) -> List[Proc

return results

# TODO IMPLEMENT
def write_predictions(
self, all_preds: pd.DataFrame, peptides: pd.DataFrame, output_filename: str
):
raise NotImplementedError
spec_out = spectrum_output.SpectrumOutput(
all_preds,
peptides,
self.params["ms2pip"],
output_filename=output_filename,
)
spec_out.write_results(self.output_formats)


def _process_peptidoform(
psm_index: int,
Expand Down