Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug fixing #9

Merged
merged 6 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name = "quantms-rescoring"
description = "quantms-rescoring: Python scripts and helpers for the quantMS workflow"
readme = "README.md"
license = "MIT"
version = "0.0.3"
version = "0.0.4"
authors = [
"Yasset Perez-Riverol <[email protected]>",
"Dai Chengxin <[email protected]>",
Expand Down
2 changes: 1 addition & 1 deletion quantmsrescore/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.3"
__version__ = "0.0.4"
111 changes: 68 additions & 43 deletions quantmsrescore/ms2rescore.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import importlib.resources
import json
import logging

import os
import click
import pyopenms as oms
from ms2rescore import package_data, rescore
Expand Down Expand Up @@ -141,24 +141,24 @@ def _parse_psm(


def parse_cli_arguments_to_config(
config_file: str = None,
feature_generators: str = None,
ms2pip_model_dir: str = None,
ms2pip_model: str = None,
ms2_tolerance: float = None,
calibration_set_size: float = None,
rescoring_engine: str = None,
rng: int = None,
test_fdr: float = None,
processes: int = None,
spectrum_path: str = None,
fasta_file: str = None,
id_decoy_pattern: str = None,
lower_score_is_better: bool = None,
output_path: str = None,
log_level: str = None,
spectrum_id_pattern: str = None,
psm_id_pattern: str = None
config_file: str = None,
feature_generators: str = None,
ms2pip_model_dir: str = None,
ms2pip_model: str = None,
ms2_tolerance: float = None,
calibration_set_size: float = None,
rescoring_engine: str = None,
rng: int = None,
test_fdr: float = None,
processes: int = None,
spectrum_path: str = None,
fasta_file: str = None,
id_decoy_pattern: str = None,
lower_score_is_better: bool = None,
output_path: str = None,
log_level: str = None,
spectrum_id_pattern: str = None,
psm_id_pattern: str = None
) -> dict:
if config_file is None:
config = json.load(
Expand Down Expand Up @@ -254,6 +254,34 @@ def rescore_idxml(input_file, output_file, config) -> None:
peptide_ids = reader.new_peptide_ids
else:
peptide_ids = reader.peptide_ids

# check if any spectrum is empty
exp = oms.MSExperiment()
oms.MzMLFile().load(config["ms2rescore"]["spectrum_path"], exp)
empty_spectra = 0
spec = []
for spectrum in exp:
peaks_tuple = spectrum.get_peaks()
if len(peaks_tuple[0]) == 0 and spectrum.getMSLevel() == 2:
logging.warning(
f"{spectrum.getNativeID()} spectra don't have spectra information!"
)
empty_spectra += 1
continue
spec.append(spectrum)

if empty_spectra != 0:
logging.warning(
f"Removed {empty_spectra} spectra without spectra information!"
)
exp.setSpectra(spec)
output_dir = os.path.dirname(config["ms2rescore"]["output_path"])
mzml_output = os.path.join(output_dir,
os.path.splitext(os.path.basename(config["ms2rescore"]["spectrum_path"]))[0] + "_clear.mzML")
oms.MzMLFile().store(mzml_output, exp)
config["ms2rescore"]["spectrum_path"] = mzml_output
# TODO: Add cleanup of temporary file after processing

# Rescore
rescore(config, psm_list)

Expand All @@ -266,7 +294,7 @@ def rescore_idxml(input_file, output_file, config) -> None:


def filter_out_artifact_psms(
psm_list: PSMList, peptide_ids: List[oms.PeptideIdentification]
psm_list: PSMList, peptide_ids: List[oms.PeptideIdentification]
) -> List[oms.PeptideIdentification]:
"""Filter out PeptideHits that could not be processed by all feature generators"""
num_mandatory_features = max([len(psm.rescoring_features) for psm in psm_list])
Expand Down Expand Up @@ -424,26 +452,26 @@ def filter_out_artifact_psms(
)
@click.pass_context
def ms2rescore(
ctx,
psm_file: str,
spectrum_path,
output_path: str,
log_level,
processes,
fasta_file,
test_fdr,
feature_generators,
ms2pip_model_dir,
ms2pip_model,
ms2_tolerance,
calibration_set_size,
rescoring_engine,
rng,
id_decoy_pattern,
lower_score_is_better,
config_file: str,
spectrum_id_pattern: str,
psm_id_pattern: str
ctx,
psm_file: str,
spectrum_path,
output_path: str,
log_level,
processes,
fasta_file,
test_fdr,
feature_generators,
ms2pip_model_dir,
ms2pip_model,
ms2_tolerance,
calibration_set_size,
rescoring_engine,
rng,
id_decoy_pattern,
lower_score_is_better,
config_file: str,
spectrum_id_pattern: str,
psm_id_pattern: str
):
"""
Rescore PSMs in an idXML file and keep other information unchanged.
Expand Down Expand Up @@ -505,6 +533,3 @@ def ms2rescore(
logging.info("MS²Rescore config:")
logging.info(config)
rescore_idxml(psm_file, output_path, config)



Loading