Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Msangel compatibility #543

Merged
merged 51 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
a297739
update msangel param test files
mlocardpaulet Jan 21, 2025
ed7e65d
amend precedent
mlocardpaulet Jan 21, 2025
6a1846d
change test file MSAngel
mlocardpaulet Jan 21, 2025
7d484c0
Params
RobbinBouwmeester Jan 21, 2025
7305234
works for Mascot only, no test set up
mlocardpaulet Jan 21, 2025
e822cbb
generate csv for testing
mlocardpaulet Jan 21, 2025
555d0d4
parse MSAngel X!Tandem outputs
mlocardpaulet Jan 21, 2025
80d5471
make csv for test MSAngel XTandem param parsing
mlocardpaulet Jan 21, 2025
9063d59
create test py for msangel
mlocardpaulet Jan 21, 2025
6e09697
Update proteobench/io/params/MSAngel.py
mlocardpaulet Jan 21, 2025
e43946e
add default modifications of X!Tandem
mlocardpaulet Jan 21, 2025
a724ec2
Merge branch 'msangel-compatibility' of https://github.com/Proteobenc…
mlocardpaulet Jan 21, 2025
84329d6
Change the rows based on json for tests
RobbinBouwmeester Jan 21, 2025
34e5536
Merge branch 'main' into msangel-compatibility
RobbinBouwmeester Jan 21, 2025
8b96394
Update MSAngel.py
RobbinBouwmeester Jan 22, 2025
d3ff2a2
Update __init__.py
RobbinBouwmeester Jan 22, 2025
1233e43
Update MSAngel.py
RobbinBouwmeester Jan 22, 2025
5c4b972
Delete MSAngel.py
RobbinBouwmeester Jan 22, 2025
c3484e7
Create msangel.py
RobbinBouwmeester Jan 22, 2025
993a817
Update __init__.py
RobbinBouwmeester Jan 22, 2025
0f6a1db
Merge branch 'main' into param_input_forms
RobbinBouwmeester Jan 22, 2025
51f595e
Update test_parse_params_maxquant.py
RobbinBouwmeester Jan 22, 2025
cdcc561
Change
RobbinBouwmeester Jan 22, 2025
c7d633d
fix wrong tolerance window reporting
mlocardpaulet Jan 22, 2025
18ec505
Change param parsing proline
RobbinBouwmeester Jan 23, 2025
d4f38da
Update test_parse_params_proline.py
RobbinBouwmeester Jan 23, 2025
0e650ad
Change what is done with none and rem MQ tests
RobbinBouwmeester Jan 23, 2025
26cc09f
MQ files and new nan
RobbinBouwmeester Jan 23, 2025
f6d0e88
Merge branch 'msangel-compatibility' into param_input_forms
RobbinBouwmeester Jan 23, 2025
1af76c7
Merge pull request #546 from Proteobench/param_input_forms
RobbinBouwmeester Jan 23, 2025
7c9a5c1
Fix tests
RobbinBouwmeester Jan 23, 2025
4dd78b6
Update __init__.py
RobbinBouwmeester Jan 23, 2025
f7c3c7b
Delete test_proline.csv
RobbinBouwmeester Jan 23, 2025
9f18fca
Allow manual input
RobbinBouwmeester Jan 24, 2025
4172243
Merge branch 'main' into split_json_results
RobbinBouwmeester Jan 26, 2025
f0b2cd8
Merge branch 'main' into msangel-compatibility
RobbinBouwmeester Jan 26, 2025
2c74619
Changes to gh individual json
RobbinBouwmeester Jan 26, 2025
8cca5b6
Support individual json files write, read and shallow gh clone
RobbinBouwmeester Jan 26, 2025
0ab2d34
alternatively read results.json
RobbinBouwmeester Jan 26, 2025
04e5647
Fix cloning into existing dir
RobbinBouwmeester Jan 26, 2025
765e6ea
Merge branch 'msangel-compatibility' into split_json_results
RobbinBouwmeester Jan 26, 2025
16b5ea0
Merge pull request #552 from Proteobench/split_json_results
RobbinBouwmeester Jan 26, 2025
1be4d5f
add parameter json configs
RobbinBouwmeester Jan 26, 2025
1494341
change page variables
RobbinBouwmeester Jan 26, 2025
e92e79d
Update fields.json
RobbinBouwmeester Jan 26, 2025
0d15f43
Update fields.json
RobbinBouwmeester Jan 26, 2025
ba2a1bf
Change to text input for optimal flexibility
RobbinBouwmeester Jan 27, 2025
3a8cd41
Remove default search engine version
RobbinBouwmeester Jan 27, 2025
3c557fc
Remove default fragment tol
RobbinBouwmeester Jan 27, 2025
3003507
Code PR highlighting manual changes
RobbinBouwmeester Jan 27, 2025
8f8f2c7
Fix changed params in PR
RobbinBouwmeester Jan 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 57 additions & 6 deletions proteobench/github/gh.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,31 +78,82 @@ def clone(remote_url: str, clone_dir: str) -> Repo:
try:
repo = Repo(clone_dir)
except (exc.NoSuchPathError, exc.InvalidGitRepositoryError):
repo = Repo.clone_from(remote_url.rstrip("/"), clone_dir)
repo = Repo.clone_from(remote_url.rstrip("/"), clone_dir, depth=1, no_single_branch=True)
return repo

@staticmethod
def shallow_clone(remote_url: str, clone_dir: str) -> Repo:
"""
Performs a shallow clone of the repository (only the latest commit).

Args:
remote_url (str): The repository URL.
clone_dir (str): The target directory for cloning.

Returns:
Repo: The cloned repository object.
"""
if os.path.exists(clone_dir):
print(f"Repository already exists in {clone_dir}. Trying to use existing files.")
try:
return Repo(clone_dir)
except exc.InvalidGitRepositoryError:
print(f"Repository invalid, will clone again.")

try:
repo = Repo.clone_from(remote_url.rstrip("/"), clone_dir, depth=1, no_single_branch=True)
except exc.GitCommandError as e:
raise RuntimeError(f"Failed to clone the repository: {e}")

return repo

def clone_repo_anonymous(self) -> Repo:
"""
Clones the Proteobench repository anonymously (without authentication).
Clones the Proteobench repository anonymously with a shallow clone (without authentication).

Returns:
Repo: The local repository object.
Repo: The cloned repository object.
"""
remote_url = self.get_remote_url_anon()
repo = self.clone(remote_url, self.clone_dir)
return repo
self.repo = self.shallow_clone(remote_url, self.clone_dir)
return self.repo

def read_results_json_repo(self) -> pd.DataFrame:
def read_results_json_repo_single_file(self) -> pd.DataFrame:
"""
Reads the `results.json` file from the cloned Proteobench repository and returns the data as a DataFrame.

Returns:
pd.DataFrame: A Pandas DataFrame containing the results from `results.json`.
"""
f_name = os.path.join(self.clone_dir, "results.json")

if not os.path.exists(f_name):
raise FileNotFoundError(f"File '{f_name}' does not exist.")

all_datapoints = pd.read_json(f_name)
return all_datapoints

def read_results_json_repo(self) -> pd.DataFrame:
"""
Reads all JSON result files from the cloned Proteobench repository.

Returns:
pd.DataFrame: A Pandas DataFrame containing aggregated results from multiple JSON files.
"""
data = []
if not os.path.exists(self.clone_dir):
raise FileNotFoundError(f"Clone directory '{self.clone_dir}' does not exist.")

for file in os.listdir(self.clone_dir):
if file.endswith(".json") and file != "results.json":
file_path = os.path.join(self.clone_dir, file)
with open(file_path, "r") as f:
data.append(pd.read_json(f, typ="series"))
if not data:
self.read_results_json_repo_single_file()

return pd.DataFrame(data)

def clone_repo(self) -> Repo:
"""
Clones the Proteobench repository using either an anonymous or authenticated GitHub access token.
Expand Down
122 changes: 0 additions & 122 deletions proteobench/io/params/MSAngel.py

This file was deleted.

141 changes: 53 additions & 88 deletions proteobench/io/params/__init__.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,60 @@
from dataclasses import dataclass
# Reference for parameter names
# https://github.com/bigbio/proteomics-sample-metadata/blob/master/sdrf-proteomics/assets/param2sdrf.yml
import json
import os
from dataclasses import dataclass, field
from typing import Optional

import numpy as np


# Reference for parameter names
# https://github.com/bigbio/proteomics-sample-metadata/blob/master/sdrf-proteomics/assets/param2sdrf.yml
@dataclass
class ProteoBenchParameters:
"""
Parameters for a proteomics search engine.
def __init__(
self, filename=os.path.join(os.path.dirname(__file__), "json/Quant/lfq/ion/DDA/fields.json"), **kwargs
):
"""
Reads the JSON file and initializes only the attributes present in the file.
"""
if not os.path.isfile(filename):
print(f"Error: File '{filename}' not found.")
return # No initialization happens if the file is missing

with open(filename, "r", encoding="utf-8") as file:
json_dict = json.load(file)

# Initialize only the fields present in the JSON
for key, value in json_dict.items():
if "value" in value:
setattr(self, key, value["value"])
elif "placeholder" in value:
setattr(self, key, value["placeholder"])
else:
setattr(self, key, None)

for key, value in kwargs.items():
print(key, value)
if hasattr(self, key) and value == "None":
setattr(self, key, np.nan)
elif hasattr(self, key):
setattr(self, key, value)

def __repr__(self):
"""
Custom string representation to only show initialized attributes.
"""
return str({key: value for key, value in self.__dict__.items() if value is not None})

def fill_none(self):
"""
Fill all None values with np.nan
"""
for key, value in self.__dict__.items():
if value == "None":
setattr(self, key, np.nan)

Attributes
----------
software_name : Optional[str]
Name of the software tool / pipeline used for this benchmark run
(examples: "MaxQuant", "AlphaPept", "Proline", ...).
software_version : Optional[str]
Version of the software tool / pipeline used for this benchmark run
search_engine: Optional[str]
Search engine used for this benchmark run
(examples: "Andromeda", "Mascot", ...).
search_engine_version : Optional[str]
Version of the search engine used for this benchmark run.
ident_fdr_psm : Optional[str]
False discovery rate (FDR) threshold for peptide-spectrum match
(PSM) validation ("0.01" = 1%).
ident_fdr_peptide : Optional[str]
False discovery rate (FDR) threshold for peptide validation ("0.01" = 1%).
ident_fdr_protein : Optional[str]
False discovery rate (FDR) threshold for protein validation ("0.01" = 1%).
enable_match_between_runs : Optional[bool]
Match between run (also named cross assignment) is enabled.
precursor_mass_tolerance : Optional[str]
Precursor mass tolerance used for the search.
Given as an interval of upper and lower tolerance, e.g. [-20 ppm, 20 ppm].
fragment_mass_tolerance : Optional[str]
Precursor mass tolerance used for the search:
Given as an interval of upper and lower tolerance, e.g. [-0.02 Da, 0.02 Da].
enzyme : Optional[str]
Enzyme used as parameter for the search. If several, use "|".
allowed_miscleavages : Optional[int]
Maximal number of missed cleavages allowed.
min_peptide_length : Optional[str]
Minimum peptide length (number of residues) allowed for the search.
max_peptide_length : Optional[str]
Maximum peptide length (number of residues) allowed for the search.
fixed_mods : Optional[str]
Fixed modifications searched for in the search. If several, separate with "|".
variable_mods : Optional[str]
Variable modifications searched for in the search. If several, separate with "|".
max_mods : Optional[int]
Maximal number of modifications per peptide
(including fixed and variable modifications).
min_precursor_charge : Optional[int]
Minimum precursor charge allowed.
max_precursor_charge : Optional[int]
Maximum precursor charge allowed.
spectral_library_generation : Optional[dict]
Models used to generate spectral library (DIA-specific).
scan_window : Optional[int]
Scan window radius. Ideally corresponds to approximate
average number of data points per peak (DIA-specific).
quantification_method_DIANN : Optional[str]
Quantification strategy used in the DIA-NN engine (DIANN-specific).
second_pass : Optional[bool]
Whether second pass search is enabled (DIANN-specific).
protein_inference : Optional[str]
Protein inference method used.
"""

software_name: Optional[str] = None
software_version: Optional[str] = None
search_engine: Optional[str] = None
search_engine_version: Optional[str] = None
ident_fdr_psm: Optional[str] = None # fdr_psm
ident_fdr_peptide: Optional[float] = None # fdr_peptide
ident_fdr_protein: Optional[float] = None # fdr_protein
enable_match_between_runs: Optional[bool] = None # MBR
precursor_mass_tolerance: Optional[str] = None # precursor_tol, precursor_tol_unit
fragment_mass_tolerance: Optional[str] = None # fragment_tol, fragment_tol_unit
enzyme: Optional[str] = None # enzyme_name
allowed_miscleavages: Optional[int] = None # missed_cleavages
min_peptide_length: Optional[int] = None # min_pep_length
max_peptide_length: Optional[int] = None # max_pep_length
fixed_mods: Optional[str] = None # fixed_modifications
variable_mods: Optional[str] = None # variable_modifications
max_mods: Optional[int] = None # max_num_modifications
min_precursor_charge: Optional[int] = None # precursor_charge
max_precursor_charge: Optional[int] = None
scan_window: Optional[int] = None # DIA-specific
quantification_method: Optional[str] = None #
second_pass: Optional[bool] = None # DIANN specific
protein_inference: Optional[str] = None # example occams razor, proteinprophet
predictors_library: Optional[dict] = None # type of model used to generate spectral library
abundance_normalization_ions: Optional[str] = None # tic, median etc.
# Automatically initialize from fields.json if run directly
if __name__ == "__main__":
proteo_params = ProteoBenchParameters()
print(proteo_params)
1 change: 1 addition & 0 deletions proteobench/io/params/alphapept.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def extract_params(fname: pathlib.Path) -> ProteoBenchParameters:
params.max_precursor_charge = record["features"]["iso_charge_max"]
params.enable_match_between_runs = record["workflow"]["match"] # Check if matching is enabled
params.abundance_normalization_ions = None # No normalization in AlphaPept
params.fill_none()
return params


Expand Down
2 changes: 2 additions & 0 deletions proteobench/io/params/fragger.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ def extract_params(file: BytesIO) -> ProteoBenchParameters:
if fragpipe_params.loc["protein-prophet.run-protein-prophet"] == "true":
params.protein_inference = f"ProteinProphet: {fragpipe_params.loc['protein-prophet.cmd-opts']}"

params.fill_none()

return params


Expand Down
2 changes: 2 additions & 0 deletions proteobench/io/params/i2masschroq.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def _extract_xtandem_params(params: pd.Series) -> ProteoBenchParameters:
min_precursor_charge=1,
max_precursor_charge=int(params.loc["spectrum, maximum parent charge"]),
)
params.fill_none()
return params


Expand Down Expand Up @@ -120,6 +121,7 @@ def _extract_sage_params(params: pd.Series) -> ProteoBenchParameters:
min_precursor_charge=int(min_precursor_charge),
max_precursor_charge=int(max_precursor_charge),
)
params.fill_none()
return params


Expand Down
Loading
Loading