Skip to content

Commit

Permalink
Merge pull request #552 from Proteobench/split_json_results
Browse files Browse the repository at this point in the history
Split json results
  • Loading branch information
RobbinBouwmeester authored Jan 26, 2025
2 parents f0b2cd8 + 765e6ea commit 16b5ea0
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 13 deletions.
63 changes: 57 additions & 6 deletions proteobench/github/gh.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,31 +78,82 @@ def clone(remote_url: str, clone_dir: str) -> Repo:
try:
repo = Repo(clone_dir)
except (exc.NoSuchPathError, exc.InvalidGitRepositoryError):
repo = Repo.clone_from(remote_url.rstrip("/"), clone_dir)
repo = Repo.clone_from(remote_url.rstrip("/"), clone_dir, depth=1, no_single_branch=True)
return repo

@staticmethod
def shallow_clone(remote_url: str, clone_dir: str) -> Repo:
"""
Performs a shallow clone of the repository (only the latest commit).
Args:
remote_url (str): The repository URL.
clone_dir (str): The target directory for cloning.
Returns:
Repo: The cloned repository object.
"""
if os.path.exists(clone_dir):
print(f"Repository already exists in {clone_dir}. Trying to use existing files.")
try:
return Repo(clone_dir)
except exc.InvalidGitRepositoryError:
print(f"Repository invalid, will clone again.")

try:
repo = Repo.clone_from(remote_url.rstrip("/"), clone_dir, depth=1, no_single_branch=True)
except exc.GitCommandError as e:
raise RuntimeError(f"Failed to clone the repository: {e}")

return repo

def clone_repo_anonymous(self) -> Repo:
"""
Clones the Proteobench repository anonymously (without authentication).
Clones the Proteobench repository anonymously with a shallow clone (without authentication).
Returns:
Repo: The local repository object.
Repo: The cloned repository object.
"""
remote_url = self.get_remote_url_anon()
repo = self.clone(remote_url, self.clone_dir)
return repo
self.repo = self.shallow_clone(remote_url, self.clone_dir)
return self.repo

def read_results_json_repo(self) -> pd.DataFrame:
def read_results_json_repo_single_file(self) -> pd.DataFrame:
"""
Reads the `results.json` file from the cloned Proteobench repository and returns the data as a DataFrame.
Returns:
pd.DataFrame: A Pandas DataFrame containing the results from `results.json`.
"""
f_name = os.path.join(self.clone_dir, "results.json")

if not os.path.exists(f_name):
raise FileNotFoundError(f"File '{f_name}' does not exist.")

all_datapoints = pd.read_json(f_name)
return all_datapoints

def read_results_json_repo(self) -> pd.DataFrame:
"""
Reads all JSON result files from the cloned Proteobench repository.
Returns:
pd.DataFrame: A Pandas DataFrame containing aggregated results from multiple JSON files.
"""
data = []
if not os.path.exists(self.clone_dir):
raise FileNotFoundError(f"Clone directory '{self.clone_dir}' does not exist.")

for file in os.listdir(self.clone_dir):
if file.endswith(".json") and file != "results.json":
file_path = os.path.join(self.clone_dir, file)
with open(file_path, "r") as f:
data.append(pd.read_json(f, typ="series"))
if not data:
self.read_results_json_repo_single_file()

return pd.DataFrame(data)

def clone_repo(self) -> Repo:
"""
Clones the Proteobench repository using either an anonymous or authenticated GitHub access token.
Expand Down
14 changes: 8 additions & 6 deletions proteobench/modules/quant/quant_base/quant_base_module.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

import json
import logging
import os
import zipfile
Expand All @@ -26,17 +27,13 @@
)
from proteobench.io.params.maxquant import extract_params as extract_params_maxquant
from proteobench.io.params.msaid import extract_params as extract_params_msaid
from proteobench.io.params.proline import extract_params as extract_params_proline

from proteobench.io.params.msangel import extract_params as extract_params_msangel
from proteobench.io.params.peaks import read_peaks_settings as extract_params_peaks
from proteobench.io.params.proline import extract_params as extract_params_proline
from proteobench.io.params.sage import extract_params as extract_params_sage
from proteobench.io.params.spectronaut import (
read_spectronaut_settings as extract_params_spectronaut,
)
from proteobench.io.params.peaks import (
read_peaks_settings as extract_params_peaks,
)

from proteobench.io.parsing.parse_ion import load_input_file
from proteobench.io.parsing.parse_settings import ParseSettingsBuilder
from proteobench.score.quant.quantscores import QuantScores
Expand Down Expand Up @@ -330,6 +327,11 @@ def clone_pr(
with open(path_write, "w") as f:
all_datapoints.to_json(f, orient="records", indent=2)

path_write_individual_point = os.path.join(self.t_dir_pr, current_datapoint["intermediate_hash"] + ".json")
logging.info(f"Writing the json (single point) to: {path_write_individual_point}")
with open(path_write_individual_point, "w") as f:
json.dump(current_datapoint.to_dict(), f, indent=2)

commit_name = f"Added new run with id {branch_name}"
commit_message = f"User comments: {submission_comments}"

Expand Down
1 change: 0 additions & 1 deletion webinterface/pages/base_pages/quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def __init__(
self.stop_duplicating = False

if self.variables_quant.params_file_dict not in st.session_state.keys():
input("stop")
st.session_state[self.variables_quant.params_file_dict] = dict()

def display_submission_form(self) -> None:
Expand Down

0 comments on commit 16b5ea0

Please sign in to comment.