diff --git a/proteobench/github/gh.py b/proteobench/github/gh.py index 5bef9349..a4613327 100644 --- a/proteobench/github/gh.py +++ b/proteobench/github/gh.py @@ -78,21 +78,47 @@ def clone(remote_url: str, clone_dir: str) -> Repo: try: repo = Repo(clone_dir) except (exc.NoSuchPathError, exc.InvalidGitRepositoryError): - repo = Repo.clone_from(remote_url.rstrip("/"), clone_dir) + repo = Repo.clone_from(remote_url.rstrip("/"), clone_dir, depth=1, no_single_branch=True) + return repo + + @staticmethod + def shallow_clone(remote_url: str, clone_dir: str) -> Repo: + """ + Performs a shallow clone of the repository (only the latest commit). + + Args: + remote_url (str): The repository URL. + clone_dir (str): The target directory for cloning. + + Returns: + Repo: The cloned repository object. + """ + if os.path.exists(clone_dir): + print(f"Repository already exists in {clone_dir}. Trying to use existing files.") + try: + return Repo(clone_dir) + except exc.InvalidGitRepositoryError: + print(f"Repository invalid, will clone again.") + + try: + repo = Repo.clone_from(remote_url.rstrip("/"), clone_dir, depth=1, no_single_branch=True) + except exc.GitCommandError as e: + raise RuntimeError(f"Failed to clone the repository: {e}") + return repo def clone_repo_anonymous(self) -> Repo: """ - Clones the Proteobench repository anonymously (without authentication). + Clones the Proteobench repository anonymously with a shallow clone (without authentication). Returns: - Repo: The local repository object. + Repo: The cloned repository object. """ remote_url = self.get_remote_url_anon() - repo = self.clone(remote_url, self.clone_dir) - return repo + self.repo = self.shallow_clone(remote_url, self.clone_dir) + return self.repo - def read_results_json_repo(self) -> pd.DataFrame: + def read_results_json_repo_single_file(self) -> pd.DataFrame: """ Reads the `results.json` file from the cloned Proteobench repository and returns the data as a DataFrame. @@ -100,9 +126,34 @@ def read_results_json_repo(self) -> pd.DataFrame: pd.DataFrame: A Pandas DataFrame containing the results from `results.json`. """ f_name = os.path.join(self.clone_dir, "results.json") + + if not os.path.exists(f_name): + raise FileNotFoundError(f"File '{f_name}' does not exist.") + all_datapoints = pd.read_json(f_name) return all_datapoints + def read_results_json_repo(self) -> pd.DataFrame: + """ + Reads all JSON result files from the cloned Proteobench repository. + + Returns: + pd.DataFrame: A Pandas DataFrame containing aggregated results from multiple JSON files. + """ + data = [] + if not os.path.exists(self.clone_dir): + raise FileNotFoundError(f"Clone directory '{self.clone_dir}' does not exist.") + + for file in os.listdir(self.clone_dir): + if file.endswith(".json") and file != "results.json": + file_path = os.path.join(self.clone_dir, file) + with open(file_path, "r") as f: + data.append(pd.read_json(f, typ="series")) + if not data: + self.read_results_json_repo_single_file() + + return pd.DataFrame(data) + def clone_repo(self) -> Repo: """ Clones the Proteobench repository using either an anonymous or authenticated GitHub access token. diff --git a/proteobench/modules/quant/quant_base/quant_base_module.py b/proteobench/modules/quant/quant_base/quant_base_module.py index ac360b5a..1e5b3431 100644 --- a/proteobench/modules/quant/quant_base/quant_base_module.py +++ b/proteobench/modules/quant/quant_base/quant_base_module.py @@ -1,5 +1,6 @@ from __future__ import annotations +import json import logging import os import zipfile @@ -26,17 +27,13 @@ ) from proteobench.io.params.maxquant import extract_params as extract_params_maxquant from proteobench.io.params.msaid import extract_params as extract_params_msaid -from proteobench.io.params.proline import extract_params as extract_params_proline - from proteobench.io.params.msangel import extract_params as extract_params_msangel +from proteobench.io.params.peaks import read_peaks_settings as extract_params_peaks +from proteobench.io.params.proline import extract_params as extract_params_proline from proteobench.io.params.sage import extract_params as extract_params_sage from proteobench.io.params.spectronaut import ( read_spectronaut_settings as extract_params_spectronaut, ) -from proteobench.io.params.peaks import ( - read_peaks_settings as extract_params_peaks, -) - from proteobench.io.parsing.parse_ion import load_input_file from proteobench.io.parsing.parse_settings import ParseSettingsBuilder from proteobench.score.quant.quantscores import QuantScores @@ -330,6 +327,11 @@ def clone_pr( with open(path_write, "w") as f: all_datapoints.to_json(f, orient="records", indent=2) + path_write_individual_point = os.path.join(self.t_dir_pr, current_datapoint["intermediate_hash"] + ".json") + logging.info(f"Writing the json (single point) to: {path_write_individual_point}") + with open(path_write_individual_point, "w") as f: + json.dump(current_datapoint.to_dict(), f, indent=2) + commit_name = f"Added new run with id {branch_name}" commit_message = f"User comments: {submission_comments}" diff --git a/webinterface/pages/base_pages/quant.py b/webinterface/pages/base_pages/quant.py index 48ece59e..02ba8a4e 100644 --- a/webinterface/pages/base_pages/quant.py +++ b/webinterface/pages/base_pages/quant.py @@ -52,7 +52,6 @@ def __init__( self.stop_duplicating = False if self.variables_quant.params_file_dict not in st.session_state.keys(): - input("stop") st.session_state[self.variables_quant.params_file_dict] = dict() def display_submission_form(self) -> None: