From 12f56ed6b56e913dfdb3c7a117f3fb2d02e24687 Mon Sep 17 00:00:00 2001 From: RobbinBouwmeester Date: Sat, 25 Nov 2023 13:51:10 +0100 Subject: [PATCH 1/2] Minor changes --- proteobench/modules/dda_quant/datapoint.py | 7 ++++--- proteobench/modules/dda_quant/module.py | 13 ++++++------- proteobench/modules/dda_quant/parse.py | 11 +++++------ webinterface/pages/DDA_Quant.py | 3 +-- 4 files changed, 16 insertions(+), 18 deletions(-) diff --git a/proteobench/modules/dda_quant/datapoint.py b/proteobench/modules/dda_quant/datapoint.py index 5349aa25..bf886881 100644 --- a/proteobench/modules/dda_quant/datapoint.py +++ b/proteobench/modules/dda_quant/datapoint.py @@ -1,4 +1,5 @@ import json +import logging from dataclasses import asdict, dataclass from datetime import datetime @@ -48,16 +49,16 @@ def calculate_plot_data(self, df): self.nr_prec = len(df) def generate_id(self): + time_stamp = datetime.now().strftime("%Y%m%d_%H%M%S") self.id = ( self.search_engine + "_" + str(self.software_version) + "_" - + str(datetime.timestamp(datetime.now())) + + str(time_stamp) ) - print(self.id) + logging.info(f"Assigned the following ID to this run: {self.id}") - # TODO, not used? Can be removed? def dump_json_object(self, file_name): f = open(file_name, "a") f.write(json.dumps(asdict(self))) diff --git a/proteobench/modules/dda_quant/module.py b/proteobench/modules/dda_quant/module.py index 23625162..46d7c186 100644 --- a/proteobench/modules/dda_quant/module.py +++ b/proteobench/modules/dda_quant/module.py @@ -2,10 +2,9 @@ import datetime import hashlib -import itertools +import logging import os import re -import shutil from dataclasses import asdict from tempfile import TemporaryDirectory @@ -189,7 +188,6 @@ def generate_datapoint( ) result_datapoint.generate_id() result_datapoint.calculate_plot_data(intermediate) - # result_metadata.dump_json_object(json_dump_path) df = pd.Series(asdict(result_datapoint)) return df @@ -301,8 +299,9 @@ def clone_pr( branch_name = current_datapoint["id"] - print(os.path.join(t_dir, "results.json")) - f = open(os.path.join(t_dir, "results.json"), "w") + path_write = os.path.join(t_dir, "results.json") + logging.info(f"Writing the json to: {path_write}") + f = open(path_write, "w") all_datapoints.to_json(f, orient="records", indent=2) @@ -328,7 +327,7 @@ def write_json_local_development(self, temporary_datapoints): # TODO write below to logger instead of std.out fname = os.path.join(t_dir, "results.json") - print(f"Writing the json to: {fname}") + logging.info(f"Writing the json to: {fname}") f = open(os.path.join(t_dir, "results.json"), "w") @@ -343,7 +342,7 @@ def write_intermediate_raw( try: os.mkdir(path_write) except: - print("Could not make directory") + logging.warning(f"Could not make directory: {path_write}") outfile_param = open(os.path.join(path_write, "params.csv"), "w") outfile_param.write(str(param_loc.getvalue())) diff --git a/proteobench/modules/dda_quant/parse.py b/proteobench/modules/dda_quant/parse.py index cd9b9f59..b3cf3d66 100644 --- a/proteobench/modules/dda_quant/parse.py +++ b/proteobench/modules/dda_quant/parse.py @@ -131,7 +131,6 @@ def convert_to_standard_format( df = df[df["MULTI_SPEC"] == False] # If there is "Raw file" then it is a long format, otherwise short format - # TODO we might need to generalize this with toml if "Raw file" not in parse_settings.mapper.values(): meltvars = parse_settings.replicate_mapper.keys() df = df.melt( @@ -155,7 +154,6 @@ def convert_to_standard_format( modification_dict=parse_settings.modifications_mapper, ) - # TODO, if "Charge" is not available return a sensible error try: df.loc[df.index, "peptidoform"] = ( df.loc[df.index, "proforma"] @@ -163,13 +161,14 @@ def convert_to_standard_format( + df.loc[df.index, "Charge"].map(str) ) except KeyError: - # TODO if charge is not available it is now replaced with 2 - df.loc[df.index, "peptidoform"] = df.loc[df.index, "proforma"] + "|Z=2" + raise KeyError( + f"Not all columns required for making the ion are available." + "Is the charge available in the input file?" + ) - # TODO use peptide_ion or peptidoform here # TODO move this to datapoint, keep a count here of quantified AA count_non_zero = ( - df.groupby(["Sequence", "Raw file"])["Intensity"].sum() > 0.0 + df.groupby(["peptidoform", "Raw file"])["Intensity"].sum() > 0.0 ).groupby(level=[0]).sum() == 6 allowed_peptidoforms = list(count_non_zero.index[count_non_zero]) diff --git a/webinterface/pages/DDA_Quant.py b/webinterface/pages/DDA_Quant.py index 867c6a90..0a5f0c82 100644 --- a/webinterface/pages/DDA_Quant.py +++ b/webinterface/pages/DDA_Quant.py @@ -23,8 +23,6 @@ import streamlit_utils from streamlit_extras.let_it_rain import rain -# from proteobench.github.gh import clone_pr, write_json_local_development - logger = logging.getLogger(__name__) ALL_DATAPOINTS = "all_datapoints" @@ -394,4 +392,5 @@ class Errors: if __name__ == "__main__": + logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO) StreamlitUI() From c1812c004d887d170f2e785af9b9e5ce61e70c5b Mon Sep 17 00:00:00 2001 From: RobbinBouwmeester Date: Sat, 25 Nov 2023 13:57:18 +0100 Subject: [PATCH 2/2] Make sure we check on peptidoform/ion --- proteobench/modules/dda_quant/parse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/proteobench/modules/dda_quant/parse.py b/proteobench/modules/dda_quant/parse.py index b3cf3d66..1ac7acc1 100644 --- a/proteobench/modules/dda_quant/parse.py +++ b/proteobench/modules/dda_quant/parse.py @@ -172,6 +172,6 @@ def convert_to_standard_format( ).groupby(level=[0]).sum() == 6 allowed_peptidoforms = list(count_non_zero.index[count_non_zero]) - filtered_df = df[df["Sequence"].isin(allowed_peptidoforms)] + filtered_df = df[df["peptidoform"].isin(allowed_peptidoforms)] return filtered_df, replicate_to_raw