From 12f56ed6b56e913dfdb3c7a117f3fb2d02e24687 Mon Sep 17 00:00:00 2001
From: RobbinBouwmeester <robbin.bouwmeester@ugent.be>
Date: Sat, 25 Nov 2023 13:51:10 +0100
Subject: [PATCH 1/2] Minor changes

---
 proteobench/modules/dda_quant/datapoint.py |  7 ++++---
 proteobench/modules/dda_quant/module.py    | 13 ++++++-------
 proteobench/modules/dda_quant/parse.py     | 11 +++++------
 webinterface/pages/DDA_Quant.py            |  3 +--
 4 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/proteobench/modules/dda_quant/datapoint.py b/proteobench/modules/dda_quant/datapoint.py
index 5349aa25..bf886881 100644
--- a/proteobench/modules/dda_quant/datapoint.py
+++ b/proteobench/modules/dda_quant/datapoint.py
@@ -1,4 +1,5 @@
 import json
+import logging
 from dataclasses import asdict, dataclass
 from datetime import datetime
 
@@ -48,16 +49,16 @@ def calculate_plot_data(self, df):
         self.nr_prec = len(df)
 
     def generate_id(self):
+        time_stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         self.id = (
             self.search_engine
             + "_"
             + str(self.software_version)
             + "_"
-            + str(datetime.timestamp(datetime.now()))
+            + str(time_stamp)
         )
-        print(self.id)
+        logging.info(f"Assigned the following ID to this run: {self.id}")
 
-    # TODO, not used? Can be removed?
     def dump_json_object(self, file_name):
         f = open(file_name, "a")
         f.write(json.dumps(asdict(self)))
diff --git a/proteobench/modules/dda_quant/module.py b/proteobench/modules/dda_quant/module.py
index 23625162..46d7c186 100644
--- a/proteobench/modules/dda_quant/module.py
+++ b/proteobench/modules/dda_quant/module.py
@@ -2,10 +2,9 @@
 
 import datetime
 import hashlib
-import itertools
+import logging
 import os
 import re
-import shutil
 from dataclasses import asdict
 from tempfile import TemporaryDirectory
 
@@ -189,7 +188,6 @@ def generate_datapoint(
         )
         result_datapoint.generate_id()
         result_datapoint.calculate_plot_data(intermediate)
-        # result_metadata.dump_json_object(json_dump_path)
         df = pd.Series(asdict(result_datapoint))
 
         return df
@@ -301,8 +299,9 @@ def clone_pr(
 
         branch_name = current_datapoint["id"]
 
-        print(os.path.join(t_dir, "results.json"))
-        f = open(os.path.join(t_dir, "results.json"), "w")
+        path_write = os.path.join(t_dir, "results.json")
+        logging.info(f"Writing the json to: {path_write}")
+        f = open(path_write, "w")
 
         all_datapoints.to_json(f, orient="records", indent=2)
 
@@ -328,7 +327,7 @@ def write_json_local_development(self, temporary_datapoints):
 
         # TODO write below to logger instead of std.out
         fname = os.path.join(t_dir, "results.json")
-        print(f"Writing the json to: {fname}")
+        logging.info(f"Writing the json to: {fname}")
 
         f = open(os.path.join(t_dir, "results.json"), "w")
 
@@ -343,7 +342,7 @@ def write_intermediate_raw(
         try:
             os.mkdir(path_write)
         except:
-            print("Could not make directory")
+            logging.warning(f"Could not make directory: {path_write}")
 
         outfile_param = open(os.path.join(path_write, "params.csv"), "w")
         outfile_param.write(str(param_loc.getvalue()))
diff --git a/proteobench/modules/dda_quant/parse.py b/proteobench/modules/dda_quant/parse.py
index cd9b9f59..b3cf3d66 100644
--- a/proteobench/modules/dda_quant/parse.py
+++ b/proteobench/modules/dda_quant/parse.py
@@ -131,7 +131,6 @@ def convert_to_standard_format(
         df = df[df["MULTI_SPEC"] == False]
 
         # If there is "Raw file" then it is a long format, otherwise short format
-        # TODO we might need to generalize this with toml
         if "Raw file" not in parse_settings.mapper.values():
             meltvars = parse_settings.replicate_mapper.keys()
             df = df.melt(
@@ -155,7 +154,6 @@ def convert_to_standard_format(
                 modification_dict=parse_settings.modifications_mapper,
             )
 
-        # TODO, if "Charge" is not available return a sensible error
         try:
             df.loc[df.index, "peptidoform"] = (
                 df.loc[df.index, "proforma"]
@@ -163,13 +161,14 @@ def convert_to_standard_format(
                 + df.loc[df.index, "Charge"].map(str)
             )
         except KeyError:
-            # TODO if charge is not available it is now replaced with 2
-            df.loc[df.index, "peptidoform"] = df.loc[df.index, "proforma"] + "|Z=2"
+            raise KeyError(
+                f"Not all columns required for making the ion are available."
+                "Is the charge available in the input file?"
+            )
 
-        # TODO use peptide_ion or peptidoform here
         # TODO move this to datapoint, keep a count here of quantified AA
         count_non_zero = (
-            df.groupby(["Sequence", "Raw file"])["Intensity"].sum() > 0.0
+            df.groupby(["peptidoform", "Raw file"])["Intensity"].sum() > 0.0
         ).groupby(level=[0]).sum() == 6
 
         allowed_peptidoforms = list(count_non_zero.index[count_non_zero])
diff --git a/webinterface/pages/DDA_Quant.py b/webinterface/pages/DDA_Quant.py
index 867c6a90..0a5f0c82 100644
--- a/webinterface/pages/DDA_Quant.py
+++ b/webinterface/pages/DDA_Quant.py
@@ -23,8 +23,6 @@
 import streamlit_utils
 from streamlit_extras.let_it_rain import rain
 
-# from proteobench.github.gh import clone_pr, write_json_local_development
-
 logger = logging.getLogger(__name__)
 
 ALL_DATAPOINTS = "all_datapoints"
@@ -394,4 +392,5 @@ class Errors:
 
 
 if __name__ == "__main__":
+    logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
     StreamlitUI()

From c1812c004d887d170f2e785af9b9e5ce61e70c5b Mon Sep 17 00:00:00 2001
From: RobbinBouwmeester <robbin.bouwmeester@ugent.be>
Date: Sat, 25 Nov 2023 13:57:18 +0100
Subject: [PATCH 2/2] Make sure we check on peptidoform/ion

---
 proteobench/modules/dda_quant/parse.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/proteobench/modules/dda_quant/parse.py b/proteobench/modules/dda_quant/parse.py
index b3cf3d66..1ac7acc1 100644
--- a/proteobench/modules/dda_quant/parse.py
+++ b/proteobench/modules/dda_quant/parse.py
@@ -172,6 +172,6 @@ def convert_to_standard_format(
         ).groupby(level=[0]).sum() == 6
 
         allowed_peptidoforms = list(count_non_zero.index[count_non_zero])
-        filtered_df = df[df["Sequence"].isin(allowed_peptidoforms)]
+        filtered_df = df[df["peptidoform"].isin(allowed_peptidoforms)]
 
         return filtered_df, replicate_to_raw