Merge pull request #142 from Proteobench/minor-changes-before-alpha

Minor changes
Proteobench · Nov 25, 2023 · 17666ba · 17666ba
2 parents 7e4640e + c1812c0
commit 17666ba
Show file tree

Hide file tree

Showing 4 changed files with 17 additions and 19 deletions.
diff --git a/proteobench/modules/dda_quant/datapoint.py b/proteobench/modules/dda_quant/datapoint.py
@@ -1,4 +1,5 @@
 import json
+import logging
 from dataclasses import asdict, dataclass
 from datetime import datetime
 
@@ -48,16 +49,16 @@ def calculate_plot_data(self, df):
         self.nr_prec = len(df)
 
     def generate_id(self):
+        time_stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
         self.id = (
             self.search_engine
             + "_"
             + str(self.software_version)
             + "_"
-            + str(datetime.timestamp(datetime.now()))
+            + str(time_stamp)
         )
-        print(self.id)
+        logging.info(f"Assigned the following ID to this run: {self.id}")
 
-    # TODO, not used? Can be removed?
     def dump_json_object(self, file_name):
         f = open(file_name, "a")
         f.write(json.dumps(asdict(self)))

diff --git a/proteobench/modules/dda_quant/module.py b/proteobench/modules/dda_quant/module.py
@@ -2,10 +2,9 @@
 
 import datetime
 import hashlib
-import itertools
+import logging
 import os
 import re
-import shutil
 from dataclasses import asdict
 from tempfile import TemporaryDirectory
 
@@ -189,7 +188,6 @@ def generate_datapoint(
         )
         result_datapoint.generate_id()
         result_datapoint.calculate_plot_data(intermediate)
-        # result_metadata.dump_json_object(json_dump_path)
         df = pd.Series(asdict(result_datapoint))
 
         return df
@@ -301,8 +299,9 @@ def clone_pr(
 
         branch_name = current_datapoint["id"]
 
-        print(os.path.join(t_dir, "results.json"))
-        f = open(os.path.join(t_dir, "results.json"), "w")
+        path_write = os.path.join(t_dir, "results.json")
+        logging.info(f"Writing the json to: {path_write}")
+        f = open(path_write, "w")
 
         all_datapoints.to_json(f, orient="records", indent=2)
 
@@ -328,7 +327,7 @@ def write_json_local_development(self, temporary_datapoints):
 
         # TODO write below to logger instead of std.out
         fname = os.path.join(t_dir, "results.json")
-        print(f"Writing the json to: {fname}")
+        logging.info(f"Writing the json to: {fname}")
 
         f = open(os.path.join(t_dir, "results.json"), "w")
 
@@ -343,7 +342,7 @@ def write_intermediate_raw(
         try:
             os.mkdir(path_write)
         except:
-            print("Could not make directory")
+            logging.warning(f"Could not make directory: {path_write}")
 
         outfile_param = open(os.path.join(path_write, "params.csv"), "w")
         outfile_param.write(str(param_loc.getvalue()))

diff --git a/proteobench/modules/dda_quant/parse.py b/proteobench/modules/dda_quant/parse.py
@@ -131,7 +131,6 @@ def convert_to_standard_format(
         df = df[df["MULTI_SPEC"] == False]
 
         # If there is "Raw file" then it is a long format, otherwise short format
-        # TODO we might need to generalize this with toml
         if "Raw file" not in parse_settings.mapper.values():
             meltvars = parse_settings.replicate_mapper.keys()
             df = df.melt(
@@ -155,24 +154,24 @@ def convert_to_standard_format(
                 modification_dict=parse_settings.modifications_mapper,
             )
 
-        # TODO, if "Charge" is not available return a sensible error
         try:
             df.loc[df.index, "peptidoform"] = (
                 df.loc[df.index, "proforma"]
                 + "|Z="
                 + df.loc[df.index, "Charge"].map(str)
             )
         except KeyError:
-            # TODO if charge is not available it is now replaced with 2
-            df.loc[df.index, "peptidoform"] = df.loc[df.index, "proforma"] + "|Z=2"
+            raise KeyError(
+                f"Not all columns required for making the ion are available."
+                "Is the charge available in the input file?"
+            )
 
-        # TODO use peptide_ion or peptidoform here
         # TODO move this to datapoint, keep a count here of quantified AA
         count_non_zero = (
-            df.groupby(["Sequence", "Raw file"])["Intensity"].sum() > 0.0
+            df.groupby(["peptidoform", "Raw file"])["Intensity"].sum() > 0.0
         ).groupby(level=[0]).sum() == 6
 
         allowed_peptidoforms = list(count_non_zero.index[count_non_zero])
-        filtered_df = df[df["Sequence"].isin(allowed_peptidoforms)]
+        filtered_df = df[df["peptidoform"].isin(allowed_peptidoforms)]
 
         return filtered_df, replicate_to_raw
diff --git a/webinterface/pages/DDA_Quant.py b/webinterface/pages/DDA_Quant.py
@@ -23,8 +23,6 @@
 import streamlit_utils
 from streamlit_extras.let_it_rain import rain
 
-# from proteobench.github.gh import clone_pr, write_json_local_development
-
 logger = logging.getLogger(__name__)
 
 ALL_DATAPOINTS = "all_datapoints"
@@ -394,4 +392,5 @@ class Errors:
 
 
 if __name__ == "__main__":
+    logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
     StreamlitUI()