Skip to content

Commit

Permalink
Merge pull request #142 from Proteobench/minor-changes-before-alpha
Browse files Browse the repository at this point in the history
Minor changes
  • Loading branch information
RobbinBouwmeester authored Nov 25, 2023
2 parents 7e4640e + c1812c0 commit 17666ba
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 19 deletions.
7 changes: 4 additions & 3 deletions proteobench/modules/dda_quant/datapoint.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import logging
from dataclasses import asdict, dataclass
from datetime import datetime

Expand Down Expand Up @@ -48,16 +49,16 @@ def calculate_plot_data(self, df):
self.nr_prec = len(df)

def generate_id(self):
time_stamp = datetime.now().strftime("%Y%m%d_%H%M%S")
self.id = (
self.search_engine
+ "_"
+ str(self.software_version)
+ "_"
+ str(datetime.timestamp(datetime.now()))
+ str(time_stamp)
)
print(self.id)
logging.info(f"Assigned the following ID to this run: {self.id}")

# TODO, not used? Can be removed?
def dump_json_object(self, file_name):
f = open(file_name, "a")
f.write(json.dumps(asdict(self)))
Expand Down
13 changes: 6 additions & 7 deletions proteobench/modules/dda_quant/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@

import datetime
import hashlib
import itertools
import logging
import os
import re
import shutil
from dataclasses import asdict
from tempfile import TemporaryDirectory

Expand Down Expand Up @@ -189,7 +188,6 @@ def generate_datapoint(
)
result_datapoint.generate_id()
result_datapoint.calculate_plot_data(intermediate)
# result_metadata.dump_json_object(json_dump_path)
df = pd.Series(asdict(result_datapoint))

return df
Expand Down Expand Up @@ -301,8 +299,9 @@ def clone_pr(

branch_name = current_datapoint["id"]

print(os.path.join(t_dir, "results.json"))
f = open(os.path.join(t_dir, "results.json"), "w")
path_write = os.path.join(t_dir, "results.json")
logging.info(f"Writing the json to: {path_write}")
f = open(path_write, "w")

all_datapoints.to_json(f, orient="records", indent=2)

Expand All @@ -328,7 +327,7 @@ def write_json_local_development(self, temporary_datapoints):

# TODO write below to logger instead of std.out
fname = os.path.join(t_dir, "results.json")
print(f"Writing the json to: {fname}")
logging.info(f"Writing the json to: {fname}")

f = open(os.path.join(t_dir, "results.json"), "w")

Expand All @@ -343,7 +342,7 @@ def write_intermediate_raw(
try:
os.mkdir(path_write)
except:
print("Could not make directory")
logging.warning(f"Could not make directory: {path_write}")

outfile_param = open(os.path.join(path_write, "params.csv"), "w")
outfile_param.write(str(param_loc.getvalue()))
Expand Down
13 changes: 6 additions & 7 deletions proteobench/modules/dda_quant/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ def convert_to_standard_format(
df = df[df["MULTI_SPEC"] == False]

# If there is "Raw file" then it is a long format, otherwise short format
# TODO we might need to generalize this with toml
if "Raw file" not in parse_settings.mapper.values():
meltvars = parse_settings.replicate_mapper.keys()
df = df.melt(
Expand All @@ -155,24 +154,24 @@ def convert_to_standard_format(
modification_dict=parse_settings.modifications_mapper,
)

# TODO, if "Charge" is not available return a sensible error
try:
df.loc[df.index, "peptidoform"] = (
df.loc[df.index, "proforma"]
+ "|Z="
+ df.loc[df.index, "Charge"].map(str)
)
except KeyError:
# TODO if charge is not available it is now replaced with 2
df.loc[df.index, "peptidoform"] = df.loc[df.index, "proforma"] + "|Z=2"
raise KeyError(
f"Not all columns required for making the ion are available."
"Is the charge available in the input file?"
)

# TODO use peptide_ion or peptidoform here
# TODO move this to datapoint, keep a count here of quantified AA
count_non_zero = (
df.groupby(["Sequence", "Raw file"])["Intensity"].sum() > 0.0
df.groupby(["peptidoform", "Raw file"])["Intensity"].sum() > 0.0
).groupby(level=[0]).sum() == 6

allowed_peptidoforms = list(count_non_zero.index[count_non_zero])
filtered_df = df[df["Sequence"].isin(allowed_peptidoforms)]
filtered_df = df[df["peptidoform"].isin(allowed_peptidoforms)]

return filtered_df, replicate_to_raw
3 changes: 1 addition & 2 deletions webinterface/pages/DDA_Quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
import streamlit_utils
from streamlit_extras.let_it_rain import rain

# from proteobench.github.gh import clone_pr, write_json_local_development

logger = logging.getLogger(__name__)

ALL_DATAPOINTS = "all_datapoints"
Expand Down Expand Up @@ -394,4 +392,5 @@ class Errors:


if __name__ == "__main__":
logging.basicConfig(format="%(levelname)s:%(message)s", level=logging.INFO)
StreamlitUI()

0 comments on commit 17666ba

Please sign in to comment.