From c666cc431043315f46888f39ec43b20387870b80 Mon Sep 17 00:00:00 2001
From: Henry Webel <heweb@dtu.dk>
Date: Sat, 25 Jan 2025 18:23:08 +0100
Subject: [PATCH 1/8] :white_check_mark: start reading the data

- downstream ion parsing does not work yet
---
 proteobench/io/parsing/parse_ion.py           |  14 ++-
 ..._quantms.sdrf_openms_design_msstats_in.csv | 101 ++++++++++++++++++
 test/test_module_dda_quant.py                 |   1 +
 3 files changed, 114 insertions(+), 2 deletions(-)
 create mode 100644 test/data/dda_quant/sample_dda_quantms.sdrf_openms_design_msstats_in.csv

diff --git a/proteobench/io/parsing/parse_ion.py b/proteobench/io/parsing/parse_ion.py
index 82a1a2f7..a4f71fe6 100644
--- a/proteobench/io/parsing/parse_ion.py
+++ b/proteobench/io/parsing/parse_ion.py
@@ -1,7 +1,6 @@
 import math
 import os
 import re
-from typing import Dict, List, Optional
 
 import pandas as pd
 
@@ -109,7 +108,18 @@ def load_input_file(input_csv: str, input_format: str) -> pd.DataFrame:
         input_data_frame = pd.read_csv(input_csv, low_memory=False, sep="\t")
     elif input_format == "PEAKS":
         input_data_frame = pd.read_csv(input_csv, low_memory=False, sep=",")
-
+    elif input_format == "quantms":
+        input_data_frame = pd.read_csv(input_csv, low_memory=False)
+        input_data_frame = input_data_frame.assign(
+            Sequence=input_data_frame["PeptideSequence"].str.replace(
+                r"\(([^)]+)\)",
+                r"",
+                regex=True,
+            ),
+        )
+        input_data_frame["Sequence"] = input_data_frame["PeptideSequence"].str.replace(r"\(([^)]+)\)", r"", regex=True)
+    else:
+        raise ValueError(f"Input format '{input_format}' not recognized.")
     return input_data_frame
 
 
diff --git a/test/data/dda_quant/sample_dda_quantms.sdrf_openms_design_msstats_in.csv b/test/data/dda_quant/sample_dda_quantms.sdrf_openms_design_msstats_in.csv
new file mode 100644
index 00000000..ee6ce861
--- /dev/null
+++ b/test/data/dda_quant/sample_dda_quantms.sdrf_openms_design_msstats_in.csv
@@ -0,0 +1,101 @@
+ProteinName,PeptideSequence,PrecursorCharge,FragmentIon,ProductCharge,IsotopeLabelType,Condition,BioReplicate,Run,Intensity,Reference
+sp|P09733|TBA1_YEAST,DLFHPEQLISGK,3,,0,L,cond_A,1,1,131708800.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|P32119|PRDX2_HUMAN,TDEGIAYR,2,,0,L,cond_B,4,4,845348600.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|P49321|NASP_HUMAN,EQVYDAMGEK,2,,0,L,cond_B,4,4,48666640.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|P50990|TCPQ_HUMAN,TVGATALPR,2,,0,L,cond_A,1,1,958554700.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|Q99832|TCPH_HUMAN,SLHDAIMIVR,3,,0,L,cond_B,4,4,329470100.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|P58004|SESN2_HUMAN,GPSAFIPVEEVLR,2,,0,L,cond_B,5,5,29033340.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
+sp|P02545|LMNA_HUMAN,ASASGSGAQVGGPISSGSSASSVTVTR,3,,0,L,cond_B,4,4,97719470.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|Q8WWM7|ATX2L_HUMAN,FTDSAIAMNSK,2,,0,L,cond_B,6,6,47579640.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML
+sp|P50552|VASP_HUMAN,QQPGPSEHIER,3,,0,L,cond_B,4,4,118654400.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|Q7L014|DDX46_HUMAN,ASYPC(Carbamidomethyl)MSLHGGIDQYDR,3,,0,L,cond_B,4,4,38434830.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|P30154|2AAB_HUMAN,MAGDQVANVR,2,,0,L,cond_A,1,1,47908580.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|Q04726|TLE3_HUMAN,AELTSSAPAC(Carbamidomethyl)YALAISPDAK,2,,0,L,cond_A,2,2,22015490.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML
+sp|P35658|NU214_HUMAN,SSLLAVSNK,2,,0,L,cond_A,1,1,91677080.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|Q8WUM4|PDC6I_HUMAN,LLDEEEATDNDLR,2,,0,L,cond_A,1,1,245312700.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|P38910|CH10_YEAST,LNQAEVVAVGPGFTDANGNK,3,,0,L,cond_B,4,4,81358820.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|Q9UQR0|SCML2_HUMAN,SEAPSYIAVPDPSVLK,2,,0,L,cond_A,2,2,43908620.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML
+sp|O43390|HNRPR_HUMAN,LC(Carbamidomethyl)DSYEIRPGK,2,,0,L,cond_A,1,1,50041590.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|P05373|HEM2_YEAST,GLINANLAHK,2,,0,L,cond_B,5,5,32199620.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
+sp|Q04438|SPG4_YEAST,NVDISNMSQGEFLR,2,,0,L,cond_B,4,4,70438430.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|Q9NUQ3|TXLNG_HUMAN,ALGAHLEAEPK,3,,0,L,cond_A,1,1,83372190.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|P49411|EFTU_HUMAN,NMITGTAPLDGC(Carbamidomethyl)ILVVAANDGPMPQTR,3,,0,L,cond_A,1,1,128703300.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|Q14657|LAGE3_HUMAN,FGPPVSR,2,,0,L,cond_B,6,6,57360970.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML
+sp|P38426|TPS3_YEAST,NPNLSFDSHPPR,3,,0,L,cond_A,1,1,61451870.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|P21333|FLNA_HUMAN,SPFEVYVDK,2,,0,L,cond_B,5,5,297051000.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
+sp|Q14684|RRP1B_HUMAN,AGPGSLELC(Carbamidomethyl)GLPSQK,2,,0,L,cond_B,6,6,60428020.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML
+sp|Q07666|KHDR1_HUMAN,ILGPQGNTIK,2,,0,L,cond_A,1,1,507243000.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|P48735|IDHP_HUMAN,VC(Carbamidomethyl)VETVESGAMTK,2,,0,L,cond_B,4,4,49347380.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|Q15645|PCH2_HUMAN,IDVAFVDR,2,,0,L,cond_A,2,2,70441630.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML
+sp|P00558|PGK1_HUMAN,AC(Carbamidomethyl)ANPAAGSVILLENLR,3,,0,L,cond_A,3,3,639706700.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML
+sp|P46367|ALDH4_YEAST,HIYQSAAAGLK,2,,0,L,cond_A,2,2,190671900.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML
+sp|Q5T653|RM02_HUMAN,QMQVLETC(Carbamidomethyl)VATVGR,2,,0,L,cond_A,2,2,38343660.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML
+sp|Q9NWV4|CZIB_HUMAN,TIVEFEC(Carbamidomethyl)R,2,,0,L,cond_A,1,1,82191550.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|P09972|ALDOC_HUMAN,YTPEEIAMATVTALR,2,,0,L,cond_B,6,6,351006400.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML
+sp|P42696|RBM34_HUMAN,IQINQEEER,2,,0,L,cond_B,5,5,17101070.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
+sp|Q8WUM4|PDC6I_HUMAN,FYNELTEILVR,2,,0,L,cond_A,2,2,220731500.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML
+sp|Q15833|STXB2_HUMAN,EPIPSLEAIYLLSPTEK,2,,0,L,cond_A,3,3,26323180.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML
+sp|Q9BWS9|CHID1_HUMAN,GLVVTDLK,2,,0,L,cond_A,3,3,25301540.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML
+sp|Q02486|ABF2_YEAST,LYSEYQK,2,,0,L,cond_B,5,5,48212330.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
+sp|Cont_Q3SX14|GELS_BOVIN,AQPVQVAEGSEPDSFWEALGGK,2,,0,L,cond_A,1,1,29316550.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|Q92841|DDX17_HUMAN,FVINYDYPNSSEDYVHR,3,,0,L,cond_B,4,4,331407500.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|Q13428|TCOF_HUMAN,LGAGEGGEASVSPEK,2,,0,L,cond_B,6,6,37681520.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML
+sp|P11177|ODPB_HUMAN,DAINQGMDEELERDEK,3,,0,L,cond_A,2,2,69755510.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML
+sp|P08758|ANXA5_HUMAN,ALLLLC(Carbamidomethyl)GEDD,2,,0,L,cond_A,1,1,41767580.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|Q9UHD8|SEPT9_HUMAN,FINDQYEK,2,,0,L,cond_A,2,2,93834380.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML
+sp|Q9UNZ2|NSF1C_HUMAN,SPNELVDDLFK,2,,0,L,cond_A,2,2,38462320.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML
+sp|Q1KMD3|HNRL2_HUMAN,NGEDLGVAFWISK,2,,0,L,cond_B,4,4,124289300.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|P16157|ANK1_HUMAN,LGYISVTDVLK,2,,0,L,cond_B,4,4,78834210.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|P30050|RL12_HUMAN,QAQIEVVPSASALIIK,2,,0,L,cond_A,1,1,610790300.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|P15880|RS2_HUMAN,LSIVPVR,2,,0,L,cond_B,4,4,618046500.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|P27616|PUR7_YEAST,TELDGILPLVAR,2,,0,L,cond_A,3,3,435745100.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML
+sp|P26641|EF1G_HUMAN,STFVLDEFK,2,,0,L,cond_B,4,4,1932950000.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|O15498|YKT6_HUMAN,VAFTLLEK,2,,0,L,cond_B,5,5,38799580.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
+sp|P38088|SYG_YEAST,YDIGNPVTGETLESPR,2,,0,L,cond_B,4,4,186166200.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|P49589|SYCC_HUMAN,VSEYVPEIVNFVQK,3,,0,L,cond_B,5,5,26276280.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
+sp|P38764|RPN1_YEAST,VGQAVETVGQAGRPK,2,,0,L,cond_A,3,3,22935790.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML
+sp|P28482|MK01_HUMAN,VADPDHDHTGFLTEYVATR,4,,0,L,cond_B,4,4,230327400.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|P23381|SYWC_HUMAN,MSASDPNSSIFLTDTAK,2,,0,L,cond_A,3,3,346003800.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML
+sp|P38701|RS20_YEAST,YIDLEAPVQIVK,3,,0,L,cond_A,1,1,49868200.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|P16120|THRC_YEAST,ADVELVK,2,,0,L,cond_B,6,6,3600227000.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML
+sp|P09110|THIK_HUMAN,QDTFALASQQK,2,,0,L,cond_A,3,3,84843690.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML
+sp|P30044|PRDX5_HUMAN,THLPGFVEQAEALK,3,,0,L,cond_B,5,5,286970800.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
+sp|P35197|GCS1_YEAST,NFNGNAEDSSTAGNTTHTEYQK,3,,0,L,cond_A,1,1,30576350.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|Q99567|NUP88_HUMAN,NQSPTEAEKPASSSLPSSPPPQLLTR,3,,0,L,cond_B,6,6,122309400.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML
+sp|P0A6G7|CLPP_ECOLI,FLSAPEAVEYGLVDSILTHR,3,,0,L,cond_A,3,3,97005950.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML
+sp|Q14690|RRP5_HUMAN,VVILNVDLLK,2,,0,L,cond_B,5,5,15405070.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
+sp|P0A6P1|EFTS_ECOLI,VAALEGDVLGSYQHGAR,2,,0,L,cond_B,5,5,68910670.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
+sp|O43583|DENR_HUMAN,QEAGISEGQGTAGEEEEK,2,,0,L,cond_B,6,6,74275150.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML
+sp|Q6PJ69|TRI65_HUMAN,ASLEVTQQQATQAEGQLLELR,3,,0,L,cond_A,2,2,14652530.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML
+sp|O15397|IPO8_HUMAN,ETENDDVTNVIQK,2,,0,L,cond_A,3,3,41540000.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML
+sp|P0A9G6|ACEA_ECOLI,ADQIQWSAGIEPGDPR,2,,0,L,cond_B,5,5,106234000.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
+sp|Q15393|SF3B3_HUMAN,MQGQEAVLAMSSR,2,,0,L,cond_B,6,6,285025400.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML
+sp|P07814|SYEP_HUMAN,EAPC(Carbamidomethyl)VLIYIPDGHTK,2,,0,L,cond_A,3,3,49156330.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML
+sp|Q9H9A7|RMI1_HUMAN,DLEHPLLPDGILEIPK,3,,0,L,cond_B,4,4,13267190.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|O94880|PHF14_HUMAN,LNIPAILR,2,,0,L,cond_B,6,6,18349690.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML
+sp|Q08J23|NSUN2_HUMAN,LAQEGIYTLYPFINSR,3,,0,L,cond_B,5,5,41253360.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
+sp|P32929|CGL_HUMAN,VIYPGLPSHPQHELVK,3,,0,L,cond_B,6,6,96001410.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML
+sp|P0AGE9|SUCD_ECOLI,SGTLTYEAVK,2,,0,L,cond_B,4,4,131380700.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|O15355|PPM1G_HUMAN,GTEAGQVGEPGIPTGEAGPSC(Carbamidomethyl)SSASDK,2,,0,L,cond_A,2,2,32767620.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML
+sp|Q99614|TTC1_HUMAN,SNEDVNSSELDEEYLIELEK,2,,0,L,cond_B,4,4,44210590.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|P62316|SMD2_HUMAN,REEEEFNTGPLSVLTQSVK,2,,0,L,cond_A,3,3,216058000.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML
+sp|O14828|SCAM3_HUMAN,ELQHAALGGTATR,2,,0,L,cond_B,4,4,8710374.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|Q9BX10|GTPB2_HUMAN,VGADITVLR,2,,0,L,cond_B,5,5,30394810.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
+sp|P31539|HS104_YEAST,YAIDMTEQAR,2,,0,L,cond_A,2,2,309601800.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML
+sp|Q08723|RPN8_YEAST,VTNSFALPFEEDEK,2,,0,L,cond_A,1,1,53689080.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|P53250|TWF1_YEAST,SFEELVQLASQER,2,,0,L,cond_B,6,6,17220740.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML
+sp|P14922|CYC8_YEAST,ANEIYFR,2,,0,L,cond_A,1,1,19421460.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|Q9UJV9|DDX41_HUMAN,GVEAVAIHGGK,2,,0,L,cond_B,4,4,12814100.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|Q99856|ARI3A_HUMAN,MALVADEQQR,2,,0,L,cond_A,1,1,207930200.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|Q9Y613|FHOD1_HUMAN,FSGVAGEAPSNPSVPVAVSSGPGR,2,,0,L,cond_B,4,4,14475940.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|P46778|RL21_HUMAN,VYNVTQHAVGIVVNK,3,,0,L,cond_A,1,1,817014100.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|P31939|PUR9_HUMAN,NLTALGLNLVASGGTAK,2,,0,L,cond_B,6,6,409515000.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML
+sp|P09105|HBAT_HUMAN,LGSNVGVYTTEALER,2,,0,L,cond_A,2,2,47899360.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML
+sp|Q96T37|RBM15_HUMAN,SSGAASSAPGGGDGAEYK,2,,0,L,cond_B,5,5,20204150.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
+sp|Q96G03|PGM2_HUMAN,MNDLTIIQTTQGFC(Carbamidomethyl)R,2,,0,L,cond_B,4,4,44245960.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|Q9BXP5|SRRT_HUMAN,EEEWEKPK,2,,0,L,cond_B,4,4,25720120.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|P42704|LPPRC_HUMAN,MEEANIQPNR,2,,0,L,cond_B,4,4,104089900.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|Q86V81|THOC4_HUMAN,SLGTADVHFER,3,,0,L,cond_B,4,4,527266100.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML
+sp|P22336|RFA1_YEAST,EEDPNEFTK,2,,0,L,cond_A,1,1,20258070.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML
+sp|Cont_Q7SIH1|A2MG_BOVIN,NEESLVFVQTDKPIYKPEQTVK,4,,0,L,cond_A,2,2,120535100.0,LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML
+sp|Q8WUM4|PDC6I_HUMAN,DTIVLLC(Carbamidomethyl)KPEPELNAAIPSANPAK,2,,0,L,cond_B,5,5,27655920.0,LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML
diff --git a/test/test_module_dda_quant.py b/test/test_module_dda_quant.py
index adbd4c7b..74f7fb2d 100644
--- a/test/test_module_dda_quant.py
+++ b/test/test_module_dda_quant.py
@@ -28,6 +28,7 @@
     "ProlineStudio": os.path.join(TESTDATA_DIR, "Proline_DDA_quan_ions_subset.xlsx"),
     "MSAngel": os.path.join(TESTDATA_DIR, "MSAngel_DDA_quan_ions_subset.xlsx"),
     "i2MassChroQ": os.path.join(TESTDATA_DIR, "i2MassChroQ_DDA_quant_ions_test_new_random_subset.tsv"),
+    "quantms": os.path.join(TESTDATA_DIR, "sample_dda_quantms.sdrf_openms_design_msstats_in.csv"),
 }
 
 

From e280fc65f05babc55cdd2963a674bd159165915e Mon Sep 17 00:00:00 2001
From: Henry Webel <heweb@dtu.dk>
Date: Sat, 25 Jan 2025 18:35:16 +0100
Subject: [PATCH 2/8] :wrench::construction: add not entirely correct
 configuration

---
 .../lfq/ion/DDA/parse_settings_msstats.toml   | 33 +++++++++++++++++++
 .../parse_settings_files.toml                 |  1 +
 2 files changed, 34 insertions(+)
 create mode 100644 proteobench/io/parsing/io_parse_settings/Quant/lfq/ion/DDA/parse_settings_msstats.toml

diff --git a/proteobench/io/parsing/io_parse_settings/Quant/lfq/ion/DDA/parse_settings_msstats.toml b/proteobench/io/parsing/io_parse_settings/Quant/lfq/ion/DDA/parse_settings_msstats.toml
new file mode 100644
index 00000000..10a1cfa7
--- /dev/null
+++ b/proteobench/io/parsing/io_parse_settings/Quant/lfq/ion/DDA/parse_settings_msstats.toml
@@ -0,0 +1,33 @@
+[mapper]
+"ProteinName" = "Proteins"
+"Sequence" = "Sequence"
+"PrecursorCharge" = "Charge"
+"Reference" = "Raw file"
+"PeptideSequence" = "Modified sequence"
+
+[condition_mapper]
+"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML" = "A"
+"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML" = "A"
+"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML" = "A"
+"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML" = "B"
+"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML" = "B"
+"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML" = "B"
+
+[run_mapper]
+"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.mzML" = "Condition_A_Sample_Alpha_01"
+"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.mzML" = "Condition_A_Sample_Alpha_02"
+"LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.mzML" = "Condition_A_Sample_Alpha_03"
+"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.mzML" = "Condition_B_Sample_Alpha_01"
+"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.mzML" = "Condition_B_Sample_Alpha_02"
+"LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.mzML" = "Condition_B_Sample_Alpha_03"
+
+
+[species_mapper]
+"_YEAST" = "YEAST"
+"_ECOLI" = "ECOLI"
+"_HUMAN" = "HUMAN"
+
+
+[general]
+"contaminant_flag" = "Cont_"
+"decoy_flag" = false
diff --git a/proteobench/io/parsing/io_parse_settings/parse_settings_files.toml b/proteobench/io/parsing/io_parse_settings/parse_settings_files.toml
index 2b25755a..c6b9bcee 100644
--- a/proteobench/io/parsing/io_parse_settings/parse_settings_files.toml
+++ b/proteobench/io/parsing/io_parse_settings/parse_settings_files.toml
@@ -7,6 +7,7 @@
 "MSAngel" = "parse_settings_msangel.toml"
 "Sage" = "parse_settings_sage.toml"
 "PEAKS" = "parse_settings_peaks.toml"
+"quantms" = "parse_settings_msstats.toml"
 "Custom" = "parse_settings_custom.toml"
 
 [quant_lfq_peptidoform_DDA]

From 9d0ddc08d7cf5a537113fdd59d3354e344b986e0 Mon Sep 17 00:00:00 2001
From: Henry Webel <heweb@dtu.dk>
Date: Sun, 26 Jan 2025 16:45:55 +0100
Subject: [PATCH 3/8] :art: raise errors if proforma is missing and explicit
 error handling

---
 proteobench/io/parsing/parse_settings.py      |  8 +++----
 .../quant/lfq/ion/DDA/quant_lfq_ion_DDA.py    | 23 +++++++++++--------
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/proteobench/io/parsing/parse_settings.py b/proteobench/io/parsing/parse_settings.py
index f45377e6..1e99dc3c 100644
--- a/proteobench/io/parsing/parse_settings.py
+++ b/proteobench/io/parsing/parse_settings.py
@@ -171,14 +171,14 @@ def convert_to_standard_format(self, df: pd.DataFrame) -> tuple[pd.DataFrame, Di
                     df_filtered_melted["proforma"] + "|Z=" + df_filtered_melted["Charge"].astype(str)
                 )
             else:
-                print("Not all columns required for making the ion are available.")
+                raise ValueError("Not all columns required for making the ion are available: 'proforma' and 'Charge'.")
             return df_filtered_melted, replicate_to_raw
 
         elif self.analysis_level == "peptidoform":
             if "proforma" in df_filtered_melted.columns:
                 df_filtered_melted["peptidoform"] = df_filtered_melted["proforma"]
             else:
-                print("Not all columns required for making the peptidoform are available.")
+                raise ValueError("Not all columns required for making the peptidoform are available: 'proforma'.")
             return df_filtered_melted, replicate_to_raw
 
         else:
@@ -244,11 +244,11 @@ def convert_to_standard_format(self, df: pd.DataFrame) -> tuple[pd.DataFrame, Di
         if self.parser.analysis_level == "ion":
             try:
                 df["precursor ion"] = df["proforma"] + "|Z=" + df["Charge"].astype(str)
-            except KeyError:
+            except KeyError as e:
                 raise KeyError(
                     "Not all columns required for making the ion are available."
                     " Is the charge available in the input file?"
-                )
+                ) from e
 
             return df, replicate_to_raw
 
diff --git a/proteobench/modules/quant/lfq/ion/DDA/quant_lfq_ion_DDA.py b/proteobench/modules/quant/lfq/ion/DDA/quant_lfq_ion_DDA.py
index a2a6af77..2766784a 100644
--- a/proteobench/modules/quant/lfq/ion/DDA/quant_lfq_ion_DDA.py
+++ b/proteobench/modules/quant/lfq/ion/DDA/quant_lfq_ion_DDA.py
@@ -113,42 +113,45 @@ def benchmarking(
         except pd.errors.ParserError as e:
             raise ParseError(
                 f"Error parsing {input_format} file, please make sure the format is correct and the correct software tool is chosen: {e}"
-            )
+            ) from e
         except Exception as e:
-            raise ParseSettingsError(f"Error parsing the input file: {e}")
+            raise ParseSettingsError("Error parsing the input file.") from e
 
+        msg = f"Folder: {self.parse_settings_dir}, Module: {self.module_id}"
         # Parse settings file
         try:
             parse_settings = ParseSettingsBuilder(
                 parse_settings_dir=self.parse_settings_dir, module_id=self.module_id
             ).build_parser(input_format)
         except KeyError as e:
-            raise ParseSettingsError(f"Error parsing settings file for parsing, settings seem to be missing: {e}")
+            raise ParseSettingsError(
+                f"Error parsing settings file for parsing, settings seem to be missing: {msg}"
+            ) from e
         except FileNotFoundError as e:
-            raise ParseSettingsError(f"Could not find the parsing settings file: {e}")
+            raise ParseSettingsError(f"Could not find the parsing settings file: {msg}") from e
         except Exception as e:
-            raise ParseSettingsError(f"Error parsing settings file for parsing: {e}")
+            raise ParseSettingsError(f"Error parsing settings file for parsing: {msg}") from e
 
         try:
             standard_format, replicate_to_raw = parse_settings.convert_to_standard_format(input_df)
         except KeyError as e:
-            raise ConvertStandardFormatError(f"Error converting to standard format, key missing: {e}")
+            raise ConvertStandardFormatError("Error converting to standard format, key missing.") from e
         except Exception as e:
-            raise ConvertStandardFormatError(f"Error converting to standard format: {e}")
+            raise ConvertStandardFormatError("Error converting to standard format.") from e
 
-        # calculate quantification scores
+        # instantiate quantification scores
         try:
             quant_score = QuantScores(
                 self.precursor_name, parse_settings.species_expected_ratio(), parse_settings.species_dict()
             )
         except Exception as e:
-            raise QuantificationError(f"Error generating quantification scores: {e}")
+            raise QuantificationError("Error generating quantification scores.") from e
 
         # generate intermediate data structure
         try:
             intermediate_data_structure = quant_score.generate_intermediate(standard_format, replicate_to_raw)
         except Exception as e:
-            raise IntermediateFormatGenerationError(f"Error generating intermediate data structure: {e}")
+            raise IntermediateFormatGenerationError("Error generating intermediate data structure.") from e
 
         # try:
         current_datapoint = Datapoint.generate_datapoint(

From 03215953f6986f7f2daa866761c24aeaec91e825 Mon Sep 17 00:00:00 2001
From: Henry Webel <heweb@dtu.dk>
Date: Sun, 26 Jan 2025 16:47:26 +0100
Subject: [PATCH 4/8] :bug: add proforma manuelly as no modification parsing is
 specified.

ParseModifications. logic in
proteobench/io/parsing/parse_settings.py

add 'proforma' column which is required.
---
 proteobench/io/parsing/parse_ion.py | 2 +-
 proteobench/plotting/plot_quant.py  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/proteobench/io/parsing/parse_ion.py b/proteobench/io/parsing/parse_ion.py
index a4f71fe6..3e41c8fd 100644
--- a/proteobench/io/parsing/parse_ion.py
+++ b/proteobench/io/parsing/parse_ion.py
@@ -111,7 +111,7 @@ def load_input_file(input_csv: str, input_format: str) -> pd.DataFrame:
     elif input_format == "quantms":
         input_data_frame = pd.read_csv(input_csv, low_memory=False)
         input_data_frame = input_data_frame.assign(
-            Sequence=input_data_frame["PeptideSequence"].str.replace(
+            proforma=input_data_frame["PeptideSequence"].str.replace(
                 r"\(([^)]+)\)",
                 r"",
                 regex=True,
diff --git a/proteobench/plotting/plot_quant.py b/proteobench/plotting/plot_quant.py
index b714e189..fe9598a3 100644
--- a/proteobench/plotting/plot_quant.py
+++ b/proteobench/plotting/plot_quant.py
@@ -89,6 +89,7 @@ def plot_metric(
             "MSAID": "#afff57",
             "Proteome Discoverer": "#8c564b",
             "PEAKS": "#f781bf",
+            "quantms": "#03fc39",
         },
         mapping: Dict[str, int] = {"old": 10, "new": 20},
         highlight_color: str = "#d30067",

From 49e792998e5368066eea0efedd04fa061e466152 Mon Sep 17 00:00:00 2001
From: Henry Webel <heweb@dtu.dk>
Date: Sun, 26 Jan 2025 17:01:49 +0100
Subject: [PATCH 5/8] :bug: move to separate issue #556

---
 proteobench/io/parsing/parse_settings.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/proteobench/io/parsing/parse_settings.py b/proteobench/io/parsing/parse_settings.py
index 1e99dc3c..033354c8 100644
--- a/proteobench/io/parsing/parse_settings.py
+++ b/proteobench/io/parsing/parse_settings.py
@@ -171,14 +171,16 @@ def convert_to_standard_format(self, df: pd.DataFrame) -> tuple[pd.DataFrame, Di
                     df_filtered_melted["proforma"] + "|Z=" + df_filtered_melted["Charge"].astype(str)
                 )
             else:
-                raise ValueError("Not all columns required for making the ion are available: 'proforma' and 'Charge'.")
+                # ! raise ValueError
+                print("Not all columns required for making the ion are available: 'proforma' and 'Charge'.")
             return df_filtered_melted, replicate_to_raw
 
         elif self.analysis_level == "peptidoform":
             if "proforma" in df_filtered_melted.columns:
                 df_filtered_melted["peptidoform"] = df_filtered_melted["proforma"]
             else:
-                raise ValueError("Not all columns required for making the peptidoform are available: 'proforma'.")
+                # ! raise ValueError
+                print("Not all columns required for making the peptidoform are available: 'proforma'.")
             return df_filtered_melted, replicate_to_raw
 
         else:

From 38058a82342ae5d83dc77b09fa26a5b5e7f2d81e Mon Sep 17 00:00:00 2001
From: Henry Webel <heweb@dtu.dk>
Date: Wed, 29 Jan 2025 12:54:57 +0100
Subject: [PATCH 6/8] =?UTF-8?q?=F0=9F=9A=A7=20Start=20file=20reading=20of?=
 =?UTF-8?q?=20quantms=20parameter=20files?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 proteobench/io/params/quantms.py              | 147 ++++++++++++++++
 ....nf_core_quantms_software_mqc_versions.yml |  30 ++++
 test/params/quantms_1-3.sdrf_config.tsv       |   7 +
 test/params/quantms_1-3_dev.json              | 162 ++++++++++++++++++
 4 files changed, 346 insertions(+)
 create mode 100644 proteobench/io/params/quantms.py
 create mode 100644 test/params/quantms_1-3.nf_core_quantms_software_mqc_versions.yml
 create mode 100644 test/params/quantms_1-3.sdrf_config.tsv
 create mode 100644 test/params/quantms_1-3_dev.json

diff --git a/proteobench/io/params/quantms.py b/proteobench/io/params/quantms.py
new file mode 100644
index 00000000..ab6b9845
--- /dev/null
+++ b/proteobench/io/params/quantms.py
@@ -0,0 +1,147 @@
+"""quantms is a nextflow pipeline that execution depends on the settings in an
+SDRF file. It is executed using a parameters file in JSON format.
+
+However, the version of packages are dumped to a versions yaml file. And some parameters
+are taken from the SDRF file.
+"""
+
+import json
+import logging
+import pathlib
+from typing import IO, Union
+
+import pandas as pd
+import yaml
+
+from proteobench.io.params import ProteoBenchParameters
+
+logger = logging.getLogger(__name__)
+
+
+def load_versions(file: IO) -> dict:
+    """
+    Load the versions of the tools used in the quantms pipeline.
+    """
+    versions = yaml.safe_load(file)
+    return versions
+
+
+def load_parsed_sdrf(file: Union[str, pathlib.Path, IO]) -> pd.DataFrame:
+    """
+    Load the parsed SDRF file.
+    """
+    return pd.read_csv(file, sep="\t")
+
+
+def load_files(file1: IO, file2: IO, file3: IO) -> [dict, pd.DataFrame]:
+    """Load file independent of order they are provided in."""
+    versions = None
+    sdrf = None
+    pipeline_params = None
+    for file in [file1, file2, file3]:
+        try:
+            _versions = load_versions(file)
+            if "Workflow" not in _versions:
+                logger.debug("Loaded other file.")
+            elif versions is None:
+                versions = _versions
+                continue
+            elif "custom_config_base" in _versions:
+                logger.debug("Loaded nextflow parameters file.")
+            else:
+                raise ValueError("Multiple version files provided.")
+        except yaml.YAMLError as e:
+            file.seek(0)
+
+        try:
+            # file.seek(0)
+            _pipeline_params = json.load(file)
+            if pipeline_params is None:
+                pipeline_params = _pipeline_params
+                continue
+            else:
+                raise ValueError("Multiple parameter files provided.")
+        except json.JSONDecodeError as e:
+            print(e)
+            file.seek(0)
+
+        try:
+            # file.seek(0)
+            _sdrf = load_parsed_sdrf(file)
+            if _sdrf.shape[1] == 1:
+                logger.debug("Loaded version or parameter file. Skip")
+                continue
+            elif sdrf is None:
+                sdrf = _sdrf
+            else:
+                raise ValueError("Multiple SDRF files provided.")
+        except pd.errors.EmptyDataError as e:
+            pass
+    return versions, sdrf, pipeline_params
+
+
+def extract_params(file1: IO, file2: IO, file3: IO) -> ProteoBenchParameters:
+    """
+    Extract parameters from the parsed SDRF and version file. We use both the parsed
+    SDRF file and the yaml file of versions to extract the parameters. The function
+    needs to be able to handle any order of files as the streamlit interfaces does
+    allow the user to select any order.
+
+    This might be changed in a newer quantms version with one central parameters
+    file.
+    """
+    versions, sdrf, pipeline_params = load_files(file1, file2, file3)
+
+    params = ProteoBenchParameters()
+    params.software_name = "quantms"
+    params.software_version = versions["Workflow"]["bigbio/quantms"]
+    engines = list()
+    engines_version = list()
+    for key in versions:
+        if key.startswith("SEARCHENGINE"):
+            _engine = key.split("SEARCHENGINE")[-1].lower()
+            engines.append(_engine)
+            if _engine == "comet":
+                engines_version.append(versions[key]["Comet"])
+            elif _engine == "msgf":
+                versions.append(versions[key]["msgf_plus"])
+            else:
+                raise ValueError(f"Unknown search engine: {_engine}")
+    if engines:
+        params.search_engine = ",".join(engines)
+    if engines_version:
+        params.search_engine_version = ",".join(engines_version)
+
+    return (versions, sdrf, pipeline_params, params)
+
+
+if __name__ == "__main__":
+
+    from pathlib import Path
+
+    fpath1 = Path("../../../test/params/quantms_1-3.sdrf_config.tsv")
+    fpath2 = Path("../../../test/params/quantms_1-3.nf_core_quantms_software_mqc_versions.yml")
+    fpath3 = Path("../../../test/params/quantms_1-3_dev.json")
+
+    # Extract parameters from the fileP
+    with open(fpath1, "r") as file1, open(fpath2, "r") as file2, open(fpath3, "r") as file3:
+        versions, sdrf, pipeline_params, params = extract_params(file1, file2, file3)
+        display(params.__dict__)
+
+    import itertools
+
+    permutations_fpath = list(itertools.permutations([fpath1, fpath2, fpath3]))
+    for file1, file2, file3 in permutations_fpath:
+        print(file1.name, file2.name, file3.name)
+        with open(file1, "r") as f1, open(file2, "r") as f2, open(file3, "r") as f3:
+            _versions, _sdrf, _pipeline_params, params = extract_params(f1, f2, f3)
+            assert _versions == versions
+            assert _sdrf.equals(sdrf)
+            assert _pipeline_params == pipeline_params
+            # display(params.__dict__)
+
+    # Convert the extracted parameters to a dictioPnary and then to a pandas Series
+    # data_dict = params.__dict__
+    # series = pd.Series(data_dict)
+    # # Write the Series to a CSV file
+    # series.to_csv(file.with_suffix(".csv"))
diff --git a/test/params/quantms_1-3.nf_core_quantms_software_mqc_versions.yml b/test/params/quantms_1-3.nf_core_quantms_software_mqc_versions.yml
new file mode 100644
index 00000000..7a53f716
--- /dev/null
+++ b/test/params/quantms_1-3.nf_core_quantms_software_mqc_versions.yml
@@ -0,0 +1,30 @@
+DECOYDATABASE:
+  DecoyDatabase: 3.2.0-pre-exported-20240919
+EXTRACTPSMFEATURES:
+  PSMFeatureExtractor: 3.2.0-pre-exported-20240919
+IDFILTER:
+  IDFilter: 3.2.0-pre-exported-20240919
+IDSCORESWITCHER:
+  IDScoreSwitcher: 3.2.0-pre-exported-20240919
+MSSTATS:
+  r-base: 4.3.2
+  bioconductor-msstats: 4.10.0
+MZMLSTATISTICS:
+  quantms-utils: 0.0.18
+PERCOLATOR:
+  PercolatorAdapter: 3.2.0-pre-exported-20240919
+  percolator: 3.05.0, Build Date Aug 31 2020 19:03:04
+PROTEOMICSLFQ:
+  ProteomicsLFQ: 3.2.0-pre-exported-20240919
+SAMPLESHEET_CHECK:
+  quantms-utils: 0.0.18
+SDRFPARSING:
+  sdrf-pipelines: 0.0.31
+SEARCHENGINECOMET:
+  CometAdapter: 3.2.0-pre-exported-20240919
+  Comet: 2023.01 rev. 2
+THERMORAWFILEPARSER:
+  ThermoRawFileParser: 1.3.4
+Workflow:
+    bigbio/quantms: v1.3.1dev-g70337bc
+    Nextflow: 24.10.3
diff --git a/test/params/quantms_1-3.sdrf_config.tsv b/test/params/quantms_1-3.sdrf_config.tsv
new file mode 100644
index 00000000..8cdc8655
--- /dev/null
+++ b/test/params/quantms_1-3.sdrf_config.tsv
@@ -0,0 +1,7 @@
+URI	Filename	FixedModifications	VariableModifications	Proteomics Data Acquisition Method	Label	PrecursorMassTolerance	PrecursorMassToleranceUnit	FragmentMassTolerance	FragmentMassToleranceUnit	DissociationMethod	Enzyme
+ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/10/PXD010000/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw	LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw	Carbamidomethyl (C)		Data-Dependent Acquisition	label free sample	10	ppm	20	ppm	HCD	Trypsin
+ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/10/PXD010000/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw	LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw	Carbamidomethyl (C)		Data-Dependent Acquisition	label free sample	10	ppm	20	ppm	HCD	Trypsin
+ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/10/PXD010000/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw	LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw	Carbamidomethyl (C)		Data-Dependent Acquisition	label free sample	10	ppm	20	ppm	HCD	Trypsin
+ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/10/PXD010000/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw	LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw	Carbamidomethyl (C)		Data-Dependent Acquisition	label free sample	10	ppm	20	ppm	HCD	Trypsin
+ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/10/PXD010000/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw	LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw	Carbamidomethyl (C)		Data-Dependent Acquisition	label free sample	10	ppm	20	ppm	HCD	Trypsin
+ftp://ftp.pride.ebi.ac.uk/pride/data/archive/2021/10/PXD010000/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw	LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw	Carbamidomethyl (C)		Data-Dependent Acquisition	label free sample	10	ppm	20	ppm	HCD	Trypsin
diff --git a/test/params/quantms_1-3_dev.json b/test/params/quantms_1-3_dev.json
new file mode 100644
index 00000000..29326c7f
--- /dev/null
+++ b/test/params/quantms_1-3_dev.json
@@ -0,0 +1,162 @@
+{
+    "custom_config_base": "https://raw.githubusercontent.com/nf-core/configs/master",
+    "min_peptide_length": 6,
+    "alignment_order": "star",
+    "fdr_level": "psm_level_fdrs",
+    "msstatslfq_removeFewMeasurements": true,
+    "plaintext_email": false,
+    "luciphor_debug": 0,
+    "isotope_correction": false,
+    "extractpsmfeature_debug": 0,
+    "msstatsiso_rmpsm_withfewmea_withinrun": true,
+    "subset_max_train": 300000,
+    "protein_score": "best",
+    "feature_with_id_min_score": 0.1,
+    "ms2rescore": false,
+    "shuffle_sequence_identity_threshold": 0.5,
+    "protein_inference_method": "aggregation",
+    "min_corr": 2,
+    "reindex_mzml": true,
+    "min_reporter_intensity": 0,
+    "normalize": false,
+    "skip_preliminary_analysis": false,
+    "description_correct_features": 0,
+    "decoy_string": "DECOY_",
+    "variable_mods": "Oxidation (M)",
+    "fragment_mass_tolerance": 0.03,
+    "msstatslfq_quant_summary_method": "TMP",
+    "skip_factor_validation": true,
+    "psm_level_fdr_cutoff": 0.01,
+    "skip_table_plots": false,
+    "scan_window_automatic": true,
+    "corr_diff": 1,
+    "enable_diann_mztab": true,
+    "pmultiqc_idxml_skip": true,
+    "version": false,
+    "openms_peakpicking": false,
+    "publish_dir_mode": "copy",
+    "input": "az://seqera/proteobench_dda_quant/dda_lfq_proteobench_v1.sdrf.tsv",
+    "feature_without_id_min_score": 0.75,
+    "msstatsiso_remove_norm_channel": true,
+    "min_precursor_charge": 2,
+    "consensusid_algorithm": "best",
+    "protein_quant": "unique_peptides",
+    "min_peptides_per_protein": 1,
+    "precursor_isotope_deviation": 10,
+    "num_hits": 1,
+    "precursor_mass_tolerance": 5,
+    "average": "median",
+    "decoy_method": "reverse",
+    "allowed_missed_cleavages": 2,
+    "max_peptide_length": 40,
+    "iso_normalization": false,
+    "protein_level_fdr_cutoff": 0.01,
+    "random_preanalysis": false,
+    "diann_debug": 3,
+    "mass_acc_automatic": true,
+    "custom_config_version": "master",
+    "update_PSM_probabilities": false,
+    "feature_generators": "deeplc,ms2pip",
+    "msstats_remove_one_feat_prot": true,
+    "top": 3,
+    "fixed_mods": "Carbamidomethyl (C)",
+    "msstatsiso_summaryformultiple_psm": "sum",
+    "msstats_plot_profile_qc": false,
+    "root_folder": "az://seqera/test-data/LFQ_DDA/raw/",
+    "pp_debug": 0,
+    "email": "heweb@dtu.dk",
+    "fix_peptides": false,
+    "pg_level": 2,
+    "use_ols_cache_only": false,
+    "IL_equivalent": true,
+    "acquisition_method": "dda",
+    "empirical_assembly_ms_n": 200,
+    "peakpicking_inmemory": false,
+    "run_fdr_cutoff": 0.1,
+    "lfq_intensity_threshold": 1000,
+    "protein_inference_debug": 0,
+    "local_input_type": "raw",
+    "quantification_method": "feature_intensity",
+    "enable_pmultiqc": true,
+    "outdir": "az://seqera/results_msquant_proteobench_raw",
+    "use_shared_peptides": true,
+    "pipelines_testdata_base_path": "https://raw.githubusercontent.com/nf-core/test-datasets/",
+    "sage_processes": 1,
+    "help": false,
+    "min_precursor_purity": 0,
+    "enable_mod_localization": false,
+    "train_FDR": 0.05,
+    "skip_ms_validation": false,
+    "export_mztab": true,
+    "klammer": false,
+    "search_engines": "comet",
+    "idfilter_debug": 0,
+    "msstats_threshold": 0.05,
+    "monochrome_logs": false,
+    "diann_normalize": true,
+    "test_FDR": 0.05,
+    "precursor_mass_tolerance_unit": "ppm",
+    "protocol": "automatic",
+    "skip_experimental_design_validation": false,
+    "add_triqler_output": false,
+    "targeted_only": true,
+    "max_multiqc_email_size": "25.MB",
+    "msstatsiso_useunique_peptide": true,
+    "min_consensus_support": 0,
+    "time_corr_only": true,
+    "max_precursor_charge": 4,
+    "validate_params": true,
+    "consensusid_debug": 0,
+    "min_peaks": 10,
+    "select_activation": "HCD",
+    "isotope_error_range": "0,1",
+    "best_charge_and_fraction": false,
+    "mod_localization": "Phospho (S),Phospho (T),Phospho (Y)",
+    "num_enzyme_termini": "fully",
+    "add_decoys": true,
+    "percolator_debug": 0,
+    "fragment_mass_tolerance_unit": "Da",
+    "msstatsiso_summarization_method": "msstats",
+    "trace_report_suffix": "2025-01-23_16-38-37",
+    "db_debug": 0,
+    "min_precursor_intensity": 1,
+    "export_decoy_psm": true,
+    "species_genes": false,
+    "ms2pip_model": "HCD2021",
+    "picked_fdr": true,
+    "scan_window": 8,
+    "iso_debug": 0,
+    "msstatsiso_global_norm": true,
+    "msstatslfq_feature_subset_protein": "top3",
+    "mass_recalibration": false,
+    "shuffle_max_attempts": 30,
+    "protein_quant_debug": 0,
+    "id_only": false,
+    "database": "az://seqera/proteobench_dda_quant/ProteoBenchFASTA_DDAQuantification.fasta",
+    "reporter_mass_shift": 0.002,
+    "rescore_range": "independent_run",
+    "random_preanalysis_seed": 42,
+    "calibration_set_size": 0.15,
+    "quantify_decoys": false,
+    "reference_channel": "126",
+    "contrasts": "pairwise",
+    "idmapper_debug": 0,
+    "ratios": false,
+    "enzyme": "Trypsin",
+    "include_all": true,
+    "add_snr_feature_percolator": false,
+    "validate_ontologies": true,
+    "consensusid_considered_top_hits": 0,
+    "top_PSMs": 1,
+    "convert_dotd": false,
+    "unmatched_action": "warn",
+    "skip_rescoring": false,
+    "msstatsiso_reference_normalization": true,
+    "idscoreswitcher_debug": 0,
+    "skip_post_msstats": false,
+    "decoy_string_position": "prefix",
+    "max_mods": 3,
+    "decoydatabase_debug": 0,
+    "plfq_debug": 0,
+    "fragment_method": "HCD"
+}
\ No newline at end of file

From 05f71b072c11613930d4689d33ed4b45444306af Mon Sep 17 00:00:00 2001
From: Henry Webel <heweb@dtu.dk>
Date: Sun, 2 Feb 2025 16:55:53 +0100
Subject: [PATCH 7/8] :bug: json could not be reloaded

---
 proteobench/io/params/quantms.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/proteobench/io/params/quantms.py b/proteobench/io/params/quantms.py
index ab6b9845..b48daf4c 100644
--- a/proteobench/io/params/quantms.py
+++ b/proteobench/io/params/quantms.py
@@ -43,6 +43,7 @@ def load_files(file1: IO, file2: IO, file3: IO) -> [dict, pd.DataFrame]:
             _versions = load_versions(file)
             if "Workflow" not in _versions:
                 logger.debug("Loaded other file.")
+                file.seek(0)
             elif versions is None:
                 versions = _versions
                 continue
@@ -77,6 +78,11 @@ def load_files(file1: IO, file2: IO, file3: IO) -> [dict, pd.DataFrame]:
                 raise ValueError("Multiple SDRF files provided.")
         except pd.errors.EmptyDataError as e:
             pass
+
+    assert versions is not None
+    assert sdrf is not None
+    assert pipeline_params is not None
+
     return versions, sdrf, pipeline_params
 
 

From 038fcf8509f4bfcb7c8ed19c0070ec0183a3d460 Mon Sep 17 00:00:00 2001
From: Henry Webel <heweb@dtu.dk>
Date: Sun, 2 Feb 2025 16:56:17 +0100
Subject: [PATCH 8/8] =?UTF-8?q?=F0=9F=9A=A7=20continue=20mapping=20paramet?=
 =?UTF-8?q?ers?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 proteobench/io/params/quantms.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/proteobench/io/params/quantms.py b/proteobench/io/params/quantms.py
index b48daf4c..02511e5f 100644
--- a/proteobench/io/params/quantms.py
+++ b/proteobench/io/params/quantms.py
@@ -118,6 +118,27 @@ def extract_params(file1: IO, file2: IO, file3: IO) -> ProteoBenchParameters:
     if engines_version:
         params.search_engine_version = ",".join(engines_version)
 
+    # "fdr_level": "psm_level_fdrs",
+    params.ident_fdr_psm = pipeline_params["psm_level_fdr_cutoff"]
+    params.ident_fdr_protein = pipeline_params["protein_level_fdr_cutoff"]
+    params.variable_mods = pipeline_params["variable_mods"]
+    params.fixed_mods = pipeline_params["fixed_mods"]
+    params.max_mods = pipeline_params["max_mods"]
+    params.min_precursor_charge = pipeline_params["min_precursor_charge"]
+    params.max_precursor_charge = pipeline_params["max_precursor_charge"]
+    params.max_peptide_length = pipeline_params["max_peptide_length"]
+    params.min_peptide_length = pipeline_params["min_peptide_length"]
+    params.precursor_mass_tolerance = pipeline_params["precursor_mass_tolerance"]
+    params.fragment_mass_tolerance = pipeline_params["fragment_mass_tolerance"]
+    params.allowed_miscleavages = pipeline_params["allowed_missed_cleavages"]
+    params.quantification_method = pipeline_params["quantification_method"]
+    params.protein_inference = pipeline_params["protein_inference_method"]
+
+    # maybe (also) in sdrf infos?
+    # params.quantification_method =
+    # params.protein_inference =
+    # params.abundance_normalization_ions =
+
     return (versions, sdrf, pipeline_params, params)