From 415ac5c1b63b6466eee034d5ab1534e65d43fc36 Mon Sep 17 00:00:00 2001 From: Henry Date: Sat, 9 Dec 2023 18:28:46 +0100 Subject: [PATCH 1/2] :sparkles: add second proline example --- proteobench/io/params/proline.py | 10 +++++++++- test/params/Proline_example_2.csv | 20 ++++++++++++++++++++ test/params/Proline_example_2.xlsx | Bin 0 -> 13902 bytes test/test_parse_params_proline.py | 14 +++++++++++--- 4 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 test/params/Proline_example_2.csv create mode 100644 test/params/Proline_example_2.xlsx diff --git a/proteobench/io/params/proline.py b/proteobench/io/params/proline.py index 2883f9cd..5dd13ad4 100644 --- a/proteobench/io/params/proline.py +++ b/proteobench/io/params/proline.py @@ -87,7 +87,15 @@ def extract_params(fname) -> ProteoBenchParameters: if __name__ == "__main__": - file = pathlib.Path("test/params/Proline_example_w_Mascot_wo_proteinSets.xlsx") + file = pathlib.Path( + "../../../test/params/Proline_example_w_Mascot_wo_proteinSets.xlsx" + ) + params = extract_params(file) + data_dict = params.__dict__ + series = pd.Series(data_dict) + series.to_csv(file.with_suffix(".csv")) + + file = pathlib.Path("../../../test/params/Proline_example_2.xlsx") params = extract_params(file) data_dict = params.__dict__ series = pd.Series(data_dict) diff --git a/test/params/Proline_example_2.csv b/test/params/Proline_example_2.csv new file mode 100644 index 00000000..100f04c1 --- /dev/null +++ b/test/params/Proline_example_2.csv @@ -0,0 +1,20 @@ +,0 +software_name,Proline +software_version,X! Tandem Vengeance (2015.12.15.2) +search_engine,XTandem +search_engine_version, +ident_fdr_psm,1 +ident_fdr_peptide, +ident_fdr_protein, +enable_match_between_runs,False +precursor_mass_tolerance,10.0 ppm +fragment_mass_tolerance,0.02 Da +enzyme,Trypsin +allowed_miscleavages,2 +min_peptide_length,7 +max_peptide_length, +fixed_mods,Carbamidomethyl (C) +variable_mods,Acetyl (Protein N-term); Gln->pyro-Glu (Any N-term Q); Ammonia-loss (Any N-term C); Glu->pyro-Glu (Any N-term E); Oxidation (M) +max_mods, +min_precursor_charge, +max_precursor_charge, diff --git a/test/params/Proline_example_2.xlsx b/test/params/Proline_example_2.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..f51738a3996bd11209ba8ac136603fb02e49de39 GIT binary patch literal 13902 zcmeIZWq94lvNddmZHU>9nVBJGcFfF7v14XtW@e_uF*7qWGcz;BOs{9=oI5i)bANu{ zpL?_)Nso4SEptE#(7vJ#+RC_oTEP(VOH1VHZQ$!I3PKtS2xKtRYqP#_wDR+jdL zmi9V|F4l&2TC~m<=J?;hK*+O!K;G;Bcl$s51V-Z5O}pq(`pzNUf?}5^g>b?dO0-YL zcS%RM`R3A>bW{D)4}P9HVLF6QQhcjeWv&@~Xw#92wPMp|A=MDzJ2mfoZ0TpL_@S^g z2Dts?F=V9{D5p*mQ7v1+f{ztNOBXm@3prA-q~Cdj@sfd{IyWJ@Gv@arWhg*c*cFax z(~v!h3BgbMTsVGAxPl?76aotlFFO_nxw;?KV=WqtexT}Uv}>+dcgpR%#K#at>e9!} zSSuVhr92i@vnUn+PN953&gjb0!F6rny7`-ire$FK8ndf*2mnJc4Bwp{*5ea%{N4m-0Kg7+%oI2 z#CE!%^kr}xjZu0?+&h}XKu0VP;-S@`?(=K?{CCDk=_2CsK>bd7=pQW{&cvVuW-d@M zwUW+IBoA0E$8Nq7NL8U_d}`Z=gW3f78%fC3>Qt@54^=T>)XPt_Ih6(`J=!=xQj*#x3fq)y_8>aW-pt;n zh={w8JGMqq7B>{+NDQtM2~S@NRU(Yjs$fDR|G*AJ<4*BW?~_tn)4wkRo)u6yEe))w zXU#r}8&C6^Ps}@n;|*q)Jef*C9kA0kS}b!PFekWqz*JH&W;dzO&$Q(rbkQ*|@30yQLE*`iT^bNx+SbH~C|EVMa!4+yh z-xXvG0to2C`;&1tr*pEhHPg4UGW*Shi&j5vCi9eJ%g7?Z~*l|^(!dYo?CopaB#bFRmOt26v)L}9p<)(lV1X57_B z++I!-GxCo4B$S&Q?b~}FW8%*9{J3jQGiyt`iv;XF!7vt4NZ3EgY(z3iR;3R@ouWv> zK(7cRq7oXTxF4QUW$iu>ml132Qx6s6`XO+HQF>8zs>z_soF9cM82FK6Kf!WR{1LYy zw!l!!p@5poJNeo-tsesAd#vzuKy>H`jJ4bL&4h3#oqzkjz;->E&*4EqtO7~x$Otqa zr^b}Xt6SN&3nak;cK?QlsL%I>&}Hm1)xrzEk}e$Od%#{}mhd&7dW13|)EeZZE2kw7 zZl+<2u9L)x0b!umG5!;B5jD z#zK3p!jh4uNgg>Nj{Usuc zcX)Fp3L=6)ffr-f+S=jZtyScx##7{=*7CQ{qraI|`TF}%PbIl#3JABH`DIgH;byvS z7iZ!%y^P^w+;Q@+W7*UC#wD{a`Lxz<9Ybe_Z}B(?n0lnacVWQ{DUY7;o#LAb>=q2Qu4v9N@Gfwy9;7!87xui% zt3?_bJ$#&}i6#|CScpRRM47&9%*{j5y7hb}-K4chWKruB{d}v+BE^m6@8NBtCT5uG zAsZ`;`NZjSZJGb(xL=;icOy%|*tWn~+FYN%Y+SM{CB~m4WMO2|7*Y9x0{8Zh3bF?M zLJsm?G6M<(gb4%%{H`E>7?QuL$-j*X@VmSBUibg@))F^v`cA5S=YFkzlO8s)mWJ^h zt>OIT8h1dXa$6HUpAv7$ta#%DoA?H4bI|>WpFMad@?SRd&wJ1zOf!_CNhqLdeiW-O zli#zo@^Ja#&(%$3R2S&-`3l;2)q8bI+dT{A-4K5ZB>@jyJkQOY(k-X3!L>-Aq5YZq zZNl7j@LYAlg7r z0Bpof?8|oQEw&6a-Psq4ey63N^i};!n17;Jbk@7$_>*Goj13L#?dX0# zG5p50>2af$%k;3`=b(@9zGork`G=T9(sJ1cbIbH4t7l9=IJYn zYJABQGk~!mg8hUcLf7Y;Zpc#Q<~PFam&-E9nP~{-nkF|V1lgG=y%tsNZC7R2o_;D* z);TdVx*CzOzB~@ZO%CPsQ%Z(V2PXF)()9?4I*>+Ow*%tV0FdV#XcxXiZD{fwywz1O z=C_|0RzIRe4ew^q!o0>{XF-pepbUOax6g8~rDjoL``U}G2<|0$jko9;^>_Z%$yrQ|;I(Pq z_qX=;_WL1z>D2PwAVO?t@N0Gq*a$$QepcU#I_85lX>Tf2Ty?yY2wt|<#@pKFokro6 z7{0W5?mMx5acOK>o_;!Xc7D1sy1A{&yE%G%`ss@EdfTE={wiv?*lfcvccq`jM;BW$UVEUrd!aEk{@< zfRQ8s^K#hr{<;{TnlHxe1Tq{SN<)_`)r*Wyi^@ssPN_~SKzXam;ouBE+GR>r zggfEAX3O_m`pH$HGfvYF$g415q)Py>uRYN3I}F%Y;#~hC7je}ef!xyVx56VDp_Iw} zmW(G9N}FHA85^F7{f=cA8-PgssAL&GB@W) zJcx^IfrnU*aCii`$GTJ?D;l#W9Y#q*0xWZ7-20vcJ0cN1!o^@tAg?4}krp^d2n&nI zQf~l=n5TnV`oUk%e2@gu?Z>YjR0WnPY)v=<`ZL9pl_BA75H(HHW)L+?6Z%f&KA$|a zKYs5E;YtaS>D+bxefYO56?1(}+g>u2t{2o-icmu8A0HE;RBVK9O34bBtf=nOxzUB- zHmBix$}AYHHn@w>oa7Ls!rD!MCa2COhiHb%;~`WDog*C(rKs)-*G}mZcfCh}lR$YO zyFfOy6j3xNMoSH>=fi{BVLAm{=JAX9rTtnU0${fssKhJd`&1b#j2sXz8-Rz4|5i3s zL+r2a_rOcIW_1sd&9Cg24&ed^_}Fu-9B&~8RZqI;^20fe>jHJ453ipSeLa(Tz)W@k zU%UQWfrP*u%OEWUn24Vq>skmaNn5&E^fZ1sKY`ypw7%RH%lE(@jqNrB#N$}ZJ8-xv z%$@%c^*8tT*Zi%>?8h%fRYYb4aq|sy$v^3nO;^yF2TcD3>AV`c1`>XOCi^Zn`4i1M zUr@r&(PV-K?(s+Gg>LkG4*5P7!T$WcG&-V}w%**|yZF{UZ#JkV8*wgzSh=s{M~01Q zz%pPGJ)a%T-(vasVIAtBy|a5UShbyH@DVvO3Q=t|E9sMmaT}7 z<%NB&$yGf2dr#k+b=8h|<>f~fjd)ux*aHd4LnFPmu&KVdDPpiuNp5svmJZ1F!Sdok(muK&x>0Uk;=*t4#ecf-R0(T| zVDGbF0OJ2@V;KLiFNH9~3f;aW_ z?l@P*JDEpISqLV0TB3j!d>E ziXCKZ&UYiG%+C)g4l^=Fb{=3P=8U6&m)S_F*dEUFrnHuy%DDr>4HqYA^Wfq$Zs#wl8x;ZoEVcJ>W?2^typb>cmufIT=?_=;@d(#)BJ%JVT9dTdd-~ z+*>{o-2P1)%=Y$B2N0w6^t%)Bhq0O0u=Jxx0G}uo(&mGX{z($7cr^bBA!4Sy1|Wym zT&@+gky6QZp%O7JQ&t|a9Dd%v{rNWObt0X&jdfo+UZ2VHHh=Qv=9&Bx?`|1h=DO#u z4R&O{1XZ2WtTuLP=Q4VfMkP8{J$=*Re2!&kxu629$|Sg)$T#1ugisum*idei&`>vW zX4DU|7?ij<6^dM|LYrI7hN`856WhuwBG3sMDEj!#$?@d9VEp{UVEoNLkiaYgr4eed zNckrm7r;{PKn)Iys#~IPyLl3gk(>EUZ6M)mYUG3 z6n6#1l7*6`k_D1|lHjvFt5eTR?=Milr$QXCjEWT%|?YsluThiFvNwS`4<^(M!2M!w?= z0Bf`j(#e|hwr=Suu|q+{VF>q&w^bbM;pSGB6L9y51IkPprm+r+lG`lJbE(p7hP_~wDM0uyB3l+9^kGWIkLAY(BAR1FF9UNb{9q$xgq%GfeZH__X%L^ z1_^xAy#1VC52kPZ^L+*9-+pS6hGiE$!smM57H^2O1$c6PN_`^oh#5L{M4{!?c{;su z*hDTbRBru-lw-e-^W%n%S#)>9HZ2}6=9Y2M`qiE~WyMd&**$wIw@D8dmrmMqE{$ts zl^zP}*!_#u=eU{vs91MVv#pP8XD18XkzZKJc2HupLSSWb?F6&pR%>ntaWi{l+fH$% zB1LuZuWoejHkTmXFoe(Qjje@-20RR_o~%#fxM7Lz0Pi(EHmwj2j%0b^rBozwToH0nCYl!)k#Xd4^S;$Wq=kdbWzPD1 z7n;HqR)eu>*^_-C9ff1|y&JK+)qLL*z66owNm|;H=-DPt5j&&~K85E~GwcqTRjbLz z&6oz-XPA!8G>u)Q=IS%$9?zJQIwA2mBrXvWjKL4j5!7`k!8HLzDHah}OObti?Z7uEnnJ~~?%o?DU^>i@64Nhdl&n=~X4P#^wp zk+R>H810P>Eez>?KmI=TJy9DC!(v5gM}OpnwR3uA*^VMv+ZZ>GTO~Hgh{vw2JCRdn zVvKFULIxpWzmX@;{gEVK#T`Gz2MX8vGYXox<`8d^Y;=LBbeA~ITwL7|E==ULH9!9@ z)6@0Z{&K1{*;OPNrYkm4r%m>XJ3V|MnRq(U%Z!bj*MNt9s*@|8jfq1Y@8=qmy z`6+lbyMr>m<{KT)XQSw7bk9qiYRlIDG?Nt)vuy~Z4X8DBYFt7!=sJi*UW4`HZ;hoG zmy3ogK9N_DR3&8HRS!R7|OnYTR{WNOQu`a=+kd5MEu;8?X8SF6H zdUcABmeCOlxrS0)KYpS1m)PS7y20dc2f%K_KjCWie2SH@J(y|?!(fWxc3g;%<+59n zj2-ClB!wsV9OL9ChVCA<$SK4vh%H;iuu>0Q*Y)z1umOC>G&P{NciId~HFkz_~L%}}055ujV$4+%Zk7;^K5S%_zwg?aW=vn#$9vx?fjkjr+6>?4!HsNBrCR+ff8LGhZ&fKqvq# z`l@d*>Z_(uMzOm2S$x_;+NTeoD8kCE$Z202inW=)hnNnblhMNCT@HI_7`7Q%qXJ~&foMhGT}=N+hX zpRbk99D5!pI;kZ=kS|VS=t+LXl5@b2(}5R2(KhneUQAJ)v{#*!Jxa<^7q;02&|9FQ zI`7_Yd~V$h;$Pwq{E8jF{{=O_CZ#333^0}=>>9^Zm;x-TPsfBGWv{Tc1MB4)D(?AS?T*Z*fl*aSfR4B84E$UszDIs3O z?rO!0h-wAp*k?t|RaK>?T-5>hrYDMN*aA+wm;nR86bm0SdR+bXlliZ?rC2Vt^jl-EmnG-Drk7{s{)S9~Qn zQ$_j1@|0zhKU*qR%YhYy@i7? z8~0@y8Sg;Gh`IwG^|5%y{o~L|%j{Q3Cm$|XYcIBZ`2$%2t0W@w$0@9V_)ofFD{*QO zQ0p!1WHfJ{6y}ht;tMqKn0oC9ihlArh;_vQju^_rFBO_c4wN4@bYtW+;ir!ufi?9e zK0BI}_s`RwZST^I1|WuoN$QNC*KR&;wP`N8+ngifXFSyoWvf{@ng)EWo=ek-kiOqGwR z3R3e#6PirqIZevplY5GIqir~*-q!=>`H;TVnawG=0YB+yxt+E0wDs&qY#jI;rBbBm zzKwJT4{4%GmLWZ#(%O+?%mxnlT9cUg*4OmyANGH+v8YR5E2Ch6!Z*m+sAZTPaX5CD z0xuj_{7m9bk$~3Ip;*4sG!>6hyGq=E<-E3l>MyoJFe2m$2*!W(RA6sJLS--?WMf)J zgCU9;FH?~>soLSIy%dc7-iLP##NxW2Tg!7vq*|s_DsBhSP(~?{IGt~we>Ow%01O?y z9=Ib?hK}94cECy)P?q;$B=JMOO<``KT-m27Vm7dEpTMi#3Oxr#JbeLo#ZwKQLLdD7 zdiyOg_d}^7lBtaa!@y)pq>%WVS(5y#e|()&Me4k-V}hKUm}}M{!!TIK)Eds+j_Z?X zJ{nlA9(Z$+IWB{U48Qgm6aK`lEBb{^Gte}p6YOTY*#f*@+!wwHVcDU)MBAk5#_!-K zT8(T)s(5^=_$UC%o?Sq%LRZ;2}4-y zJ7!u8H43qY(iKY)UZ90^TTipy*H*4BOL1aWLgYP6X;@9{y1izCi=!YMZPv4tpn1Z1r+qEJ{O8Lbhj8Of2$0+{))$@z@HFsh7+6_;!a9GC_zL~%!t%zfrD^EH zbh(F2hCMluB77Zy9FTO~OPJ>BRzpkQxn zVrlfds`aQ@*e&&;y{@G^;oV;4Y)iw;QCU^4%JDTw(h~?DTI3%Y&*}SE5SKPJhe~4-PiJ2chjNgmh z|E+}#cMpc}*aN8wwMma@>a&8=)(S`FGTm|>;TW)Lr|DU;_l;3fo$UFzCya6nS<_714}_!B^i4TYSR0#WN{ zkfuN?ofK{Cdik+P;nQj4nrZRkh?>nPb}=AI$+%y$6#gQ5!a#!{54oNd8`iC}CnNqtSDzw6 zBRfzqi721aH#sHG0J9aLu3T zQKvcg-2=;sTBeKWq-ZQ|k|ftD}fefpI!0O znY0mW*gF|Pn=$ea$L1t3k%`s7sDh}&`5$MP(^=$2MIKLqHDOgtt*=V5$)0jMb_DaGbuxg*RyES!H0C?hVo^HO;i*X zH(cf#u2(Ca3tXJ#&}xmaFD^dRQlNMXtYcxQDVsdORIWuAJjXl*AYbTXY84MjSb}7$ zyR{yut5^}Po~b*|D~rPXJPso9Rp5BLTFI|@q{DCvRku-$F6?ku%wX3m@nz35YU?U|Ij!h{6IIuK<0G6>6OF*H%cYUyjL<4)`%mU9{A|W7EXe9+vNRlZ zWvvVOvnMrD&0jtyF3wpj&!z4S+oYZ*&YIV%H^B!lXE^Tf@;G!KF{dS3rhH=NZ-S_F zW*Dn8F|p8MI<5O!$0v8u!B-F~G$dU1iM1%QCC4@ikDNz+SJMp!_7*IjF5W1##1iTY^M-M@{q<=hGW(Pgs zL>`9kzMN7D7;zgq@#<^;9CGXDB#3#a(8tQD$DPm73gP!`j*NZ0gL$CTvQci2WE}Wm zQ0o%D=Hr9hPo2|%yKC`y)~(IJ!O67*?pOc8B?-b7j-y!)!D@|WryP}l4n!2Puutgi zlFXo^-#_Z+3%d@kR>!eCBFdGxb%Tj7yjxZY!@jdzdeuG~FdoDam5W3#M{(9gMz`B) z@q23MKN#&J0&q0N_i)XJ$(kRapyV$K_@0d#SiJKiP)LvjxT6b8Goelc!pBLM8`6F<4-hJ4T)Hl12s`E@nEs&Fgt5kh=`s?L8AW z`)c$=lj)M*5HG<*n^*REiC+HEf7IjLyf1toifZoz72&T#QP0}?za#N|2>xqJk6Sle zrhgAYK|bO6uVFQgk`oP3eqTM%{qj+)*+`0~|67%TeNyyE`TAmB+4Px-Rem#_4$qz7 z%bfC9fs@u7igxBGUuSA-(kyu@4-QQQS2o^|hqvvsV{PZC$Q;&YGu zr}yq-6zQ|%eadf@u;OzFG{)7Kd+BhZzq%0dgi zi6fGn^UhmA4UMeH1JF|V4RS-D*AG#r;-TwkIsFv7onNhp7?kO-@C30ra?L$|Jp01| z5Xm-UCg1?==z3a?hQu}`u{58S(PtpE& zRs1F0`B&k;Cx8Aa3Iucj{k!o0k_!4O&aWwee}z9lwSkBf1(Va{Ugfn;oo0TehupUi9(6>JIb#io?iieU6%S2ppfu)fIk Date: Sat, 9 Dec 2023 18:30:39 +0100 Subject: [PATCH 2/2] :sparkles: Add alphapept parameter parsing --- proteobench/io/params/alphapept.py | 49 +++ test/params/alphapept_0.4.9.csv | 20 + test/params/alphapept_0.4.9.yaml | 393 ++++++++++++++++++ test/params/alphapept_0.4.9_unnormalized.csv | 20 + test/params/alphapept_0.4.9_unnormalized.yaml | 393 ++++++++++++++++++ test/test_parse_params_alphapept.py | 25 ++ 6 files changed, 900 insertions(+) create mode 100644 proteobench/io/params/alphapept.py create mode 100644 test/params/alphapept_0.4.9.csv create mode 100644 test/params/alphapept_0.4.9.yaml create mode 100644 test/params/alphapept_0.4.9_unnormalized.csv create mode 100644 test/params/alphapept_0.4.9_unnormalized.yaml create mode 100644 test/test_parse_params_alphapept.py diff --git a/proteobench/io/params/alphapept.py b/proteobench/io/params/alphapept.py new file mode 100644 index 00000000..e4b65900 --- /dev/null +++ b/proteobench/io/params/alphapept.py @@ -0,0 +1,49 @@ +"""Alphapept uses the yaml format to save configuration.""" +import pathlib + +import pandas as pd +import yaml + +from proteobench.io.params import ProteoBenchParameters + + +def extract_params(fname) -> ProteoBenchParameters: + with open(fname) as f: + record = yaml.safe_load(f) + summary = record["summary"] + params = ProteoBenchParameters() + params.software_name = "AlphaPept" + params.software_version = summary["version"] + params.search_engine = params.software_name + params.search_engine_version = params.software_version + fasta = record["fasta"] + params.enzyme = fasta["protease"] + params.allowed_miscleavages = fasta["n_missed_cleavages"] + params.fixed_mods = ",".join(fasta["mods_fixed"]) + params.variable_mods = ",".join(fasta["mods_variable"]) + params.max_mods = fasta["n_modifications_max"] + params.min_peptide_length = fasta["pep_length_min"] + params.max_peptide_length = fasta["pep_length_max"] + search = record["search"] + params.precursor_mass_tolerance = search["prec_tol"] + params.fragment_mass_tolerance = search["frag_tol"] + params.ident_fdr_protein = search["protein_fdr"] + params.ident_fdr_peptide = search["peptide_fdr"] + # params.ident_fdr_psm = search + params.min_precursor_charge = record["features"]["iso_charge_min"] + params.max_precursor_charge = record["features"]["iso_charge_max"] + params.enable_match_between_runs = record["workflow"]["match"] # ! check + + return params + + +if __name__ == "__main__": + for fname in [ + "../../../test/params/alphapept_0.4.9.yaml", + "../../../test/params/alphapept_0.4.9_unnormalized.yaml", + ]: + file = pathlib.Path(fname) + params = extract_params(file) + data_dict = params.__dict__ + series = pd.Series(data_dict) + series.to_csv(file.with_suffix(".csv")) diff --git a/test/params/alphapept_0.4.9.csv b/test/params/alphapept_0.4.9.csv new file mode 100644 index 00000000..e72f1535 --- /dev/null +++ b/test/params/alphapept_0.4.9.csv @@ -0,0 +1,20 @@ +,0 +software_name,AlphaPept +software_version,0.4.9 +search_engine,AlphaPept +search_engine_version,0.4.9 +ident_fdr_psm, +ident_fdr_peptide,0.01 +ident_fdr_protein,0.01 +enable_match_between_runs,False +precursor_mass_tolerance,20 +fragment_mass_tolerance,50 +enzyme,trypsin +allowed_miscleavages,2 +min_peptide_length,7 +max_peptide_length,27 +fixed_mods,cC +variable_mods,oxM +max_mods,3 +min_precursor_charge,1 +max_precursor_charge,6 diff --git a/test/params/alphapept_0.4.9.yaml b/test/params/alphapept_0.4.9.yaml new file mode 100644 index 00000000..737e20a7 --- /dev/null +++ b/test/params/alphapept_0.4.9.yaml @@ -0,0 +1,393 @@ +calibration: + calib_mob_range: 0.3 + calib_mz_range: 2000 + calib_n_neighbors: 100 + calib_rt_range: 0.5 + outlier_std: 3 +experiment: + database_path: /home/alphapept/processing_challenge/database.hdf + fasta_paths: + - /home/alphapept/processing_challenge/combinedForSearch_ModuleDDA_quan.fasta + file_paths: + - /home/alphapept/processing_challenge/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw + - /home/alphapept/processing_challenge/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw + - /home/alphapept/processing_challenge/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw + - /home/alphapept/processing_challenge/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw + - /home/alphapept/processing_challenge/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw + - /home/alphapept/processing_challenge/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw + fraction: + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + matching_group: + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + results_path: /home/alphapept/processing_challenge/results.hdf + sample_group: + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01 + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02 + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03 + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01 + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02 + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03 + shortnames: + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01 + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02 + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03 + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01 + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02 + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03 +failed: + calibrate_hdf: [] + find_features: [] + raw_conversion: [] + score_hdf: [] + search_db: [] + search_db_2: [] +fasta: + AL_swap: false + KR_swap: false + fasta_block: 1000 + fasta_size_max: 100 + isoforms_max: 1024 + mods_fixed: + - cC + mods_fixed_terminal: [] + mods_fixed_terminal_prot: [] + mods_variable: + - oxM + mods_variable_terminal: [] + mods_variable_terminal_prot: + - a<^ + n_missed_cleavages: 2 + n_modifications_max: 3 + pep_length_max: 27 + pep_length_min: 7 + protease: trypsin + pseudo_reverse: true + save_db: true + spectra_block: 100000 +features: + centroid_tol: 8 + hill_check_large: 40 + hill_length_min: 3 + hill_nboot: 150 + hill_nboot_max: 300 + hill_smoothing: 1 + hill_split_level: 1.3 + iso_charge_max: 6 + iso_charge_min: 1 + iso_corr_min: 0.6 + iso_mass_range: 5 + iso_n_seeds: 100 + iso_split_level: 1.3 + map_mob_range: 0.3 + map_mz_range: 1.5 + map_n_neighbors: 5 + map_rt_range: 0.5 + max_gap: 2 + search_unidentified: false +general: + modfile_hash: c5a35c77af837322c672586ce65695c9 + n_processes: 60 +matching: + match_d_min: 3 + match_group_tol: 0 + match_p_min: 0.05 +quantification: + lfq_ratio_min: 1 + max_lfq: true + mode: ms1_int_sum_apex +raw: + n_most_abundant: 400 + use_profile_ms1: false +score: + method: random_forest +search: + calibrate: true + calibration_std_frag: 5 + calibration_std_prec: 5 + frag_tol: 50 + min_frag_hits: 7 + parallel: true + peptide_fdr: 0.01 + ppm: true + prec_tol: 20 + protein_fdr: 0.01 + recalibration_min: 100 +summary: + LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01: + acquisition_date_time: '2021-02-15T21:12:15.1229978Z' + feature_cluster_mapping (n in table): 438670 + feature_table (n in table): 165602 + feature_table_idx (n in table): 37641628 + features (n in table): 267813 + first_search (n in table): 454194 + fragment_ions (n in table): 2148384 + fwhm (feature_table, median): 0.12623177777778238 + fwhm (peptide_fdr, median): 0.20272416161616036 + fwhm (protein_fdr, median): 0.20314581818182376 + id_rate (0.01): 0.16 + identifications (n in table): 90666 + ms1_int_max_apex (feature_table, median): 844654.0 + ms1_int_max_apex (peptide_fdr, median): 2336117.0 + ms1_int_max_apex (protein_fdr, median): 2362316.0 + ms1_int_max_area (feature_table, median): 112547.37615274914 + ms1_int_max_area (peptide_fdr, median): 500624.62272100535 + ms1_int_max_area (protein_fdr, median): 507627.9758549973 + ms1_int_sum_apex (feature_table, median): 1396117.0101404912 + ms1_int_sum_apex (peptide_fdr, median): 4623983.026912997 + ms1_int_sum_apex (protein_fdr, median): 4700047.711603966 + ms1_int_sum_area (feature_table, median): 179186.34458981827 + ms1_int_sum_area (peptide_fdr, median): 987189.932214783 + ms1_int_sum_area (protein_fdr, median): 1006590.1215038294 + peptide_fdr (n in table): 35060 + prec_offset_ppm (peptide_fdr, median): -5.89969033626403e-07 + prec_offset_ppm (protein_fdr, median): -5.895853973925114e-07 + prec_offset_raw_ppm (peptide_fdr, median): 0.44801682233810425 + prec_offset_raw_ppm (protein_fdr, median): 0.454866498708725 + precursor (protein_fdr, n unique): 33307 + protein (protein_fdr, n unique): 5279 + protein_group (protein_fdr, n unique): 5279 + rt_length (feature_table, median): 0.26126634343434674 + rt_tail (feature_table, median): 1.1590909090909585 + second_search (n in table): 137437 + sequence (protein_fdr, n unique): 30242 + sequence_naked (protein_fdr, n unique): 29434 + LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02: + acquisition_date_time: '2021-02-17T05:58:09.9313599Z' + feature_cluster_mapping (n in table): 490116 + feature_table (n in table): 182019 + feature_table_idx (n in table): 36934614 + features (n in table): 269330 + first_search (n in table): 588859 + fragment_ions (n in table): 2868074 + fwhm (feature_table, median): 0.10678636363637395 + fwhm (peptide_fdr, median): 0.2029154343434385 + fwhm (protein_fdr, median): 0.20294414141415018 + id_rate (0.01): 0.23 + identifications (n in table): 105131 + ms1_int_max_apex (feature_table, median): 1091019.0 + ms1_int_max_apex (peptide_fdr, median): 2947208.0 + ms1_int_max_apex (protein_fdr, median): 2969663.0 + ms1_int_max_area (feature_table, median): 124483.75116599361 + ms1_int_max_area (peptide_fdr, median): 619675.2671249988 + ms1_int_max_area (protein_fdr, median): 624143.199694999 + ms1_int_sum_apex (feature_table, median): 1846733.9274262264 + ms1_int_sum_apex (peptide_fdr, median): 5814098.347923319 + ms1_int_sum_apex (protein_fdr, median): 5885988.774501283 + ms1_int_sum_area (feature_table, median): 202082.9838195806 + ms1_int_sum_area (peptide_fdr, median): 1205227.0687600963 + ms1_int_sum_area (protein_fdr, median): 1220265.7854634204 + peptide_fdr (n in table): 35725 + prec_offset_ppm (peptide_fdr, median): -5.982350899103039e-07 + prec_offset_ppm (protein_fdr, median): -5.976991133138654e-07 + prec_offset_raw_ppm (peptide_fdr, median): 0.4017564058303833 + prec_offset_raw_ppm (protein_fdr, median): 0.4127162992954254 + precursor (protein_fdr, n unique): 34284 + protein (protein_fdr, n unique): 5319 + protein_group (protein_fdr, n unique): 5319 + rt_length (feature_table, median): 0.22284888888888332 + rt_tail (feature_table, median): 1.1521739130434867 + second_search (n in table): 181774 + sequence (protein_fdr, n unique): 30654 + sequence_naked (protein_fdr, n unique): 29768 + LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03: + acquisition_date_time: '2021-02-18T22:31:16.2898136Z' + feature_cluster_mapping (n in table): 388797 + feature_table (n in table): 144932 + feature_table_idx (n in table): 36996665 + features (n in table): 298727 + first_search (n in table): 642138 + fragment_ions (n in table): 3252433 + fwhm (feature_table, median): 0.18050616161616517 + fwhm (peptide_fdr, median): 0.23415876767676735 + fwhm (protein_fdr, median): 0.2344743232323232 + id_rate (0.01): 0.26 + identifications (n in table): 117226 + ms1_int_max_apex (feature_table, median): 1226170.0 + ms1_int_max_apex (peptide_fdr, median): 2952926.0 + ms1_int_max_apex (protein_fdr, median): 2974708.5 + ms1_int_max_area (feature_table, median): 214662.9903085017 + ms1_int_max_area (peptide_fdr, median): 732499.0719202487 + ms1_int_max_area (protein_fdr, median): 739024.1325862431 + ms1_int_sum_apex (feature_table, median): 2024339.101863 + ms1_int_sum_apex (peptide_fdr, median): 5788108.372898562 + ms1_int_sum_apex (protein_fdr, median): 5870760.345829593 + ms1_int_sum_area (feature_table, median): 348704.6126305632 + ms1_int_sum_area (peptide_fdr, median): 1449997.8613120955 + ms1_int_sum_area (protein_fdr, median): 1472705.9034050903 + peptide_fdr (n in table): 37100 + prec_offset_ppm (peptide_fdr, median): -6.529426173074171e-07 + prec_offset_ppm (protein_fdr, median): -6.512607910735824e-07 + prec_offset_raw_ppm (peptide_fdr, median): 0.48788803815841675 + prec_offset_raw_ppm (protein_fdr, median): 0.49879640340805054 + precursor (protein_fdr, n unique): 35645 + protein (protein_fdr, n unique): 5416 + protein_group (protein_fdr, n unique): 5416 + rt_length (feature_table, median): 0.33942324242423894 + rt_tail (feature_table, median): 1.1707317073170755 + second_search (n in table): 207722 + sequence (protein_fdr, n unique): 31579 + sequence_naked (protein_fdr, n unique): 30644 + LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01: + acquisition_date_time: '2021-02-16T00:35:33.4711979Z' + feature_cluster_mapping (n in table): 422957 + feature_table (n in table): 159696 + feature_table_idx (n in table): 38266599 + features (n in table): 282141 + first_search (n in table): 472753 + fragment_ions (n in table): 2339045 + fwhm (feature_table, median): 0.1479760151515137 + fwhm (peptide_fdr, median): 0.22121849494948975 + fwhm (protein_fdr, median): 0.2216950909090869 + id_rate (0.01): 0.17 + identifications (n in table): 97913 + ms1_int_max_apex (feature_table, median): 761059.0 + ms1_int_max_apex (peptide_fdr, median): 2168794.0 + ms1_int_max_apex (protein_fdr, median): 2192301.0 + ms1_int_max_area (feature_table, median): 113011.02941624864 + ms1_int_max_area (peptide_fdr, median): 507020.0490622483 + ms1_int_max_area (protein_fdr, median): 515177.21989950276 + ms1_int_sum_apex (feature_table, median): 1240894.3861163845 + ms1_int_sum_apex (peptide_fdr, median): 4309569.974162634 + ms1_int_sum_apex (protein_fdr, median): 4386704.252986056 + ms1_int_sum_area (feature_table, median): 178141.18505316257 + ms1_int_sum_area (peptide_fdr, median): 1008435.8474547872 + ms1_int_sum_area (protein_fdr, median): 1027178.6029087919 + peptide_fdr (n in table): 35860 + prec_offset_ppm (peptide_fdr, median): -5.915183010074543e-07 + prec_offset_ppm (protein_fdr, median): -5.871704047422099e-07 + prec_offset_raw_ppm (peptide_fdr, median): 0.43428438901901245 + prec_offset_raw_ppm (protein_fdr, median): 0.44429290294647217 + precursor (protein_fdr, n unique): 34304 + protein (protein_fdr, n unique): 5277 + protein_group (protein_fdr, n unique): 5277 + rt_length (feature_table, median): 0.28941702020201454 + rt_tail (feature_table, median): 1.1818181818181683 + second_search (n in table): 150560 + sequence (protein_fdr, n unique): 30830 + sequence_naked (protein_fdr, n unique): 29841 + LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02: + acquisition_date_time: '2021-02-17T17:08:43.6657345Z' + feature_cluster_mapping (n in table): 420122 + feature_table (n in table): 160015 + feature_table_idx (n in table): 43445155 + features (n in table): 297720 + first_search (n in table): 546310 + fragment_ions (n in table): 2640669 + fwhm (feature_table, median): 0.1657121616161703 + fwhm (peptide_fdr, median): 0.2420684848484811 + fwhm (protein_fdr, median): 0.24239437373737616 + id_rate (0.01): 0.2 + identifications (n in table): 105030 + ms1_int_max_apex (feature_table, median): 804525.0 + ms1_int_max_apex (peptide_fdr, median): 2287129.0 + ms1_int_max_apex (protein_fdr, median): 2309060.0 + ms1_int_max_area (feature_table, median): 127269.71073899744 + ms1_int_max_area (peptide_fdr, median): 589055.1713864943 + ms1_int_max_area (protein_fdr, median): 596105.1278289948 + ms1_int_sum_apex (feature_table, median): 1320877.302977677 + ms1_int_sum_apex (peptide_fdr, median): 4494544.66858951 + ms1_int_sum_apex (protein_fdr, median): 4564185.635985839 + ms1_int_sum_area (feature_table, median): 202527.9342573708 + ms1_int_sum_area (peptide_fdr, median): 1155869.2869869529 + ms1_int_sum_area (protein_fdr, median): 1177593.0371076728 + peptide_fdr (n in table): 35375 + prec_offset_ppm (peptide_fdr, median): -6.740981461916817e-07 + prec_offset_ppm (protein_fdr, median): -6.74145837820106e-07 + prec_offset_raw_ppm (peptide_fdr, median): 0.49986252188682556 + prec_offset_raw_ppm (protein_fdr, median): 0.510723352432251 + precursor (protein_fdr, n unique): 34164 + protein (protein_fdr, n unique): 5192 + protein_group (protein_fdr, n unique): 5192 + rt_length (feature_table, median): 0.3026909090909129 + rt_tail (feature_table, median): 1.1500000000000123 + second_search (n in table): 170273 + sequence (protein_fdr, n unique): 30330 + sequence_naked (protein_fdr, n unique): 29299 + LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03: + acquisition_date_time: '2021-02-19T01:54:34.5516971Z' + feature_cluster_mapping (n in table): 399472 + feature_table (n in table): 148953 + feature_table_idx (n in table): 37870929 + features (n in table): 303009 + first_search (n in table): 620353 + fragment_ions (n in table): 3129119 + fwhm (feature_table, median): 0.1805095959595988 + fwhm (peptide_fdr, median): 0.237176969696975 + fwhm (protein_fdr, median): 0.23747596969697327 + id_rate (0.01): 0.25 + identifications (n in table): 117036 + ms1_int_max_apex (feature_table, median): 1071251.0 + ms1_int_max_apex (peptide_fdr, median): 2711603.0 + ms1_int_max_apex (protein_fdr, median): 2730784.0 + ms1_int_max_area (feature_table, median): 188128.80485600044 + ms1_int_max_area (peptide_fdr, median): 682523.0138275052 + ms1_int_max_area (protein_fdr, median): 688575.141182492 + ms1_int_sum_apex (feature_table, median): 1774145.218433667 + ms1_int_sum_apex (peptide_fdr, median): 5378542.6572872065 + ms1_int_sum_apex (protein_fdr, median): 5446152.496879385 + ms1_int_sum_area (feature_table, median): 305880.1365647374 + ms1_int_sum_area (peptide_fdr, median): 1355160.395593183 + ms1_int_sum_area (protein_fdr, median): 1375114.4792273701 + peptide_fdr (n in table): 38249 + prec_offset_ppm (peptide_fdr, median): -6.402689791684679e-07 + prec_offset_ppm (protein_fdr, median): -6.399289986802614e-07 + prec_offset_raw_ppm (peptide_fdr, median): 0.5020662546157837 + prec_offset_raw_ppm (protein_fdr, median): 0.5113118290901184 + precursor (protein_fdr, n unique): 36192 + protein (protein_fdr, n unique): 5415 + protein_group (protein_fdr, n unique): 5415 + rt_length (feature_table, median): 0.33933919191917994 + rt_tail (feature_table, median): 1.1666666666666436 + second_search (n in table): 202816 + sequence (protein_fdr, n unique): 32135 + sequence_naked (protein_fdr, n unique): 30873 + file_sizes: + files: + /home/alphapept/processing_challenge/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.ms_data.hdf: 2294.0040550231934 + /home/alphapept/processing_challenge/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.ms_data.hdf: 2556.17316532135 + /home/alphapept/processing_challenge/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.ms_data.hdf: 2664.7953567504883 + /home/alphapept/processing_challenge/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.ms_data.hdf: 2352.241373062134 + /home/alphapept/processing_challenge/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.ms_data.hdf: 2659.7218141555786 + /home/alphapept/processing_challenge/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.ms_data.hdf: 2636.2522497177124 + results: 375.19372272491455 + processed_files: + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw + time: '2023-02-07 12:17:58.889918' + timing: + create_database (min): 3.5521597623825074 + feature_finding (min): 8.045105290412902 + import_raw_data (min): 5.708554915587107 + isobaric_labeling (min): 2.094109853108724e-06 + protein_grouping (min): 0.10644924640655518 + quantification (min): 1.845250387986501 + recalibrate_data (min): 2.120876407623291 + score (min): 2.3479329705238343 + search_data (min): 4.872584227720896 + search_data_2 (min): 2.499837418397268 + total (min): 31.098779396216074 + version: 0.4.9 +workflow: + align: false + continue_runs: false + create_database: true + find_features: true + import_raw_data: true + lfq_quantification: true + match: false + recalibrate_data: true + search_data: true diff --git a/test/params/alphapept_0.4.9_unnormalized.csv b/test/params/alphapept_0.4.9_unnormalized.csv new file mode 100644 index 00000000..e72f1535 --- /dev/null +++ b/test/params/alphapept_0.4.9_unnormalized.csv @@ -0,0 +1,20 @@ +,0 +software_name,AlphaPept +software_version,0.4.9 +search_engine,AlphaPept +search_engine_version,0.4.9 +ident_fdr_psm, +ident_fdr_peptide,0.01 +ident_fdr_protein,0.01 +enable_match_between_runs,False +precursor_mass_tolerance,20 +fragment_mass_tolerance,50 +enzyme,trypsin +allowed_miscleavages,2 +min_peptide_length,7 +max_peptide_length,27 +fixed_mods,cC +variable_mods,oxM +max_mods,3 +min_precursor_charge,1 +max_precursor_charge,6 diff --git a/test/params/alphapept_0.4.9_unnormalized.yaml b/test/params/alphapept_0.4.9_unnormalized.yaml new file mode 100644 index 00000000..7d86408f --- /dev/null +++ b/test/params/alphapept_0.4.9_unnormalized.yaml @@ -0,0 +1,393 @@ +calibration: + calib_mob_range: 0.3 + calib_mz_range: 2000 + calib_n_neighbors: 100 + calib_rt_range: 0.5 + outlier_std: 3 +experiment: + database_path: /home/alphapept/processing_challenge_2/database.hdf + fasta_paths: + - /home/alphapept/processing_challenge_2/BenchmarkFASTAModule1_DDA.fasta + file_paths: + - /home/alphapept/processing_challenge_2/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw + - /home/alphapept/processing_challenge_2/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw + - /home/alphapept/processing_challenge_2/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw + - /home/alphapept/processing_challenge_2/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw + - /home/alphapept/processing_challenge_2/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw + - /home/alphapept/processing_challenge_2/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw + fraction: + - 1 + - 1 + - 1 + - 1 + - 1 + - 1 + matching_group: + - 0 + - 0 + - 0 + - 0 + - 0 + - 0 + results_path: /home/alphapept/processing_challenge_2/results.hdf + sample_group: + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01 + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02 + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03 + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01 + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02 + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03 + shortnames: + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01 + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02 + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03 + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01 + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02 + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03 +failed: + calibrate_hdf: [] + find_features: [] + raw_conversion: [] + score_hdf: [] + search_db: [] + search_db_2: [] +fasta: + AL_swap: false + KR_swap: false + fasta_block: 1000 + fasta_size_max: 100 + isoforms_max: 1024 + mods_fixed: + - cC + mods_fixed_terminal: [] + mods_fixed_terminal_prot: [] + mods_variable: + - oxM + mods_variable_terminal: [] + mods_variable_terminal_prot: + - a<^ + n_missed_cleavages: 2 + n_modifications_max: 3 + pep_length_max: 27 + pep_length_min: 7 + protease: trypsin + pseudo_reverse: true + save_db: true + spectra_block: 100000 +features: + centroid_tol: 8 + hill_check_large: 40 + hill_length_min: 3 + hill_nboot: 150 + hill_nboot_max: 300 + hill_smoothing: 1 + hill_split_level: 1.3 + iso_charge_max: 6 + iso_charge_min: 1 + iso_corr_min: 0.6 + iso_mass_range: 5 + iso_n_seeds: 100 + iso_split_level: 1.3 + map_mob_range: 0.3 + map_mz_range: 1.5 + map_n_neighbors: 5 + map_rt_range: 0.5 + max_gap: 2 + search_unidentified: false +general: + modfile_hash: c5a35c77af837322c672586ce65695c9 + n_processes: 60 +matching: + match_d_min: 3 + match_group_tol: 0 + match_p_min: 0.05 +quantification: + lfq_ratio_min: 1 + max_lfq: true + mode: ms1_int_sum_apex +raw: + n_most_abundant: 400 + use_profile_ms1: false +score: + method: random_forest +search: + calibrate: true + calibration_std_frag: 5 + calibration_std_prec: 5 + frag_tol: 50 + min_frag_hits: 7 + parallel: true + peptide_fdr: 0.01 + ppm: true + prec_tol: 20 + protein_fdr: 0.01 + recalibration_min: 100 +summary: + LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01: + acquisition_date_time: '2021-02-15T21:12:15.1229978Z' + feature_cluster_mapping (n in table): 438670 + feature_table (n in table): 165602 + feature_table_idx (n in table): 37641628 + features (n in table): 267813 + first_search (n in table): 454194 + fragment_ions (n in table): 2148384 + fwhm (feature_table, median): 0.12623177777778238 + fwhm (peptide_fdr, median): 0.20272416161616036 + fwhm (protein_fdr, median): 0.20314228282828495 + id_rate (0.01): 0.16 + identifications (n in table): 90666 + ms1_int_max_apex (feature_table, median): 844654.0 + ms1_int_max_apex (peptide_fdr, median): 2336117.0 + ms1_int_max_apex (protein_fdr, median): 2362284.0 + ms1_int_max_area (feature_table, median): 112547.37615274914 + ms1_int_max_area (peptide_fdr, median): 500624.62272100535 + ms1_int_max_area (protein_fdr, median): 507599.0745082491 + ms1_int_sum_apex (feature_table, median): 1396117.0101404912 + ms1_int_sum_apex (peptide_fdr, median): 4623983.026912997 + ms1_int_sum_apex (protein_fdr, median): 4699908.052512772 + ms1_int_sum_area (feature_table, median): 179186.34458981827 + ms1_int_sum_area (peptide_fdr, median): 987189.932214783 + ms1_int_sum_area (protein_fdr, median): 1006571.5946226772 + peptide_fdr (n in table): 35060 + prec_offset_ppm (peptide_fdr, median): -5.89969033626403e-07 + prec_offset_ppm (protein_fdr, median): -5.897688879485941e-07 + prec_offset_raw_ppm (peptide_fdr, median): 0.44801682233810425 + prec_offset_raw_ppm (protein_fdr, median): 0.4548822045326233 + precursor (protein_fdr, n unique): 33306 + protein (protein_fdr, n unique): 5281 + protein_group (protein_fdr, n unique): 5281 + rt_length (feature_table, median): 0.26126634343434674 + rt_tail (feature_table, median): 1.1590909090909585 + second_search (n in table): 137437 + sequence (protein_fdr, n unique): 30241 + sequence_naked (protein_fdr, n unique): 29433 + LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02: + acquisition_date_time: '2021-02-17T05:58:09.9313599Z' + feature_cluster_mapping (n in table): 490116 + feature_table (n in table): 182019 + feature_table_idx (n in table): 36934614 + features (n in table): 269330 + first_search (n in table): 588859 + fragment_ions (n in table): 2868074 + fwhm (feature_table, median): 0.10678636363637395 + fwhm (peptide_fdr, median): 0.2029154343434385 + fwhm (protein_fdr, median): 0.20294414141415018 + id_rate (0.01): 0.23 + identifications (n in table): 105131 + ms1_int_max_apex (feature_table, median): 1091019.0 + ms1_int_max_apex (peptide_fdr, median): 2947208.0 + ms1_int_max_apex (protein_fdr, median): 2969663.0 + ms1_int_max_area (feature_table, median): 124483.75116599361 + ms1_int_max_area (peptide_fdr, median): 619675.2671249988 + ms1_int_max_area (protein_fdr, median): 624143.199694999 + ms1_int_sum_apex (feature_table, median): 1846733.9274262264 + ms1_int_sum_apex (peptide_fdr, median): 5814098.347923319 + ms1_int_sum_apex (protein_fdr, median): 5885988.774501283 + ms1_int_sum_area (feature_table, median): 202082.9838195806 + ms1_int_sum_area (peptide_fdr, median): 1205227.0687600963 + ms1_int_sum_area (protein_fdr, median): 1220265.7854634204 + peptide_fdr (n in table): 35725 + prec_offset_ppm (peptide_fdr, median): -5.982350899103039e-07 + prec_offset_ppm (protein_fdr, median): -5.976991133138654e-07 + prec_offset_raw_ppm (peptide_fdr, median): 0.4017564058303833 + prec_offset_raw_ppm (protein_fdr, median): 0.41256070137023926 + precursor (protein_fdr, n unique): 34286 + protein (protein_fdr, n unique): 5321 + protein_group (protein_fdr, n unique): 5321 + rt_length (feature_table, median): 0.22284888888888332 + rt_tail (feature_table, median): 1.1521739130434867 + second_search (n in table): 181774 + sequence (protein_fdr, n unique): 30656 + sequence_naked (protein_fdr, n unique): 29770 + LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03: + acquisition_date_time: '2021-02-18T22:31:16.2898136Z' + feature_cluster_mapping (n in table): 388797 + feature_table (n in table): 144932 + feature_table_idx (n in table): 36996665 + features (n in table): 298727 + first_search (n in table): 642138 + fragment_ions (n in table): 3252433 + fwhm (feature_table, median): 0.18050616161616517 + fwhm (peptide_fdr, median): 0.23415876767676735 + fwhm (protein_fdr, median): 0.23447397979798268 + id_rate (0.01): 0.26 + identifications (n in table): 117226 + ms1_int_max_apex (feature_table, median): 1226170.0 + ms1_int_max_apex (peptide_fdr, median): 2952926.0 + ms1_int_max_apex (protein_fdr, median): 2974648.0 + ms1_int_max_area (feature_table, median): 214662.9903085017 + ms1_int_max_area (peptide_fdr, median): 732499.0719202487 + ms1_int_max_area (protein_fdr, median): 738911.5653044912 + ms1_int_sum_apex (feature_table, median): 2024339.101863 + ms1_int_sum_apex (peptide_fdr, median): 5788108.372898562 + ms1_int_sum_apex (protein_fdr, median): 5870379.0630417075 + ms1_int_sum_area (feature_table, median): 348704.6126305632 + ms1_int_sum_area (peptide_fdr, median): 1449997.8613120955 + ms1_int_sum_area (protein_fdr, median): 1472675.4111828352 + peptide_fdr (n in table): 37100 + prec_offset_ppm (peptide_fdr, median): -6.529426173074171e-07 + prec_offset_ppm (protein_fdr, median): -6.512795494018064e-07 + prec_offset_raw_ppm (peptide_fdr, median): 0.48788803815841675 + prec_offset_raw_ppm (protein_fdr, median): 0.49874216318130493 + precursor (protein_fdr, n unique): 35646 + protein (protein_fdr, n unique): 5417 + protein_group (protein_fdr, n unique): 5417 + rt_length (feature_table, median): 0.33942324242423894 + rt_tail (feature_table, median): 1.1707317073170755 + second_search (n in table): 207722 + sequence (protein_fdr, n unique): 31580 + sequence_naked (protein_fdr, n unique): 30645 + LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01: + acquisition_date_time: '2021-02-16T00:35:33.4711979Z' + feature_cluster_mapping (n in table): 422957 + feature_table (n in table): 159696 + feature_table_idx (n in table): 38266599 + features (n in table): 282141 + first_search (n in table): 472753 + fragment_ions (n in table): 2339045 + fwhm (feature_table, median): 0.1479760151515137 + fwhm (peptide_fdr, median): 0.22121849494948975 + fwhm (protein_fdr, median): 0.22169181818181372 + id_rate (0.01): 0.17 + identifications (n in table): 97913 + ms1_int_max_apex (feature_table, median): 761059.0 + ms1_int_max_apex (peptide_fdr, median): 2168794.0 + ms1_int_max_apex (protein_fdr, median): 2192379.5 + ms1_int_max_area (feature_table, median): 113011.02941624864 + ms1_int_max_area (peptide_fdr, median): 507020.0490622483 + ms1_int_max_area (protein_fdr, median): 515208.3705772543 + ms1_int_sum_apex (feature_table, median): 1240894.3861163845 + ms1_int_sum_apex (peptide_fdr, median): 4309569.974162634 + ms1_int_sum_apex (protein_fdr, median): 4386765.138914232 + ms1_int_sum_area (feature_table, median): 178141.18505316257 + ms1_int_sum_area (peptide_fdr, median): 1008435.8474547872 + ms1_int_sum_area (protein_fdr, median): 1027189.0526562632 + peptide_fdr (n in table): 35860 + prec_offset_ppm (peptide_fdr, median): -5.915183010074543e-07 + prec_offset_ppm (protein_fdr, median): -5.871924031453091e-07 + prec_offset_raw_ppm (peptide_fdr, median): 0.43428438901901245 + prec_offset_raw_ppm (protein_fdr, median): 0.44402003288269043 + precursor (protein_fdr, n unique): 34303 + protein (protein_fdr, n unique): 5277 + protein_group (protein_fdr, n unique): 5277 + rt_length (feature_table, median): 0.28941702020201454 + rt_tail (feature_table, median): 1.1818181818181683 + second_search (n in table): 150560 + sequence (protein_fdr, n unique): 30829 + sequence_naked (protein_fdr, n unique): 29840 + LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02: + acquisition_date_time: '2021-02-17T17:08:43.6657345Z' + feature_cluster_mapping (n in table): 420122 + feature_table (n in table): 160015 + feature_table_idx (n in table): 43445155 + features (n in table): 297720 + first_search (n in table): 546310 + fragment_ions (n in table): 2640669 + fwhm (feature_table, median): 0.1657121616161703 + fwhm (peptide_fdr, median): 0.2420684848484811 + fwhm (protein_fdr, median): 0.24239437373737616 + id_rate (0.01): 0.2 + identifications (n in table): 105030 + ms1_int_max_apex (feature_table, median): 804525.0 + ms1_int_max_apex (peptide_fdr, median): 2287129.0 + ms1_int_max_apex (protein_fdr, median): 2309060.0 + ms1_int_max_area (feature_table, median): 127269.71073899744 + ms1_int_max_area (peptide_fdr, median): 589055.1713864943 + ms1_int_max_area (protein_fdr, median): 596105.1278289948 + ms1_int_sum_apex (feature_table, median): 1320877.302977677 + ms1_int_sum_apex (peptide_fdr, median): 4494544.66858951 + ms1_int_sum_apex (protein_fdr, median): 4564185.635985839 + ms1_int_sum_area (feature_table, median): 202527.9342573708 + ms1_int_sum_area (peptide_fdr, median): 1155869.2869869529 + ms1_int_sum_area (protein_fdr, median): 1177593.0371076728 + peptide_fdr (n in table): 35375 + prec_offset_ppm (peptide_fdr, median): -6.740981461916817e-07 + prec_offset_ppm (protein_fdr, median): -6.74145837820106e-07 + prec_offset_raw_ppm (peptide_fdr, median): 0.49986252188682556 + prec_offset_raw_ppm (protein_fdr, median): 0.510723352432251 + precursor (protein_fdr, n unique): 34164 + protein (protein_fdr, n unique): 5192 + protein_group (protein_fdr, n unique): 5192 + rt_length (feature_table, median): 0.3026909090909129 + rt_tail (feature_table, median): 1.1500000000000123 + second_search (n in table): 170273 + sequence (protein_fdr, n unique): 30330 + sequence_naked (protein_fdr, n unique): 29299 + LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03: + acquisition_date_time: '2021-02-19T01:54:34.5516971Z' + feature_cluster_mapping (n in table): 399472 + feature_table (n in table): 148953 + feature_table_idx (n in table): 37870929 + features (n in table): 303009 + first_search (n in table): 620353 + fragment_ions (n in table): 3129119 + fwhm (feature_table, median): 0.1805095959595988 + fwhm (peptide_fdr, median): 0.237176969696975 + fwhm (protein_fdr, median): 0.23747596969697327 + id_rate (0.01): 0.25 + identifications (n in table): 117036 + ms1_int_max_apex (feature_table, median): 1071251.0 + ms1_int_max_apex (peptide_fdr, median): 2711603.0 + ms1_int_max_apex (protein_fdr, median): 2730784.0 + ms1_int_max_area (feature_table, median): 188128.80485600044 + ms1_int_max_area (peptide_fdr, median): 682523.0138275052 + ms1_int_max_area (protein_fdr, median): 688575.141182492 + ms1_int_sum_apex (feature_table, median): 1774145.218433667 + ms1_int_sum_apex (peptide_fdr, median): 5378542.6572872065 + ms1_int_sum_apex (protein_fdr, median): 5446152.496879385 + ms1_int_sum_area (feature_table, median): 305880.1365647374 + ms1_int_sum_area (peptide_fdr, median): 1355160.395593183 + ms1_int_sum_area (protein_fdr, median): 1375114.4792273701 + peptide_fdr (n in table): 38249 + prec_offset_ppm (peptide_fdr, median): -6.402689791684679e-07 + prec_offset_ppm (protein_fdr, median): -6.399289986802614e-07 + prec_offset_raw_ppm (peptide_fdr, median): 0.5020662546157837 + prec_offset_raw_ppm (protein_fdr, median): 0.5113118290901184 + precursor (protein_fdr, n unique): 36192 + protein (protein_fdr, n unique): 5415 + protein_group (protein_fdr, n unique): 5415 + rt_length (feature_table, median): 0.33933919191917994 + rt_tail (feature_table, median): 1.1666666666666436 + second_search (n in table): 202816 + sequence (protein_fdr, n unique): 32135 + sequence_naked (protein_fdr, n unique): 30873 + file_sizes: + files: + /home/alphapept/processing_challenge_2/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.ms_data.hdf: 2294.0040550231934 + /home/alphapept/processing_challenge_2/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.ms_data.hdf: 2556.17316532135 + /home/alphapept/processing_challenge_2/LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.ms_data.hdf: 2664.7953567504883 + /home/alphapept/processing_challenge_2/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.ms_data.hdf: 2352.241373062134 + /home/alphapept/processing_challenge_2/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.ms_data.hdf: 2659.7218141555786 + /home/alphapept/processing_challenge_2/LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.ms_data.hdf: 2636.2522497177124 + results: 377.3486204147339 + processed_files: + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_01.raw + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_02.raw + - LFQ_Orbitrap_DDA_Condition_A_Sample_Alpha_03.raw + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_01.raw + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_02.raw + - LFQ_Orbitrap_DDA_Condition_B_Sample_Alpha_03.raw + time: '2023-02-07 14:57:04.377694' + timing: + create_database (min): 3.633662752310435 + feature_finding (min): 8.496953483422597 + import_raw_data (min): 6.33388987382253 + isobaric_labeling (min): 2.6226043701171875e-06 + protein_grouping (min): 0.1000998576482137 + quantification (min): 3.426512809594472 + recalibrate_data (min): 2.1994417190551756 + score (min): 5.557692555586497 + search_data (min): 5.309630779425303 + search_data_2 (min): 2.649403250217438 + total (min): 37.7073157787323 + version: 0.4.9 +workflow: + align: false + continue_runs: false + create_database: true + find_features: true + import_raw_data: true + lfq_quantification: true + match: false + recalibrate_data: true + search_data: true diff --git a/test/test_parse_params_alphapept.py b/test/test_parse_params_alphapept.py new file mode 100644 index 00000000..0bc3c8a9 --- /dev/null +++ b/test/test_parse_params_alphapept.py @@ -0,0 +1,25 @@ +import io +import json +from pathlib import Path + +import pandas as pd +import pytest + +import proteobench.io.params.alphapept as alpahpept_params + +TESTDATA_DIR = Path(__file__).parent / "params" + +fnames = [ + "alphapept_0.4.9_unnormalized.yaml", + "alphapept_0.4.9.yaml", +] +fnames = [TESTDATA_DIR / f for f in fnames] + + +@pytest.mark.parametrize("file", fnames) +def test_extract_params(file): + expected = pd.read_csv(file.with_suffix(".csv"), index_col=0).squeeze("columns") + actual = alpahpept_params.extract_params(file) + actual = pd.Series(actual.__dict__) + actual = pd.read_csv(io.StringIO(actual.to_csv()), index_col=0).squeeze("columns") + assert expected.equals(actual)