From 777f64e3b8b73beead161d438fa35ee940145db3 Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 9 Feb 2024 17:16:44 +0100 Subject: [PATCH 1/6] Use more accurate mass values --- ms2pip/_cython_modules/ms2pip_peaks_c.c | 100 +++++++++++++----------- 1 file changed, 53 insertions(+), 47 deletions(-) diff --git a/ms2pip/_cython_modules/ms2pip_peaks_c.c b/ms2pip/_cython_modules/ms2pip_peaks_c.c index 607e4250..20ff6bce 100644 --- a/ms2pip/_cython_modules/ms2pip_peaks_c.c +++ b/ms2pip/_cython_modules/ms2pip_peaks_c.c @@ -5,8 +5,14 @@ #include "ms2pip_init_c.c" #include "ms2pip_features_c.c" -#include "../_models_c/HCD-2019.h" -#include "../_models_c/TMT.h" +#define H_MASS 1.00782503207f +#define H2O_MASS 18.0105646837f +#define NH3_MASS 17.02654910101f +#define OH_MASS 17.002739651629998f +#define NH_MASS 15.01089903687f +#define CO_MASS 27.99491461956f + +#define ZERO_INTENSITY -9.96578428466f float membuffer[10000]; float ions[2000]; @@ -81,7 +87,7 @@ float* get_mz_ms2pip_general(int peplen, unsigned short* modpeptide) } for (i=1; i < peplen; i++) { mz += amino_masses[modpeptide[i]]; - membuffer[j++] = mz+1.007236; //b-ion + membuffer[j++] = mz+H_MASS; //b-ion } mz = 0; @@ -90,7 +96,7 @@ float* get_mz_ms2pip_general(int peplen, unsigned short* modpeptide) } for (i=peplen; i > 1; i--) { mz += amino_masses[modpeptide[i]]; - membuffer[j++] = 18.0105647 + mz + 1.007236; //y-ion + membuffer[j++] = H2O_MASS + mz + H_MASS; //y-ion } mz = 0; @@ -99,7 +105,7 @@ float* get_mz_ms2pip_general(int peplen, unsigned short* modpeptide) } for (i=1; i < peplen; i++) { mz += amino_masses[modpeptide[i]]; - membuffer[j++] = (mz + 1.007236 + 1.007236)/2; //b2-ion: (b-ion + H)/2 + membuffer[j++] = (mz + H_MASS + H_MASS)/2; //b2-ion: (b-ion + H)/2 } mz = 0; @@ -108,7 +114,7 @@ float* get_mz_ms2pip_general(int peplen, unsigned short* modpeptide) } for (i=peplen; i > 1; i--) { mz += amino_masses[modpeptide[i]]; - membuffer[j++] = (18.0105647 + mz + 1.007236 + 1.007236)/2; //y2-ion: (y-ion + H)/2 + membuffer[j++] = (H2O_MASS + mz + H_MASS + H_MASS)/2; //y2-ion: (y-ion + H)/2 } return membuffer; @@ -128,7 +134,7 @@ float* get_mz_ms2pip_etd(int peplen, unsigned short* modpeptide) } for (i=1; i < peplen; i++) { mz += amino_masses[modpeptide[i]]; - membuffer[j++] = mz + 1.007236; //b-ion + membuffer[j++] = mz + H_MASS; //b-ion } mz = 0; @@ -137,7 +143,7 @@ float* get_mz_ms2pip_etd(int peplen, unsigned short* modpeptide) } for (i=peplen; i > 1; i--) { mz += amino_masses[modpeptide[i]]; - membuffer[j++] = 18.0105647 + mz + 1.007236; //y-ion + membuffer[j++] = H2O_MASS + mz + H_MASS; //y-ion } mz = 0; @@ -146,7 +152,7 @@ float* get_mz_ms2pip_etd(int peplen, unsigned short* modpeptide) } for (i=1; i < peplen; i++) { mz += amino_masses[modpeptide[i]]; - membuffer[j++] = mz + 1.007825032 + 17.0265491; //c-ion: peptide + H + NH3 + membuffer[j++] = mz + H_MASS + NH3_MASS; //c-ion: peptide + H + NH3 } mz = 0; @@ -155,7 +161,7 @@ float* get_mz_ms2pip_etd(int peplen, unsigned short* modpeptide) } for (i=peplen; i > 1; i--) { mz += amino_masses[modpeptide[i]]; - membuffer[j++] = mz + 17.00273965 - 15.01089904 + 1.007825032; //z-ion: peptide + OH - NH + membuffer[j++] = mz + OH_MASS - NH_MASS + H_MASS; //z-ion: peptide + OH - NH } return membuffer; @@ -175,7 +181,7 @@ float* get_mz_ms2pip_ch2(int peplen, unsigned short* modpeptide) } for (i=1; i < peplen; i++) { mz += amino_masses[modpeptide[i]]; - membuffer[j++] = mz+1.007236; //b-ion + membuffer[j++] = mz+H_MASS; //b-ion } mz = 0; @@ -184,7 +190,7 @@ float* get_mz_ms2pip_ch2(int peplen, unsigned short* modpeptide) } for (i=peplen; i > 1; i--) { mz += amino_masses[modpeptide[i]]; - membuffer[j++] = 18.0105647 + mz + 1.007236; //y-ion + membuffer[j++] = H2O_MASS + mz + H_MASS; //y-ion } mz = 0; @@ -193,7 +199,7 @@ float* get_mz_ms2pip_ch2(int peplen, unsigned short* modpeptide) } for (i=1; i < peplen; i++) { mz += amino_masses[modpeptide[i]]; - membuffer[j++] = (mz + 1.007236 + 1.007236)/2; //b2-ion: (b-ion + H)/2 + membuffer[j++] = (mz + H_MASS + H_MASS)/2; //b2-ion: (b-ion + H)/2 } mz = 0; @@ -202,7 +208,7 @@ float* get_mz_ms2pip_ch2(int peplen, unsigned short* modpeptide) } for (i=peplen; i > 1; i--) { mz += amino_masses[modpeptide[i]]; - membuffer[j++] = (18.0105647 + mz + 1.007236 + 1.007236)/2; //y2-ion: (y-ion + H)/2 + membuffer[j++] = (H2O_MASS + mz + H_MASS + H_MASS)/2; //y2-ion: (y-ion + H)/2 } return membuffer; @@ -222,8 +228,8 @@ annotations get_t_ms2pip_all(int peplen, unsigned short* modpeptide, int numpeak //} for (i=0; i < 18*(peplen-1); i++) { // 2*9 iontypes: b: a -H2O -NH3 b c y: -H2O z y x - ions[i] = -9.96578428466; //HARD CODED!! - mzs[i] = 0; //HARD CODED!! + ions[i] = ZERO_INTENSITY; + mzs[i] = 0.0f; } //b-ions @@ -234,11 +240,11 @@ annotations get_t_ms2pip_all(int peplen, unsigned short* modpeptide, int numpeak int pos = 0; for (i=1; i < peplen; i++) { mz += amino_masses[modpeptide[i]]; - membuffer[pos++] = mz+1.007236-27.99492; // a - membuffer[pos++] = mz+1.007236-18.010565; // -H2O - membuffer[pos++] = mz+1.007236-17.026001; // -NH3 - membuffer[pos++] = mz+1.007236; // b - membuffer[pos++] = mz+1.007236+17.02654; // c + membuffer[pos++] = mz+H_MASS-CO_MASS; // a + membuffer[pos++] = mz+H_MASS-H2O_MASS; // -H2O + membuffer[pos++] = mz+H_MASS-NH3_MASS; // -NH3 + membuffer[pos++] = mz+H_MASS; // b + membuffer[pos++] = mz+H_MASS+NH3_MASS; // c } msms_pos = 0; @@ -288,11 +294,11 @@ annotations get_t_ms2pip_all(int peplen, unsigned short* modpeptide, int numpeak pos = 0; for (i=1; i < peplen; i++) { mz += amino_masses[modpeptide[i]]; - membuffer[pos++] = (mz+1.007236+1.007236-27.99492)/2.; // a - membuffer[pos++] = (mz+1.007236+1.007236-18.010565)/2.; // -H2O - membuffer[pos++] = (mz+1.007236+1.007236-17.026001)/2.; // -NH3 - membuffer[pos++] = (mz+1.007236+1.007236)/2.; // b - membuffer[pos++] = (mz+1.007236+1.007236+17.02654)/2.; // c + membuffer[pos++] = (mz+H_MASS+H_MASS-CO_MASS)/2.; // a + membuffer[pos++] = (mz+H_MASS+H_MASS-H2O_MASS)/2.; // -H2O + membuffer[pos++] = (mz+H_MASS+H_MASS-NH3_MASS)/2.; // -NH3 + membuffer[pos++] = (mz+H_MASS+H_MASS)/2.; // b + membuffer[pos++] = (mz+H_MASS+H_MASS+NH3_MASS)/2.; // c } msms_pos = 0; @@ -343,10 +349,10 @@ annotations get_t_ms2pip_all(int peplen, unsigned short* modpeptide, int numpeak pos = 0; for (i=peplen; i >= 2; i--) { mz += amino_masses[modpeptide[i]]; - membuffer[pos++] = 18.0105647+mz+1.007236-18.010565; //-H2O - membuffer[pos++] = 18.0105647+mz+1.007236-17.02545; // z - membuffer[pos++] = 18.0105647+mz+1.007236; // y - membuffer[pos++] = 18.0105647+mz+1.007236+25.97926; // x + membuffer[pos++] = H2O_MASS+mz+H_MASS-H2O_MASS; //-H2O + membuffer[pos++] = H2O_MASS+mz+H_MASS-NH3_MASS; // z + membuffer[pos++] = H2O_MASS+mz+H_MASS; // y + membuffer[pos++] = H2O_MASS+mz+CO_MASS-H_MASS; // x } msms_pos = 0; @@ -396,10 +402,10 @@ annotations get_t_ms2pip_all(int peplen, unsigned short* modpeptide, int numpeak pos = 0; for (i=peplen; i >= 2; i--) { mz += amino_masses[modpeptide[i]]; - membuffer[pos++] = (18.0105647+mz+1.007236+1.007236-18.010565)/2.; //-H2O - membuffer[pos++] = (18.0105647+mz+1.007236+1.007236-17.02545)/2.; // z - membuffer[pos++] = (18.0105647+mz+1.007236+1.007236)/2.; // y - membuffer[pos++] = (18.0105647+mz+1.007236+1.007236+25.97926)/2.; // x + membuffer[pos++] = (H2O_MASS+mz+H_MASS-H2O_MASS+H_MASS)/2; //-H2O + membuffer[pos++] = (H2O_MASS+mz+H_MASS-NH3_MASS+H_MASS)/2; // z + membuffer[pos++] = (H2O_MASS+mz+H_MASS+H_MASS)/2; // y + membuffer[pos++] = (H2O_MASS+mz+CO_MASS)/2; // x } msms_pos = 0; @@ -465,7 +471,7 @@ float* get_t_ms2pip_general(int peplen, unsigned short* modpeptide, int numpeaks //} for (i=0; i < 2*(peplen-1); i++) { - ions[i] = -9.96578428466; //HARD CODED!! + ions[i] = ZERO_INTENSITY; } //b-ions @@ -475,7 +481,7 @@ float* get_t_ms2pip_general(int peplen, unsigned short* modpeptide, int numpeaks } for (i=1; i < peplen; i++) { mz += amino_masses[modpeptide[i]]; - membuffer[i-1] = mz+1.007236; + membuffer[i-1] = mz+H_MASS; } msms_pos = 0; @@ -522,7 +528,7 @@ float* get_t_ms2pip_general(int peplen, unsigned short* modpeptide, int numpeaks j=0; for (i=peplen; i >= 2; i--) { mz += amino_masses[modpeptide[i]]; - membuffer[j] = 18.0105647+mz+1.007236; + membuffer[j] = H2O_MASS+mz+H_MASS; j++; } @@ -578,7 +584,7 @@ float* get_t_ms2pip_etd(int peplen, unsigned short* modpeptide, int numpeaks, fl float max, tmp2; for (i=0; i < 4*(peplen-1); i++) { - ions[i] = -9.96578428466; //HARD CODED!! + ions[i] = ZERO_INTENSITY; } //b-ions @@ -588,7 +594,7 @@ float* get_t_ms2pip_etd(int peplen, unsigned short* modpeptide, int numpeaks, fl } for (i=1; i < peplen; i++) { mz += amino_masses[modpeptide[i]]; - membuffer[i-1] = mz+1.007236; + membuffer[i-1] = mz+H_MASS; } msms_pos = 0; @@ -635,7 +641,7 @@ float* get_t_ms2pip_etd(int peplen, unsigned short* modpeptide, int numpeaks, fl j=0; for (i=peplen; i >= 2; i--) { mz += amino_masses[modpeptide[i]]; - membuffer[j] = 18.0105647+mz+1.007236; + membuffer[j] = H2O_MASS+mz+H_MASS; j++; } @@ -682,7 +688,7 @@ float* get_t_ms2pip_etd(int peplen, unsigned short* modpeptide, int numpeaks, fl } for (i=1; i < peplen; i++) { mz += amino_masses[modpeptide[i]]; - membuffer[i-1] = mz + 1.007825032 + 17.026549; + membuffer[i-1] = mz + H_MASS + NH3_MASS; } msms_pos = 0; @@ -729,7 +735,7 @@ float* get_t_ms2pip_etd(int peplen, unsigned short* modpeptide, int numpeaks, fl j=0; for (i=peplen; i >= 2; i--) { mz += amino_masses[modpeptide[i]]; - membuffer[j] = mz + 17.00274 - 15.010899 + 1.007825032; + membuffer[j] = mz + OH_MASS - NH_MASS + H_MASS; j++; } @@ -787,7 +793,7 @@ float* get_t_ms2pip_ch2(int peplen, unsigned short* modpeptide, int numpeaks, fl //} for (i=0; i < 4*(peplen-1); i++) { - ions[i] = -9.96578428466; //HARD CODED!! + ions[i] = ZERO_INTENSITY; } //b-ions @@ -797,7 +803,7 @@ float* get_t_ms2pip_ch2(int peplen, unsigned short* modpeptide, int numpeaks, fl } for (i=1; i < peplen; i++) { mz += amino_masses[modpeptide[i]]; - membuffer[i-1] = mz+1.007236; + membuffer[i-1] = mz+H_MASS; } msms_pos = 0; @@ -844,7 +850,7 @@ float* get_t_ms2pip_ch2(int peplen, unsigned short* modpeptide, int numpeaks, fl j=0; for (i=peplen; i >= 2; i--) { mz += amino_masses[modpeptide[i]]; - membuffer[j] = 18.0105647+mz+1.007236; + membuffer[j] = H2O_MASS+mz+H_MASS; j++; } @@ -891,7 +897,7 @@ float* get_t_ms2pip_ch2(int peplen, unsigned short* modpeptide, int numpeaks, fl } for (i=1; i < peplen; i++) { mz += amino_masses[modpeptide[i]]; - membuffer[i-1] = (mz + 1.007236 + 1.007236)/2; + membuffer[i-1] = (mz + H_MASS + H_MASS)/2; } msms_pos = 0; @@ -938,7 +944,7 @@ float* get_t_ms2pip_ch2(int peplen, unsigned short* modpeptide, int numpeaks, fl j=0; for (i=peplen; i >= 2; i--) { mz += amino_masses[modpeptide[i]]; - membuffer[j] = (18.0105647 + mz + 1.007236 + 1.007236)/2; + membuffer[j] = (H2O_MASS + mz + H_MASS + H_MASS)/2; j++; } From 1d713a850c2c3a490ccfffd3faca3d2dfbf4b167 Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 9 Feb 2024 17:20:17 +0100 Subject: [PATCH 2/6] Replace remaining C model files with .xgboost models --- ms2pip/_cython_modules/ms2pip_peaks_c.c | 44 +------------------ ms2pip/_models_c/HCD-2019.h | 4 -- .../HCD-2019/model_20190107_HCD_train_B.c | 3 -- .../HCD-2019/model_20190107_HCD_train_B2.c | 3 -- .../HCD-2019/model_20190107_HCD_train_Y.c | 3 -- .../HCD-2019/model_20190107_HCD_train_Y2.c | 3 -- ms2pip/_models_c/TMT.h | 2 - .../TMT/model_20190107_TMT_train_B.c | 3 -- .../TMT/model_20190107_TMT_train_Y.c | 3 -- ms2pip/constants.py | 21 ++++++++- 10 files changed, 21 insertions(+), 68 deletions(-) delete mode 100644 ms2pip/_models_c/HCD-2019.h delete mode 100644 ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_B.c delete mode 100644 ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_B2.c delete mode 100644 ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_Y.c delete mode 100644 ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_Y2.c delete mode 100644 ms2pip/_models_c/TMT.h delete mode 100644 ms2pip/_models_c/TMT/model_20190107_TMT_train_B.c delete mode 100644 ms2pip/_models_c/TMT/model_20190107_TMT_train_Y.c diff --git a/ms2pip/_cython_modules/ms2pip_peaks_c.c b/ms2pip/_cython_modules/ms2pip_peaks_c.c index 20ff6bce..8ede4a4f 100644 --- a/ms2pip/_cython_modules/ms2pip_peaks_c.c +++ b/ms2pip/_cython_modules/ms2pip_peaks_c.c @@ -28,49 +28,7 @@ typedef struct annotations annotations; //compute feature vector from peptide + predict intensities float* get_p_ms2pip(int peplen, unsigned short* peptide, unsigned short* modpeptide, int charge, int model_id, int ce) { - unsigned int* v = get_v_ms2pip(peplen, peptide, modpeptide, charge); - int fnum = v[0]/(peplen-1); - int i; - - // HCD - if (model_id == 1) { - for (i=0; i < peplen-1; i++) { - predictions[0*(peplen-1)+i] = score_HCD_B(v+1+(i*fnum))+0.5; - predictions[2*(peplen-1)-i-1] = score_HCD_Y(v+1+(i*fnum))+0.5; - } - } - - // TMT - else if (model_id == 3) { - for (i=0; i < peplen-1; i++) { - predictions[0*(peplen-1)+i] = score_TMT_B(v+1+(i*fnum))+0.5; - predictions[2*(peplen-1)-i-1] = score_TMT_Y(v+1+(i*fnum))+0.5; - } - } - // EThcD - // else if (model_id == 6) { - // for (i=0; i < peplen-1; i++) { - // predictions[0*(peplen-1)+i] = score_EThcD_B(v+1+(i*fnum))+0.5; - // predictions[2*(peplen-1)-i-1] = score_EThcD_Y(v+1+(i*fnum))+0.5; - // predictions[2*(peplen-1)+i] = score_EThcD_C(v+1+(i*fnum))+0.5; - // predictions[4*(peplen-1)-i-1] = score_EThcD_Z(v+1+(i*fnum))+0.5; - // } - // } - - // HCDch2 - else if (model_id == 7) { - for (i=0; i < peplen-1; i++) { - predictions[0*(peplen-1)+i] = score_HCD_B(v+1+(i*fnum))+0.5; - predictions[2*(peplen-1)-i-1] = score_HCD_Y(v+1+(i*fnum))+0.5; - predictions[2*(peplen-1)+i] = score_HCD_B2(v+1+(i*fnum))+0.5; - predictions[4*(peplen-1)-i-1] = score_HCD_Y2(v+1+(i*fnum))+0.5; - } - } - - else { - return NULL; - } - return predictions; + return NULL; } diff --git a/ms2pip/_models_c/HCD-2019.h b/ms2pip/_models_c/HCD-2019.h deleted file mode 100644 index 4887791b..00000000 --- a/ms2pip/_models_c/HCD-2019.h +++ /dev/null @@ -1,4 +0,0 @@ -float score_HCD_B(unsigned int* v); -float score_HCD_B2(unsigned int* v); -float score_HCD_Y(unsigned int* v); -float score_HCD_Y2(unsigned int* v); diff --git a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_B.c b/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_B.c deleted file mode 100644 index ae8fcf62..00000000 --- a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_B.c +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:d0ab6fdc7c3b2324228912757d1c55bd56fdebbe066a0b5fc0305f0c7bbb9969 -size 8726156 diff --git a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_B2.c b/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_B2.c deleted file mode 100644 index 19a3d6a4..00000000 --- a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_B2.c +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:b2e27aab327de1a883500f7bb019ff25c9412141fb19ffe1a189eb092bdebd5b -size 1664299 diff --git a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_Y.c b/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_Y.c deleted file mode 100644 index 755926fc..00000000 --- a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_Y.c +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:105a9323043df6a192d300adaf3867391ef9e6e03599f6bc393289c3f62b982d -size 9116794 diff --git a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_Y2.c b/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_Y2.c deleted file mode 100644 index 4dd0a7b4..00000000 --- a/ms2pip/_models_c/HCD-2019/model_20190107_HCD_train_Y2.c +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7c1be44093ce4b3085b49142ed5a263c01a79c90b3bcaea4c0e829e3b587f2a3 -size 9063001 diff --git a/ms2pip/_models_c/TMT.h b/ms2pip/_models_c/TMT.h deleted file mode 100644 index 6ec67231..00000000 --- a/ms2pip/_models_c/TMT.h +++ /dev/null @@ -1,2 +0,0 @@ -float score_TMT_B(unsigned int* v); -float score_TMT_Y(unsigned int* v); diff --git a/ms2pip/_models_c/TMT/model_20190107_TMT_train_B.c b/ms2pip/_models_c/TMT/model_20190107_TMT_train_B.c deleted file mode 100644 index c23a7f9a..00000000 --- a/ms2pip/_models_c/TMT/model_20190107_TMT_train_B.c +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:49252e3d1b42df924b422f118870f237ec541d58db0037b4800444999277d055 -size 2348087 diff --git a/ms2pip/_models_c/TMT/model_20190107_TMT_train_Y.c b/ms2pip/_models_c/TMT/model_20190107_TMT_train_Y.c deleted file mode 100644 index f3635860..00000000 --- a/ms2pip/_models_c/TMT/model_20190107_TMT_train_Y.c +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:5429cc869363e3383ce41a78329870898ce6e6c3b5fcf9ed1ca5eaac9417df42 -size 4323734 diff --git a/ms2pip/constants.py b/ms2pip/constants.py index ba475391..b72e21d1 100644 --- a/ms2pip/constants.py +++ b/ms2pip/constants.py @@ -27,6 +27,14 @@ "ion_types": ["B", "Y"], "peaks_version": "general", "features_version": "normal", + "xgboost_model_files": { + "b": "model_20190107_HCD_train_B.xgboost", + "y": "model_20190107_HCD_train_Y.xgboost", + }, + "model_hash": { + "model_20190107_HCD_train_B.xgboost": "2503856c382806672e4b85f6b0ccc1f3093acc1b", + "model_20190107_HCD_train_Y.xgboost": "867bbc9940f75845b3f4f845d429b3780c997a02", + }, }, "TTOF5600": { "id": 2, @@ -76,12 +84,23 @@ "model_20190107_iTRAQphospho_train_Y.xgboost": "261b2e1810a299ed7ebf193ce1fb81a608c07d3b", }, }, - # ETD': {'id': 6, 'ion_types': ['B', 'Y', 'C', 'Z'], 'peaks_version': 'etd', 'features_version': 'normal'}, "HCDch2": { "id": 7, "ion_types": ["B", "Y", "B2", "Y2"], "peaks_version": "ch2", "features_version": "normal", + "xgboost_model_files": { + "b": "model_20190107_HCD_train_B.xgboost", + "y": "model_20190107_HCD_train_Y.xgboost", + "b2": "model_20190107_HCD_train_B2.xgboost", + "y2": "model_20190107_HCD_train_Y2.xgboost", + }, + "model_hash": { + "model_20190107_HCD_train_B.xgboost": "2503856c382806672e4b85f6b0ccc1f3093acc1b", + "model_20190107_HCD_train_Y.xgboost": "867bbc9940f75845b3f4f845d429b3780c997a02", + "model_20190107_HCD_train_B2.xgboost": "2df86d3576e85bfd25cc149d723f1613baf854d0", + "model_20190107_HCD_train_Y2.xgboost": "0a116ad9f14925fc70e3eceed9484b16ca8edddb", + }, }, "CIDch2": { "id": 8, From 062db9dbd3a0388a990d77063aee18365acade50 Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 9 Feb 2024 17:34:21 +0100 Subject: [PATCH 3/6] Format C and Cython code --- ms2pip/_cython_modules/ms2pip_features_c.c | 253 +-- ms2pip/_cython_modules/ms2pip_init_c.c | 178 +- ms2pip/_cython_modules/ms2pip_peaks_c.c | 1986 +++++++++++--------- ms2pip/_cython_modules/ms2pip_pyx.pyx | 384 ++-- 4 files changed, 1528 insertions(+), 1273 deletions(-) diff --git a/ms2pip/_cython_modules/ms2pip_features_c.c b/ms2pip/_cython_modules/ms2pip_features_c.c index 19749192..b8763a0c 100644 --- a/ms2pip/_cython_modules/ms2pip_features_c.c +++ b/ms2pip/_cython_modules/ms2pip_features_c.c @@ -1,128 +1,149 @@ // Compute feature vectors from peptide -unsigned int* get_v_ms2pip(int peplen, unsigned short* peptide, unsigned short* modpeptide, int charge) - { - int i,j,k; +unsigned int *get_v_ms2pip(int peplen, unsigned short *peptide, unsigned short *modpeptide, int charge) +{ + int i, j, k; - int fnum = 1; //first value in v is its length + int fnum = 1; // first value in v is its length - for (i=0; i < 19; i++) { - count_n[i] = 0; - count_c[i] = 0; - } + for (i = 0; i < 19; i++) + { + count_n[i] = 0; + count_c[i] = 0; + } - //I need this for Omega - //important for sptms!! - peptide_buf[0] = peptide[0]; - for (i=0; i < peplen; i++) { - if (peptide[i+1] > 18) { - peptide_buf[i+1] = sptm_mapper[peptide[i+1]]; - } - else { - peptide_buf[i+1] = peptide[i+1]; - } - count_c[peptide_buf[i+1]]++; - } + // I need this for Omega + // important for sptms!! + peptide_buf[0] = peptide[0]; + for (i = 0; i < peplen; i++) + { + if (peptide[i + 1] > 18) + { + peptide_buf[i + 1] = sptm_mapper[peptide[i + 1]]; + } + else + { + peptide_buf[i + 1] = peptide[i + 1]; + } + count_c[peptide_buf[i + 1]]++; + } - int num_shared = 0; + int num_shared = 0; - shared_features[num_shared++] = peplen; - shared_features[num_shared++] = charge; + shared_features[num_shared++] = peplen; + shared_features[num_shared++] = charge; - shared_features[num_shared] = 0; - if (charge == 1) { - shared_features[num_shared] = 1; - } - num_shared++; - shared_features[num_shared] = 0; - if (charge == 2) { - shared_features[num_shared] =1; - } - num_shared++; - shared_features[num_shared] = 0; - if (charge == 3) { - shared_features[num_shared] =1; - } - num_shared++; - shared_features[num_shared] = 0; - if (charge == 4) { - shared_features[num_shared] =1; - } - num_shared++; - shared_features[num_shared] = 0; - if (charge >= 5) { - shared_features[num_shared] =1; - } - num_shared++; + shared_features[num_shared] = 0; + if (charge == 1) + { + shared_features[num_shared] = 1; + } + num_shared++; + shared_features[num_shared] = 0; + if (charge == 2) + { + shared_features[num_shared] = 1; + } + num_shared++; + shared_features[num_shared] = 0; + if (charge == 3) + { + shared_features[num_shared] = 1; + } + num_shared++; + shared_features[num_shared] = 0; + if (charge == 4) + { + shared_features[num_shared] = 1; + } + num_shared++; + shared_features[num_shared] = 0; + if (charge >= 5) + { + shared_features[num_shared] = 1; + } + num_shared++; - for (j=0; j < num_props; j++) { - for (i=0; i < peplen; i++) { - props_buffer[i] = props[j][peptide_buf[i+1]]; - } - qsort(props_buffer,peplen,sizeof(unsigned int),cmpfunc); - shared_features[num_shared++] = props_buffer[0]; - shared_features[num_shared++] = props_buffer[(int)(0.25*(peplen-1))]; - shared_features[num_shared++] = props_buffer[(int)(0.5*(peplen-1))]; - shared_features[num_shared++] = props_buffer[(int)(0.75*(peplen-1))]; - shared_features[num_shared++] = props_buffer[peplen-1]; - } + for (j = 0; j < num_props; j++) + { + for (i = 0; i < peplen; i++) + { + props_buffer[i] = props[j][peptide_buf[i + 1]]; + } + qsort(props_buffer, peplen, sizeof(unsigned int), cmpfunc); + shared_features[num_shared++] = props_buffer[0]; + shared_features[num_shared++] = props_buffer[(int)(0.25 * (peplen - 1))]; + shared_features[num_shared++] = props_buffer[(int)(0.5 * (peplen - 1))]; + shared_features[num_shared++] = props_buffer[(int)(0.75 * (peplen - 1))]; + shared_features[num_shared++] = props_buffer[peplen - 1]; + } - for (i=0; i < peplen-1; i++) { - for (j=0; j #include - -float* amino_masses; -unsigned short* amino_F; -unsigned short* sptm_mapper; +float *amino_masses; +unsigned short *amino_F; +unsigned short *sptm_mapper; float ntermmod; - // For ms2pip_features_c.c unsigned int v[300000]; int num_props = 4; unsigned int props[5][19] = { - {37,35,59,129,94,0,210,81,191,106,101,117,115,343,49,90,60,134,104}, //basicity - {68,23,33,29,70,58,41,73,32,66,38,0,40,39,44,53,71,51,55}, //helicity - {51,75,25,35,100,16,3,94,0,82,12,0,22,22,21,39,80,98,70}, //hydrophobicity - {32,23,0,4,27,32,48,32,69,29,26,35,28,79,29,28,31,31,28}, //pI - //{71,103,115,129,147,57,137,113,128,131,114,97,128,156,87,101,99,186,163} //mass + {37, 35, 59, 129, 94, 0, 210, 81, 191, 106, 101, 117, 115, 343, 49, 90, 60, 134, 104}, // basicity + {68, 23, 33, 29, 70, 58, 41, 73, 32, 66, 38, 0, 40, 39, 44, 53, 71, 51, 55}, // helicity + {51, 75, 25, 35, 100, 16, 3, 94, 0, 82, 12, 0, 22, 22, 21, 39, 80, 98, 70}, // hydrophobicity + {32, 23, 0, 4, 27, 32, 48, 32, 69, 29, 26, 35, 28, 79, 29, 28, 31, 31, 28}, // pI + //{71,103,115,129,147,57,137,113,128,131,114,97,128,156,87,101,99,186,163} //mass }; -unsigned int props_buffer[100]; //100 is max pep length -unsigned int shared_features[100]; //100 is max num shared features +unsigned int props_buffer[100]; // 100 is max pep length +unsigned int shared_features[100]; // 100 is max num shared features unsigned int count_n[19]; unsigned int count_c[19]; -unsigned short peptide_buf[200]; //IONBOT - +unsigned short peptide_buf[200]; // IONBOT // Function required in ms2pip_features_c_general.c and ms2pip_features_c_catboost.c -int cmpfunc (const void * a, const void * b) { - return ( *(int*)a - *(int*)b ); +int cmpfunc(const void *a, const void *b) +{ + return (*(int *)a - *(int *)b); } - // This function initializes amino acid masses and PTMs from a configuration file generated by Omega -void init_ms2pip(char* amino_masses_fname, char* modifications_fname, char* modifications_fname_sptm) { - int i; - int nummods; - int nummods_sptm; - float mz; - int numptm; - int before; - int after; - - FILE* f = fopen(modifications_fname,"rt"); - fscanf(f,"%i\n",&nummods); - fclose(f); - - f = fopen(modifications_fname_sptm,"rt"); - fscanf(f,"%i\n",&nummods_sptm); - fclose(f); - - //malloc - amino_masses = (float*) malloc((38+nummods+nummods_sptm)*sizeof(float)); - amino_F = (unsigned short*) malloc((38+nummods+nummods_sptm)*sizeof(unsigned short)); - sptm_mapper = (unsigned short*) malloc((38+nummods+nummods_sptm)*sizeof(unsigned short)); - - f = fopen(amino_masses_fname,"rt"); - for (i=0; i< 19; i++) { - fscanf(f,"%f\n",&amino_masses[i]); - amino_F[i] = (unsigned short) (amino_masses[i]-57.021464); - } - fscanf(f,"%f\n",&ntermmod); - fclose(f); - - for (i=0; i< 19; i++) { - amino_masses[19+i]=amino_masses[i]; - amino_F[19+i] = amino_F[i]; - } - - f = fopen(modifications_fname_sptm,"rt"); - fscanf(f,"%i\n",&nummods_sptm); - for (i=0; i< nummods_sptm; i++) { - fscanf(f,"%f,%i,%i,%i\n",&mz,&numptm,&before,&after); - sptm_mapper[after] = before; - sptm_mapper[after] = before; - if (after > 18) { - if (before<0) { - amino_masses[after] = mz; - } - else - { - amino_masses[after] = amino_masses[before]+mz; - amino_F[after] = (unsigned short) (amino_masses[before]+mz - 57.021464); - } - } - } - fclose(f); - f = fopen(modifications_fname,"rt"); - fscanf(f,"%i\n",&nummods); - for (i=0; i< nummods; i++) { - fscanf(f,"%f,%i,%i,%i\n",&mz,&numptm,&before,&after); - if (after > 18) { - if (before<0) { - amino_masses[after] = mz; - } - else - { - amino_masses[after] = amino_masses[before]+mz; - amino_F[after] = (unsigned short) (amino_masses[before]+mz - 57.021464); - } - } - } - fclose(f); +void init_ms2pip(char *amino_masses_fname, char *modifications_fname, char *modifications_fname_sptm) +{ + int i; + int nummods; + int nummods_sptm; + float mz; + int numptm; + int before; + int after; + + FILE *f = fopen(modifications_fname, "rt"); + fscanf(f, "%i\n", &nummods); + fclose(f); + + f = fopen(modifications_fname_sptm, "rt"); + fscanf(f, "%i\n", &nummods_sptm); + fclose(f); + + // malloc + amino_masses = (float *)malloc((38 + nummods + nummods_sptm) * sizeof(float)); + amino_F = (unsigned short *)malloc((38 + nummods + nummods_sptm) * sizeof(unsigned short)); + sptm_mapper = (unsigned short *)malloc((38 + nummods + nummods_sptm) * sizeof(unsigned short)); + + f = fopen(amino_masses_fname, "rt"); + for (i = 0; i < 19; i++) + { + fscanf(f, "%f\n", &amino_masses[i]); + amino_F[i] = (unsigned short)(amino_masses[i] - 57.021464); + } + fscanf(f, "%f\n", &ntermmod); + fclose(f); + + for (i = 0; i < 19; i++) + { + amino_masses[19 + i] = amino_masses[i]; + amino_F[19 + i] = amino_F[i]; + } + + f = fopen(modifications_fname_sptm, "rt"); + fscanf(f, "%i\n", &nummods_sptm); + for (i = 0; i < nummods_sptm; i++) + { + fscanf(f, "%f,%i,%i,%i\n", &mz, &numptm, &before, &after); + sptm_mapper[after] = before; + sptm_mapper[after] = before; + if (after > 18) + { + if (before < 0) + { + amino_masses[after] = mz; + } + else + { + amino_masses[after] = amino_masses[before] + mz; + amino_F[after] = (unsigned short)(amino_masses[before] + mz - 57.021464); + } + } + } + fclose(f); + f = fopen(modifications_fname, "rt"); + fscanf(f, "%i\n", &nummods); + for (i = 0; i < nummods; i++) + { + fscanf(f, "%f,%i,%i,%i\n", &mz, &numptm, &before, &after); + if (after > 18) + { + if (before < 0) + { + amino_masses[after] = mz; + } + else + { + amino_masses[after] = amino_masses[before] + mz; + amino_F[after] = (unsigned short)(amino_masses[before] + mz - 57.021464); + } + } + } + fclose(f); } diff --git a/ms2pip/_cython_modules/ms2pip_peaks_c.c b/ms2pip/_cython_modules/ms2pip_peaks_c.c index 8ede4a4f..f0e87992 100644 --- a/ms2pip/_cython_modules/ms2pip_peaks_c.c +++ b/ms2pip/_cython_modules/ms2pip_peaks_c.c @@ -19,928 +19,1100 @@ float ions[2000]; float mzs[2000]; float predictions[1000]; -struct annotations{ - float* peaks; - float* msms; +struct annotations +{ + float *peaks; + float *msms; }; typedef struct annotations annotations; -//compute feature vector from peptide + predict intensities -float* get_p_ms2pip(int peplen, unsigned short* peptide, unsigned short* modpeptide, int charge, int model_id, int ce) - { - return NULL; +// compute feature vector from peptide + predict intensities +float *get_p_ms2pip(int peplen, unsigned short *peptide, unsigned short *modpeptide, int charge, int model_id, int ce) +{ + return NULL; } - -//get fragment ion mz values (b, y) -float* get_mz_ms2pip_general(int peplen, unsigned short* modpeptide) - { - int i,j; - float mz; - j=0; - - mz = 0; - if (modpeptide[0] != 0) { - mz = amino_masses[modpeptide[0]]; - } - for (i=1; i < peplen; i++) { - mz += amino_masses[modpeptide[i]]; - membuffer[j++] = mz+H_MASS; //b-ion - } - - mz = 0; - if (modpeptide[peplen+1] != 0) { - mz = amino_masses[modpeptide[peplen+1]]; - } - for (i=peplen; i > 1; i--) { - mz += amino_masses[modpeptide[i]]; - membuffer[j++] = H2O_MASS + mz + H_MASS; //y-ion - } - - mz = 0; - if (modpeptide[0] != 0) { - mz = amino_masses[modpeptide[0]]; - } - for (i=1; i < peplen; i++) { - mz += amino_masses[modpeptide[i]]; - membuffer[j++] = (mz + H_MASS + H_MASS)/2; //b2-ion: (b-ion + H)/2 - } - - mz = 0; - if (modpeptide[peplen+1] != 0) { - mz = amino_masses[modpeptide[peplen+1]]; - } - for (i=peplen; i > 1; i--) { - mz += amino_masses[modpeptide[i]]; - membuffer[j++] = (H2O_MASS + mz + H_MASS + H_MASS)/2; //y2-ion: (y-ion + H)/2 - } - - return membuffer; +// get fragment ion mz values (b, y) +float *get_mz_ms2pip_general(int peplen, unsigned short *modpeptide) +{ + int i, j; + float mz; + j = 0; + + mz = 0; + if (modpeptide[0] != 0) + { + mz = amino_masses[modpeptide[0]]; + } + for (i = 1; i < peplen; i++) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j++] = mz + H_MASS; // b-ion + } + + mz = 0; + if (modpeptide[peplen + 1] != 0) + { + mz = amino_masses[modpeptide[peplen + 1]]; + } + for (i = peplen; i > 1; i--) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j++] = H2O_MASS + mz + H_MASS; // y-ion + } + + mz = 0; + if (modpeptide[0] != 0) + { + mz = amino_masses[modpeptide[0]]; + } + for (i = 1; i < peplen; i++) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j++] = (mz + H_MASS + H_MASS) / 2; // b2-ion: (b-ion + H)/2 + } + + mz = 0; + if (modpeptide[peplen + 1] != 0) + { + mz = amino_masses[modpeptide[peplen + 1]]; + } + for (i = peplen; i > 1; i--) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j++] = (H2O_MASS + mz + H_MASS + H_MASS) / 2; // y2-ion: (y-ion + H)/2 + } + + return membuffer; } - -//get fragment ion mz values (b, y, c, z) -float* get_mz_ms2pip_etd(int peplen, unsigned short* modpeptide) - { - int i,j; - float mz; - j=0; - - mz = 0; - if (modpeptide[0] != 0) { - mz = amino_masses[modpeptide[0]]; - } - for (i=1; i < peplen; i++) { - mz += amino_masses[modpeptide[i]]; - membuffer[j++] = mz + H_MASS; //b-ion - } - - mz = 0; - if (modpeptide[peplen+1] != 0) { - mz = amino_masses[modpeptide[peplen+1]]; - } - for (i=peplen; i > 1; i--) { - mz += amino_masses[modpeptide[i]]; - membuffer[j++] = H2O_MASS + mz + H_MASS; //y-ion - } - - mz = 0; - if (modpeptide[0] != 0) { - mz = amino_masses[modpeptide[0]]; - } - for (i=1; i < peplen; i++) { - mz += amino_masses[modpeptide[i]]; - membuffer[j++] = mz + H_MASS + NH3_MASS; //c-ion: peptide + H + NH3 - } - - mz = 0; - if (modpeptide[peplen+1] != 0) { - mz = amino_masses[modpeptide[peplen+1]]; - } - for (i=peplen; i > 1; i--) { - mz += amino_masses[modpeptide[i]]; - membuffer[j++] = mz + OH_MASS - NH_MASS + H_MASS; //z-ion: peptide + OH - NH - } - - return membuffer; +// get fragment ion mz values (b, y, c, z) +float *get_mz_ms2pip_etd(int peplen, unsigned short *modpeptide) +{ + int i, j; + float mz; + j = 0; + + mz = 0; + if (modpeptide[0] != 0) + { + mz = amino_masses[modpeptide[0]]; + } + for (i = 1; i < peplen; i++) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j++] = mz + H_MASS; // b-ion + } + + mz = 0; + if (modpeptide[peplen + 1] != 0) + { + mz = amino_masses[modpeptide[peplen + 1]]; + } + for (i = peplen; i > 1; i--) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j++] = H2O_MASS + mz + H_MASS; // y-ion + } + + mz = 0; + if (modpeptide[0] != 0) + { + mz = amino_masses[modpeptide[0]]; + } + for (i = 1; i < peplen; i++) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j++] = mz + H_MASS + NH3_MASS; // c-ion: peptide + H + NH3 + } + + mz = 0; + if (modpeptide[peplen + 1] != 0) + { + mz = amino_masses[modpeptide[peplen + 1]]; + } + for (i = peplen; i > 1; i--) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j++] = mz + OH_MASS - NH_MASS + H_MASS; // z-ion: peptide + OH - NH + } + + return membuffer; } - -//get fragment ion mz values (b, y, b++, y++) -float* get_mz_ms2pip_ch2(int peplen, unsigned short* modpeptide) - { - int i,j; - float mz; - j=0; - - mz = 0; - if (modpeptide[0] != 0) { - mz = amino_masses[modpeptide[0]]; - } - for (i=1; i < peplen; i++) { - mz += amino_masses[modpeptide[i]]; - membuffer[j++] = mz+H_MASS; //b-ion - } - - mz = 0; - if (modpeptide[peplen+1] != 0) { - mz = amino_masses[modpeptide[peplen+1]]; - } - for (i=peplen; i > 1; i--) { - mz += amino_masses[modpeptide[i]]; - membuffer[j++] = H2O_MASS + mz + H_MASS; //y-ion - } - - mz = 0; - if (modpeptide[0] != 0) { - mz = amino_masses[modpeptide[0]]; - } - for (i=1; i < peplen; i++) { - mz += amino_masses[modpeptide[i]]; - membuffer[j++] = (mz + H_MASS + H_MASS)/2; //b2-ion: (b-ion + H)/2 - } - - mz = 0; - if (modpeptide[peplen+1] != 0) { - mz = amino_masses[modpeptide[peplen+1]]; - } - for (i=peplen; i > 1; i--) { - mz += amino_masses[modpeptide[i]]; - membuffer[j++] = (H2O_MASS + mz + H_MASS + H_MASS)/2; //y2-ion: (y-ion + H)/2 - } - - return membuffer; +// get fragment ion mz values (b, y, b++, y++) +float *get_mz_ms2pip_ch2(int peplen, unsigned short *modpeptide) +{ + int i, j; + float mz; + j = 0; + + mz = 0; + if (modpeptide[0] != 0) + { + mz = amino_masses[modpeptide[0]]; + } + for (i = 1; i < peplen; i++) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j++] = mz + H_MASS; // b-ion + } + + mz = 0; + if (modpeptide[peplen + 1] != 0) + { + mz = amino_masses[modpeptide[peplen + 1]]; + } + for (i = peplen; i > 1; i--) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j++] = H2O_MASS + mz + H_MASS; // y-ion + } + + mz = 0; + if (modpeptide[0] != 0) + { + mz = amino_masses[modpeptide[0]]; + } + for (i = 1; i < peplen; i++) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j++] = (mz + H_MASS + H_MASS) / 2; // b2-ion: (b-ion + H)/2 + } + + mz = 0; + if (modpeptide[peplen + 1] != 0) + { + mz = amino_masses[modpeptide[peplen + 1]]; + } + for (i = peplen; i > 1; i--) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j++] = (H2O_MASS + mz + H_MASS + H_MASS) / 2; // y2-ion: (y-ion + H)/2 + } + + return membuffer; } -//get all fragment ion peaks from spectrum -annotations get_t_ms2pip_all(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz) - { - int i,tmp; - float mz; - int msms_pos; - int mem_pos; - float max, maxmz, tmp2; - - //for (i=0; i < numpeaks; i++) { - // fprintf(stderr,"m %f\n",msms[i]); - //} - - for (i=0; i < 18*(peplen-1); i++) { // 2*9 iontypes: b: a -H2O -NH3 b c y: -H2O z y x - ions[i] = ZERO_INTENSITY; - mzs[i] = 0.0f; - } - - //b-ions - mz = ntermmod; - if (modpeptide[0] != 0) { - mz += amino_masses[modpeptide[0]]; - } - int pos = 0; - for (i=1; i < peplen; i++) { - mz += amino_masses[modpeptide[i]]; - membuffer[pos++] = mz+H_MASS-CO_MASS; // a - membuffer[pos++] = mz+H_MASS-H2O_MASS; // -H2O - membuffer[pos++] = mz+H_MASS-NH3_MASS; // -NH3 - membuffer[pos++] = mz+H_MASS; // b - membuffer[pos++] = mz+H_MASS+NH3_MASS; // c - } - - msms_pos = 0; - mem_pos = 0; - while (1) { - if (msms_pos >= numpeaks) { - break; - } - if (mem_pos >= 5*(peplen-1)) { - break; - } - mz = membuffer[mem_pos]; - if (msms[msms_pos] > (mz+tolmz)) { - mem_pos += 1; - } - else if (msms[msms_pos] < (mz-tolmz)) { - msms_pos += 1; - } - else { - max = peaks[msms_pos]; - maxmz = msms[msms_pos]; - tmp = msms_pos + 1; - if (tmp < numpeaks) { - while (msms[tmp] <= (mz+tolmz)) { - tmp2 = peaks[tmp]; - if (max < tmp2) { - max = tmp2; - maxmz = msms[tmp]; - } - tmp += 1; - if (tmp == numpeaks) { - break; - } - } - } - ions[mem_pos] = max; - mzs[mem_pos] = maxmz; - mem_pos += 1; - } - } - - //b2-ions - mz = ntermmod; - if (modpeptide[0] != 0) { - mz += amino_masses[modpeptide[0]]; - } - pos = 0; - for (i=1; i < peplen; i++) { - mz += amino_masses[modpeptide[i]]; - membuffer[pos++] = (mz+H_MASS+H_MASS-CO_MASS)/2.; // a - membuffer[pos++] = (mz+H_MASS+H_MASS-H2O_MASS)/2.; // -H2O - membuffer[pos++] = (mz+H_MASS+H_MASS-NH3_MASS)/2.; // -NH3 - membuffer[pos++] = (mz+H_MASS+H_MASS)/2.; // b - membuffer[pos++] = (mz+H_MASS+H_MASS+NH3_MASS)/2.; // c - } - - msms_pos = 0; - mem_pos=0; - while (1) { - if (msms_pos >= numpeaks) { - break; - } - if (mem_pos >= 5*(peplen-1)) { - break; - } - mz = membuffer[mem_pos]; - if (msms[msms_pos] > (mz+tolmz)) { - mem_pos += 1; - } - else if (msms[msms_pos] < (mz-tolmz)) { - msms_pos += 1; - } - else { - max = peaks[msms_pos]; - maxmz = msms[msms_pos]; - tmp = msms_pos + 1; - if (tmp < numpeaks) { - while (msms[tmp] <= (mz+tolmz)) { - tmp2 = peaks[tmp]; - if (max < tmp2) { - max = tmp2; - maxmz = msms[tmp]; - } - tmp += 1; - if (tmp == numpeaks) { - break; - } - } - } - ions[5*(peplen-1)+mem_pos] = max; - mzs[5*(peplen-1)+mem_pos] = maxmz; - mem_pos += 1; - } - } - - - // y-ions - mz = 0.; - if (modpeptide[peplen+1] != 0) { - mz += modpeptide[peplen+1]; - } - pos = 0; - for (i=peplen; i >= 2; i--) { - mz += amino_masses[modpeptide[i]]; - membuffer[pos++] = H2O_MASS+mz+H_MASS-H2O_MASS; //-H2O - membuffer[pos++] = H2O_MASS+mz+H_MASS-NH3_MASS; // z - membuffer[pos++] = H2O_MASS+mz+H_MASS; // y - membuffer[pos++] = H2O_MASS+mz+CO_MASS-H_MASS; // x - } - - msms_pos = 0; - mem_pos = 0; - while (1) { - if (msms_pos >= numpeaks) { - break; - } - if (mem_pos >= 4*(peplen-1)) { - break; - } - mz = membuffer[mem_pos]; - if (msms[msms_pos] > (mz+tolmz)) { - mem_pos += 1; - } - else if (msms[msms_pos] < (mz-tolmz)) { - msms_pos += 1; - } - else { - max = peaks[msms_pos]; - maxmz = msms[msms_pos]; - tmp = msms_pos + 1; - if (tmp < numpeaks) { - while (msms[tmp] <= (mz+tolmz)) { - tmp2 = peaks[tmp]; - if (max < tmp2) { - max = tmp2; - maxmz = msms[tmp]; - } - tmp += 1; - if (tmp == numpeaks) { - break; - } - } - } - ions[10*(peplen-1)+mem_pos] = max; - mzs[10*(peplen-1)+mem_pos] = maxmz; - mem_pos += 1; - } - } - - // y2-ions - mz = 0.; - if (modpeptide[peplen+1] != 0) { - mz += modpeptide[peplen+1]; - } - pos = 0; - for (i=peplen; i >= 2; i--) { - mz += amino_masses[modpeptide[i]]; - membuffer[pos++] = (H2O_MASS+mz+H_MASS-H2O_MASS+H_MASS)/2; //-H2O - membuffer[pos++] = (H2O_MASS+mz+H_MASS-NH3_MASS+H_MASS)/2; // z - membuffer[pos++] = (H2O_MASS+mz+H_MASS+H_MASS)/2; // y - membuffer[pos++] = (H2O_MASS+mz+CO_MASS)/2; // x - } - - msms_pos = 0; - mem_pos = 0; - while (1) { - if (msms_pos >= numpeaks) { - break; - } - if (mem_pos >= 4*(peplen-1)) { - break; - } - mz = membuffer[mem_pos]; - if (msms[msms_pos] > (mz+tolmz)) { - mem_pos += 1; - } - else if (msms[msms_pos] < (mz-tolmz)) { - msms_pos += 1; - } - else { - max = peaks[msms_pos]; - maxmz = msms[msms_pos]; - tmp = msms_pos + 1; - if (tmp < numpeaks) { - while (msms[tmp] <= (mz+tolmz)) { - tmp2 = peaks[tmp]; - if (max < tmp2) { - max = tmp2; - maxmz = msms[tmp]; - } - tmp += 1; - if (tmp == numpeaks) { - break; - } - } - } - ions[14*(peplen-1)+mem_pos] = max; - mzs[14*(peplen-1)+mem_pos] = maxmz; - mem_pos += 1; - } - } - - //for (i=0; i < 18*(peplen-1); i++) { // 2*9 iontypes: b: a -H2O -NH3 b c y: -H2O z y x - // fprintf(stderr,"%f ",ions[i]); //HARD CODED!! - //} - //fprintf(stderr,"\n"); - - struct annotations r = {ions,mzs}; - - return r; +// get all fragment ion peaks from spectrum +annotations get_t_ms2pip_all(int peplen, unsigned short *modpeptide, int numpeaks, float *msms, float *peaks, float tolmz) +{ + int i, tmp; + float mz; + int msms_pos; + int mem_pos; + float max, maxmz, tmp2; + + for (i = 0; i < 18 * (peplen - 1); i++) + { // 2*9 iontypes: b: a -H2O -NH3 b c y: -H2O z y x + ions[i] = ZERO_INTENSITY; + mzs[i] = 0.0f; + } + + // b-ions + mz = ntermmod; + if (modpeptide[0] != 0) + { + mz += amino_masses[modpeptide[0]]; + } + int pos = 0; + for (i = 1; i < peplen; i++) + { + mz += amino_masses[modpeptide[i]]; + membuffer[pos++] = mz + H_MASS - CO_MASS; // a + membuffer[pos++] = mz + H_MASS - H2O_MASS; // -H2O + membuffer[pos++] = mz + H_MASS - NH3_MASS; // -NH3 + membuffer[pos++] = mz + H_MASS; // b + membuffer[pos++] = mz + H_MASS + NH3_MASS; // c + } + + msms_pos = 0; + mem_pos = 0; + while (1) + { + if (msms_pos >= numpeaks) + { + break; + } + if (mem_pos >= 5 * (peplen - 1)) + { + break; + } + mz = membuffer[mem_pos]; + if (msms[msms_pos] > (mz + tolmz)) + { + mem_pos += 1; + } + else if (msms[msms_pos] < (mz - tolmz)) + { + msms_pos += 1; + } + else + { + max = peaks[msms_pos]; + maxmz = msms[msms_pos]; + tmp = msms_pos + 1; + if (tmp < numpeaks) + { + while (msms[tmp] <= (mz + tolmz)) + { + tmp2 = peaks[tmp]; + if (max < tmp2) + { + max = tmp2; + maxmz = msms[tmp]; + } + tmp += 1; + if (tmp == numpeaks) + { + break; + } + } + } + ions[mem_pos] = max; + mzs[mem_pos] = maxmz; + mem_pos += 1; + } + } + + // b2-ions + mz = ntermmod; + if (modpeptide[0] != 0) + { + mz += amino_masses[modpeptide[0]]; + } + pos = 0; + for (i = 1; i < peplen; i++) + { + mz += amino_masses[modpeptide[i]]; + membuffer[pos++] = (mz + H_MASS + H_MASS - CO_MASS) / 2.; // a + membuffer[pos++] = (mz + H_MASS + H_MASS - H2O_MASS) / 2.; // -H2O + membuffer[pos++] = (mz + H_MASS + H_MASS - NH3_MASS) / 2.; // -NH3 + membuffer[pos++] = (mz + H_MASS + H_MASS) / 2.; // b + membuffer[pos++] = (mz + H_MASS + H_MASS + NH3_MASS) / 2.; // c + } + + msms_pos = 0; + mem_pos = 0; + while (1) + { + if (msms_pos >= numpeaks) + { + break; + } + if (mem_pos >= 5 * (peplen - 1)) + { + break; + } + mz = membuffer[mem_pos]; + if (msms[msms_pos] > (mz + tolmz)) + { + mem_pos += 1; + } + else if (msms[msms_pos] < (mz - tolmz)) + { + msms_pos += 1; + } + else + { + max = peaks[msms_pos]; + maxmz = msms[msms_pos]; + tmp = msms_pos + 1; + if (tmp < numpeaks) + { + while (msms[tmp] <= (mz + tolmz)) + { + tmp2 = peaks[tmp]; + if (max < tmp2) + { + max = tmp2; + maxmz = msms[tmp]; + } + tmp += 1; + if (tmp == numpeaks) + { + break; + } + } + } + ions[5 * (peplen - 1) + mem_pos] = max; + mzs[5 * (peplen - 1) + mem_pos] = maxmz; + mem_pos += 1; + } + } + + // y-ions + mz = 0.; + if (modpeptide[peplen + 1] != 0) + { + mz += modpeptide[peplen + 1]; + } + pos = 0; + for (i = peplen; i >= 2; i--) + { + mz += amino_masses[modpeptide[i]]; + membuffer[pos++] = H2O_MASS + mz + H_MASS - H2O_MASS; //-H2O + membuffer[pos++] = H2O_MASS + mz + H_MASS - NH3_MASS; // z + membuffer[pos++] = H2O_MASS + mz + H_MASS; // y + membuffer[pos++] = H2O_MASS + mz + CO_MASS - H_MASS; // x + } + + msms_pos = 0; + mem_pos = 0; + while (1) + { + if (msms_pos >= numpeaks) + { + break; + } + if (mem_pos >= 4 * (peplen - 1)) + { + break; + } + mz = membuffer[mem_pos]; + if (msms[msms_pos] > (mz + tolmz)) + { + mem_pos += 1; + } + else if (msms[msms_pos] < (mz - tolmz)) + { + msms_pos += 1; + } + else + { + max = peaks[msms_pos]; + maxmz = msms[msms_pos]; + tmp = msms_pos + 1; + if (tmp < numpeaks) + { + while (msms[tmp] <= (mz + tolmz)) + { + tmp2 = peaks[tmp]; + if (max < tmp2) + { + max = tmp2; + maxmz = msms[tmp]; + } + tmp += 1; + if (tmp == numpeaks) + { + break; + } + } + } + ions[10 * (peplen - 1) + mem_pos] = max; + mzs[10 * (peplen - 1) + mem_pos] = maxmz; + mem_pos += 1; + } + } + + // y2-ions + mz = 0.; + if (modpeptide[peplen + 1] != 0) + { + mz += modpeptide[peplen + 1]; + } + pos = 0; + for (i = peplen; i >= 2; i--) + { + mz += amino_masses[modpeptide[i]]; + membuffer[pos++] = (H2O_MASS + mz + H_MASS - H2O_MASS + H_MASS) / 2; //-H2O + membuffer[pos++] = (H2O_MASS + mz + H_MASS - NH3_MASS + H_MASS) / 2; // z + membuffer[pos++] = (H2O_MASS + mz + H_MASS + H_MASS) / 2; // y + membuffer[pos++] = (H2O_MASS + mz + CO_MASS) / 2; // x + } + + msms_pos = 0; + mem_pos = 0; + while (1) + { + if (msms_pos >= numpeaks) + { + break; + } + if (mem_pos >= 4 * (peplen - 1)) + { + break; + } + mz = membuffer[mem_pos]; + if (msms[msms_pos] > (mz + tolmz)) + { + mem_pos += 1; + } + else if (msms[msms_pos] < (mz - tolmz)) + { + msms_pos += 1; + } + else + { + max = peaks[msms_pos]; + maxmz = msms[msms_pos]; + tmp = msms_pos + 1; + if (tmp < numpeaks) + { + while (msms[tmp] <= (mz + tolmz)) + { + tmp2 = peaks[tmp]; + if (max < tmp2) + { + max = tmp2; + maxmz = msms[tmp]; + } + tmp += 1; + if (tmp == numpeaks) + { + break; + } + } + } + ions[14 * (peplen - 1) + mem_pos] = max; + mzs[14 * (peplen - 1) + mem_pos] = maxmz; + mem_pos += 1; + } + } + + struct annotations r = {ions, mzs}; + + return r; } -//get fragment ion peaks from spectrum -float* get_t_ms2pip_general(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz) - { - int i,j,tmp; - float mz; - int msms_pos; - int mem_pos; - float max, tmp2; - - //for (i=0; i < numpeaks; i++) { - // fprintf(stderr,"m %f\n",msms[i]); - //} - - for (i=0; i < 2*(peplen-1); i++) { - ions[i] = ZERO_INTENSITY; - } - - //b-ions - mz = ntermmod; - if (modpeptide[0] != 0) { - mz += amino_masses[modpeptide[0]]; - } - for (i=1; i < peplen; i++) { - mz += amino_masses[modpeptide[i]]; - membuffer[i-1] = mz+H_MASS; - } - - msms_pos = 0; - mem_pos = 0; - while (1) { - if (msms_pos >= numpeaks) { - break; - } - if (mem_pos >= peplen-1) { - break; - } - mz = membuffer[mem_pos]; - if (msms[msms_pos] > (mz+tolmz)) { - mem_pos += 1; - } - else if (msms[msms_pos] < (mz-tolmz)) { - msms_pos += 1; - } - else { - max = peaks[msms_pos]; - tmp = msms_pos + 1; - if (tmp < numpeaks) { - while (msms[tmp] <= (mz+tolmz)) { - tmp2 = peaks[tmp]; - if (max < tmp2) { - max = tmp2; - } - tmp += 1; - if (tmp == numpeaks) { - break; - } - } - } - ions[mem_pos] = max; - mem_pos += 1; - } - } - - // y-ions - mz = 0.; - if (modpeptide[peplen+1] != 0) { - mz += modpeptide[peplen+1]; - } - j=0; - for (i=peplen; i >= 2; i--) { - mz += amino_masses[modpeptide[i]]; - membuffer[j] = H2O_MASS+mz+H_MASS; - j++; - } - - msms_pos = 0; - mem_pos = 0; - while (1) { - if (msms_pos >= numpeaks) { - break; - } - if (mem_pos >= peplen-1) { - break; - } - mz = membuffer[mem_pos]; - if (msms[msms_pos] > (mz+tolmz)) { - mem_pos += 1; - } - else if (msms[msms_pos] < (mz-tolmz)) { - msms_pos += 1; - } - else { - max = peaks[msms_pos]; - tmp = msms_pos + 1; - if (tmp < numpeaks) { - while (msms[tmp] <= (mz+tolmz)) { - tmp2 = peaks[tmp]; - if (max < tmp2) { - max = tmp2; - } - tmp += 1; - if (tmp == numpeaks) { - break; - } - } - } - ions[(peplen-1)+mem_pos] = max; - mem_pos += 1; - } - } - - - return ions; +// get fragment ion peaks from spectrum +float *get_t_ms2pip_general(int peplen, unsigned short *modpeptide, int numpeaks, float *msms, float *peaks, float tolmz) +{ + int i, j, tmp; + float mz; + int msms_pos; + int mem_pos; + float max, tmp2; + + for (i = 0; i < 2 * (peplen - 1); i++) + { + ions[i] = ZERO_INTENSITY; + } + + // b-ions + mz = ntermmod; + if (modpeptide[0] != 0) + { + mz += amino_masses[modpeptide[0]]; + } + for (i = 1; i < peplen; i++) + { + mz += amino_masses[modpeptide[i]]; + membuffer[i - 1] = mz + H_MASS; + } + + msms_pos = 0; + mem_pos = 0; + while (1) + { + if (msms_pos >= numpeaks) + { + break; + } + if (mem_pos >= peplen - 1) + { + break; + } + mz = membuffer[mem_pos]; + if (msms[msms_pos] > (mz + tolmz)) + { + mem_pos += 1; + } + else if (msms[msms_pos] < (mz - tolmz)) + { + msms_pos += 1; + } + else + { + max = peaks[msms_pos]; + tmp = msms_pos + 1; + if (tmp < numpeaks) + { + while (msms[tmp] <= (mz + tolmz)) + { + tmp2 = peaks[tmp]; + if (max < tmp2) + { + max = tmp2; + } + tmp += 1; + if (tmp == numpeaks) + { + break; + } + } + } + ions[mem_pos] = max; + mem_pos += 1; + } + } + + // y-ions + mz = 0.; + if (modpeptide[peplen + 1] != 0) + { + mz += modpeptide[peplen + 1]; + } + j = 0; + for (i = peplen; i >= 2; i--) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j] = H2O_MASS + mz + H_MASS; + j++; + } + + msms_pos = 0; + mem_pos = 0; + while (1) + { + if (msms_pos >= numpeaks) + { + break; + } + if (mem_pos >= peplen - 1) + { + break; + } + mz = membuffer[mem_pos]; + if (msms[msms_pos] > (mz + tolmz)) + { + mem_pos += 1; + } + else if (msms[msms_pos] < (mz - tolmz)) + { + msms_pos += 1; + } + else + { + max = peaks[msms_pos]; + tmp = msms_pos + 1; + if (tmp < numpeaks) + { + while (msms[tmp] <= (mz + tolmz)) + { + tmp2 = peaks[tmp]; + if (max < tmp2) + { + max = tmp2; + } + tmp += 1; + if (tmp == numpeaks) + { + break; + } + } + } + ions[(peplen - 1) + mem_pos] = max; + mem_pos += 1; + } + } + + return ions; } - - -//get fragment ion peaks from spectrum (b, y, c, z) -float* get_t_ms2pip_etd(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz) - { - int i,j,tmp; - float mz; - int msms_pos; - int mem_pos; - float max, tmp2; - - for (i=0; i < 4*(peplen-1); i++) { - ions[i] = ZERO_INTENSITY; - } - - //b-ions - mz = ntermmod; - if (modpeptide[0] != 0) { - mz += amino_masses[modpeptide[0]]; - } - for (i=1; i < peplen; i++) { - mz += amino_masses[modpeptide[i]]; - membuffer[i-1] = mz+H_MASS; - } - - msms_pos = 0; - mem_pos = 0; - while (1) { - if (msms_pos >= numpeaks) { - break; - } - if (mem_pos >= peplen-1) { - break; - } - mz = membuffer[mem_pos]; - if (msms[msms_pos] > (mz+tolmz)) { - mem_pos += 1; - } - else if (msms[msms_pos] < (mz-tolmz)) { - msms_pos += 1; - } - else { - max = peaks[msms_pos]; - tmp = msms_pos + 1; - if (tmp < numpeaks) { - while (msms[tmp] <= (mz+tolmz)) { - tmp2 = peaks[tmp]; - if (max < tmp2) { - max = tmp2; - } - tmp += 1; - if (tmp == numpeaks) { - break; - } - } - } - ions[mem_pos] = max; - mem_pos += 1; - } - } - - // y-ions - mz = 0.; - if (modpeptide[peplen+1] != 0) { - mz += modpeptide[peplen+1]; - } - j=0; - for (i=peplen; i >= 2; i--) { - mz += amino_masses[modpeptide[i]]; - membuffer[j] = H2O_MASS+mz+H_MASS; - j++; - } - - msms_pos = 0; - mem_pos = 0; - while (1) { - if (msms_pos >= numpeaks) { - break; - } - if (mem_pos >= peplen-1) { - break; - } - mz = membuffer[mem_pos]; - if (msms[msms_pos] > (mz+tolmz)) { - mem_pos += 1; - } - else if (msms[msms_pos] < (mz-tolmz)) { - msms_pos += 1; - } - else { - max = peaks[msms_pos]; - tmp = msms_pos + 1; - if (tmp < numpeaks) { - while (msms[tmp] <= (mz+tolmz)) { - tmp2 = peaks[tmp]; - if (max < tmp2) { - max = tmp2; - } - tmp += 1; - if (tmp == numpeaks) { - break; - } - } - } - ions[(peplen-1)+mem_pos] = max; - mem_pos += 1; - } - } - - //c-ions - mz = ntermmod; - if (modpeptide[0] != 0) { - mz += amino_masses[modpeptide[0]]; - } - for (i=1; i < peplen; i++) { - mz += amino_masses[modpeptide[i]]; - membuffer[i-1] = mz + H_MASS + NH3_MASS; - } - - msms_pos = 0; - mem_pos = 0; - while (1) { - if (msms_pos >= numpeaks) { - break; - } - if (mem_pos >= peplen-1) { - break; - } - mz = membuffer[mem_pos]; - if (msms[msms_pos] > (mz+tolmz)) { - mem_pos += 1; - } - else if (msms[msms_pos] < (mz-tolmz)) { - msms_pos += 1; - } - else { - max = peaks[msms_pos]; - tmp = msms_pos + 1; - if (tmp < numpeaks) { - while (msms[tmp] <= (mz+tolmz)) { - tmp2 = peaks[tmp]; - if (max < tmp2) { - max = tmp2; - } - tmp += 1; - if (tmp == numpeaks) { - break; - } - } - } - ions[2*(peplen-1)+mem_pos] = max; - mem_pos += 1; - } - } - - // z-ions - mz = 0.; - if (modpeptide[peplen+1] != 0) { - mz += modpeptide[peplen+1]; - } - j=0; - for (i=peplen; i >= 2; i--) { - mz += amino_masses[modpeptide[i]]; - membuffer[j] = mz + OH_MASS - NH_MASS + H_MASS; - j++; - } - - msms_pos = 0; - mem_pos = 0; - while (1) { - if (msms_pos >= numpeaks) { - break; - } - if (mem_pos >= peplen-1) { - break; - } - mz = membuffer[mem_pos]; - if (msms[msms_pos] > (mz+tolmz)) { - mem_pos += 1; - } - else if (msms[msms_pos] < (mz-tolmz)) { - msms_pos += 1; - } - else { - max = peaks[msms_pos]; - tmp = msms_pos + 1; - if (tmp < numpeaks) { - while (msms[tmp] <= (mz+tolmz)) { - tmp2 = peaks[tmp]; - if (max < tmp2) { - max = tmp2; - } - tmp += 1; - if (tmp == numpeaks) { - break; - } - } - } - ions[3*(peplen-1)+mem_pos] = max; - mem_pos += 1; - } - } - - return ions; +// get fragment ion peaks from spectrum (b, y, c, z) +float *get_t_ms2pip_etd(int peplen, unsigned short *modpeptide, int numpeaks, float *msms, float *peaks, float tolmz) +{ + int i, j, tmp; + float mz; + int msms_pos; + int mem_pos; + float max, tmp2; + + for (i = 0; i < 4 * (peplen - 1); i++) + { + ions[i] = ZERO_INTENSITY; + } + + // b-ions + mz = ntermmod; + if (modpeptide[0] != 0) + { + mz += amino_masses[modpeptide[0]]; + } + for (i = 1; i < peplen; i++) + { + mz += amino_masses[modpeptide[i]]; + membuffer[i - 1] = mz + H_MASS; + } + + msms_pos = 0; + mem_pos = 0; + while (1) + { + if (msms_pos >= numpeaks) + { + break; + } + if (mem_pos >= peplen - 1) + { + break; + } + mz = membuffer[mem_pos]; + if (msms[msms_pos] > (mz + tolmz)) + { + mem_pos += 1; + } + else if (msms[msms_pos] < (mz - tolmz)) + { + msms_pos += 1; + } + else + { + max = peaks[msms_pos]; + tmp = msms_pos + 1; + if (tmp < numpeaks) + { + while (msms[tmp] <= (mz + tolmz)) + { + tmp2 = peaks[tmp]; + if (max < tmp2) + { + max = tmp2; + } + tmp += 1; + if (tmp == numpeaks) + { + break; + } + } + } + ions[mem_pos] = max; + mem_pos += 1; + } + } + + // y-ions + mz = 0.; + if (modpeptide[peplen + 1] != 0) + { + mz += modpeptide[peplen + 1]; + } + j = 0; + for (i = peplen; i >= 2; i--) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j] = H2O_MASS + mz + H_MASS; + j++; + } + + msms_pos = 0; + mem_pos = 0; + while (1) + { + if (msms_pos >= numpeaks) + { + break; + } + if (mem_pos >= peplen - 1) + { + break; + } + mz = membuffer[mem_pos]; + if (msms[msms_pos] > (mz + tolmz)) + { + mem_pos += 1; + } + else if (msms[msms_pos] < (mz - tolmz)) + { + msms_pos += 1; + } + else + { + max = peaks[msms_pos]; + tmp = msms_pos + 1; + if (tmp < numpeaks) + { + while (msms[tmp] <= (mz + tolmz)) + { + tmp2 = peaks[tmp]; + if (max < tmp2) + { + max = tmp2; + } + tmp += 1; + if (tmp == numpeaks) + { + break; + } + } + } + ions[(peplen - 1) + mem_pos] = max; + mem_pos += 1; + } + } + + // c-ions + mz = ntermmod; + if (modpeptide[0] != 0) + { + mz += amino_masses[modpeptide[0]]; + } + for (i = 1; i < peplen; i++) + { + mz += amino_masses[modpeptide[i]]; + membuffer[i - 1] = mz + H_MASS + NH3_MASS; + } + + msms_pos = 0; + mem_pos = 0; + while (1) + { + if (msms_pos >= numpeaks) + { + break; + } + if (mem_pos >= peplen - 1) + { + break; + } + mz = membuffer[mem_pos]; + if (msms[msms_pos] > (mz + tolmz)) + { + mem_pos += 1; + } + else if (msms[msms_pos] < (mz - tolmz)) + { + msms_pos += 1; + } + else + { + max = peaks[msms_pos]; + tmp = msms_pos + 1; + if (tmp < numpeaks) + { + while (msms[tmp] <= (mz + tolmz)) + { + tmp2 = peaks[tmp]; + if (max < tmp2) + { + max = tmp2; + } + tmp += 1; + if (tmp == numpeaks) + { + break; + } + } + } + ions[2 * (peplen - 1) + mem_pos] = max; + mem_pos += 1; + } + } + + // z-ions + mz = 0.; + if (modpeptide[peplen + 1] != 0) + { + mz += modpeptide[peplen + 1]; + } + j = 0; + for (i = peplen; i >= 2; i--) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j] = mz + OH_MASS - NH_MASS + H_MASS; + j++; + } + + msms_pos = 0; + mem_pos = 0; + while (1) + { + if (msms_pos >= numpeaks) + { + break; + } + if (mem_pos >= peplen - 1) + { + break; + } + mz = membuffer[mem_pos]; + if (msms[msms_pos] > (mz + tolmz)) + { + mem_pos += 1; + } + else if (msms[msms_pos] < (mz - tolmz)) + { + msms_pos += 1; + } + else + { + max = peaks[msms_pos]; + tmp = msms_pos + 1; + if (tmp < numpeaks) + { + while (msms[tmp] <= (mz + tolmz)) + { + tmp2 = peaks[tmp]; + if (max < tmp2) + { + max = tmp2; + } + tmp += 1; + if (tmp == numpeaks) + { + break; + } + } + } + ions[3 * (peplen - 1) + mem_pos] = max; + mem_pos += 1; + } + } + + return ions; } - -//get fragment ion peaks from spectrum (b, y, b++, y++) -float* get_t_ms2pip_ch2(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz) - { - int i,j,tmp; - float mz; - int msms_pos; - int mem_pos; - float max, tmp2; - - //for (i=0; i < numpeaks; i++) { - // fprintf(stderr,"m %f\n",msms[i]); - //} - - for (i=0; i < 4*(peplen-1); i++) { - ions[i] = ZERO_INTENSITY; - } - - //b-ions - mz = ntermmod; - if (modpeptide[0] != 0) { - mz += amino_masses[modpeptide[0]]; - } - for (i=1; i < peplen; i++) { - mz += amino_masses[modpeptide[i]]; - membuffer[i-1] = mz+H_MASS; - } - - msms_pos = 0; - mem_pos = 0; - while (1) { - if (msms_pos >= numpeaks) { - break; - } - if (mem_pos >= peplen-1) { - break; - } - mz = membuffer[mem_pos]; - if (msms[msms_pos] > (mz+tolmz)) { - mem_pos += 1; - } - else if (msms[msms_pos] < (mz-tolmz)) { - msms_pos += 1; - } - else { - max = peaks[msms_pos]; - tmp = msms_pos + 1; - if (tmp < numpeaks) { - while (msms[tmp] <= (mz+tolmz)) { - tmp2 = peaks[tmp]; - if (max < tmp2) { - max = tmp2; - } - tmp += 1; - if (tmp == numpeaks) { - break; - } - } - } - ions[mem_pos] = max; - mem_pos += 1; - } - } - - // y-ions - mz = 0.; - if (modpeptide[peplen+1] != 0) { - mz += modpeptide[peplen+1]; - } - j=0; - for (i=peplen; i >= 2; i--) { - mz += amino_masses[modpeptide[i]]; - membuffer[j] = H2O_MASS+mz+H_MASS; - j++; - } - - msms_pos = 0; - mem_pos = 0; - while (1) { - if (msms_pos >= numpeaks) { - break; - } - if (mem_pos >= peplen-1) { - break; - } - mz = membuffer[mem_pos]; - if (msms[msms_pos] > (mz+tolmz)) { - mem_pos += 1; - } - else if (msms[msms_pos] < (mz-tolmz)) { - msms_pos += 1; - } - else { - max = peaks[msms_pos]; - tmp = msms_pos + 1; - if (tmp < numpeaks) { - while (msms[tmp] <= (mz+tolmz)) { - tmp2 = peaks[tmp]; - if (max < tmp2) { - max = tmp2; - } - tmp += 1; - if (tmp == numpeaks) { - break; - } - } - } - ions[(peplen-1)+mem_pos] = max; - mem_pos += 1; - } - } - - //b2-ions - mz = ntermmod; - if (modpeptide[0] != 0) { - mz += amino_masses[modpeptide[0]]; - } - for (i=1; i < peplen; i++) { - mz += amino_masses[modpeptide[i]]; - membuffer[i-1] = (mz + H_MASS + H_MASS)/2; - } - - msms_pos = 0; - mem_pos = 0; - while (1) { - if (msms_pos >= numpeaks) { - break; - } - if (mem_pos >= peplen-1) { - break; - } - mz = membuffer[mem_pos]; - if (msms[msms_pos] > (mz+tolmz)) { - mem_pos += 1; - } - else if (msms[msms_pos] < (mz-tolmz)) { - msms_pos += 1; - } - else { - max = peaks[msms_pos]; - tmp = msms_pos + 1; - if (tmp < numpeaks) { - while (msms[tmp] <= (mz+tolmz)) { - tmp2 = peaks[tmp]; - if (max < tmp2) { - max = tmp2; - } - tmp += 1; - if (tmp == numpeaks) { - break; - } - } - } - ions[2*(peplen-1)+mem_pos] = max; - mem_pos += 1; - } - } - - // y2-ions - mz = 0.; - if (modpeptide[peplen+1] != 0) { - mz += modpeptide[peplen+1]; - } - j=0; - for (i=peplen; i >= 2; i--) { - mz += amino_masses[modpeptide[i]]; - membuffer[j] = (H2O_MASS + mz + H_MASS + H_MASS)/2; - j++; - } - - msms_pos = 0; - mem_pos = 0; - while (1) { - if (msms_pos >= numpeaks) { - break; - } - if (mem_pos >= peplen-1) { - break; - } - mz = membuffer[mem_pos]; - if (msms[msms_pos] > (mz+tolmz)) { - mem_pos += 1; - } - else if (msms[msms_pos] < (mz-tolmz)) { - msms_pos += 1; - } - else { - max = peaks[msms_pos]; - tmp = msms_pos + 1; - if (tmp < numpeaks) { - while (msms[tmp] <= (mz+tolmz)) { - tmp2 = peaks[tmp]; - if (max < tmp2) { - max = tmp2; - } - tmp += 1; - if (tmp == numpeaks) { - break; - } - } - } - ions[3*(peplen-1)+mem_pos] = max; - mem_pos += 1; - } - } - - return ions; +// get fragment ion peaks from spectrum (b, y, b++, y++) +float *get_t_ms2pip_ch2(int peplen, unsigned short *modpeptide, int numpeaks, float *msms, float *peaks, float tolmz) +{ + int i, j, tmp; + float mz; + int msms_pos; + int mem_pos; + float max, tmp2; + + for (i = 0; i < 4 * (peplen - 1); i++) + { + ions[i] = ZERO_INTENSITY; + } + + // b-ions + mz = ntermmod; + if (modpeptide[0] != 0) + { + mz += amino_masses[modpeptide[0]]; + } + for (i = 1; i < peplen; i++) + { + mz += amino_masses[modpeptide[i]]; + membuffer[i - 1] = mz + H_MASS; + } + + msms_pos = 0; + mem_pos = 0; + while (1) + { + if (msms_pos >= numpeaks) + { + break; + } + if (mem_pos >= peplen - 1) + { + break; + } + mz = membuffer[mem_pos]; + if (msms[msms_pos] > (mz + tolmz)) + { + mem_pos += 1; + } + else if (msms[msms_pos] < (mz - tolmz)) + { + msms_pos += 1; + } + else + { + max = peaks[msms_pos]; + tmp = msms_pos + 1; + if (tmp < numpeaks) + { + while (msms[tmp] <= (mz + tolmz)) + { + tmp2 = peaks[tmp]; + if (max < tmp2) + { + max = tmp2; + } + tmp += 1; + if (tmp == numpeaks) + { + break; + } + } + } + ions[mem_pos] = max; + mem_pos += 1; + } + } + + // y-ions + mz = 0.; + if (modpeptide[peplen + 1] != 0) + { + mz += modpeptide[peplen + 1]; + } + j = 0; + for (i = peplen; i >= 2; i--) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j] = H2O_MASS + mz + H_MASS; + j++; + } + + msms_pos = 0; + mem_pos = 0; + while (1) + { + if (msms_pos >= numpeaks) + { + break; + } + if (mem_pos >= peplen - 1) + { + break; + } + mz = membuffer[mem_pos]; + if (msms[msms_pos] > (mz + tolmz)) + { + mem_pos += 1; + } + else if (msms[msms_pos] < (mz - tolmz)) + { + msms_pos += 1; + } + else + { + max = peaks[msms_pos]; + tmp = msms_pos + 1; + if (tmp < numpeaks) + { + while (msms[tmp] <= (mz + tolmz)) + { + tmp2 = peaks[tmp]; + if (max < tmp2) + { + max = tmp2; + } + tmp += 1; + if (tmp == numpeaks) + { + break; + } + } + } + ions[(peplen - 1) + mem_pos] = max; + mem_pos += 1; + } + } + + // b2-ions + mz = ntermmod; + if (modpeptide[0] != 0) + { + mz += amino_masses[modpeptide[0]]; + } + for (i = 1; i < peplen; i++) + { + mz += amino_masses[modpeptide[i]]; + membuffer[i - 1] = (mz + H_MASS + H_MASS) / 2; + } + + msms_pos = 0; + mem_pos = 0; + while (1) + { + if (msms_pos >= numpeaks) + { + break; + } + if (mem_pos >= peplen - 1) + { + break; + } + mz = membuffer[mem_pos]; + if (msms[msms_pos] > (mz + tolmz)) + { + mem_pos += 1; + } + else if (msms[msms_pos] < (mz - tolmz)) + { + msms_pos += 1; + } + else + { + max = peaks[msms_pos]; + tmp = msms_pos + 1; + if (tmp < numpeaks) + { + while (msms[tmp] <= (mz + tolmz)) + { + tmp2 = peaks[tmp]; + if (max < tmp2) + { + max = tmp2; + } + tmp += 1; + if (tmp == numpeaks) + { + break; + } + } + } + ions[2 * (peplen - 1) + mem_pos] = max; + mem_pos += 1; + } + } + + // y2-ions + mz = 0.; + if (modpeptide[peplen + 1] != 0) + { + mz += modpeptide[peplen + 1]; + } + j = 0; + for (i = peplen; i >= 2; i--) + { + mz += amino_masses[modpeptide[i]]; + membuffer[j] = (H2O_MASS + mz + H_MASS + H_MASS) / 2; + j++; + } + + msms_pos = 0; + mem_pos = 0; + while (1) + { + if (msms_pos >= numpeaks) + { + break; + } + if (mem_pos >= peplen - 1) + { + break; + } + mz = membuffer[mem_pos]; + if (msms[msms_pos] > (mz + tolmz)) + { + mem_pos += 1; + } + else if (msms[msms_pos] < (mz - tolmz)) + { + msms_pos += 1; + } + else + { + max = peaks[msms_pos]; + tmp = msms_pos + 1; + if (tmp < numpeaks) + { + while (msms[tmp] <= (mz + tolmz)) + { + tmp2 = peaks[tmp]; + if (max < tmp2) + { + max = tmp2; + } + tmp += 1; + if (tmp == numpeaks) + { + break; + } + } + } + ions[3 * (peplen - 1) + mem_pos] = max; + mem_pos += 1; + } + } + + return ions; } diff --git a/ms2pip/_cython_modules/ms2pip_pyx.pyx b/ms2pip/_cython_modules/ms2pip_pyx.pyx index 0e980ba6..a656d67a 100644 --- a/ms2pip/_cython_modules/ms2pip_pyx.pyx +++ b/ms2pip/_cython_modules/ms2pip_pyx.pyx @@ -3,185 +3,241 @@ import sys import numpy as np cimport numpy as np - NUM_ION_TYPES_MAPPING = {'general': 2, 'etd': 4, 'ch2': 4, 'all': 18} cdef extern from "ms2pip_peaks_c.c": + ctypedef struct annotations: + float* peaks + float* msms - ctypedef struct annotations: - float* peaks - float* msms - - void init_ms2pip(char* amino_masses_fname, char* modifications_fname, char* modifications_fname_sptm) + void init_ms2pip(char* amino_masses_fname, char* modifications_fname, char* modifications_fname_sptm) - unsigned int* get_v_ms2pip(int peplen, unsigned short* peptide, unsigned short* modpeptide, int charge) + unsigned int* get_v_ms2pip(int peplen, unsigned short* peptide, unsigned short* modpeptide, int charge) - float* get_p_ms2pip(int peplen, unsigned short* peptide, unsigned short* modpeptide, int charge, int model_id, int ce) + float* get_p_ms2pip(int peplen, unsigned short* peptide, unsigned short* modpeptide, int charge, int model_id, int ce) - float* get_mz_ms2pip_general(int peplen, unsigned short* modpeptide) - float* get_mz_ms2pip_etd(int peplen, unsigned short* modpeptide) - float* get_mz_ms2pip_ch2(int peplen, unsigned short* modpeptide) + float* get_mz_ms2pip_general(int peplen, unsigned short* modpeptide) + float* get_mz_ms2pip_etd(int peplen, unsigned short* modpeptide) + float* get_mz_ms2pip_ch2(int peplen, unsigned short* modpeptide) - annotations get_t_ms2pip_all(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz) - float* get_t_ms2pip_general(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz) - float* get_t_ms2pip_etd(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz) - float* get_t_ms2pip_ch2(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz) + annotations get_t_ms2pip_all(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz) + float* get_t_ms2pip_general(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz) + float* get_t_ms2pip_etd(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz) + float* get_t_ms2pip_ch2(int peplen, unsigned short* modpeptide, int numpeaks, float* msms, float* peaks, float tolmz) def ms2pip_init(amino_masses_fname, modifications_fname, modifications_fname_sptm): - if not isinstance(amino_masses_fname, bytearray): - amino_masses_fname = bytearray(amino_masses_fname.encode()) - if not isinstance(modifications_fname, bytearray): - modifications_fname = bytearray(modifications_fname.encode()) - if not isinstance(modifications_fname_sptm, bytearray): - modifications_fname_sptm = bytearray(modifications_fname_sptm.encode()) - init_ms2pip(amino_masses_fname, modifications_fname, modifications_fname_sptm) - - -def get_vector(np.ndarray[unsigned short, ndim=1, mode="c"] peptide, - np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, - charge): - - cdef unsigned int* results = get_v_ms2pip(len(peptide)-2, &peptide[0], &modpeptide[0], charge) - - r = [] - offset = 0 - fnum = int(results[0] / (len(peptide) - 3)) - for i in range(len(peptide) - 3): - v = [] - for j in range(fnum): - v.append(results[j + 1 + offset]) - offset += fnum - r.append(np.array(v, dtype=np.uint16)) - - return r - - -def get_predictions(np.ndarray[unsigned short, ndim=1, mode="c"] peptide, - np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, - charge, model_id, peaks_version, ce): - cdef float* results = get_p_ms2pip(len(peptide)-2, &peptide[0], &modpeptide[0], charge, model_id, ce) - if results is NULL: - raise NotImplementedError(model_id) - result_parsed = [] - for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): - tmp = [] - for j in range(len(modpeptide)-3): - tmp.append(results[(len(modpeptide)-3) * i + j]) - result_parsed.append(tmp) - return result_parsed + """Initialize MS2PIP C code with given masses for amino acids and modifications".""" + if not isinstance(amino_masses_fname, bytearray): + amino_masses_fname = bytearray(amino_masses_fname.encode()) + if not isinstance(modifications_fname, bytearray): + modifications_fname = bytearray(modifications_fname.encode()) + if not isinstance(modifications_fname_sptm, bytearray): + modifications_fname_sptm = bytearray(modifications_fname_sptm.encode()) + init_ms2pip(amino_masses_fname, modifications_fname, modifications_fname_sptm) + + +def get_vector( + np.ndarray[unsigned short, ndim=1, mode="c"] peptide, + np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, + charge +): + """Get feature vector for given peptide and charge.""" + cdef unsigned int* results = get_v_ms2pip(len(peptide)-2, &peptide[0], &modpeptide[0], charge) + + r = [] + offset = 0 + fnum = int(results[0] / (len(peptide) - 3)) + for i in range(len(peptide) - 3): + v = [] + for j in range(fnum): + v.append(results[j + 1 + offset]) + offset += fnum + r.append(np.array(v, dtype=np.uint16)) + + return r + + +def get_predictions( + np.ndarray[unsigned short, ndim=1, mode="c"] peptide, + np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, + charge, + model_id, + peaks_version, + ce +): + """Get intensity predictions from C-model.""" + cdef float* results = get_p_ms2pip(len(peptide)-2, &peptide[0], &modpeptide[0], charge, model_id, ce) + + if results is NULL: + raise NotImplementedError(model_id) + + result_parsed = [] + for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): + tmp = [] + for j in range(len(modpeptide)-3): + tmp.append(results[(len(modpeptide)-3) * i + j]) + result_parsed.append(tmp) + + return result_parsed def get_targets(*args): - if args[4] == 'general': - result = get_targets_general(*args) - if args[4] == 'etd': - result = get_targets_etd(*args) - if args[4] == 'ch2': - result = get_targets_ch2(*args) - if args[4] == 'all': - result = get_targets_all(*args) - return result - - -def get_targets_all(np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, - np.ndarray[float, ndim=1, mode="c"] msms, - np.ndarray[float, ndim=1, mode="c"] peaks, - fragerror, peaks_version): - cdef annotations results - results = get_t_ms2pip_all(len(modpeptide)-2, &modpeptide[0], len(peaks), &msms[0], &peaks[0], fragerror) - result_peaks = [] - for i in range(NUM_ION_TYPES_MAPPING[peaks_version]*(len(modpeptide)-3)): - result_peaks.append(results.peaks[i]) - result_mzs = [] - for i in range(NUM_ION_TYPES_MAPPING[peaks_version]*(len(modpeptide)-3)): - result_mzs.append(results.msms[i]) - return (result_mzs,result_peaks) - - -def get_targets_general(np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, - np.ndarray[float, ndim=1, mode="c"] msms, - np.ndarray[float, ndim=1, mode="c"] peaks, - fragerror, peaks_version): - cdef float* results = get_t_ms2pip_general(len(modpeptide)-2, &modpeptide[0], len(peaks), &msms[0], &peaks[0], fragerror) - result_parsed = [] - for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): #SD: HAd to change this - tmp = [] - for j in range(len(modpeptide)-3): - tmp.append(results[(len(modpeptide)-3) * i + j]) - result_parsed.append(tmp) - return result_parsed - -def get_targets_etd(np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, - np.ndarray[float, ndim=1, mode="c"] msms, - np.ndarray[float, ndim=1, mode="c"] peaks, - fragerror, peaks_version): - cdef float* results = get_t_ms2pip_etd(len(modpeptide)-2, &modpeptide[0], len(peaks), &msms[0], &peaks[0], fragerror) - result_parsed = [] - for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): - tmp = [] - for j in range(len(modpeptide)-3): - tmp.append(results[(len(modpeptide)-3) * i + j]) - result_parsed.append(tmp) - return result_parsed - - -def get_targets_ch2(np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, - np.ndarray[float, ndim=1, mode="c"] msms, - np.ndarray[float, ndim=1, mode="c"] peaks, - fragerror, peaks_version): - cdef float* results = get_t_ms2pip_ch2(len(modpeptide)-2, &modpeptide[0], len(peaks), &msms[0], &peaks[0], fragerror) - result_parsed = [] - for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): - tmp = [] - for j in range(len(modpeptide)-3): - tmp.append(results[(len(modpeptide)-3) * i + j]) - result_parsed.append(tmp) - return result_parsed + """Get target intensities.""" + if args[4] == 'general': + result = get_targets_general(*args) + elif args[4] == 'etd': + result = get_targets_etd(*args) + elif args[4] == 'ch2': + result = get_targets_ch2(*args) + elif args[4] == 'all': + result = get_targets_all(*args) + else: + raise NotImplementedError(args[4]) + return result + + +def get_targets_all( + np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, + np.ndarray[float, ndim=1, mode="c"] msms, + np.ndarray[float, ndim=1, mode="c"] peaks, + fragerror, + peaks_version +): + """Get target intensities for all supported ion types.""" + cdef annotations results + + results = get_t_ms2pip_all(len(modpeptide)-2, &modpeptide[0], len(peaks), &msms[0], &peaks[0], fragerror) + + result_peaks = [] + for i in range(NUM_ION_TYPES_MAPPING[peaks_version]*(len(modpeptide)-3)): + result_peaks.append(results.peaks[i]) + + result_mzs = [] + for i in range(NUM_ION_TYPES_MAPPING[peaks_version]*(len(modpeptide)-3)): + result_mzs.append(results.msms[i]) + + return (result_mzs, result_peaks) + + +def get_targets_general( + np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, + np.ndarray[float, ndim=1, mode="c"] msms, + np.ndarray[float, ndim=1, mode="c"] peaks, + fragerror, + peaks_version +): + """Get target intensities for singly-charged b- and y-ions.""" + cdef float* results = get_t_ms2pip_general(len(modpeptide)-2, &modpeptide[0], len(peaks), &msms[0], &peaks[0], fragerror) + + result_parsed = [] + for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): + tmp = [] + for j in range(len(modpeptide)-3): + tmp.append(results[(len(modpeptide)-3) * i + j]) + result_parsed.append(tmp) + + return result_parsed + + +def get_targets_etd( + np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, + np.ndarray[float, ndim=1, mode="c"] msms, + np.ndarray[float, ndim=1, mode="c"] peaks, + fragerror, + peaks_version, +): + """Get target intensities for ETD fragment ions.""" + cdef float* results = get_t_ms2pip_etd(len(modpeptide)-2, &modpeptide[0], len(peaks), &msms[0], &peaks[0], fragerror) + + result_parsed = [] + for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): + tmp = [] + for j in range(len(modpeptide)-3): + tmp.append(results[(len(modpeptide)-3) * i + j]) + result_parsed.append(tmp) + + return result_parsed + + +def get_targets_ch2( + np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, + np.ndarray[float, ndim=1, mode="c"] msms, + np.ndarray[float, ndim=1, mode="c"] peaks, + fragerror, + peaks_version +): + """Get target intensities for singly- and doubly-charged b- and y-ions.""" + cdef float* results = get_t_ms2pip_ch2(len(modpeptide)-2, &modpeptide[0], len(peaks), &msms[0], &peaks[0], fragerror) + + result_parsed = [] + for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): + tmp = [] + for j in range(len(modpeptide)-3): + tmp.append(results[(len(modpeptide)-3) * i + j]) + result_parsed.append(tmp) + + return result_parsed def get_mzs(*args): - if args[1] == 'general': - result = get_mzs_general(*args) - if args[1] == 'etd': - result = get_mzs_etd(*args) - if args[1] == 'ch2': - result = get_mzs_ch2(*args) - return result - - -def get_mzs_general(np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, - peaks_version): - cdef float* results = get_mz_ms2pip_general(len(modpeptide)-2, &modpeptide[0]) - result_parsed = [] - for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): - tmp = [] - for j in range(len(modpeptide)-3): - tmp.append(results[(len(modpeptide)-3) * i + j]) - result_parsed.append(tmp) - return result_parsed - - -def get_mzs_etd(np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, - peaks_version): - cdef float* results = get_mz_ms2pip_etd(len(modpeptide)-2, &modpeptide[0]) - result_parsed = [] - for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): - tmp = [] - for j in range(len(modpeptide)-3): - tmp.append(results[(len(modpeptide)-3) * i + j]) - result_parsed.append(tmp) - return result_parsed - - -def get_mzs_ch2(np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, - peaks_version): - cdef float* results = get_mz_ms2pip_ch2(len(modpeptide)-2, &modpeptide[0]) - result_parsed = [] - for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): - tmp = [] - for j in range(len(modpeptide)-3): - tmp.append(results[(len(modpeptide)-3) * i + j]) - result_parsed.append(tmp) - return result_parsed + """Get theoretical m/z values.""" + if args[1] == 'general': + result = get_mzs_general(*args) + if args[1] == 'etd': + result = get_mzs_etd(*args) + if args[1] == 'ch2': + result = get_mzs_ch2(*args) + return result + + +def get_mzs_general( + np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, + peaks_version +): + """Get theoretical m/z values for singly-charged b- and y-ions.""" + cdef float* results = get_mz_ms2pip_general(len(modpeptide)-2, &modpeptide[0]) + + result_parsed = [] + for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): + tmp = [] + for j in range(len(modpeptide)-3): + tmp.append(results[(len(modpeptide)-3) * i + j]) + result_parsed.append(tmp) + + return result_parsed + + +def get_mzs_etd( + np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, + peaks_version +): + """Get theoretical m/z values for ETD fragment ions.""" + cdef float* results = get_mz_ms2pip_etd(len(modpeptide)-2, &modpeptide[0]) + + result_parsed = [] + for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): + tmp = [] + for j in range(len(modpeptide)-3): + tmp.append(results[(len(modpeptide)-3) * i + j]) + result_parsed.append(tmp) + + return result_parsed + + +def get_mzs_ch2( + np.ndarray[unsigned short, ndim=1, mode="c"] modpeptide, + peaks_version +): + """Get theoretical m/z values for singly- and doubly-charged b- and y-ions.""" + cdef float* results = get_mz_ms2pip_ch2(len(modpeptide)-2, &modpeptide[0]) + + result_parsed = [] + for i in range(NUM_ION_TYPES_MAPPING[peaks_version]): + tmp = [] + for j in range(len(modpeptide)-3): + tmp.append(results[(len(modpeptide)-3) * i + j]) + result_parsed.append(tmp) + + return result_parsed From 994cd680d0c644ec66de17f0f55e74c83029b694 Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 9 Feb 2024 17:35:29 +0100 Subject: [PATCH 4/6] Fix calculating correlations if some peptides could not be processed (e.g., invalid amino acids). Now returns nan for those spectrum correlations --- ms2pip/core.py | 4 +++- ms2pip/result.py | 14 ++++++++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/ms2pip/core.py b/ms2pip/core.py index ed458fff..7ad72b00 100644 --- a/ms2pip/core.py +++ b/ms2pip/core.py @@ -189,7 +189,9 @@ def correlate( if compute_correlations: logger.info("Computing correlations") calculate_correlations(results) - logger.info(f"Median correlation: {np.median(list(r.correlation for r in results))}") + logger.info( + f"Median correlation: {np.nanmedian(np.array([r.correlation for r in results]))}" + ) return results diff --git a/ms2pip/result.py b/ms2pip/result.py index a673a466..1b1b2d72 100644 --- a/ms2pip/result.py +++ b/ms2pip/result.py @@ -112,9 +112,15 @@ def plot_spectra(self): def calculate_correlations(results: List[ProcessingResult]) -> None: """Calculate and add Pearson correlations to list of results.""" for result in results: - pred_int = np.concatenate([i for i in result.predicted_intensity.values()]) - obs_int = np.concatenate([i for i in result.observed_intensity.values()]) - result.correlation = np.corrcoef(pred_int, obs_int)[0][1] + try: + pred = result.predicted_intensity.values() + obs = result.observed_intensity.values() + except AttributeError: + result.correlation = np.nan + else: + pred_int = np.concatenate([i for i in pred]) + obs_int = np.concatenate([i for i in obs]) + result.correlation = np.corrcoef(pred_int, obs_int)[0][1] def results_to_csv(results: List["ProcessingResult"], output_file: str) -> None: @@ -157,4 +163,4 @@ def correlations_to_csv(results: List["ProcessingResult"], output_file: str) -> writer = csv.DictWriter(f, fieldnames=fieldnames, lineterminator="\n") writer.writeheader() for result in results: - writer.writerow({"psm_index": result.psm_id, "correlation": result.correlation}) + writer.writerow({"psm_index": result.psm_index, "correlation": result.correlation}) From 00e96d6de3712018d1abb2d23315d79d712dab45 Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 9 Feb 2024 17:48:10 +0100 Subject: [PATCH 5/6] Avoid type conversions --- ms2pip/_cython_modules/ms2pip_peaks_c.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ms2pip/_cython_modules/ms2pip_peaks_c.c b/ms2pip/_cython_modules/ms2pip_peaks_c.c index f0e87992..4bb484cf 100644 --- a/ms2pip/_cython_modules/ms2pip_peaks_c.c +++ b/ms2pip/_cython_modules/ms2pip_peaks_c.c @@ -285,11 +285,11 @@ annotations get_t_ms2pip_all(int peplen, unsigned short *modpeptide, int numpeak for (i = 1; i < peplen; i++) { mz += amino_masses[modpeptide[i]]; - membuffer[pos++] = (mz + H_MASS + H_MASS - CO_MASS) / 2.; // a - membuffer[pos++] = (mz + H_MASS + H_MASS - H2O_MASS) / 2.; // -H2O - membuffer[pos++] = (mz + H_MASS + H_MASS - NH3_MASS) / 2.; // -NH3 - membuffer[pos++] = (mz + H_MASS + H_MASS) / 2.; // b - membuffer[pos++] = (mz + H_MASS + H_MASS + NH3_MASS) / 2.; // c + membuffer[pos++] = (mz + H_MASS + H_MASS - CO_MASS) / 2.f; // a + membuffer[pos++] = (mz + H_MASS + H_MASS - H2O_MASS) / 2.f; // -H2O + membuffer[pos++] = (mz + H_MASS + H_MASS - NH3_MASS) / 2.f; // -NH3 + membuffer[pos++] = (mz + H_MASS + H_MASS) / 2.f; // b + membuffer[pos++] = (mz + H_MASS + H_MASS + NH3_MASS) / 2.f; // c } msms_pos = 0; From 41198c924c8ef8d2a99986245e695999a4856e33 Mon Sep 17 00:00:00 2001 From: RalfG Date: Fri, 9 Feb 2024 17:48:27 +0100 Subject: [PATCH 6/6] Clean up setup.py --- pyproject.toml | 8 +++++--- setup.py | 49 ++++--------------------------------------------- 2 files changed, 9 insertions(+), 48 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d5907e81..59caaaec 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,6 +77,9 @@ ms2pip = "ms2pip.__main__:main" requires = ["setuptools", "cython", "oldest-supported-numpy"] build-backend = "setuptools.build_meta" +[tool.setuptools.dynamic] +version = { attr = "ms2pip.__version__" } + [tool.setuptools.packages.find] include = ["ms2pip*"] @@ -89,9 +92,8 @@ line-length = 99 target-version = 'py38' [tool.cibuildwheel] -build = "cp3*-manylinux_x86_64 cp3*-win_amd64 cp3*-macosx_x86_64" -skip = "cp36-* cp37-* cp312-*" # EOL / no Numpy wheels available yet for Python 3.12 -manylinux-x86_64-image = "manylinux2014" +build = "cp3*-manylinux_x86_64 cp3*-musllinux_x86_64 cp3*-win_amd64 cp3*-macosx_universal2" +skip = "cp36-* cp37-*" # EOL / no Numpy wheels available yet for Python 3.12 # test-command = "pytest {package}/tests" test-command = "ms2pip --help" diff --git a/setup.py b/setup.py index b389ab08..dc63a9d9 100644 --- a/setup.py +++ b/setup.py @@ -1,58 +1,17 @@ -import os -import platform -from glob import glob - -import numpy -from Cython.Distutils import build_ext +from Cython.Build import build_ext +from numpy import get_include from setuptools import setup from setuptools.extension import Extension - -def _get_version(): - with open("ms2pip/__init__.py") as f: - for line in f: - if line.startswith("__version__"): - return line.split("=")[1].strip().strip('"').strip("'") - - -to_remove = [ - "ms2pip/_cython_modules/ms2pip_pyx.c*", - "ms2pip/_cython_modules/ms2pip_pyx.so", -] -_ = [[os.remove(f) for f in glob(pat)] for pat in to_remove] - -# Large machine-written C model files require optimization to be disabled -compile_args = { - "Linux": [ - "-O0", - "-fno-var-tracking", - "-Wno-unused-result", - "-Wno-cpp", - "-Wno-unused-function", - ], - "Darwin": [ - "-O0", - ], - "Windows": [ - "/Od", - "/DEBUG", - "/GL-", - "/bigobj", - "/wd4244", - ], -} - extensions = [ Extension( "ms2pip._cython_modules.ms2pip_pyx", - sources=["ms2pip/_cython_modules/ms2pip_pyx.pyx"] + glob("ms2pip/_models_c/*/*.c"), - extra_compile_args=compile_args[platform.system()], + sources=["ms2pip/_cython_modules/ms2pip_pyx.pyx"], ) ] setup( - version=_get_version(), ext_modules=extensions, - include_dirs=[numpy.get_include()], + include_dirs=[get_include()], cmdclass={"build_ext": build_ext}, )