-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmz_calculator.py
151 lines (145 loc) · 7.98 KB
/
mz_calculator.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
"""
╔═════════════════════════════════════════════════════╗
║ mz_calculator.py ║
╠═════════════════════════════════════════════════════╣
║ Description: Utility functions for ║
║ mass-to-charge ratio calculation ║
╠═════════════════════════════════════════════════════╣
║ Author: Mingxuan Gao ║
║ Contact: [email protected] ║
╚═════════════════════════════════════════════════════╝
"""
## Modified from PECAN's peptideCalculator.py
import re
import numpy as np
mono_masses = dict()
mono_masses['H'] = 1.007825035
mono_masses['C'] = 12.00000
mono_masses['O'] = 15.99491463
mono_masses['N'] = 14.003074
mono_masses['I'] = 126.904473
mono_masses['S'] = 31.9720707
mono_masses['P'] = 30.973762
mono_masses['13C'] = 13.0033554
mono_masses['15N'] = 15.0001088
proton_mass = 1.00727646677
OH_mass = mono_masses['O'] + mono_masses['H']
H2O_mass = mono_masses['O'] + 2*(mono_masses['H'])
NH3_mass = mono_masses['N'] + 3*(mono_masses['H'])
CO_mass = mono_masses['C'] + mono_masses['O']
cam_mass = 2*mono_masses['C'] + 3*mono_masses['H'] + mono_masses['O'] + mono_masses['N']
residue_composition = dict()
residue_composition['A'] = {'H': 5, 'C': 3, 'O': 1, 'N': 1}
residue_composition['C'] = {'H': 5, 'C': 3, 'O': 1, 'N': 1, 'S': 1}
residue_composition['D'] = {'H': 5, 'C': 4, 'O': 3, 'N': 1}
residue_composition['E'] = {'H': 7, 'C': 5, 'O': 3, 'N': 1}
residue_composition['F'] = {'H': 9, 'C': 9, 'O': 1, 'N': 1}
residue_composition['G'] = {'H': 3, 'C': 2, 'O': 1, 'N': 1}
residue_composition['H'] = {'H': 7, 'C': 6, 'O': 1, 'N': 3}
residue_composition['I'] = {'H': 11, 'C': 6, 'O': 1, 'N': 1}
residue_composition['K'] = {'H': 12, 'C': 6, 'O': 1, 'N': 2}
residue_composition['L'] = {'H': 11, 'C': 6, 'O': 1, 'N': 1}
residue_composition['M'] = {'H': 9, 'C': 5, 'O': 1, 'N': 1, 'S': 1}
residue_composition['N'] = {'H': 6, 'C': 4, 'O': 2, 'N': 2}
residue_composition['P'] = {'H': 7, 'C': 5, 'O': 1, 'N': 1}
residue_composition['Q'] = {'H': 8, 'C': 5, 'O': 2, 'N': 2}
residue_composition['R'] = {'H': 12, 'C': 6, 'O': 1, 'N': 4}
residue_composition['S'] = {'H': 5, 'C': 3, 'O': 2, 'N': 1}
residue_composition['T'] = {'H': 7, 'C': 4, 'O': 2, 'N': 1}
residue_composition['U'] = {'H': 5, 'C': 3, 'O': 1, 'N': 1}
residue_composition['V'] = {'H': 9, 'C': 5, 'O': 1, 'N': 1}
residue_composition['W'] = {'H': 10, 'C': 11, 'O': 1, 'N': 2}
residue_composition['Y'] = {'H': 9, 'C': 9, 'O': 2, 'N': 1}
residue_composition['X'] = {'H': 11, 'C': 6, 'O': 1, 'N': 1}
residue_composition['Z'] = {'H':999, 'C':999, 'O':999,'N':999}
residue_composition['B'] = {'H': 12, '13C': 6, 'O': 1, '15N': 4}
residue_composition['J'] = {'H': 12, '13C': 6, 'O': 1, '15N': 2}
def precompute_fragment_mass():
all_seqchars = "ABCDEFGHIJKLMNPQRSTUVWXYZ"
fragment_mass_precomput = dict()
for aa in all_seqchars:
fragment_mass_aa = 0
for el in residue_composition[aa]:
fragment_mass_aa += residue_composition[aa][el]*mono_masses[el]
fragment_mass_precomput[aa] = fragment_mass_aa
return fragment_mass_precomput
FRAGMENT_MASS_DICT = precompute_fragment_mass()
unimod1_mass = 2 * mono_masses['C'] + mono_masses['O'] + 2 * mono_masses['H']
unimod4_mass = 57.021464
unimod5_mass = mono_masses['C'] + mono_masses['N'] + mono_masses['O']
unimod7_mass = 0.9840
unimod21_mass = mono_masses['H'] + 3 * mono_masses['O'] + mono_masses['P']
unimod26_mass = 2 * mono_masses['C'] + mono_masses['O']
unimod27_mass = -2 * mono_masses['H'] - mono_masses['O']
unimod28_mass = -3 * mono_masses['H'] - mono_masses['N']
unimod35_mass = 15.994915
unimod259_mass = mono_masses["13C"] * 6 - mono_masses["C"] * 6 + mono_masses["15N"] * 2 - mono_masses["N"] * 2
unimod267_mass = mono_masses["13C"] * 6 - mono_masses["C"] * 6 + mono_masses["15N"] * 4 - mono_masses["N"] * 4
def calc_fragment_mz(full_seq, pure_peptide_seq, charge, ion_type):
fragment_length = int(ion_type[1:])
if ion_type[0] == "b":
re_pattern = "^(\(UniMod:\d+\))*" + "(\(UniMod:\d+\))*".join(list(pure_peptide_seq[ : fragment_length])) + "(\(UniMod:\d+\))*"
else:
re_pattern = "(\(UniMod:\d+\))*".join(list(pure_peptide_seq[-fragment_length : ])) + "(\(UniMod:\d+\))*$"
if fragment_length == len(pure_peptide_seq):
re_pattern = "^(\(UniMod:\d+\))*" + re_pattern
fragment_seq = re.search(re_pattern, full_seq).group()
pure_fragment_seq = re.sub(r"\(UniMod:\d+\)", "", fragment_seq)
unimod1_count = fragment_seq.count("(UniMod:1)")
unimod4_count = fragment_seq.count("(UniMod:4)")
unimod5_count = fragment_seq.count("(UniMod:5)")
unimod7_count = fragment_seq.count("(UniMod:7)")
unimod21_count = fragment_seq.count("(UniMod:21)")
unimod26_count = fragment_seq.count("(UniMod:26)")
unimod27_count = fragment_seq.count("(UniMod:27)")
unimod28_count = fragment_seq.count("(UniMod:28)")
unimod35_count = fragment_seq.count("(UniMod:35)")
unimod259_count = fragment_seq.count("(UniMod:259)")
unimod267_count = fragment_seq.count("(UniMod:267)")
fragment_mass = sum([FRAGMENT_MASS_DICT[aa] for aa in pure_fragment_seq])
if ion_type.startswith("y"):
fragment_mass += H2O_mass
fragment_mass += (unimod1_count * unimod1_mass +
unimod4_count * unimod4_mass +
unimod5_count * unimod5_mass +
unimod7_count * unimod7_mass +
unimod35_count * unimod35_mass +
unimod28_count * unimod28_mass +
unimod21_count * unimod21_mass +
unimod259_count * unimod259_mass +
unimod267_count * unimod267_mass +
unimod26_count * unimod26_mass +
unimod27_count * unimod27_mass)
return (fragment_mass + (charge * proton_mass)) / charge
def calc_all_fragment_mzs(full_seq, precursor_charge,
fragment_mz_limit = (99, 1801), b_start = 1, y_start = 2,
return_annotations = False):
fragment_mzs = []
fragment_charges = []
fragment_series = []
pure_peptide_seq = re.sub(r"\(UniMod:\d+\)", "", full_seq)
peptide_length = len(pure_peptide_seq)
for i in range(b_start, peptide_length + 1):
if i >= y_start:
fragment_mzs.append(calc_fragment_mz(full_seq, pure_peptide_seq, 1, "y%d" % i))
fragment_charges.append(1)
fragment_series.append("y%d" % i)
fragment_mzs.append(calc_fragment_mz(full_seq, pure_peptide_seq, 1, "b%d" % i))
fragment_charges.append(1)
fragment_series.append("b%d" % i)
if precursor_charge > 2:
if i >= y_start:
fragment_mzs.append(calc_fragment_mz(full_seq, pure_peptide_seq, 2, "y%d" % i))
fragment_charges.append(2)
fragment_series.append("y%d" % i)
fragment_mzs.append(calc_fragment_mz(full_seq, pure_peptide_seq, 2, "b%d" % i))
fragment_charges.append(2)
fragment_series.append("b%d" % i)
fragment_mzs = np.array(fragment_mzs)
index = (fragment_mzs >= fragment_mz_limit[0]) & (fragment_mzs <= fragment_mz_limit[1])
fragment_mzs = fragment_mzs[index]
fragment_charges = np.array(fragment_charges)[index]
fragment_series = np.array(fragment_series)[index]
if return_annotations:
return np.array(fragment_mzs), np.array(fragment_charges), np.array(fragment_series)
return np.array(fragment_mzs)