Skip to content

Commit

Permalink
Ignore charge in both input file and output file.
Browse files Browse the repository at this point in the history
  • Loading branch information
YuanyueLi committed Mar 24, 2024
1 parent 1c56c1f commit aaf6b94
Show file tree
Hide file tree
Showing 3 changed files with 79 additions and 79 deletions.
150 changes: 75 additions & 75 deletions backend/entropy_search.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
#!/usr/bin/env python3
from pathlib import Path
import copy
import hashlib
import json
import numpy as np
# import base64
import pickle
import hashlib
from pathlib import Path

import numpy as np
from ms_entropy import FlashEntropySearch, read_one_spectrum, standardize_spectrum
# import multiprocessing as mp
import copy

__VERSION__ = "2.0.0"


def worker_search_one_spectrum(function, parameters_global, queue_input, queue_output):
Expand Down Expand Up @@ -41,7 +41,7 @@ def __init__(self, ms2_tolerance_in_da) -> None:
"ready": False, # True means ready to display results, if error found, ready will be False.
"running": False, # True means searching is running, False means searching is not running.
"error": False, # True means error found
"message": "" # Message to display
"message": "", # Message to display
}

def search_one_spectrum(self, spec, top_n, ms1_tolerance_in_da, ms2_tolerance_in_da):
Expand All @@ -53,20 +53,18 @@ def search_one_spectrum(self, spec, top_n, ms1_tolerance_in_da, ms2_tolerance_in
"charge": spec["charge"],
"rt": spec["rt"],
}
if spec["precursor_mz"] <= 0 or \
len(spec["peaks"]) == 0 or \
spec["charge"] not in self.spectral_library:
if spec["precursor_mz"] <= 0 or len(spec["peaks"]) == 0 or spec["charge"] not in self.spectral_library:
for search_type in ["identity_search", "open_search", "neutral_loss_search", "hybrid_search"]:
result[search_type] = []
result[search_type+"-score"] = 0
result[search_type + "-score"] = 0
else:
entropy_search = self.spectral_library[spec["charge"]]
entropy_search_result = entropy_search.search(
precursor_mz=spec["precursor_mz"],
peaks=spec["peaks"],
ms1_tolerance_in_da=ms1_tolerance_in_da,
ms2_tolerance_in_da=ms2_tolerance_in_da,
method="all"
method="all",
)

for search_type, score_array in entropy_search_result.items():
Expand Down Expand Up @@ -96,9 +94,9 @@ def search_one_spectrum(self, spec, top_n, ms1_tolerance_in_da, ms2_tolerance_in
result[search_type] = [[spec["scan"], i, score_array[i]] for i in top_n_idx]

if len(top_n_score) > 0:
result[search_type+"-score"] = np.max(top_n_score)
result[search_type + "-score"] = np.max(top_n_score)
else:
result[search_type+"-score"] = 0
result[search_type + "-score"] = 0
return result

def get_one_library_spectrum(self, charge, library_idx):
Expand Down Expand Up @@ -263,21 +261,17 @@ def search_file_single_core(self, file_query, top_n, ms1_tolerance_in_da, ms2_to
# Search spectra
file_query = Path(file_query)
all_results = []
self.status = {
"ready": False,
"running": True,
"error": False,
"message": f"Start reading {file_query.name}..."
}
self.status = {"ready": False, "running": True, "error": False, "message": f"Start reading {file_query.name}..."}
for spec_num, spec in enumerate(read_one_spectrum(file_query)):
try:
if spec_num % 100 == 0:
self.status["message"] = f"Reading {file_query.name}... {spec_num} spectra read"
if spec.pop("_ms_level", 2) != 2:
continue
if charge is not None:
spec["charge"] = charge
spec['peaks'] = np.array(spec['peaks']).astype(np.float32)
spec["charge"] = 0
# if charge is not None:
# spec["charge"] = charge
spec["peaks"] = np.array(spec["peaks"]).astype(np.float32)
self.all_spectra.append(spec)
self.scan_number_to_index[spec["_scan_number"]] = len(self.all_spectra) - 1

Expand Down Expand Up @@ -322,10 +316,7 @@ def load_spectral_library(self, file_library) -> None:

def _build_spectral_library(self, file_library):
# Calculate hash of file_library
index_hash = hashlib.md5(json.dumps({
"ms2_tolerance_in_da": self.ms2_tolerance_in_da,
"version": "1.2.0"
}).encode()).hexdigest()[:6]
index_hash = hashlib.md5(json.dumps({"ms2_tolerance_in_da": self.ms2_tolerance_in_da, "version": __VERSION__}).encode()).hexdigest()[:6]

# Check if the library is already indexed
if file_library.suffix == ".esi":
Expand All @@ -347,27 +338,28 @@ def _build_spectral_library(self, file_library):
except:
pass

spectral_library = {}
spectral_library = {0: []}
spectral_number = 0
# Read spectra
for spec in read_one_spectrum(file_library):
try:
spec['peaks'] = np.array(spec['peaks']).astype(np.float32)
# spec_raw = spec
spec["peaks"] = np.array(spec["peaks"]).astype(np.float32)
spec = _parse_spectrum(spec)

if spec["precursor_mz"] <= 0 or len(spec["peaks"]) == 0 or spec.get("_ms_level", 2) != 2:
continue

charge = spec["charge"]
if charge not in spectral_library:
spectral_library[charge] = []
charge = 0
# if charge not in spectral_library:
# spectral_library[charge] = []

all_spec_keys = list(spec.keys())
all_spec_keys.remove("peaks")
all_spec_keys.remove("precursor_mz")
all_spec_keys.remove("_ms_level")
for k in all_spec_keys:
spec["library-"+k] = spec.pop(k)
spec["library-" + k] = spec.pop(k)
spec["library-file_name"] = library_name

spectral_library[charge].append(spec)
Expand All @@ -382,7 +374,7 @@ def _build_spectral_library(self, file_library):
self.status["message"] = f"Building index for {library_name}, this may take up to 10 minutes depending on the size of the library..."
for charge, spectra in spectral_library.items():
entropy_search = FlashEntropySearch(max_ms2_tolerance_in_da=self.ms2_tolerance_in_da)
all_library_spectra = entropy_search.build_index(all_spectra_list=spectra, min_ms2_difference_in_da=2*self.ms2_tolerance_in_da)
all_library_spectra = entropy_search.build_index(all_spectra_list=spectra, min_ms2_difference_in_da=2 * self.ms2_tolerance_in_da)
# Generate abstract spectra information
all_library_spectra_abstract = []
for spec in all_library_spectra:
Expand All @@ -391,7 +383,7 @@ def _build_spectral_library(self, file_library):
"precursor_mz": spec["precursor_mz"],
"library-name": spec["library-name"],
"library-precursor_type": spec["library-precursor_type"],
"library-idx": len(all_library_spectra_abstract)
"library-idx": len(all_library_spectra_abstract),
}
all_library_spectra_abstract.append(spec_abstract)
entropy_search.abstract_library_spectra = all_library_spectra_abstract
Expand Down Expand Up @@ -419,69 +411,77 @@ def convert_float(x):

def convert_precursor_mz(x):
try:
return convert_float(x)
f = float(x)
if np.isnan(f):
return -1
else:
return f
except:
try:
return float(x.split()[0])
except:
return -1
spec = standardize_spectrum(spec, standardize_info={
"id": [["db#"], "", str],
"scan": [["_scan_number"], -1, int],
"name": [["title"], "", str],
"rt": [["retentiontime"], -1, convert_float],
"precursor_mz": [["precursormz", "pepmass"], -1, convert_precursor_mz],
"ion_mode": [["ionmode"], "", str],
"precursor_type": [["precursortype"], "", str],
"charge": [[], "", str],
"name": [["title"], "", str],
})

charge = 0
if spec["charge"]:
if spec["charge"][-1] in {"+", "-"}:
c = spec["charge"][-1]
try:
charge = int(spec["charge"][:-1])
if c == "-":
charge = -charge
except:
charge = 0
else:
try:
charge = int(spec["charge"])
except:
charge = 0
spec = standardize_spectrum(
spec,
standardize_info={
"id": [["db#"], "", str],
"scan": [["_scan_number"], -1, int],
"name": [["title"], "", str],
"rt": [["retentiontime"], -1, convert_float],
"precursor_mz": [["precursormz", "pepmass"], -1, convert_precursor_mz],
"ion_mode": [["ionmode"], "", str],
"precursor_type": [["precursortype"], "", str],
"charge": [[], "", str],
"name": [["title"], "", str],
},
)

# Infer precursor charge from ion mode
if (charge == 0) and (ion_mode := spec["ion_mode"]):
charge = {"n": -1, "p": 1}.get(ion_mode[0].lower(), "")

# Guess precursor charge from adduct
if (charge == 0) and (len(spec["precursor_type"]) > 0):
charge = {"+": 1, "-": -1}.get(spec["precursor_type"][-1], "")
charge = 0
# if spec["charge"]:
# if spec["charge"][-1] in {"+", "-"}:
# c = spec["charge"][-1]
# try:
# charge = int(spec["charge"][:-1])
# if c == "-":
# charge = -charge
# except:
# charge = 0
# else:
# try:
# charge = int(spec["charge"])
# except:
# charge = 0

# # Infer precursor charge from ion mode
# if (charge == 0) and (ion_mode := spec["ion_mode"]):
# charge = {"n": -1, "p": 1}.get(ion_mode[0].lower(), "")

# # Guess precursor charge from adduct
# if (charge == 0) and (len(spec["precursor_type"]) > 0):
# charge = {"+": 1, "-": -1}.get(spec["precursor_type"][-1], "")

spec["charge"] = charge
return spec


if __name__ == '__main__':
if __name__ == "__main__":
para = {
"ms1_tolerance_in_da": 0.01,
"ms2_tolerance_in_da": 0.02,
"top_n": 10,
"cores": 1,

"file_query": r"/p/github/EntropySearch/test/test.mzml",
"file_library": r"/p/github/EntropySearch/test/MoNA-export-All_Spectra.msp",
"file_query": r"/p/github/EntropySearch/test/test_2.mzML",
# "file_library": r"/p/github/EntropySearch/test/MoNA-export-All_Spectra.msp",
# "file_query": r"/p/FastEntropySearch/gui/test/input/test.mgf",
# "file_library": r"/p/FastEntropySearch/gui/test/input/test.mgf",
"file_library": r"/p/FastEntropySearch/gui/test/input/test.mgf",
"file_output": r"/p/github/EntropySearch/test/result.csv",
}
entropy_search = EntropySearch(para["ms2_tolerance_in_da"])
entropy_search.load_spectral_library(Path(para["file_library"]))
all_results = entropy_search.search_file_single_core(
Path(para["file_query"]), para["top_n"], para["ms1_tolerance_in_da"], para["ms2_tolerance_in_da"], cores=para["cores"])
Path(para["file_query"]), para["top_n"], para["ms1_tolerance_in_da"], para["ms2_tolerance_in_da"], cores=para["cores"]
)
a = 1
# test = entropy_search.get_one_spectrum_result(5, para["top_n"], para["ms1_tolerance_in_da"], para["ms2_tolerance_in_da"])
# print(test)
Expand Down
2 changes: 1 addition & 1 deletion frontend/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "entropy_search",
"version": "1.2.2",
"version": "2.0.0",
"description": "GUI for Entropy Search",
"author": {
"name": "Yuanyue Li"
Expand Down
6 changes: 3 additions & 3 deletions frontend/src/Pages/Input/InputParameters.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ const InputParameters = (showNext) => {
file_query: "",
file_library: "",
path_output: "/p/FastEntropySearch/gui/test/output/",
charge: null,
charge: 0,
ms1_tolerance_in_da: 0.01,
ms2_tolerance_in_da: 0.02,
top_n: 100,
Expand Down Expand Up @@ -92,11 +92,11 @@ const InputParameters = (showNext) => {
{/* rules={[{required: true}]}>*/}
{/* <Input/>*/}
{/*</Form.Item>*/}
<Tooltip title={"1 means all input spectra have charge +1, -1 means all input spectra have charge -1, 0 means auto-detection charge from input file."}>
{/* <Tooltip title={"1 means all input spectra have charge +1, -1 means all input spectra have charge -1, 0 means auto-detection charge from input file."}>
<Form.Item label={"Charge"} name={"charge"} {...formStyle2} rules={[{required: true}]}>
<InputNumber min={-10} step={1}/>
</Form.Item>
</Tooltip>
</Tooltip> */}
<Form.Item label={"Report top n hits"} name={"top_n"}
{...formStyle2}>
<InputNumber min={1} step={10}/>
Expand Down

0 comments on commit aaf6b94

Please sign in to comment.