diff --git a/backend/entropy_search.py b/backend/entropy_search.py index 79f45e1..cef94a4 100644 --- a/backend/entropy_search.py +++ b/backend/entropy_search.py @@ -1,14 +1,14 @@ #!/usr/bin/env python3 -from pathlib import Path +import copy +import hashlib import json -import numpy as np -# import base64 import pickle -import hashlib +from pathlib import Path +import numpy as np from ms_entropy import FlashEntropySearch, read_one_spectrum, standardize_spectrum -# import multiprocessing as mp -import copy + +__VERSION__ = "2.0.0" def worker_search_one_spectrum(function, parameters_global, queue_input, queue_output): @@ -41,7 +41,7 @@ def __init__(self, ms2_tolerance_in_da) -> None: "ready": False, # True means ready to display results, if error found, ready will be False. "running": False, # True means searching is running, False means searching is not running. "error": False, # True means error found - "message": "" # Message to display + "message": "", # Message to display } def search_one_spectrum(self, spec, top_n, ms1_tolerance_in_da, ms2_tolerance_in_da): @@ -53,12 +53,10 @@ def search_one_spectrum(self, spec, top_n, ms1_tolerance_in_da, ms2_tolerance_in "charge": spec["charge"], "rt": spec["rt"], } - if spec["precursor_mz"] <= 0 or \ - len(spec["peaks"]) == 0 or \ - spec["charge"] not in self.spectral_library: + if spec["precursor_mz"] <= 0 or len(spec["peaks"]) == 0 or spec["charge"] not in self.spectral_library: for search_type in ["identity_search", "open_search", "neutral_loss_search", "hybrid_search"]: result[search_type] = [] - result[search_type+"-score"] = 0 + result[search_type + "-score"] = 0 else: entropy_search = self.spectral_library[spec["charge"]] entropy_search_result = entropy_search.search( @@ -66,7 +64,7 @@ def search_one_spectrum(self, spec, top_n, ms1_tolerance_in_da, ms2_tolerance_in peaks=spec["peaks"], ms1_tolerance_in_da=ms1_tolerance_in_da, ms2_tolerance_in_da=ms2_tolerance_in_da, - method="all" + method="all", ) for search_type, score_array in entropy_search_result.items(): @@ -96,9 +94,9 @@ def search_one_spectrum(self, spec, top_n, ms1_tolerance_in_da, ms2_tolerance_in result[search_type] = [[spec["scan"], i, score_array[i]] for i in top_n_idx] if len(top_n_score) > 0: - result[search_type+"-score"] = np.max(top_n_score) + result[search_type + "-score"] = np.max(top_n_score) else: - result[search_type+"-score"] = 0 + result[search_type + "-score"] = 0 return result def get_one_library_spectrum(self, charge, library_idx): @@ -263,21 +261,17 @@ def search_file_single_core(self, file_query, top_n, ms1_tolerance_in_da, ms2_to # Search spectra file_query = Path(file_query) all_results = [] - self.status = { - "ready": False, - "running": True, - "error": False, - "message": f"Start reading {file_query.name}..." - } + self.status = {"ready": False, "running": True, "error": False, "message": f"Start reading {file_query.name}..."} for spec_num, spec in enumerate(read_one_spectrum(file_query)): try: if spec_num % 100 == 0: self.status["message"] = f"Reading {file_query.name}... {spec_num} spectra read" if spec.pop("_ms_level", 2) != 2: continue - if charge is not None: - spec["charge"] = charge - spec['peaks'] = np.array(spec['peaks']).astype(np.float32) + spec["charge"] = 0 + # if charge is not None: + # spec["charge"] = charge + spec["peaks"] = np.array(spec["peaks"]).astype(np.float32) self.all_spectra.append(spec) self.scan_number_to_index[spec["_scan_number"]] = len(self.all_spectra) - 1 @@ -322,10 +316,7 @@ def load_spectral_library(self, file_library) -> None: def _build_spectral_library(self, file_library): # Calculate hash of file_library - index_hash = hashlib.md5(json.dumps({ - "ms2_tolerance_in_da": self.ms2_tolerance_in_da, - "version": "1.2.0" - }).encode()).hexdigest()[:6] + index_hash = hashlib.md5(json.dumps({"ms2_tolerance_in_da": self.ms2_tolerance_in_da, "version": __VERSION__}).encode()).hexdigest()[:6] # Check if the library is already indexed if file_library.suffix == ".esi": @@ -347,27 +338,28 @@ def _build_spectral_library(self, file_library): except: pass - spectral_library = {} + spectral_library = {0: []} spectral_number = 0 # Read spectra for spec in read_one_spectrum(file_library): try: - spec['peaks'] = np.array(spec['peaks']).astype(np.float32) + # spec_raw = spec + spec["peaks"] = np.array(spec["peaks"]).astype(np.float32) spec = _parse_spectrum(spec) if spec["precursor_mz"] <= 0 or len(spec["peaks"]) == 0 or spec.get("_ms_level", 2) != 2: continue - charge = spec["charge"] - if charge not in spectral_library: - spectral_library[charge] = [] + charge = 0 + # if charge not in spectral_library: + # spectral_library[charge] = [] all_spec_keys = list(spec.keys()) all_spec_keys.remove("peaks") all_spec_keys.remove("precursor_mz") all_spec_keys.remove("_ms_level") for k in all_spec_keys: - spec["library-"+k] = spec.pop(k) + spec["library-" + k] = spec.pop(k) spec["library-file_name"] = library_name spectral_library[charge].append(spec) @@ -382,7 +374,7 @@ def _build_spectral_library(self, file_library): self.status["message"] = f"Building index for {library_name}, this may take up to 10 minutes depending on the size of the library..." for charge, spectra in spectral_library.items(): entropy_search = FlashEntropySearch(max_ms2_tolerance_in_da=self.ms2_tolerance_in_da) - all_library_spectra = entropy_search.build_index(all_spectra_list=spectra, min_ms2_difference_in_da=2*self.ms2_tolerance_in_da) + all_library_spectra = entropy_search.build_index(all_spectra_list=spectra, min_ms2_difference_in_da=2 * self.ms2_tolerance_in_da) # Generate abstract spectra information all_library_spectra_abstract = [] for spec in all_library_spectra: @@ -391,7 +383,7 @@ def _build_spectral_library(self, file_library): "precursor_mz": spec["precursor_mz"], "library-name": spec["library-name"], "library-precursor_type": spec["library-precursor_type"], - "library-idx": len(all_library_spectra_abstract) + "library-idx": len(all_library_spectra_abstract), } all_library_spectra_abstract.append(spec_abstract) entropy_search.abstract_library_spectra = all_library_spectra_abstract @@ -419,69 +411,77 @@ def convert_float(x): def convert_precursor_mz(x): try: - return convert_float(x) + f = float(x) + if np.isnan(f): + return -1 + else: + return f except: try: return float(x.split()[0]) except: return -1 - spec = standardize_spectrum(spec, standardize_info={ - "id": [["db#"], "", str], - "scan": [["_scan_number"], -1, int], - "name": [["title"], "", str], - "rt": [["retentiontime"], -1, convert_float], - "precursor_mz": [["precursormz", "pepmass"], -1, convert_precursor_mz], - "ion_mode": [["ionmode"], "", str], - "precursor_type": [["precursortype"], "", str], - "charge": [[], "", str], - "name": [["title"], "", str], - }) - charge = 0 - if spec["charge"]: - if spec["charge"][-1] in {"+", "-"}: - c = spec["charge"][-1] - try: - charge = int(spec["charge"][:-1]) - if c == "-": - charge = -charge - except: - charge = 0 - else: - try: - charge = int(spec["charge"]) - except: - charge = 0 + spec = standardize_spectrum( + spec, + standardize_info={ + "id": [["db#"], "", str], + "scan": [["_scan_number"], -1, int], + "name": [["title"], "", str], + "rt": [["retentiontime"], -1, convert_float], + "precursor_mz": [["precursormz", "pepmass"], -1, convert_precursor_mz], + "ion_mode": [["ionmode"], "", str], + "precursor_type": [["precursortype"], "", str], + "charge": [[], "", str], + "name": [["title"], "", str], + }, + ) - # Infer precursor charge from ion mode - if (charge == 0) and (ion_mode := spec["ion_mode"]): - charge = {"n": -1, "p": 1}.get(ion_mode[0].lower(), "") - - # Guess precursor charge from adduct - if (charge == 0) and (len(spec["precursor_type"]) > 0): - charge = {"+": 1, "-": -1}.get(spec["precursor_type"][-1], "") + charge = 0 + # if spec["charge"]: + # if spec["charge"][-1] in {"+", "-"}: + # c = spec["charge"][-1] + # try: + # charge = int(spec["charge"][:-1]) + # if c == "-": + # charge = -charge + # except: + # charge = 0 + # else: + # try: + # charge = int(spec["charge"]) + # except: + # charge = 0 + + # # Infer precursor charge from ion mode + # if (charge == 0) and (ion_mode := spec["ion_mode"]): + # charge = {"n": -1, "p": 1}.get(ion_mode[0].lower(), "") + + # # Guess precursor charge from adduct + # if (charge == 0) and (len(spec["precursor_type"]) > 0): + # charge = {"+": 1, "-": -1}.get(spec["precursor_type"][-1], "") spec["charge"] = charge return spec -if __name__ == '__main__': +if __name__ == "__main__": para = { "ms1_tolerance_in_da": 0.01, "ms2_tolerance_in_da": 0.02, "top_n": 10, "cores": 1, - - "file_query": r"/p/github/EntropySearch/test/test.mzml", - "file_library": r"/p/github/EntropySearch/test/MoNA-export-All_Spectra.msp", + "file_query": r"/p/github/EntropySearch/test/test_2.mzML", + # "file_library": r"/p/github/EntropySearch/test/MoNA-export-All_Spectra.msp", # "file_query": r"/p/FastEntropySearch/gui/test/input/test.mgf", - # "file_library": r"/p/FastEntropySearch/gui/test/input/test.mgf", + "file_library": r"/p/FastEntropySearch/gui/test/input/test.mgf", "file_output": r"/p/github/EntropySearch/test/result.csv", } entropy_search = EntropySearch(para["ms2_tolerance_in_da"]) entropy_search.load_spectral_library(Path(para["file_library"])) all_results = entropy_search.search_file_single_core( - Path(para["file_query"]), para["top_n"], para["ms1_tolerance_in_da"], para["ms2_tolerance_in_da"], cores=para["cores"]) + Path(para["file_query"]), para["top_n"], para["ms1_tolerance_in_da"], para["ms2_tolerance_in_da"], cores=para["cores"] + ) a = 1 # test = entropy_search.get_one_spectrum_result(5, para["top_n"], para["ms1_tolerance_in_da"], para["ms2_tolerance_in_da"]) # print(test) diff --git a/frontend/package.json b/frontend/package.json index cff7a26..052cde1 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -1,6 +1,6 @@ { "name": "entropy_search", - "version": "1.2.2", + "version": "2.0.0", "description": "GUI for Entropy Search", "author": { "name": "Yuanyue Li" diff --git a/frontend/src/Pages/Input/InputParameters.jsx b/frontend/src/Pages/Input/InputParameters.jsx index 9ba8133..e39675f 100644 --- a/frontend/src/Pages/Input/InputParameters.jsx +++ b/frontend/src/Pages/Input/InputParameters.jsx @@ -15,7 +15,7 @@ const InputParameters = (showNext) => { file_query: "", file_library: "", path_output: "/p/FastEntropySearch/gui/test/output/", - charge: null, + charge: 0, ms1_tolerance_in_da: 0.01, ms2_tolerance_in_da: 0.02, top_n: 100, @@ -92,11 +92,11 @@ const InputParameters = (showNext) => { {/* rules={[{required: true}]}>*/} {/* */} {/**/} - + {/* - + */}