diff --git a/environment.yml b/environment.yml index 45dea5a1..3f1c337c 100644 --- a/environment.yml +++ b/environment.yml @@ -12,7 +12,7 @@ dependencies: - pyarrow=12.0.1 - tensorflow=2.12.1 - scikit-learn=1.3.2 - - ms2deepscore=0.5.0 + - ms2deepscore=2.0.0 - pandas=2.0.3 - matplotlib=3.7.3 - skl2onnx=1.16.0 diff --git a/ms2query/__init__.py b/ms2query/__init__.py index 46c08ec3..3e38ca16 100644 --- a/ms2query/__init__.py +++ b/ms2query/__init__.py @@ -1,10 +1,10 @@ import os - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # pylint: disable=wrong-import-position import argparse import logging + from .__version__ import __version__ from .ms2library import MS2Library, create_library_object_from_one_dir from .results_table import ResultsTable @@ -12,7 +12,6 @@ run_ms2query_single_file) from .utils import SettingsRunMS2Query - logging.getLogger(__name__).addHandler(logging.NullHandler()) __author__ = "Netherlands eScience Center" diff --git a/ms2query/benchmarking/collect_test_data_results.py b/ms2query/benchmarking/collect_test_data_results.py index 1f12f986..188d76e4 100644 --- a/ms2query/benchmarking/collect_test_data_results.py +++ b/ms2query/benchmarking/collect_test_data_results.py @@ -8,6 +8,7 @@ import sqlite3 import tempfile from typing import List, Tuple, Union + import pandas as pd from matchms import Spectrum from matchms.calculate_scores import calculate_scores @@ -16,6 +17,7 @@ from ms2deepscore.models import SiameseSpectralModel, compute_embedding_array from spec2vec.vector_operations import cosine_similarity_matrix from tqdm import tqdm + from ms2query.create_new_library.calculate_tanimoto_scores import ( calculate_highest_tanimoto_score, calculate_single_tanimoto_score) from ms2query.ms2library import MS2Library diff --git a/ms2query/benchmarking/create_accuracy_vs_recall_plot.py b/ms2query/benchmarking/create_accuracy_vs_recall_plot.py index f7bdc3ec..18e177e4 100644 --- a/ms2query/benchmarking/create_accuracy_vs_recall_plot.py +++ b/ms2query/benchmarking/create_accuracy_vs_recall_plot.py @@ -5,9 +5,11 @@ import os import random from typing import Dict, List, Tuple + import numpy as np from matplotlib import pyplot as plt from tqdm import tqdm + from ms2query.utils import (load_df_from_parquet_file, load_json_file, save_df_as_parquet_file) diff --git a/ms2query/benchmarking/k_fold_cross_validation.py b/ms2query/benchmarking/k_fold_cross_validation.py index 4213dcd3..3f94aeef 100644 --- a/ms2query/benchmarking/k_fold_cross_validation.py +++ b/ms2query/benchmarking/k_fold_cross_validation.py @@ -5,8 +5,10 @@ import os import random from typing import List + from matchms import Spectrum from matchms.exporting.save_as_mgf import save_as_mgf + from ms2query.benchmarking.collect_test_data_results import ( generate_exact_matches_test_results, generate_test_results) from ms2query.clean_and_filter_spectra import \ diff --git a/ms2query/benchmarking/visualize_mass_distribution.py b/ms2query/benchmarking/visualize_mass_distribution.py index a6926b2e..b03af485 100644 --- a/ms2query/benchmarking/visualize_mass_distribution.py +++ b/ms2query/benchmarking/visualize_mass_distribution.py @@ -1,9 +1,11 @@ import os from typing import Dict, List, Tuple + from create_accuracy_vs_recall_plot import ( calculate_means_and_standard_deviation, load_results_from_folder) from matchms import Spectrum from matplotlib import pyplot as plt + from ms2query.utils import (load_df_from_parquet_file, load_matchms_spectrum_objects_from_file) diff --git a/ms2query/benchmarking/visualize_tanimoto_score_distribution.py b/ms2query/benchmarking/visualize_tanimoto_score_distribution.py index 5e06934c..b8979366 100644 --- a/ms2query/benchmarking/visualize_tanimoto_score_distribution.py +++ b/ms2query/benchmarking/visualize_tanimoto_score_distribution.py @@ -1,4 +1,5 @@ from typing import Dict, List, Tuple + import numpy as np from matplotlib import pyplot as plt diff --git a/ms2query/clean_and_filter_spectra.py b/ms2query/clean_and_filter_spectra.py index 2adfeb96..c92baef6 100644 --- a/ms2query/clean_and_filter_spectra.py +++ b/ms2query/clean_and_filter_spectra.py @@ -1,4 +1,5 @@ from typing import List, Optional, Tuple + import matchms.filtering as msfilters from matchms.filtering.filter_utils.smile_inchi_inchikey_conversions import ( is_valid_inchi, is_valid_inchikey, is_valid_smiles) diff --git a/ms2query/create_new_library/add_classifire_classifications.py b/ms2query/create_new_library/add_classifire_classifications.py index e28f3ef8..26ab3a6a 100644 --- a/ms2query/create_new_library/add_classifire_classifications.py +++ b/ms2query/create_new_library/add_classifire_classifications.py @@ -2,6 +2,7 @@ import urllib from http.client import InvalidURL from typing import List, Optional + import pandas as pd from tqdm import tqdm diff --git a/ms2query/create_new_library/calculate_tanimoto_scores.py b/ms2query/create_new_library/calculate_tanimoto_scores.py index 6b417a55..6a2ee943 100644 --- a/ms2query/create_new_library/calculate_tanimoto_scores.py +++ b/ms2query/create_new_library/calculate_tanimoto_scores.py @@ -4,6 +4,7 @@ """ from collections import Counter from typing import List + import numpy as np import pandas as pd from matchms import Spectrum diff --git a/ms2query/create_new_library/create_sqlite_database.py b/ms2query/create_new_library/create_sqlite_database.py index 722a6b3e..5308e61c 100644 --- a/ms2query/create_new_library/create_sqlite_database.py +++ b/ms2query/create_new_library/create_sqlite_database.py @@ -5,9 +5,11 @@ import sqlite3 from typing import Dict, List + import pandas as pd from matchms import Spectrum from tqdm import tqdm + from ms2query.create_new_library.calculate_tanimoto_scores import \ calculate_highest_tanimoto_score from ms2query.utils import return_non_existing_file_name diff --git a/ms2query/create_new_library/library_files_creator.py b/ms2query/create_new_library/library_files_creator.py index 791d6ce5..aae12bed 100644 --- a/ms2query/create_new_library/library_files_creator.py +++ b/ms2query/create_new_library/library_files_creator.py @@ -6,6 +6,7 @@ import os from pathlib import Path from typing import List, Union + import matchms.filtering as msfilters import numpy as np import pandas as pd @@ -15,6 +16,7 @@ from ms2deepscore.models.SiameseSpectralModel import compute_embedding_array from spec2vec.vector_operations import calc_vector from tqdm import tqdm + from ms2query.clean_and_filter_spectra import create_spectrum_documents from ms2query.create_new_library.add_classifire_classifications import ( convert_to_dataframe, select_compound_classes) diff --git a/ms2query/create_new_library/split_data_for_training.py b/ms2query/create_new_library/split_data_for_training.py index 8acb1e1a..e6d4044f 100644 --- a/ms2query/create_new_library/split_data_for_training.py +++ b/ms2query/create_new_library/split_data_for_training.py @@ -5,6 +5,7 @@ import random from typing import Dict, List + from matchms import Spectrum diff --git a/ms2query/create_new_library/train_models.py b/ms2query/create_new_library/train_models.py index 3009a740..7988079c 100644 --- a/ms2query/create_new_library/train_models.py +++ b/ms2query/create_new_library/train_models.py @@ -6,16 +6,18 @@ import os from ms2deepscore import SettingsMS2Deepscore +from ms2deepscore.train_new_model.train_ms2deepscore import train_ms2ds_model from spec2vec.model_building import train_new_word2vec_model + from ms2query.clean_and_filter_spectra import ( clean_normalize_and_split_annotated_spectra, create_spectrum_documents) from ms2query.create_new_library.library_files_creator import \ LibraryFilesCreator -from ms2query.create_new_library.split_data_for_training import split_spectra_on_inchikeys +from ms2query.create_new_library.split_data_for_training import \ + split_spectra_on_inchikeys from ms2query.create_new_library.train_ms2query_model import ( convert_to_onnx_model, train_ms2query_model) from ms2query.utils import load_matchms_spectrum_objects_from_file -from ms2deepscore.train_new_model.train_ms2deepscore import train_ms2ds_model class SettingsTrainingModels: diff --git a/ms2query/create_new_library/train_ms2query_model.py b/ms2query/create_new_library/train_ms2query_model.py index 53f6e99d..d591cb66 100644 --- a/ms2query/create_new_library/train_ms2query_model.py +++ b/ms2query/create_new_library/train_ms2query_model.py @@ -5,6 +5,7 @@ import os from typing import List + import pandas as pd from matchms import Spectrum from onnxconverter_common import FloatTensorType @@ -12,6 +13,7 @@ from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_squared_error from tqdm import tqdm + from ms2query import MS2Library from ms2query.create_new_library.calculate_tanimoto_scores import \ calculate_tanimoto_scores_from_smiles diff --git a/ms2query/old_query_from_sqlite_functions.py b/ms2query/old_query_from_sqlite_functions.py index c16268f8..3547e1ec 100644 --- a/ms2query/old_query_from_sqlite_functions.py +++ b/ms2query/old_query_from_sqlite_functions.py @@ -3,10 +3,12 @@ import os import sqlite3 from typing import Dict, List + import numpy as np import pandas as pd from matchms.Spectrum import Spectrum from tqdm import tqdm + from ms2query.utils import load_pickled_file diff --git a/ms2query/query_from_sqlite_database.py b/ms2query/query_from_sqlite_database.py index 83a6773d..f36e0726 100644 --- a/ms2query/query_from_sqlite_database.py +++ b/ms2query/query_from_sqlite_database.py @@ -6,7 +6,9 @@ import os.path import sqlite3 from typing import Dict, List, Tuple + import pandas as pd + from ms2query.utils import column_names_for_output diff --git a/ms2query/results_table.py b/ms2query/results_table.py index 88e52d10..fc4f9352 100644 --- a/ms2query/results_table.py +++ b/ms2query/results_table.py @@ -1,7 +1,9 @@ from typing import Tuple, Union + import numpy as np import pandas as pd from matchms.Spectrum import Spectrum + from ms2query.query_from_sqlite_database import SqliteLibrary from ms2query.utils import column_names_for_output diff --git a/ms2query/run_ms2query.py b/ms2query/run_ms2query.py index fbe26a04..7e7cad44 100644 --- a/ms2query/run_ms2query.py +++ b/ms2query/run_ms2query.py @@ -2,6 +2,7 @@ import os from typing import Union from urllib.request import urlopen, urlretrieve + from ms2query.ms2library import MS2Library from ms2query.utils import (SettingsRunMS2Query, load_matchms_spectrum_objects_from_file, diff --git a/ms2query/utils.py b/ms2query/utils.py index 895d14fb..2f8fe2a8 100644 --- a/ms2query/utils.py +++ b/ms2query/utils.py @@ -1,6 +1,7 @@ import json import os from typing import List, Optional, Tuple, Union + import numpy as np import pandas as pd from matchms import importing diff --git a/notebooks/GNPS_15_12_2021/benchmarking/benchmark_speed_ms2query.py b/notebooks/GNPS_15_12_2021/benchmarking/benchmark_speed_ms2query.py index 0bb2ef8f..992d2f66 100644 --- a/notebooks/GNPS_15_12_2021/benchmarking/benchmark_speed_ms2query.py +++ b/notebooks/GNPS_15_12_2021/benchmarking/benchmark_speed_ms2query.py @@ -6,10 +6,10 @@ import os import time + from ms2query.ms2library import MS2Library from ms2query.run_ms2query import run_complete_folder - start_time = time.time() path_root = os.path.dirname(os.getcwd()) diff --git a/notebooks/GNPS_15_12_2021/data_processing_and_training_models/5_train_ms2deepscore_model.py b/notebooks/GNPS_15_12_2021/data_processing_and_training_models/5_train_ms2deepscore_model.py index 09fa5d86..0b83e80e 100644 --- a/notebooks/GNPS_15_12_2021/data_processing_and_training_models/5_train_ms2deepscore_model.py +++ b/notebooks/GNPS_15_12_2021/data_processing_and_training_models/5_train_ms2deepscore_model.py @@ -1,5 +1,6 @@ import os import pickle + import numpy as np import tensorflow as tf from ms2deepscore import SpectrumBinner @@ -9,7 +10,6 @@ EarlyStopping, ModelCheckpoint) from tensorflow.keras.optimizers import Adam # pylint: disable=import-error - path_data = "C:\\HSD\\OneDrive - Hochschule Düsseldorf\\Data\\ms2query" outfile = os.path.join(path_data, "GNPS_15_12_2021_pos_train.pickle") diff --git a/notebooks/GNPS_15_12_2021/data_processing_and_training_models/6_train_spec2vec_model.py b/notebooks/GNPS_15_12_2021/data_processing_and_training_models/6_train_spec2vec_model.py index 454e5395..085a606e 100644 --- a/notebooks/GNPS_15_12_2021/data_processing_and_training_models/6_train_spec2vec_model.py +++ b/notebooks/GNPS_15_12_2021/data_processing_and_training_models/6_train_spec2vec_model.py @@ -1,10 +1,12 @@ import os + from matchms.filtering import (add_losses, add_precursor_mz, default_filters, normalize_intensities, reduce_to_number_of_peaks, require_minimum_number_of_peaks, select_by_mz) from spec2vec import SpectrumDocument from spec2vec.model_building import train_new_word2vec_model + from ms2query.utils import load_pickled_file diff --git a/notebooks/GNPS_15_12_2021/data_processing_and_training_models/negative_mode/5_neg_train_ms2deepscore_model.py b/notebooks/GNPS_15_12_2021/data_processing_and_training_models/negative_mode/5_neg_train_ms2deepscore_model.py index b65e0077..9d17052e 100644 --- a/notebooks/GNPS_15_12_2021/data_processing_and_training_models/negative_mode/5_neg_train_ms2deepscore_model.py +++ b/notebooks/GNPS_15_12_2021/data_processing_and_training_models/negative_mode/5_neg_train_ms2deepscore_model.py @@ -1,5 +1,6 @@ import os import pickle + import numpy as np import tensorflow as tf from ms2deepscore import SpectrumBinner @@ -9,7 +10,6 @@ EarlyStopping, ModelCheckpoint) from tensorflow.keras.optimizers import Adam # pylint: disable=import-error - path_root = os.path.dirname(os.getcwd()) path_data = os.path.join(path_root, "../../../data/libraries_and_models/gnps_15_12_2021/in_between_files") diff --git a/notebooks/GNPS_15_12_2021/data_processing_and_training_models/negative_mode/6_neg_train_spec2vec_model.py b/notebooks/GNPS_15_12_2021/data_processing_and_training_models/negative_mode/6_neg_train_spec2vec_model.py index 28c0225a..979179c2 100644 --- a/notebooks/GNPS_15_12_2021/data_processing_and_training_models/negative_mode/6_neg_train_spec2vec_model.py +++ b/notebooks/GNPS_15_12_2021/data_processing_and_training_models/negative_mode/6_neg_train_spec2vec_model.py @@ -1,10 +1,12 @@ import os + from matchms.filtering import (add_losses, add_precursor_mz, default_filters, normalize_intensities, reduce_to_number_of_peaks, require_minimum_number_of_peaks, select_by_mz) from spec2vec import SpectrumDocument from spec2vec.model_building import train_new_word2vec_model + from ms2query.utils import load_pickled_file diff --git a/setup.py b/setup.py index 0d2c4f3a..3b8fc23f 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ #!/usr/bin/env python import os -from setuptools import find_packages, setup +from setuptools import find_packages, setup here = os.path.abspath(os.path.dirname(__file__)) diff --git a/tests/conftest.py b/tests/conftest.py index 8d28c2ba..8fd716c5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,9 +1,11 @@ import os + import numpy as np import pandas as pd import pytest from matchms import Spectrum from matchms.importing.load_from_mgf import load_from_mgf + from ms2query.ms2library import MS2Library from ms2query.query_from_sqlite_database import SqliteLibrary from ms2query.utils import load_df_from_parquet_file diff --git a/tests/test_add_classifier_annotations.py b/tests/test_add_classifier_annotations.py index f6852cdf..c42fafac 100644 --- a/tests/test_add_classifier_annotations.py +++ b/tests/test_add_classifier_annotations.py @@ -1,6 +1,7 @@ import numpy as np import pytest from matchms import Spectrum + from ms2query.create_new_library.add_classifire_classifications import \ select_compound_classes diff --git a/tests/test_calculate_tanimoto_scores.py b/tests/test_calculate_tanimoto_scores.py index 2a98f7d9..7c658632 100644 --- a/tests/test_calculate_tanimoto_scores.py +++ b/tests/test_calculate_tanimoto_scores.py @@ -1,4 +1,5 @@ import pandas as pd + from ms2query.clean_and_filter_spectra import \ normalize_and_filter_peaks_multiple_spectra from ms2query.create_new_library.calculate_tanimoto_scores import ( diff --git a/tests/test_clean_and_filter_spectra.py b/tests/test_clean_and_filter_spectra.py index 02e006e8..5596c537 100644 --- a/tests/test_clean_and_filter_spectra.py +++ b/tests/test_clean_and_filter_spectra.py @@ -1,6 +1,7 @@ import numpy as np from matchms import Spectrum from spec2vec import SpectrumDocument + from ms2query.clean_and_filter_spectra import ( clean_normalize_and_split_annotated_spectra, create_spectrum_documents, harmonize_annotation, normalize_and_filter_peaks, diff --git a/tests/test_collect_test_data_results.py b/tests/test_collect_test_data_results.py index d0e64011..807949d0 100644 --- a/tests/test_collect_test_data_results.py +++ b/tests/test_collect_test_data_results.py @@ -1,8 +1,10 @@ import os + import numpy as np import pandas as pd import pytest from matchms import Spectrum + from ms2query.benchmarking.collect_test_data_results import ( create_optimal_results, create_random_results, generate_test_results, generate_test_results_ms2query, get_all_ms2ds_scores, diff --git a/tests/test_library_files_creator.py b/tests/test_library_files_creator.py index e839f471..bdd668a5 100644 --- a/tests/test_library_files_creator.py +++ b/tests/test_library_files_creator.py @@ -1,6 +1,8 @@ import os + import pandas as pd import pytest + from ms2query.clean_and_filter_spectra import normalize_and_filter_peaks from ms2query.create_new_library.library_files_creator import \ LibraryFilesCreator diff --git a/tests/test_ms2library.py b/tests/test_ms2library.py index 5e4a89c8..8a74a943 100644 --- a/tests/test_ms2library.py +++ b/tests/test_ms2library.py @@ -1,10 +1,12 @@ import math import os + import numpy as np import pandas as pd +from tests.test_utils import check_expected_headers + from ms2query.ms2library import MS2Library, create_library_object_from_one_dir from ms2query.utils import SettingsRunMS2Query, column_names_for_output -from tests.test_utils import check_expected_headers def test_get_all_ms2ds_scores(ms2library, test_spectra): diff --git a/tests/test_results_table.py b/tests/test_results_table.py index ed155d95..7bcd0d3a 100644 --- a/tests/test_results_table.py +++ b/tests/test_results_table.py @@ -2,6 +2,7 @@ import pandas as pd import pytest from matchms import Spectrum + from ms2query import ResultsTable diff --git a/tests/test_run_ms2query.py b/tests/test_run_ms2query.py index 715818ea..4b3cd5dc 100644 --- a/tests/test_run_ms2query.py +++ b/tests/test_run_ms2query.py @@ -1,6 +1,10 @@ import os + import pandas as pd from matchms.exporting.save_as_json import save_as_json +from tests.test_ms2library import MS2Library +from tests.test_utils import check_expected_headers + from ms2query.ms2library import (create_library_object_from_one_dir, select_files_for_ms2query) from ms2query.run_ms2query import (available_zenodo_files, @@ -8,8 +12,6 @@ zenodo_dois) from ms2query.utils import (SettingsRunMS2Query, load_matchms_spectrum_objects_from_file) -from tests.test_ms2library import MS2Library -from tests.test_utils import check_expected_headers def test_download_zenodo(): diff --git a/tests/test_split_data_for_training.py b/tests/test_split_data_for_training.py index e4b5712e..3feed8b4 100644 --- a/tests/test_split_data_for_training.py +++ b/tests/test_split_data_for_training.py @@ -1,4 +1,5 @@ import os + from ms2query.create_new_library.split_data_for_training import ( select_unique_inchikeys, split_spectra_in_random_inchikey_sets, split_spectra_on_inchikeys) diff --git a/tests/test_sqlite.py b/tests/test_sqlite.py index 810446d0..d9c44619 100644 --- a/tests/test_sqlite.py +++ b/tests/test_sqlite.py @@ -7,8 +7,10 @@ import os import sqlite3 + import numpy as np import pandas as pd + from ms2query.clean_and_filter_spectra import \ normalize_and_filter_peaks_multiple_spectra from ms2query.create_new_library.add_classifire_classifications import \ diff --git a/tests/test_train_models.py b/tests/test_train_models.py index 62cb8902..139efe56 100644 --- a/tests/test_train_models.py +++ b/tests/test_train_models.py @@ -1,10 +1,13 @@ import os +import string + import numpy as np import pytest -from ms2deepscore import SettingsMS2Deepscore -import string from matchms.Spectrum import Spectrum -from ms2query.create_new_library.train_models import train_all_models, SettingsTrainingModels +from ms2deepscore import SettingsMS2Deepscore + +from ms2query.create_new_library.train_models import (SettingsTrainingModels, + train_all_models) from ms2query.ms2library import MS2Library, create_library_object_from_one_dir diff --git a/tests/test_train_ms2query_model.py b/tests/test_train_ms2query_model.py index 30e7e646..5649321a 100644 --- a/tests/test_train_ms2query_model.py +++ b/tests/test_train_ms2query_model.py @@ -1,17 +1,18 @@ import os import sys + import numpy as np import pandas as pd import pytest from matchms import Spectrum from onnxruntime import InferenceSession + from ms2query.create_new_library.train_ms2query_model import ( DataCollectorForTraining, calculate_tanimoto_scores_with_library, convert_to_onnx_model, train_ms2query_model, train_random_forest) from ms2query.ms2library import MS2Library from ms2query.utils import predict_onnx_model - if sys.version_info < (3, 8): pass else: diff --git a/tests/test_utils.py b/tests/test_utils.py index 8f4dddf5..844f0bc9 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,9 +1,11 @@ import os from io import StringIO from typing import List + import pandas as pd import pytest from matchms import Spectrum + from ms2query.utils import (add_unknown_charges_to_spectra, load_matchms_spectrum_objects_from_file)