Skip to content

Commit

Permalink
Update yml file
Browse files Browse the repository at this point in the history
  • Loading branch information
niekdejonge committed Jun 24, 2024
1 parent 46a2098 commit 4e491df
Show file tree
Hide file tree
Showing 40 changed files with 71 additions and 16 deletions.
2 changes: 1 addition & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ dependencies:
- pyarrow=12.0.1
- tensorflow=2.12.1
- scikit-learn=1.3.2
- ms2deepscore=0.5.0
- ms2deepscore=2.0.0
- pandas=2.0.3
- matplotlib=3.7.3
- skl2onnx=1.16.0
Expand Down
3 changes: 1 addition & 2 deletions ms2query/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,17 @@
import os


os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# pylint: disable=wrong-import-position
import argparse
import logging

from .__version__ import __version__
from .ms2library import MS2Library, create_library_object_from_one_dir
from .results_table import ResultsTable
from .run_ms2query import (download_zenodo_files, run_complete_folder,
run_ms2query_single_file)
from .utils import SettingsRunMS2Query


logging.getLogger(__name__).addHandler(logging.NullHandler())

__author__ = "Netherlands eScience Center"
Expand Down
2 changes: 2 additions & 0 deletions ms2query/benchmarking/collect_test_data_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import sqlite3
import tempfile
from typing import List, Tuple, Union

import pandas as pd
from matchms import Spectrum
from matchms.calculate_scores import calculate_scores
Expand All @@ -16,6 +17,7 @@
from ms2deepscore.models import SiameseSpectralModel, compute_embedding_array
from spec2vec.vector_operations import cosine_similarity_matrix
from tqdm import tqdm

from ms2query.create_new_library.calculate_tanimoto_scores import (
calculate_highest_tanimoto_score, calculate_single_tanimoto_score)
from ms2query.ms2library import MS2Library
Expand Down
2 changes: 2 additions & 0 deletions ms2query/benchmarking/create_accuracy_vs_recall_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
import os
import random
from typing import Dict, List, Tuple

import numpy as np
from matplotlib import pyplot as plt
from tqdm import tqdm

from ms2query.utils import (load_df_from_parquet_file, load_json_file,
save_df_as_parquet_file)

Expand Down
2 changes: 2 additions & 0 deletions ms2query/benchmarking/k_fold_cross_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
import os
import random
from typing import List

from matchms import Spectrum
from matchms.exporting.save_as_mgf import save_as_mgf

from ms2query.benchmarking.collect_test_data_results import (
generate_exact_matches_test_results, generate_test_results)
from ms2query.clean_and_filter_spectra import \
Expand Down
2 changes: 2 additions & 0 deletions ms2query/benchmarking/visualize_mass_distribution.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import os
from typing import Dict, List, Tuple

from create_accuracy_vs_recall_plot import (
calculate_means_and_standard_deviation, load_results_from_folder)
from matchms import Spectrum
from matplotlib import pyplot as plt

from ms2query.utils import (load_df_from_parquet_file,
load_matchms_spectrum_objects_from_file)

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Dict, List, Tuple

import numpy as np
from matplotlib import pyplot as plt

Expand Down
1 change: 1 addition & 0 deletions ms2query/clean_and_filter_spectra.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import List, Optional, Tuple

import matchms.filtering as msfilters
from matchms.filtering.filter_utils.smile_inchi_inchikey_conversions import (
is_valid_inchi, is_valid_inchikey, is_valid_smiles)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import urllib
from http.client import InvalidURL
from typing import List, Optional

import pandas as pd
from tqdm import tqdm

Expand Down
1 change: 1 addition & 0 deletions ms2query/create_new_library/calculate_tanimoto_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"""
from collections import Counter
from typing import List

import numpy as np
import pandas as pd
from matchms import Spectrum
Expand Down
2 changes: 2 additions & 0 deletions ms2query/create_new_library/create_sqlite_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@

import sqlite3
from typing import Dict, List

import pandas as pd
from matchms import Spectrum
from tqdm import tqdm

from ms2query.create_new_library.calculate_tanimoto_scores import \
calculate_highest_tanimoto_score
from ms2query.utils import return_non_existing_file_name
Expand Down
2 changes: 2 additions & 0 deletions ms2query/create_new_library/library_files_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os
from pathlib import Path
from typing import List, Union

import matchms.filtering as msfilters
import numpy as np
import pandas as pd
Expand All @@ -15,6 +16,7 @@
from ms2deepscore.models.SiameseSpectralModel import compute_embedding_array
from spec2vec.vector_operations import calc_vector
from tqdm import tqdm

from ms2query.clean_and_filter_spectra import create_spectrum_documents
from ms2query.create_new_library.add_classifire_classifications import (
convert_to_dataframe, select_compound_classes)
Expand Down
1 change: 1 addition & 0 deletions ms2query/create_new_library/split_data_for_training.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import random
from typing import Dict, List

from matchms import Spectrum


Expand Down
6 changes: 4 additions & 2 deletions ms2query/create_new_library/train_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,18 @@
import os

from ms2deepscore import SettingsMS2Deepscore
from ms2deepscore.train_new_model.train_ms2deepscore import train_ms2ds_model
from spec2vec.model_building import train_new_word2vec_model

from ms2query.clean_and_filter_spectra import (
clean_normalize_and_split_annotated_spectra, create_spectrum_documents)
from ms2query.create_new_library.library_files_creator import \
LibraryFilesCreator
from ms2query.create_new_library.split_data_for_training import split_spectra_on_inchikeys
from ms2query.create_new_library.split_data_for_training import \
split_spectra_on_inchikeys
from ms2query.create_new_library.train_ms2query_model import (
convert_to_onnx_model, train_ms2query_model)
from ms2query.utils import load_matchms_spectrum_objects_from_file
from ms2deepscore.train_new_model.train_ms2deepscore import train_ms2ds_model


class SettingsTrainingModels:
Expand Down
2 changes: 2 additions & 0 deletions ms2query/create_new_library/train_ms2query_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,15 @@

import os
from typing import List

import pandas as pd
from matchms import Spectrum
from onnxconverter_common import FloatTensorType
from skl2onnx import convert_sklearn
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

from ms2query import MS2Library
from ms2query.create_new_library.calculate_tanimoto_scores import \
calculate_tanimoto_scores_from_smiles
Expand Down
2 changes: 2 additions & 0 deletions ms2query/old_query_from_sqlite_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
import os
import sqlite3
from typing import Dict, List

import numpy as np
import pandas as pd
from matchms.Spectrum import Spectrum
from tqdm import tqdm

from ms2query.utils import load_pickled_file


Expand Down
2 changes: 2 additions & 0 deletions ms2query/query_from_sqlite_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@
import os.path
import sqlite3
from typing import Dict, List, Tuple

import pandas as pd

from ms2query.utils import column_names_for_output


Expand Down
2 changes: 2 additions & 0 deletions ms2query/results_table.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from typing import Tuple, Union

import numpy as np
import pandas as pd
from matchms.Spectrum import Spectrum

from ms2query.query_from_sqlite_database import SqliteLibrary
from ms2query.utils import column_names_for_output

Expand Down
1 change: 1 addition & 0 deletions ms2query/run_ms2query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
from typing import Union
from urllib.request import urlopen, urlretrieve

from ms2query.ms2library import MS2Library
from ms2query.utils import (SettingsRunMS2Query,
load_matchms_spectrum_objects_from_file,
Expand Down
1 change: 1 addition & 0 deletions ms2query/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
import os
from typing import List, Optional, Tuple, Union

import numpy as np
import pandas as pd
from matchms import importing
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@

import os
import time

from ms2query.ms2library import MS2Library
from ms2query.run_ms2query import run_complete_folder


start_time = time.time()

path_root = os.path.dirname(os.getcwd())
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import pickle

import numpy as np
import tensorflow as tf
from ms2deepscore import SpectrumBinner
Expand All @@ -9,7 +10,6 @@
EarlyStopping, ModelCheckpoint)
from tensorflow.keras.optimizers import Adam # pylint: disable=import-error


path_data = "C:\\HSD\\OneDrive - Hochschule Düsseldorf\\Data\\ms2query"

outfile = os.path.join(path_data, "GNPS_15_12_2021_pos_train.pickle")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os

from matchms.filtering import (add_losses, add_precursor_mz, default_filters,
normalize_intensities,
reduce_to_number_of_peaks,
require_minimum_number_of_peaks, select_by_mz)
from spec2vec import SpectrumDocument
from spec2vec.model_building import train_new_word2vec_model

from ms2query.utils import load_pickled_file


Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import pickle

import numpy as np
import tensorflow as tf
from ms2deepscore import SpectrumBinner
Expand All @@ -9,7 +10,6 @@
EarlyStopping, ModelCheckpoint)
from tensorflow.keras.optimizers import Adam # pylint: disable=import-error


path_root = os.path.dirname(os.getcwd())

path_data = os.path.join(path_root, "../../../data/libraries_and_models/gnps_15_12_2021/in_between_files")
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import os

from matchms.filtering import (add_losses, add_precursor_mz, default_filters,
normalize_intensities,
reduce_to_number_of_peaks,
require_minimum_number_of_peaks, select_by_mz)
from spec2vec import SpectrumDocument
from spec2vec.model_building import train_new_word2vec_model

from ms2query.utils import load_pickled_file


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env python
import os
from setuptools import find_packages, setup

from setuptools import find_packages, setup

here = os.path.abspath(os.path.dirname(__file__))

Expand Down
2 changes: 2 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import os

import numpy as np
import pandas as pd
import pytest
from matchms import Spectrum
from matchms.importing.load_from_mgf import load_from_mgf

from ms2query.ms2library import MS2Library
from ms2query.query_from_sqlite_database import SqliteLibrary
from ms2query.utils import load_df_from_parquet_file
Expand Down
1 change: 1 addition & 0 deletions tests/test_add_classifier_annotations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
import pytest
from matchms import Spectrum

from ms2query.create_new_library.add_classifire_classifications import \
select_compound_classes

Expand Down
1 change: 1 addition & 0 deletions tests/test_calculate_tanimoto_scores.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pandas as pd

from ms2query.clean_and_filter_spectra import \
normalize_and_filter_peaks_multiple_spectra
from ms2query.create_new_library.calculate_tanimoto_scores import (
Expand Down
1 change: 1 addition & 0 deletions tests/test_clean_and_filter_spectra.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import numpy as np
from matchms import Spectrum
from spec2vec import SpectrumDocument

from ms2query.clean_and_filter_spectra import (
clean_normalize_and_split_annotated_spectra, create_spectrum_documents,
harmonize_annotation, normalize_and_filter_peaks,
Expand Down
2 changes: 2 additions & 0 deletions tests/test_collect_test_data_results.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os

import numpy as np
import pandas as pd
import pytest
from matchms import Spectrum

from ms2query.benchmarking.collect_test_data_results import (
create_optimal_results, create_random_results, generate_test_results,
generate_test_results_ms2query, get_all_ms2ds_scores,
Expand Down
2 changes: 2 additions & 0 deletions tests/test_library_files_creator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import os

import pandas as pd
import pytest

from ms2query.clean_and_filter_spectra import normalize_and_filter_peaks
from ms2query.create_new_library.library_files_creator import \
LibraryFilesCreator
Expand Down
4 changes: 3 additions & 1 deletion tests/test_ms2library.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import math
import os

import numpy as np
import pandas as pd
from tests.test_utils import check_expected_headers

from ms2query.ms2library import MS2Library, create_library_object_from_one_dir
from ms2query.utils import SettingsRunMS2Query, column_names_for_output
from tests.test_utils import check_expected_headers


def test_get_all_ms2ds_scores(ms2library, test_spectra):
Expand Down
1 change: 1 addition & 0 deletions tests/test_results_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pandas as pd
import pytest
from matchms import Spectrum

from ms2query import ResultsTable


Expand Down
Loading

0 comments on commit 4e491df

Please sign in to comment.