Skip to content

Commit

Permalink
use compute embedding array in library_files_creator.py
Browse files Browse the repository at this point in the history
  • Loading branch information
niekdejonge committed Jun 24, 2024
1 parent 501c86a commit d2fc616
Showing 1 changed file with 2 additions and 4 deletions.
6 changes: 2 additions & 4 deletions ms2query/create_new_library/library_files_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
import pandas as pd
from gensim.models import Word2Vec
from matchms.Spectrum import Spectrum
from ms2deepscore import MS2DeepScore
from ms2deepscore.models import load_model as load_ms2ds_model
from ms2deepscore.models.SiameseSpectralModel import compute_embedding_array
from spec2vec.vector_operations import calc_vector
from tqdm import tqdm
from ms2query.clean_and_filter_spectra import create_spectrum_documents
Expand Down Expand Up @@ -141,11 +141,9 @@ def store_ms2ds_embeddings(self):
assert not os.path.exists(self.ms2ds_embeddings_file_name), \
"Given ms2ds_embeddings_file_name already exists"
assert self.ms2ds_model is not None, "No MS2deepscore model was provided"
ms2ds = MS2DeepScore(self.ms2ds_model,
progress_bar=self.progress_bars)

# Compute spectral embeddings
embeddings = ms2ds.calculate_vectors(self.list_of_spectra)
embeddings = compute_embedding_array(self.ms2ds_model, self.list_of_spectra)
spectrum_ids = np.arange(0, len(self.list_of_spectra))
all_embeddings_df = pd.DataFrame(embeddings, index=spectrum_ids)
save_df_as_parquet_file(all_embeddings_df, self.ms2ds_embeddings_file_name)
Expand Down

0 comments on commit d2fc616

Please sign in to comment.