From 3f84465f14a4378a5fce3e01e47d8506b4e589ab Mon Sep 17 00:00:00 2001 From: Joshua Charkow Date: Sun, 7 Jan 2024 20:37:40 -0500 Subject: [PATCH 01/19] [FIX] error in conformer model conformer model expected properties of transition group that were not present. Require a spectral library to get the library intensities --- massdash/loaders/SpectralLibraryLoader.py | 21 ++++++++++++++++++++ massdash/loaders/SqMassLoader.py | 2 +- massdash/peakPickers/ConformerPeakPicker.py | 13 ++++++------ massdash/preprocess/ConformerPreprocessor.py | 7 ++++--- massdash/structs/TransitionGroup.py | 4 +++- 5 files changed, 36 insertions(+), 11 deletions(-) diff --git a/massdash/loaders/SpectralLibraryLoader.py b/massdash/loaders/SpectralLibraryLoader.py index dbc2221f..9b974799 100644 --- a/massdash/loaders/SpectralLibraryLoader.py +++ b/massdash/loaders/SpectralLibraryLoader.py @@ -235,6 +235,27 @@ def get_peptide_fragment_annotation_list(self, peptide: str, charge: int) -> Lis """ return self.data[(self.data['ModifiedPeptideSequence'] == peptide) & (self.data['PrecursorCharge'] == charge)]['Annotation'].tolist() + def get_fragment_library_intensity(self, peptide: str, charge: int, annotation: str) -> float: + """ + Retrieves a list of fragment annotations for a given peptide and charge. + + Args: + peptide (str): The peptide sequence. + charge (int): The precursor charge. + annotation (str): The fragment annotation. + + Returns: + float: The library intensity for the specified fragment annotation. + """ + out = self.data[(self.data['ModifiedPeptideSequence'] == peptide) & (self.data['PrecursorCharge'] == charge) & (self.data['Annotation'] == annotation)]['LibraryIntensity'] + if out.empty: + raise ValueError(f"Annotation {annotation} not found for peptide {peptide} charge {charge}") + elif len(out) > 1: + LOGGER.warning(f"Multiple annotations found for peptide {peptide} charge {charge}. Returning first.") + return out.iloc[0] + else: # len(out) == 1 + return out.iloc[0] + def filter_for_target_transition_list(self, protein: str, peptide: str, charge: int) -> pd.DataFrame: """ Filters the data for a specific target transition list based on the given protein, peptide, and charge. diff --git a/massdash/loaders/SqMassLoader.py b/massdash/loaders/SqMassLoader.py index 7ad8c7ba..4e297458 100644 --- a/massdash/loaders/SqMassLoader.py +++ b/massdash/loaders/SqMassLoader.py @@ -83,7 +83,7 @@ def loadTransitionGroups(self, pep_id: str, charge: int) -> Dict[str, Transition prec_chrom_ids = t.getPrecursorChromIDs(precursor_id) precursor_chroms = t.getDataForChromatograms(prec_chrom_ids['chrom_ids'], prec_chrom_ids['native_ids']) - out[t] = TransitionGroup(precursor_chroms, transition_chroms) + out[t] = TransitionGroup(precursor_chroms, transition_chroms, pep_id, charge) return out def loadTransitionGroupFeaturesDf(self, pep_id: str, charge: int) -> pd.DataFrame: diff --git a/massdash/peakPickers/ConformerPeakPicker.py b/massdash/peakPickers/ConformerPeakPicker.py index e962b766..6e45c4d7 100644 --- a/massdash/peakPickers/ConformerPeakPicker.py +++ b/massdash/peakPickers/ConformerPeakPicker.py @@ -11,6 +11,7 @@ # Structs from ..structs.TransitionGroup import TransitionGroup from ..structs.TransitionGroupFeature import TransitionGroupFeature +from ..loaders.SpectralLibraryLoader import SpectralLibraryLoader # Utils from ..util import check_package @@ -35,7 +36,7 @@ class ConformerPeakPicker: _convertConformerFeatureToTransitionGroupFeatures: Convert conformer predicted feature to TransitionGroupFeatures. """ - def __init__(self, transition_group: TransitionGroup, pretrained_model_file: str, window_size: int = 175, prediction_threshold: float = 0.5, prediction_type: str = "logits"): + def __init__(self, library_file: str, pretrained_model_file: str, window_size: int = 175, prediction_threshold: float = 0.5, prediction_type: str = "logits"): """ Initialize the ConformerPeakPicker class. @@ -46,12 +47,12 @@ def __init__(self, transition_group: TransitionGroup, pretrained_model_file: str prediction_threshold (float, optional): The prediction threshold for peak picking. Defaults to 0.5. prediction_type (str, optional): The prediction type for peak picking. Defaults to "logits". """ - self.transition_group = transition_group self.pretrained_model_file = pretrained_model_file self.window_size = window_size self.prediction_threshold = prediction_threshold self.prediction_type = prediction_type self.onnx_session = None + self.library = SpectralLibraryLoader(library_file) self._validate_model() @@ -74,7 +75,7 @@ def load_model(self): # Load pretrained model self.onnx_session = onnxruntime.InferenceSession(self.pretrained_model_file) - def pick(self, max_int_transition: int=1000) -> List[TransitionGroupFeature]: + def pick(self, transition_group, max_int_transition: int=1000) -> List[TransitionGroupFeature]: """ Perform peak picking. @@ -86,8 +87,8 @@ def pick(self, max_int_transition: int=1000) -> List[TransitionGroupFeature]: """ # Transform data into required input print("Preprocessing data...") - conformer_preprocessor = ConformerPreprocessor(self.transition_group) - input_data = conformer_preprocessor.preprocess() + conformer_preprocessor = ConformerPreprocessor(transition_group) + input_data = conformer_preprocessor.preprocess(self.library) print("Loading model...") self.load_model() print("Predicting...") @@ -96,7 +97,7 @@ def pick(self, max_int_transition: int=1000) -> List[TransitionGroupFeature]: print("Getting predicted boundaries...") peak_info = conformer_preprocessor.find_top_peaks(ort_output[0], ["precursor"], self.prediction_threshold, self.prediction_type) # Get actual peak boundaries - peak_info = conformer_preprocessor.get_peak_boundaries(peak_info, self.transition_group, self.window_size) + peak_info = conformer_preprocessor.get_peak_boundaries(peak_info, transition_group, self.window_size) print(f"Peak info: {peak_info}") return self._convertConformerFeatureToTransitionGroupFeatures(peak_info, max_int_transition) diff --git a/massdash/preprocess/ConformerPreprocessor.py b/massdash/preprocess/ConformerPreprocessor.py index ea29f1ff..4f80387e 100644 --- a/massdash/preprocess/ConformerPreprocessor.py +++ b/massdash/preprocess/ConformerPreprocessor.py @@ -10,6 +10,7 @@ from .GenericPreprocessor import GenericPreprocessor # Structs from ..structs.TransitionGroup import TransitionGroup +from ..loaders.SpectralLibraryLoader import SpectralLibraryLoader # Utils from ..util import check_package @@ -101,7 +102,7 @@ def sigmoid(x: np.ndarray) -> np.ndarray: """ return 1 / (1 + np.exp(-x)) - def preprocess(self, window_size: int=175) -> np.ndarray: + def preprocess(self, library: SpectralLibraryLoader, window_size: int=175) -> np.ndarray: """ Preprocesses the data by scaling and transforming it into a numpy array. @@ -131,7 +132,7 @@ def preprocess(self, window_size: int=175) -> np.ndarray: # append ms2 intensity data to data data = np.append(data, [chrom.intensity], axis=0) - lib_int = self.transition_group.targeted_transition_list[self.transition_group.targeted_transition_list.Annotation==chrom.label]['LibraryIntensity'].values + lib_int = library.get_fragment_library_intensity(self.transition_group.sequence, self.transition_group.precursor_charge, chrom.label) lib_int = np.repeat(lib_int, len(chrom.intensity)) lib_int_data = np.append(lib_int_data, [lib_int], axis=0) @@ -190,7 +191,7 @@ def preprocess(self, window_size: int=175) -> np.ndarray: new_data[19] = tmp_arr ## Add charge state - new_data[20] = self.transition_group.targeted_transition_list.PrecursorCharge.values[0] * np.ones(len(data[0])) + new_data[20] = self.transition_group.precursor_charge * np.ones(len(data[0])) ## Convert to float32 new_data = new_data.astype(np.float32) diff --git a/massdash/structs/TransitionGroup.py b/massdash/structs/TransitionGroup.py index c82877c3..a0f2bafd 100644 --- a/massdash/structs/TransitionGroup.py +++ b/massdash/structs/TransitionGroup.py @@ -18,7 +18,7 @@ class TransitionGroup: Class for Storing a transition group ''' def __init__(self, precursorData: Union[List[Chromatogram], List[Mobilogram], List[Spectrum]], - transitionData: Union[List[Chromatogram], List[Mobilogram], List[Spectrum]]): + transitionData: Union[List[Chromatogram], List[Mobilogram], List[Spectrum]], sequence: str = None, precursor_charge: int = None): self.precursorData = precursorData self.transitionData = transitionData self.type = type(precursorData[0]) @@ -30,6 +30,8 @@ def __init__(self, precursorData: Union[List[Chromatogram], List[Mobilogram], Li raise ValueError("Precursor and transition data cannot both be empty") if len(precursorData) > 0 and len(transitionData) > 0: assert(self.dataType == type(transitionData[0])) + self.sequence = sequence + self.precursor_charge = precursor_charge def to_pyopenms(self, includePrecursors=True): From cd9c172849f5c98a55d1e24dd6b163f3d0af33af Mon Sep 17 00:00:00 2001 From: Joshua Charkow Date: Wed, 17 Jan 2024 15:12:25 -0500 Subject: [PATCH 02/19] [FIX] get Conformer peak picker working with GUI --- massdash/server/ExtractedIonChromatogramAnalysisServer.py | 4 ++-- massdash/server/PeakPickingServer.py | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/massdash/server/ExtractedIonChromatogramAnalysisServer.py b/massdash/server/ExtractedIonChromatogramAnalysisServer.py index 43949e65..e65babe1 100644 --- a/massdash/server/ExtractedIonChromatogramAnalysisServer.py +++ b/massdash/server/ExtractedIonChromatogramAnalysisServer.py @@ -163,10 +163,10 @@ def main(self): tr_group.targeted_transition_list = transition_list_ui.target_transition_list print(f"Pretrained model file: {peak_picking_settings.peak_picker_algo_settings.pretrained_model_file}") - peak_picker = ConformerPeakPicker(tr_group, peak_picking_settings.peak_picker_algo_settings.pretrained_model_file, window_size=peak_picking_settings.peak_picker_algo_settings.conformer_window_size, prediction_threshold=peak_picking_settings.peak_picker_algo_settings.conformer_prediction_threshold, prediction_type=peak_picking_settings.peak_picker_algo_settings.conformer_prediction_type) + peak_picker = ConformerPeakPicker(self.massdash_gui.file_input_settings.osw_file_path, peak_picking_settings.peak_picker_algo_settings.pretrained_model_file, window_size=peak_picking_settings.peak_picker_algo_settings.conformer_window_size, prediction_threshold=peak_picking_settings.peak_picker_algo_settings.conformer_prediction_threshold, prediction_type=peak_picking_settings.peak_picker_algo_settings.conformer_prediction_type) # get the trantition in tr_group with the max intensity max_int_transition = np.max([transition.intensity for transition in tr_group.transitionData]) - peak_features = peak_picker.pick(max_int_transition) + peak_features = peak_picker.pick(tr_group, max_int_transition) tr_group_feature_data[file.filename] = peak_features st.write(f"Performing Conformer Peak Picking... Elapsed time: {elapsed_time()}") else: diff --git a/massdash/server/PeakPickingServer.py b/massdash/server/PeakPickingServer.py index a603764b..6aa58ff6 100644 --- a/massdash/server/PeakPickingServer.py +++ b/massdash/server/PeakPickingServer.py @@ -103,6 +103,7 @@ def perform_mrmtransitiongrouppicker_peak_picking(self, tr_group_data: Transitio st.write(f"Performing MRMTransitionGroupPicker Peak Picking... Elapsed time: {elapsed_time()}") return tr_group_feature_data + #TODO Add Conformer Peak Picker for 2D def perform_peak_picking(self, tr_group_data: TransitionGroup=None, xic_data: SqMassLoader=None, transition_list_ui: Literal['ExtractedIonChromatogramAnalysisUI', 'RawTargetedExtractionAnalysisUI']=None): """ Performs peak picking based on the selected method. From aed6da6ee30b2ba7e51c49cb49797677d9ab6196 Mon Sep 17 00:00:00 2001 From: Joshua Charkow Date: Tue, 23 Jan 2024 09:56:26 -0500 Subject: [PATCH 03/19] [FIX] conformer peak picker with streamlit --- massdash/server/OneDimensionPlotterServer.py | 5 ++- massdash/server/PeakPickingServer.py | 41 +++++++++++++++++-- .../RawTargetedExtractionAnalysisServer.py | 3 +- 3 files changed, 42 insertions(+), 7 deletions(-) diff --git a/massdash/server/OneDimensionPlotterServer.py b/massdash/server/OneDimensionPlotterServer.py index 11e7570a..59e75d67 100644 --- a/massdash/server/OneDimensionPlotterServer.py +++ b/massdash/server/OneDimensionPlotterServer.py @@ -46,12 +46,13 @@ class OneDimensionPlotterServer: def __init__(self, feature_map_dict: Dict[str, FeatureMap], transition_list_ui: TransitionListUISettings, chrom_plot_settings: ChromatogramPlotUISettings, - peak_picking_settings: PeakPickingUISettings, + peak_picking_settings: PeakPickingUISettings, spectral_library_path: str=None, verbose: bool=False): self.feature_map_dict = feature_map_dict self.transition_list_ui = transition_list_ui self.chrom_plot_settings = chrom_plot_settings self.peak_picking_settings = peak_picking_settings + self.spectral_library_path = spectral_library_path self.plot_obj_dict = {} self.verbose = verbose @@ -74,7 +75,7 @@ def generate_chromatogram_plots(self): tr_group = feature_map.to_chromatograms() # Perform peak picking if enabled peak_picker = PeakPickingServer(self.peak_picking_settings, self.chrom_plot_settings) - tr_group_feature_data = peak_picker.perform_peak_picking(tr_group_data={'tmp':tr_group}, transition_list_ui=self.transition_list_ui) + tr_group_feature_data = peak_picker.perform_peak_picking(tr_group_data={'tmp':tr_group}, transition_list_ui=self.transition_list_ui, spec_lib=self.spectral_library_path) plot_settings_dict = self._get_plot_settings('Retention Time (s)', 'Intensity', file, 'chromatogram') plot_obj = self._generate_plot(tr_group, plot_settings_dict, tr_group_feature_data['tmp']) run_plots_list.append(plot_obj) diff --git a/massdash/server/PeakPickingServer.py b/massdash/server/PeakPickingServer.py index 6aa58ff6..f201537f 100644 --- a/massdash/server/PeakPickingServer.py +++ b/massdash/server/PeakPickingServer.py @@ -3,6 +3,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ +import numpy as np import streamlit as st from typing import Literal @@ -14,8 +15,7 @@ # Structs from ..structs.TransitionGroup import TransitionGroup # Peak Picking -from ..peakPickers.pyMRMTransitionGroupPicker import pyMRMTransitionGroupPicker -from ..peakPickers.MRMTransitionGroupPicker import MRMTransitionGroupPicker +from ..peakPickers import pyMRMTransitionGroupPicker, MRMTransitionGroupPicker, ConformerPeakPicker # Util from ..util import time_block from .util import get_string_mslevels_from_bool @@ -103,8 +103,38 @@ def perform_mrmtransitiongrouppicker_peak_picking(self, tr_group_data: Transitio st.write(f"Performing MRMTransitionGroupPicker Peak Picking... Elapsed time: {elapsed_time()}") return tr_group_feature_data - #TODO Add Conformer Peak Picker for 2D - def perform_peak_picking(self, tr_group_data: TransitionGroup=None, xic_data: SqMassLoader=None, transition_list_ui: Literal['ExtractedIonChromatogramAnalysisUI', 'RawTargetedExtractionAnalysisUI']=None): + def perform_conformer_peak_picking(self, spec_lib_path: str, tr_group_data: TransitionGroup): + """ + Performs peak picking using ConformerPeakPicker algorithm. + + Args: + tr_group_data (dict): The transition group data. + spec_lib_path (str): The path to the spectral library. + + Returns: + dict: The transition group feature data. + """ + with time_block() as elapsed_time: + # Peak picking using Conformer + tr_group_feature_data = {} + + for file, tr_group in tr_group_data.items(): + #tr_group.targeted_transition_list = self.transition_list_ui.target_transition_list + print(f"Pretrained model file: {self.peak_picking_settings.peak_picker_algo_settings.pretrained_model_file}") + + peak_picker = ConformerPeakPicker(spec_lib_path, self.peak_picking_settings.peak_picker_algo_settings.pretrained_model_file, + window_size=self.peak_picking_settings.peak_picker_algo_settings.conformer_window_size, + prediction_threshold=self.peak_picking_settings.peak_picker_algo_settings.conformer_prediction_threshold, + prediction_type=self.peak_picking_settings.peak_picker_algo_settings.conformer_prediction_type) + + # get the trantition in tr_group with the max intensity + max_int_transition = np.max([transition.intensity for transition in tr_group.transitionData]) + peak_features = peak_picker.pick(tr_group, max_int_transition=max_int_transition) + tr_group_feature_data[file.filename] = peak_features + st.write(f"Performing Conformer Peak Picking... Elapsed time: {elapsed_time()}") + return tr_group_feature_data + + def perform_peak_picking(self, tr_group_data: TransitionGroup=None, xic_data: SqMassLoader=None, transition_list_ui: Literal['ExtractedIonChromatogramAnalysisUI', 'RawTargetedExtractionAnalysisUI']=None, spec_lib: str=None): """ Performs peak picking based on the selected method. @@ -112,6 +142,7 @@ def perform_peak_picking(self, tr_group_data: TransitionGroup=None, xic_data: Sq tr_group_data (dict, optional): The transition group data. Defaults to None. xic_data (object, optional): The XIC data. Defaults to None. transition_list_ui (object, optional): The transition list UI. Defaults to None. + spec_lib (object, optional): The spectral library. Defaults to None. Mandatory if peak picking using ConformerPeakPicker. Returns: dict: The transition group feature data. @@ -125,6 +156,8 @@ def perform_peak_picking(self, tr_group_data: TransitionGroup=None, xic_data: Sq tr_group_feature_data = self.perform_pypeakpicker_mrm_peak_picking(tr_group_data) elif self.peak_picking_settings.do_peak_picking == 'MRMTransitionGroupPicker': tr_group_feature_data = self.perform_mrmtransitiongrouppicker_peak_picking(tr_group_data) + elif self.peak_picking_settings.do_peak_picking == 'Conformer': + tr_group_feature_data = self.perform_conformer_peak_picking(spec_lib, tr_group_data) else: tr_group_feature_data = {file: None for file in tr_group_data.keys()} diff --git a/massdash/server/RawTargetedExtractionAnalysisServer.py b/massdash/server/RawTargetedExtractionAnalysisServer.py index 21546301..dbcb51b4 100644 --- a/massdash/server/RawTargetedExtractionAnalysisServer.py +++ b/massdash/server/RawTargetedExtractionAnalysisServer.py @@ -193,6 +193,7 @@ def main(self): if clear_caches: self.targeted_extraction.clear() featureMaps = self.targeted_extraction(transition_list_ui) + st.write(list(featureMaps.values())[0].feature_df) st_log_writer.write(f"Extracting spectra complete! Elapsed time: {timedelta(seconds=perf_metrics.execution_time)}") transition_list_ui.validate_extraction(featureMaps, plot_container) @@ -202,7 +203,7 @@ def main(self): # Initialize plot object dictionary plot_obj_dict = {} if chrom_plot_settings.display_plot_dimension_type == "1D": - plot_obj_dict = OneDimensionPlotterServer(featureMaps, transition_list_ui, chrom_plot_settings, peak_picking_settings, self.massdash_gui.verbose).generate_chromatogram_plots().plot_obj_dict + plot_obj_dict = OneDimensionPlotterServer(featureMaps, transition_list_ui, chrom_plot_settings, peak_picking_settings, self.massdash_gui.file_input_settings.transition_list_file_path, self.massdash_gui.verbose).generate_chromatogram_plots().plot_obj_dict elif chrom_plot_settings.display_plot_dimension_type == "2D": plot_obj_dict = TwoDimensionPlotterServer(featureMaps, transition_list_ui, chrom_plot_settings).generate_two_dimensional_plots().plot_obj_dict elif chrom_plot_settings.display_plot_dimension_type == "3D": From 12d4882b2e4669aa8ae5271a4bea88e3ee2728e2 Mon Sep 17 00:00:00 2001 From: Joshua Charkow Date: Tue, 23 Jan 2024 13:11:13 -0500 Subject: [PATCH 04/19] [FIX] conformer of mulitple lengths update pad/slicing interface for chromatograms that are smaller than window to be the window size --- massdash/loaders/access/MzMLDataAccess.py | 2 +- massdash/peakPickers/ConformerPeakPicker.py | 2 +- massdash/preprocess/ConformerPreprocessor.py | 39 ++++------------ massdash/server/PeakPickingServer.py | 5 +- massdash/structs/Chromatogram.py | 15 +++++- massdash/structs/Data1D.py | 48 ++++++++++++++++++++ massdash/structs/FeatureMap.py | 21 +++++++-- massdash/structs/Mobilogram.py | 15 +++++- massdash/structs/Spectrum.py | 15 +++++- massdash/structs/TransitionGroup.py | 16 +++++++ 10 files changed, 137 insertions(+), 41 deletions(-) diff --git a/massdash/loaders/access/MzMLDataAccess.py b/massdash/loaders/access/MzMLDataAccess.py index 330655b2..f3fcb985 100644 --- a/massdash/loaders/access/MzMLDataAccess.py +++ b/massdash/loaders/access/MzMLDataAccess.py @@ -420,7 +420,7 @@ def msExperimentToFeatureMap(self, msExperiment: po.MSExperiment, feature: Trans else: LOGGER.warn(f"No spectra found for peptide: {feature.sequence}{feature.precursor_charge}. Try adjusting the extraction parameters") - return FeatureMap(results_df, config) + return FeatureMap(results_df, feature.sequence, feature.precursor_charge, config) def _find_closest_reference_mz(self, given_mz: np.array, reference_mz_values: np.array, peptide_product_annotation_list: np.array) -> np.array: """ diff --git a/massdash/peakPickers/ConformerPeakPicker.py b/massdash/peakPickers/ConformerPeakPicker.py index 6e45c4d7..3f13ef27 100644 --- a/massdash/peakPickers/ConformerPeakPicker.py +++ b/massdash/peakPickers/ConformerPeakPicker.py @@ -97,7 +97,7 @@ def pick(self, transition_group, max_int_transition: int=1000) -> List[Transitio print("Getting predicted boundaries...") peak_info = conformer_preprocessor.find_top_peaks(ort_output[0], ["precursor"], self.prediction_threshold, self.prediction_type) # Get actual peak boundaries - peak_info = conformer_preprocessor.get_peak_boundaries(peak_info, transition_group, self.window_size) + peak_info = conformer_preprocessor.get_peak_boundaries(peak_info, self.window_size) print(f"Peak info: {peak_info}") return self._convertConformerFeatureToTransitionGroupFeatures(peak_info, max_int_transition) diff --git a/massdash/preprocess/ConformerPreprocessor.py b/massdash/preprocess/ConformerPreprocessor.py index 4f80387e..62d350db 100644 --- a/massdash/preprocess/ConformerPreprocessor.py +++ b/massdash/preprocess/ConformerPreprocessor.py @@ -123,17 +123,19 @@ def preprocess(self, library: SpectralLibraryLoader, window_size: int=175) -> np # Row index 19: library retention time diff # Row index 20: precursor charge - # initialize empty numpy array - data = np.empty((0, len(self.transition_group.transitionData[0].intensity)), float) + # pad the transition group to the window size + self.transition_group = self.transition_group.pad(window_size) - lib_int_data = np.empty((0, len(self.transition_group.transitionData[0].intensity)), float) + # initialize empty numpy array + data = np.empty((0, window_size), float) + lib_int_data = np.empty((0, window_size), float) for chrom in self.transition_group.transitionData: # append ms2 intensity data to data data = np.append(data, [chrom.intensity], axis=0) lib_int = library.get_fragment_library_intensity(self.transition_group.sequence, self.transition_group.precursor_charge, chrom.label) - lib_int = np.repeat(lib_int, len(chrom.intensity)) + lib_int = np.repeat(lib_int, window_size) lib_int_data = np.append(lib_int_data, [lib_int], axis=0) # initialize empty numpy array to store scaled data @@ -149,20 +151,7 @@ def preprocess(self, library: SpectralLibraryLoader, window_size: int=175) -> np ) ## MS1 trace data - # padd precursor intensity data with zeros to match ms2 intensity data - len_trans = len(self.transition_group.transitionData[0].intensity) - len_prec = len(self.transition_group.precursorData[0].intensity) - if len_prec!=len_trans: - if len_prec < len_trans: - prec_int = np.pad(self.transition_group.precursorData[0].intensity, (0, len_trans-len_prec), 'constant', constant_values=(0, 0)) - if len_prec > len_trans: - prec_int = self.transition_group.precursorData[0].intensity - # compute number of points to trim from either side of the middle point - remove_n_points = len_prec - len_trans - # trim precursor intensity data - prec_int = prec_int[remove_n_points//2:-remove_n_points//2] - else: - prec_int = self.transition_group.precursorData[0].intensity + prec_int = self.transition_group.precursorData[0].intensity # append ms1 intensity data to data new_data[12] = self.min_max_scale(prec_int) @@ -196,13 +185,6 @@ def preprocess(self, library: SpectralLibraryLoader, window_size: int=175) -> np ## Convert to float32 new_data = new_data.astype(np.float32) - ## trim data if does not match window size starting at the centre - if len(new_data[0]) > window_size: - middle_index = len(data[0]) // 2 - trim_start = middle_index - (window_size // 2) - trim_end = middle_index + (window_size // 2) + 1 - new_data = new_data[:, trim_start:trim_end] - # cnvert the shape to be (1, 21, len(data[0])) new_data = np.expand_dims(new_data, axis=0) @@ -298,20 +280,19 @@ def find_top_peaks(self, preds, seq_classes: List[str]='input_precursor', thresh return peak_info - def get_peak_boundaries(self, peak_info: dict, tr_group: TransitionGroup, window_size: int=175): + def get_peak_boundaries(self, peak_info: dict, window_size: int=175): """ Adjusts the peak boundaries in the peak_info dictionary based on the window size and the dimensions of the input rt_array. Calculates the actual RT values from the rt_array and appends them to the peak_info dictionary. Args: peak_info (dict): A dictionary containing information about the peaks. - tr_group (TransitionGroup): The transition group containing the data. window_size (int, optional): The size of the window used for trimming the rt_array. Defaults to 175. Returns: dict: The updated peak_info dictionary with adjusted peak boundaries and RT values. """ - rt_array = tr_group.transitionData[0].data + rt_array = self.transition_group.transitionData[0].data if rt_array.shape[0] != window_size: print(f"input_data {rt_array.shape[0]} was trimmed to {window_size}, adjusting peak_info indexes to map to the original datas dimensions") for key in peak_info.keys(): @@ -335,6 +316,6 @@ def get_peak_boundaries(self, peak_info: dict, tr_group: TransitionGroup, window peak_info[key][i]['rt_apex'] = rt_array[peak_info[key][i]['max_idx']] peak_info[key][i]['rt_start'] = rt_array[peak_info[key][i]['start_idx']] peak_info[key][i]['rt_end'] = rt_array[peak_info[key][i]['end_idx']] - peak_info[key][i]['int_apex'] = np.max([tg.intensity[peak_info[key][i]['max_idx']] for tg in tr_group.transitionData]) + peak_info[key][i]['int_apex'] = np.max([tg.intensity[peak_info[key][i]['max_idx']] for tg in self.transition_group.transitionData]) return peak_info \ No newline at end of file diff --git a/massdash/server/PeakPickingServer.py b/massdash/server/PeakPickingServer.py index f201537f..4a83cb94 100644 --- a/massdash/server/PeakPickingServer.py +++ b/massdash/server/PeakPickingServer.py @@ -120,7 +120,7 @@ def perform_conformer_peak_picking(self, spec_lib_path: str, tr_group_data: Tran for file, tr_group in tr_group_data.items(): #tr_group.targeted_transition_list = self.transition_list_ui.target_transition_list - print(f"Pretrained model file: {self.peak_picking_settings.peak_picker_algo_settings.pretrained_model_file}") + st.write(f"Pretrained model file: {self.peak_picking_settings.peak_picker_algo_settings.pretrained_model_file}") peak_picker = ConformerPeakPicker(spec_lib_path, self.peak_picking_settings.peak_picker_algo_settings.pretrained_model_file, window_size=self.peak_picking_settings.peak_picker_algo_settings.conformer_window_size, @@ -130,7 +130,7 @@ def perform_conformer_peak_picking(self, spec_lib_path: str, tr_group_data: Tran # get the trantition in tr_group with the max intensity max_int_transition = np.max([transition.intensity for transition in tr_group.transitionData]) peak_features = peak_picker.pick(tr_group, max_int_transition=max_int_transition) - tr_group_feature_data[file.filename] = peak_features + tr_group_feature_data[file] = peak_features st.write(f"Performing Conformer Peak Picking... Elapsed time: {elapsed_time()}") return tr_group_feature_data @@ -148,6 +148,7 @@ def perform_peak_picking(self, tr_group_data: TransitionGroup=None, xic_data: Sq dict: The transition group feature data. """ tr_group_feature_data = {} + st.write("Performing Peak Picking for", tr_group_data) # Perform peak picking based on the selected method if self.peak_picking_settings.do_peak_picking == 'OSW-PyProphet': diff --git a/massdash/structs/Chromatogram.py b/massdash/structs/Chromatogram.py index 94c20262..08fae0ab 100644 --- a/massdash/structs/Chromatogram.py +++ b/massdash/structs/Chromatogram.py @@ -29,4 +29,17 @@ def to_pyopenms(self, id: Optional[str] = None): return chrom def toPandasDf(self) -> pd.DataFrame: - return super().toPandasDfHelper_('rt') \ No newline at end of file + return super().toPandasDfHelper_('rt') + + def pad(self, length: int) -> 'Chromatogram': + """ + Pad the chromatogram with zeros on both sides. + + Args: + pad (int): The number of zeros to pad on both sides. + + Returns: + Chromatogram: A new chromatogram object with padded data and intensity. + """ + new_data, new_intensity = super().pad(length) + return Chromatogram(new_data, new_intensity, self.label) \ No newline at end of file diff --git a/massdash/structs/Data1D.py b/massdash/structs/Data1D.py index 353527f8..d3f2667e 100644 --- a/massdash/structs/Data1D.py +++ b/massdash/structs/Data1D.py @@ -100,6 +100,54 @@ def median(self, boundary: Optional[Tuple[float, float]] = None) -> float: else: return np.median(self.intensity) + def pad(self, length): + """ + Pad the data and intensity arrays with zeros to a given length. Modifies the object in place. + + Args: + length (int): The length of the output array + + Returns: + (new_data, new_intensity) : tuple of padded data and intensity + + """ + + #### need to slice the array + if length == len(self.data): + new_data = self.data + new_intensity = self.intensity + elif length < len(self.data): + if length % 2 == 0: + slice_left = slice_right = length // 2 + else: # length % 2 == 1 + slice_left = length // 2 + 1 + slice_right = length // 2 + new_data = self.data[slice_left:-slice_right] + new_intensity = self.intensity[slice_left:-slice_right] + else: # length > len(self.data): + ### infer the chromatogram step size + step = self.data[1] - self.data[0] + + both_even_or_odd = length % 2 == len(self.data) % 2 + if both_even_or_odd: + pad_left = pad_right = (length - len(self.data)) // 2 + + new_intensity = np.copy(self.intensity) + new_intensity = np.pad(new_intensity, (pad_left, pad_right), 'constant', constant_values=0) + else: + pad_left = (length - len(self.data)) // 2 + 1 + pad_right = (length - len(self.data)) // 2 + #### length is odd, unequal paddings ##### + + #### Pad the data to left and right #### + data_right = np.linspace(self.data[-1] + step, self.data[-1] + step * pad_right, num=pad_right) + data_left = np.linspace(self.data[0] - step * pad_left, self.data[0] - step, num=pad_left) + new_data = np.concatenate((data_left, self.data, data_right)) + new_intensity = np.copy(self.intensity) + new_intensity = np.pad(new_intensity, (pad_left, pad_right), 'constant', constant_values=0) + return (new_data, new_intensity) + + @abstractmethod def toPandasDf(self) -> pd.DataFrame: pass diff --git a/massdash/structs/FeatureMap.py b/massdash/structs/FeatureMap.py index f43305f2..fdaf8d3d 100644 --- a/massdash/structs/FeatureMap.py +++ b/massdash/structs/FeatureMap.py @@ -33,9 +33,11 @@ class FeatureMap: has_im (bool): A boolean indicating if the feature map has ion mobility data ''' - def __init__(self, feature_df: pd.DataFrame, config: TargetedDIAConfig=None, verbose: bool=False): + def __init__(self, feature_df: pd.DataFrame, sequence: str, precursor_charge: int, config: TargetedDIAConfig=None, verbose: bool=False): self.feature_df = feature_df self.has_im = 'im' in feature_df.columns and feature_df['im'].notnull().all() + self.sequence = sequence + self.precursor_charge = precursor_charge if not self.has_im and not self.feature_df.empty: self.feature_df.drop(columns=['im'], inplace=True) self.config = config @@ -100,7 +102,10 @@ def to_chromatograms(self) -> TransitionGroup: Returns: TransitionGroup: A TransitionGroup object storing chromatograms ''' - return TransitionGroup(self.get_precursor_chromatograms(), self.get_transition_chromatograms()) + tg = TransitionGroup(self.get_precursor_chromatograms(), self.get_transition_chromatograms()) + tg.sequence = self.sequence + tg.precursor_charge = self.precursor_charge + return tg def to_mobilograms(self) -> TransitionGroup: ''' @@ -109,7 +114,10 @@ def to_mobilograms(self) -> TransitionGroup: Returns: TransitionGroup: A TransitionGroup object storing mobilograms ''' - return TransitionGroup(self.get_precursor_mobilograms(), self.get_transition_mobilograms()) + tg = TransitionGroup(self.get_precursor_mobilograms(), self.get_transition_mobilograms()) + tg.sequence = self.sequence + tg.precursor_charge = self.precursor_charge + return tg def to_spectra(self) -> TransitionGroup: ''' @@ -118,8 +126,11 @@ def to_spectra(self) -> TransitionGroup: Returns: TransitionGroup: A TransitionGroup object storing spectra ''' - return TransitionGroup(self.get_precursor_spectra(), self.get_transition_spectra()) - + tg = TransitionGroup(self.get_precursor_spectra(), self.get_transition_spectra()) + tg.sequence = self.sequence + tg.precursor_charge = self.precursor_charge + return tg + def get_precursor_chromatograms(self) -> List[Chromatogram]: ''' Get a list of precursor chromatograms from the feature map diff --git a/massdash/structs/Mobilogram.py b/massdash/structs/Mobilogram.py index 889bb57c..cb64e1e0 100644 --- a/massdash/structs/Mobilogram.py +++ b/massdash/structs/Mobilogram.py @@ -17,4 +17,17 @@ def __init__(self, im, intensity, label): super().__init__(im, intensity, label) def toPandasDf(self) -> pd.DataFrame: - return super().toPandasDfHelper_(self, 'im') \ No newline at end of file + return super().toPandasDfHelper_(self, 'im') + + def pad(self, length: int) -> 'Mobilogram': + """ + Pad the chromatogram with zeros on both sides. + + Args: + pad (int): The number of zeros to pad on both sides. + + Returns: + Chromatogram: A new chromatogram object with padded data and intensity. + """ + new_data, new_intensity = super().pad(length) + return Mobilogram(new_data, new_intensity, self.label) \ No newline at end of file diff --git a/massdash/structs/Spectrum.py b/massdash/structs/Spectrum.py index 109e2600..e6e4eb85 100644 --- a/massdash/structs/Spectrum.py +++ b/massdash/structs/Spectrum.py @@ -17,4 +17,17 @@ def __init__(self, mz, intensity, label): super().__init__(mz, intensity, label) def toPandasDf(self) -> pd.DataFrame: - return super().toPandasDfHelper_(self, 'mz') \ No newline at end of file + return super().toPandasDfHelper_(self, 'mz') + + def pad(self, length: int) -> 'Spectrum': + """ + Pad the chromatogram with zeros on both sides. + + Args: + pad (int): The number of zeros to pad on both sides. + + Returns: + Chromatogram: A new chromatogram object with padded data and intensity. + """ + new_data, new_intensity = super().pad(length) + return Spectrum(new_data, new_intensity, self.label) \ No newline at end of file diff --git a/massdash/structs/TransitionGroup.py b/massdash/structs/TransitionGroup.py index 426b514f..0d6d749c 100644 --- a/massdash/structs/TransitionGroup.py +++ b/massdash/structs/TransitionGroup.py @@ -151,6 +151,22 @@ def empty(self) -> bool: """ return not any(p.empty() for p in self.precursorData) and any(t.empty() for t in self.transitionData) + def pad(self, length: int) -> None: + """ + Pad the data and intensity arrays with zeros to a given length. Modifies the object in place. + + Args: + length (int): The length of the output array + """ + new_precursorData = [] + new_transitionData = [] + for c in self.precursorData: + new_precursorData.append(c.pad(length)) + for c in self.transitionData: + new_transitionData.append(c.pad(length)) + + return TransitionGroup(new_precursorData, new_transitionData, self.sequence, self.precursor_charge) + def plot(self, transitionGroupFeatures: Optional[List[TransitionGroupFeature]] = None, smoothing: Optional[Literal['none', 'sgolay', 'gaussian']] = 'none', From 02e18622a532d6672f7a42f28214d26d767e6f65 Mon Sep 17 00:00:00 2001 From: Justin Sing <32938975+singjc@users.noreply.github.com> Date: Tue, 23 Jan 2024 19:01:35 -0500 Subject: [PATCH 05/19] Update auto-pr-desc.yml --- .github/workflows/auto-pr-desc.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/auto-pr-desc.yml b/.github/workflows/auto-pr-desc.yml index 983c555d..bc80cb57 100644 --- a/.github/workflows/auto-pr-desc.yml +++ b/.github/workflows/auto-pr-desc.yml @@ -6,25 +6,25 @@ name: generate-pull-request-description -on: - workflow_call: - secrets: - token: - required: true +on: [pull_request, push] jobs: update: if: ${{ !contains(github.event.pull_request.body, '') }} runs-on: ubuntu-latest steps: - - uses: octue/generate-pull-request-description@1.0.0.beta-2 + # Step 1: Use an action to generate the pull request description + - name: Generate PR Description + uses: octue/generate-pull-request-description@1.0.0.beta-2 id: pr-description with: pull_request_url: ${{ github.event.pull_request.url }} api_token: ${{ secrets.token }} + # Step 2: Update the pull request body using the generated description - name: Update pull request body uses: riskledger/update-pr-description@v2 with: body: ${{ steps.pr-description.outputs.pull_request_description }} token: ${{ secrets.token }} + From e552d21ea27b89b284707873516d14fc92818b5a Mon Sep 17 00:00:00 2001 From: Justin Sing <32938975+singjc@users.noreply.github.com> Date: Wed, 24 Jan 2024 00:53:34 -0500 Subject: [PATCH 06/19] Update auto-pr-desc.yml --- .github/workflows/auto-pr-desc.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/auto-pr-desc.yml b/.github/workflows/auto-pr-desc.yml index bc80cb57..edbf7311 100644 --- a/.github/workflows/auto-pr-desc.yml +++ b/.github/workflows/auto-pr-desc.yml @@ -15,16 +15,16 @@ jobs: steps: # Step 1: Use an action to generate the pull request description - name: Generate PR Description - uses: octue/generate-pull-request-description@1.0.0.beta-2 + uses: octue/generate-pull-request-description@main id: pr-description with: pull_request_url: ${{ github.event.pull_request.url }} - api_token: ${{ secrets.token }} + api_token: ${{ secrets.GITHUB_TOKEN }} # Step 2: Update the pull request body using the generated description - name: Update pull request body uses: riskledger/update-pr-description@v2 with: body: ${{ steps.pr-description.outputs.pull_request_description }} - token: ${{ secrets.token }} + token: ${{ secrets.GITHUB_TOKEN }} From 5cca7056a20b5a323ccf45ff9fd89e4c047c42b9 Mon Sep 17 00:00:00 2001 From: Justin Sing <32938975+singjc@users.noreply.github.com> Date: Wed, 24 Jan 2024 01:06:22 -0500 Subject: [PATCH 07/19] Update issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 38 +++++++++++++++++++++++ .github/ISSUE_TEMPLATE/feature_request.md | 20 ++++++++++++ 2 files changed, 58 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/ISSUE_TEMPLATE/feature_request.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 00000000..dd84ea78 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,38 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: '' + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1. Go to '...' +2. Click on '....' +3. Scroll down to '....' +4. See error + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. iOS] + - Browser [e.g. chrome, safari] + - Version [e.g. 22] + +**Smartphone (please complete the following information):** + - Device: [e.g. iPhone6] + - OS: [e.g. iOS8.1] + - Browser [e.g. stock browser, safari] + - Version [e.g. 22] + +**Additional context** +Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md new file mode 100644 index 00000000..bbcbbe7d --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -0,0 +1,20 @@ +--- +name: Feature request +about: Suggest an idea for this project +title: '' +labels: '' +assignees: '' + +--- + +**Is your feature request related to a problem? Please describe.** +A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] + +**Describe the solution you'd like** +A clear and concise description of what you want to happen. + +**Describe alternatives you've considered** +A clear and concise description of any alternative solutions or features you've considered. + +**Additional context** +Add any other context or screenshots about the feature request here. From da4ed715858fd19fb67e93afd930e3d871555eb1 Mon Sep 17 00:00:00 2001 From: Justin Sing <32938975+singjc@users.noreply.github.com> Date: Wed, 24 Jan 2024 01:07:34 -0500 Subject: [PATCH 08/19] Update bug_report.md --- .github/ISSUE_TEMPLATE/bug_report.md | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index dd84ea78..2fcf1c6d 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -28,11 +28,5 @@ If applicable, add screenshots to help explain your problem. - Browser [e.g. chrome, safari] - Version [e.g. 22] -**Smartphone (please complete the following information):** - - Device: [e.g. iPhone6] - - OS: [e.g. iOS8.1] - - Browser [e.g. stock browser, safari] - - Version [e.g. 22] - **Additional context** Add any other context about the problem here. From ef3156cf4ec495e6cdd978157dafb65cea526cce Mon Sep 17 00:00:00 2001 From: Justin Date: Wed, 24 Jan 2024 01:16:28 -0500 Subject: [PATCH 09/19] [ADD] PR template --- .../pull_request_template.md | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .github/PULL_REQUEST_TEMPLATE/pull_request_template.md diff --git a/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md new file mode 100644 index 00000000..fe25e66f --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md @@ -0,0 +1,36 @@ +# Description + +Please include a summary of the changes and the related issue. Please also include relevant motivation and context. List any dependencies that are required for this change. + +Fixes # (issue) + +## Type of change + +Please delete options that are not relevant. + +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] This change requires a documentation update + +# How Has This Been Tested? + +Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration + +- [ ] Test A +- [ ] Test B + +**Test Configuration**: +* Firmware version: +* Hardware: + +# Checklist: + +- [ ] My code follows the style guidelines of this project +- [ ] I have performed a self-review of my code +- [ ] I have commented my code, particularly in hard-to-understand areas +- [ ] I have made corresponding changes to the documentation +- [ ] My changes generate no new warnings +- [ ] I have added tests that prove my fix is effective or that my feature works +- [ ] New and existing unit tests pass locally with my changes +- [ ] Any dependent changes have been merged and published in downstream modules \ No newline at end of file From 6941e32b6e2695f03da54093d9e902d64d7a28e0 Mon Sep 17 00:00:00 2001 From: Justin Sing <32938975+singjc@users.noreply.github.com> Date: Wed, 24 Jan 2024 02:11:35 -0500 Subject: [PATCH 10/19] Update auto-pr-desc.yml --- .github/workflows/auto-pr-desc.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/auto-pr-desc.yml b/.github/workflows/auto-pr-desc.yml index edbf7311..a2d6abd0 100644 --- a/.github/workflows/auto-pr-desc.yml +++ b/.github/workflows/auto-pr-desc.yml @@ -6,7 +6,7 @@ name: generate-pull-request-description -on: [pull_request, push] +on: [pull_request] jobs: update: From db3f36d0b6299092a18b69ba6517f9c8f29ae75c Mon Sep 17 00:00:00 2001 From: Justin Date: Wed, 24 Jan 2024 02:23:51 -0500 Subject: [PATCH 11/19] [ADD] PR template to main .github dir --- .github/pull_request_template.md | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .github/pull_request_template.md diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 00000000..fe25e66f --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,36 @@ +# Description + +Please include a summary of the changes and the related issue. Please also include relevant motivation and context. List any dependencies that are required for this change. + +Fixes # (issue) + +## Type of change + +Please delete options that are not relevant. + +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] This change requires a documentation update + +# How Has This Been Tested? + +Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration + +- [ ] Test A +- [ ] Test B + +**Test Configuration**: +* Firmware version: +* Hardware: + +# Checklist: + +- [ ] My code follows the style guidelines of this project +- [ ] I have performed a self-review of my code +- [ ] I have commented my code, particularly in hard-to-understand areas +- [ ] I have made corresponding changes to the documentation +- [ ] My changes generate no new warnings +- [ ] I have added tests that prove my fix is effective or that my feature works +- [ ] New and existing unit tests pass locally with my changes +- [ ] Any dependent changes have been merged and published in downstream modules \ No newline at end of file From 8d48c049a1022eafb8ba12943749100474f71859 Mon Sep 17 00:00:00 2001 From: Joshua Charkow Date: Wed, 24 Jan 2024 09:35:07 -0500 Subject: [PATCH 12/19] [MINOR] refactor --- massdash/peakPickers/ConformerPeakPicker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/massdash/peakPickers/ConformerPeakPicker.py b/massdash/peakPickers/ConformerPeakPicker.py index 3f13ef27..e5602022 100644 --- a/massdash/peakPickers/ConformerPeakPicker.py +++ b/massdash/peakPickers/ConformerPeakPicker.py @@ -87,7 +87,7 @@ def pick(self, transition_group, max_int_transition: int=1000) -> List[Transitio """ # Transform data into required input print("Preprocessing data...") - conformer_preprocessor = ConformerPreprocessor(transition_group) + conformer_preprocessor = ConformerPreprocessor(transition_group, self.window_size) input_data = conformer_preprocessor.preprocess(self.library) print("Loading model...") self.load_model() @@ -97,7 +97,7 @@ def pick(self, transition_group, max_int_transition: int=1000) -> List[Transitio print("Getting predicted boundaries...") peak_info = conformer_preprocessor.find_top_peaks(ort_output[0], ["precursor"], self.prediction_threshold, self.prediction_type) # Get actual peak boundaries - peak_info = conformer_preprocessor.get_peak_boundaries(peak_info, self.window_size) + peak_info = conformer_preprocessor.get_peak_boundaries(peak_info) print(f"Peak info: {peak_info}") return self._convertConformerFeatureToTransitionGroupFeatures(peak_info, max_int_transition) From b90ef3884bff421ec6c69d18cdedda76ea772ef0 Mon Sep 17 00:00:00 2001 From: Joshua Charkow Date: Wed, 24 Jan 2024 09:48:15 -0500 Subject: [PATCH 13/19] [FEATURE] auto window size determining automatically set window size based on the supplied model --- massdash/peakPickers/ConformerPeakPicker.py | 13 ++++---- massdash/preprocess/ConformerPreprocessor.py | 30 +++++++++++-------- massdash/server/PeakPickingServer.py | 2 -- .../RawTargetedExtractionAnalysisServer.py | 1 - massdash/ui/ConformerPickerUISettings.py | 3 +- 5 files changed, 26 insertions(+), 23 deletions(-) diff --git a/massdash/peakPickers/ConformerPeakPicker.py b/massdash/peakPickers/ConformerPeakPicker.py index e5602022..503a0407 100644 --- a/massdash/peakPickers/ConformerPeakPicker.py +++ b/massdash/peakPickers/ConformerPeakPicker.py @@ -36,7 +36,7 @@ class ConformerPeakPicker: _convertConformerFeatureToTransitionGroupFeatures: Convert conformer predicted feature to TransitionGroupFeatures. """ - def __init__(self, library_file: str, pretrained_model_file: str, window_size: int = 175, prediction_threshold: float = 0.5, prediction_type: str = "logits"): + def __init__(self, library_file: str, pretrained_model_file: str, prediction_threshold: float = 0.5, prediction_type: str = "logits"): """ Initialize the ConformerPeakPicker class. @@ -48,13 +48,15 @@ def __init__(self, library_file: str, pretrained_model_file: str, window_size: i prediction_type (str, optional): The prediction type for peak picking. Defaults to "logits". """ self.pretrained_model_file = pretrained_model_file - self.window_size = window_size self.prediction_threshold = prediction_threshold self.prediction_type = prediction_type - self.onnx_session = None self.library = SpectralLibraryLoader(library_file) self._validate_model() + + ## set in load_model + self.onnx_session = None + self.window_size = None def _validate_model(self): """ @@ -74,6 +76,7 @@ def load_model(self): raise ImportError("onnxruntime is required for loading the pretrained Conformer model, but not installed.") # Load pretrained model self.onnx_session = onnxruntime.InferenceSession(self.pretrained_model_file) + self.window_size = self.onnx_session.get_inputs()[0].shape[2] def pick(self, transition_group, max_int_transition: int=1000) -> List[TransitionGroupFeature]: """ @@ -86,11 +89,11 @@ def pick(self, transition_group, max_int_transition: int=1000) -> List[Transitio List[TransitionGroupFeature]: The list of transition group features. """ # Transform data into required input + print("Loading model...") + self.load_model() print("Preprocessing data...") conformer_preprocessor = ConformerPreprocessor(transition_group, self.window_size) input_data = conformer_preprocessor.preprocess(self.library) - print("Loading model...") - self.load_model() print("Predicting...") ort_input = {self.onnx_session.get_inputs()[0].name: input_data} ort_output = self.onnx_session.run(None, ort_input) diff --git a/massdash/preprocess/ConformerPreprocessor.py b/massdash/preprocess/ConformerPreprocessor.py index 62d350db..0738ccf5 100644 --- a/massdash/preprocess/ConformerPreprocessor.py +++ b/massdash/preprocess/ConformerPreprocessor.py @@ -37,9 +37,13 @@ class ConformerPreprocessor(GenericPreprocessor): """ - def __init__(self, transition_group: TransitionGroup): + def __init__(self, transition_group: TransitionGroup, window_size: int=175): super().__init__(transition_group) + ## pad the transition group to the window size + self.transition_group = self.transition_group.pad(window_size) + self.window_size = window_size + @staticmethod def min_max_scale(data, min: float=None, max: float=None) -> np.ndarray: """ @@ -102,7 +106,7 @@ def sigmoid(x: np.ndarray) -> np.ndarray: """ return 1 / (1 + np.exp(-x)) - def preprocess(self, library: SpectralLibraryLoader, window_size: int=175) -> np.ndarray: + def preprocess(self, library: SpectralLibraryLoader) -> np.ndarray: """ Preprocesses the data by scaling and transforming it into a numpy array. @@ -123,19 +127,19 @@ def preprocess(self, library: SpectralLibraryLoader, window_size: int=175) -> np # Row index 19: library retention time diff # Row index 20: precursor charge - # pad the transition group to the window size - self.transition_group = self.transition_group.pad(window_size) + if len(self.transition_group.transitionData) != 6: + raise ValueError(f"Transition group must have 6 transitions, but has {len(self.transition_group.transitionData)}.") # initialize empty numpy array - data = np.empty((0, window_size), float) - lib_int_data = np.empty((0, window_size), float) + data = np.empty((0, self.window_size), float) + lib_int_data = np.empty((0, self.window_size), float) for chrom in self.transition_group.transitionData: # append ms2 intensity data to data data = np.append(data, [chrom.intensity], axis=0) lib_int = library.get_fragment_library_intensity(self.transition_group.sequence, self.transition_group.precursor_charge, chrom.label) - lib_int = np.repeat(lib_int, window_size) + lib_int = np.repeat(lib_int, self.window_size) lib_int_data = np.append(lib_int_data, [lib_int], axis=0) # initialize empty numpy array to store scaled data @@ -280,7 +284,7 @@ def find_top_peaks(self, preds, seq_classes: List[str]='input_precursor', thresh return peak_info - def get_peak_boundaries(self, peak_info: dict, window_size: int=175): + def get_peak_boundaries(self, peak_info: dict): """ Adjusts the peak boundaries in the peak_info dictionary based on the window size and the dimensions of the input rt_array. Calculates the actual RT values from the rt_array and appends them to the peak_info dictionary. @@ -293,22 +297,22 @@ def get_peak_boundaries(self, peak_info: dict, window_size: int=175): dict: The updated peak_info dictionary with adjusted peak boundaries and RT values. """ rt_array = self.transition_group.transitionData[0].data - if rt_array.shape[0] != window_size: - print(f"input_data {rt_array.shape[0]} was trimmed to {window_size}, adjusting peak_info indexes to map to the original datas dimensions") + if rt_array.shape[0] != self.window_size: + print(f"input_data {rt_array.shape[0]} was trimmed to {self.window_size}, adjusting peak_info indexes to map to the original datas dimensions") for key in peak_info.keys(): for i in range(len(peak_info[key])): peak_info[key][i]['max_idx_org'] = peak_info[key][i]['max_idx'] peak_info[key][i]['start_idx_org'] = peak_info[key][i]['start_idx'] peak_info[key][i]['end_idx_org'] = peak_info[key][i]['end_idx'] - new_max_idx = peak_info[key][i]['max_idx'] + (window_size // 2) - (rt_array.shape[0] // 2) + new_max_idx = peak_info[key][i]['max_idx'] + (self.window_size // 2) - (rt_array.shape[0] // 2) if not new_max_idx < 0: peak_info[key][i]['max_idx'] = new_max_idx - new_start_idx = peak_info[key][i]['start_idx'] + (window_size // 2) - (rt_array.shape[0] // 2) + new_start_idx = peak_info[key][i]['start_idx'] + (self.window_size // 2) - (rt_array.shape[0] // 2) if not new_start_idx < 0: peak_info[key][i]['start_idx'] = new_start_idx - peak_info[key][i]['end_idx'] = peak_info[key][i]['end_idx'] + (window_size // 2) - (rt_array.shape[0] // 2) + peak_info[key][i]['end_idx'] = peak_info[key][i]['end_idx'] + (self.window_size // 2) - (rt_array.shape[0] // 2) # get actual RT value from RT array and append to peak_info for key in peak_info.keys(): diff --git a/massdash/server/PeakPickingServer.py b/massdash/server/PeakPickingServer.py index 4a83cb94..2725b0f8 100644 --- a/massdash/server/PeakPickingServer.py +++ b/massdash/server/PeakPickingServer.py @@ -123,7 +123,6 @@ def perform_conformer_peak_picking(self, spec_lib_path: str, tr_group_data: Tran st.write(f"Pretrained model file: {self.peak_picking_settings.peak_picker_algo_settings.pretrained_model_file}") peak_picker = ConformerPeakPicker(spec_lib_path, self.peak_picking_settings.peak_picker_algo_settings.pretrained_model_file, - window_size=self.peak_picking_settings.peak_picker_algo_settings.conformer_window_size, prediction_threshold=self.peak_picking_settings.peak_picker_algo_settings.conformer_prediction_threshold, prediction_type=self.peak_picking_settings.peak_picker_algo_settings.conformer_prediction_type) @@ -148,7 +147,6 @@ def perform_peak_picking(self, tr_group_data: TransitionGroup=None, xic_data: Sq dict: The transition group feature data. """ tr_group_feature_data = {} - st.write("Performing Peak Picking for", tr_group_data) # Perform peak picking based on the selected method if self.peak_picking_settings.do_peak_picking == 'OSW-PyProphet': diff --git a/massdash/server/RawTargetedExtractionAnalysisServer.py b/massdash/server/RawTargetedExtractionAnalysisServer.py index dbcb51b4..6495f2b5 100644 --- a/massdash/server/RawTargetedExtractionAnalysisServer.py +++ b/massdash/server/RawTargetedExtractionAnalysisServer.py @@ -193,7 +193,6 @@ def main(self): if clear_caches: self.targeted_extraction.clear() featureMaps = self.targeted_extraction(transition_list_ui) - st.write(list(featureMaps.values())[0].feature_df) st_log_writer.write(f"Extracting spectra complete! Elapsed time: {timedelta(seconds=perf_metrics.execution_time)}") transition_list_ui.validate_extraction(featureMaps, plot_container) diff --git a/massdash/ui/ConformerPickerUISettings.py b/massdash/ui/ConformerPickerUISettings.py index c8ec5f99..88ea4cca 100644 --- a/massdash/ui/ConformerPickerUISettings.py +++ b/massdash/ui/ConformerPickerUISettings.py @@ -39,7 +39,7 @@ def create_ui(self, plot_settings: ChromatogramPlotUISettings): plot_settings : ChromatogramPlotUISettings The plot settings for the chromatogram. """ - self.shipped_model = st.sidebar.checkbox("Use shipped model", value=True, help="Use the shipped model.") + self.shipped_model = st.sidebar.checkbox("Use shipped model", value=True, help="Use the shipped model which picks peaks across 175 points") if self.shipped_model: self.pretrained_model_file = os.path.join(DIRNAME, '..', 'assets', 'models', 'conformer', 'base_cape.onnx') # Check if the model file exists @@ -52,6 +52,5 @@ def create_ui(self, plot_settings: ChromatogramPlotUISettings): self.pretrained_model_file = st.sidebar.text_input("Pretrained model file", value="", help="The pretrained model file to use.") with st.sidebar.expander("Advanced settings"): - self.conformer_window_size = st.number_input("window size", value=175, help="The window size for the conformer model, i.e the number of points of the chromatogram.") self.conformer_prediction_threshold = st.number_input("prediction score threshold", value=0.2, help="The threshold for the conformer models prediction scores to find the top peak boundary.") self.conformer_prediction_type = st.selectbox("prediction type", options=["logits", "sigmoided", "binarized"], help="The type of prediction to use for finding the top peak.") \ No newline at end of file From f4c9578f0315eb00cb7ead39047328def42433e8 Mon Sep 17 00:00:00 2001 From: Ira Horecka Date: Wed, 24 Jan 2024 12:42:11 -0500 Subject: [PATCH 14/19] add module docstring headers --- docs/conf.py | 5 +++++ massdash/__init__.py | 1 - massdash/constants.py | 2 +- massdash/loaders/GenericChromatogramLoader.py | 5 +++++ massdash/loaders/GenericSpectrumLoader.py | 3 +++ massdash/loaders/__init__.py | 3 +++ massdash/peakPickers/__init__.py | 3 +++ massdash/plotting/__init__.py | 3 +++ massdash/server/__init__.py | 3 +++ massdash/structs/__init__.py | 3 +++ massdash/ui/__init__.py | 3 +++ test/__init__.py | 1 - 12 files changed, 32 insertions(+), 3 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 99cfa2db..2cf3055b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,3 +1,8 @@ +""" +docs/conf +~~~~~~~~~ +""" + # Configuration file for the Sphinx documentation builder. # # This file only contains a selection of the most common options. For a full diff --git a/massdash/__init__.py b/massdash/__init__.py index 2891f5d8..8f76ef6a 100644 --- a/massdash/__init__.py +++ b/massdash/__init__.py @@ -1,5 +1,4 @@ """ -======= massdash ~~~~~~~~ TODO: ADD general documentation about how to use the auto API here diff --git a/massdash/constants.py b/massdash/constants.py index 7f44dcc4..c52b3f68 100644 --- a/massdash/constants.py +++ b/massdash/constants.py @@ -1,6 +1,6 @@ """ massdash/constants -~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~ """ import os diff --git a/massdash/loaders/GenericChromatogramLoader.py b/massdash/loaders/GenericChromatogramLoader.py index f5f41e3f..a0d98db4 100644 --- a/massdash/loaders/GenericChromatogramLoader.py +++ b/massdash/loaders/GenericChromatogramLoader.py @@ -1,3 +1,8 @@ +""" +massdash/loaders/GenericChromatogramLoader +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +""" + from abc import abstractmethod import pandas as pd from typing import Dict, List, Union, Literal diff --git a/massdash/loaders/GenericSpectrumLoader.py b/massdash/loaders/GenericSpectrumLoader.py index 713937b6..5b52062b 100644 --- a/massdash/loaders/GenericSpectrumLoader.py +++ b/massdash/loaders/GenericSpectrumLoader.py @@ -1,4 +1,7 @@ ''' +massdash/loaders/GenericSpectrumLoader +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + This is an abstract class for loading spectra from a file. ''' diff --git a/massdash/loaders/__init__.py b/massdash/loaders/__init__.py index 0c008659..c5df530c 100644 --- a/massdash/loaders/__init__.py +++ b/massdash/loaders/__init__.py @@ -1,4 +1,7 @@ """ +massdash/loaders +~~~~~~~~~~~~~~~~ + The :mod:`massseer.loaders` subpackage contains the structures for loading data into MassSeer """ diff --git a/massdash/peakPickers/__init__.py b/massdash/peakPickers/__init__.py index 4dea463e..f1f2767e 100644 --- a/massdash/peakPickers/__init__.py +++ b/massdash/peakPickers/__init__.py @@ -1,4 +1,7 @@ """ +massdash/peakPickers +~~~~~~~~~~~~~~~~~~~~ + This subpackage contains classes for performing peak picking """ from .ConformerPeakPicker import ConformerPeakPicker diff --git a/massdash/plotting/__init__.py b/massdash/plotting/__init__.py index 6b7251a7..31550c3b 100644 --- a/massdash/plotting/__init__.py +++ b/massdash/plotting/__init__.py @@ -1,4 +1,7 @@ """ +massdash/plotting +~~~~~~~~~~~~~~~~~ + This subpackage contains classes for plotting """ diff --git a/massdash/server/__init__.py b/massdash/server/__init__.py index 1a50314d..aa278888 100644 --- a/massdash/server/__init__.py +++ b/massdash/server/__init__.py @@ -1,4 +1,7 @@ """ +massdash/server +~~~~~~~~~~~~~~~ + This subpackage contains "server side" classes for GUI """ from .ExtractedIonChromatogramAnalysisServer import ExtractedIonChromatogramAnalysisServer diff --git a/massdash/structs/__init__.py b/massdash/structs/__init__.py index c46f6218..be91d918 100644 --- a/massdash/structs/__init__.py +++ b/massdash/structs/__init__.py @@ -1,4 +1,7 @@ """ +massdash/structs +~~~~~~~~~~~~~~~~ + This subpackage contains the structures for storing MassSeer data """ diff --git a/massdash/ui/__init__.py b/massdash/ui/__init__.py index 5c4059d7..4f54033f 100644 --- a/massdash/ui/__init__.py +++ b/massdash/ui/__init__.py @@ -1,4 +1,7 @@ """ +massdash/ui +~~~~~~~~~~~ + This subpackage contains the structures for storing MassSeer data """ from .BaseUISettings import BaseUISettings diff --git a/test/__init__.py b/test/__init__.py index 620daacb..1adb4b3d 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -2,4 +2,3 @@ test ~~~~ """ - From cd9a05f60592072602709e6e35535d884ad73709 Mon Sep 17 00:00:00 2001 From: Ira Horecka Date: Wed, 24 Jan 2024 12:43:28 -0500 Subject: [PATCH 15/19] change instances of 'massseer' to 'massdash' --- docs/plottingGallery/Heatmap Per Transition.ipynb | 2 +- docs/python_docs/Advanced Plotting.ipynb | 6 +++--- docs/python_docs/Loading Data.rst | 4 ++-- docs/python_docs/Quick Start.ipynb | 2 +- .../Figure-5-Peak-Picking-Demonstration.ipynb | 2 +- massdash/loaders/__init__.py | 2 +- massdash/server/SearchResultsAnalysisServer.py | 10 +++++----- massdash/structs/__init__.py | 2 +- massdash/ui/ExtractedIonChromatogramAnalysisUI.py | 2 +- massdash/ui/__init__.py | 2 +- 10 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/plottingGallery/Heatmap Per Transition.ipynb b/docs/plottingGallery/Heatmap Per Transition.ipynb index fa043b5b..dfa52fac 100644 --- a/docs/plottingGallery/Heatmap Per Transition.ipynb +++ b/docs/plottingGallery/Heatmap Per Transition.ipynb @@ -41,7 +41,7 @@ "tags": [] }, "source": [ - "By changing the ``type_of_heatmap`` in the `:py:class:~massseer.plotting.PlotConfig`, we can plot different axis against one another." + "By changing the ``type_of_heatmap`` in the `:py:class:~massdash.plotting.PlotConfig`, we can plot different axis against one another." ] }, { diff --git a/docs/python_docs/Advanced Plotting.ipynb b/docs/python_docs/Advanced Plotting.ipynb index d17b2d92..486ec6e0 100644 --- a/docs/python_docs/Advanced Plotting.ipynb +++ b/docs/python_docs/Advanced Plotting.ipynb @@ -90,7 +90,7 @@ "tags": [] }, "source": [ - "MassSeer is also capable of generating many different other plot types. Since these plots are more complex, plotting is more involved. " + "MassDash is also capable of generating many different other plot types. Since these plots are more complex, plotting is more involved. " ] }, { @@ -317,7 +317,7 @@ }, "outputs": [], "source": [ - "from massseer.plotting import PlotConfig\n", + "from massdash.plotting import PlotConfig\n", "plotConfig = PlotConfig()\n", "plotConfig.include_ms1 = True\n", "plotConfig.include_ms2 = True\n", @@ -353,7 +353,7 @@ }, "outputs": [], "source": [ - "from massseer.plotting import InteractivePlotter\n", + "from massdash.plotting import InteractivePlotter\n", "plotter = InteractivePlotter(plotConfig)" ] }, diff --git a/docs/python_docs/Loading Data.rst b/docs/python_docs/Loading Data.rst index 7a0538c4..79c342d3 100644 --- a/docs/python_docs/Loading Data.rst +++ b/docs/python_docs/Loading Data.rst @@ -3,7 +3,7 @@ Loading Data .. currentmodule:: massdash.loaders -To load raw data into MassSeer, a loader object must be initiated. There are two types of loader classes +To load raw data into MassDash, a loader object must be initiated. There are two types of loader classes 1. :py:class:`Chromatogram Loaders`: Chromatogram Loaders: Raw data stores chromatograms, this allows for faster loading however since extraction has already been performed by the upstream analysis tool. This includes :py:class:`SqMassLoader` @@ -25,5 +25,5 @@ Since each loader type is linked with a results file each loader can be used to Loading Spectrum Data Loading Feature Information -Have an idea for a loader you want to see? Create a issue `here `_. +Have an idea for a loader you want to see? Create a issue `here `_. diff --git a/docs/python_docs/Quick Start.ipynb b/docs/python_docs/Quick Start.ipynb index b6e0cd92..b7a4e68a 100644 --- a/docs/python_docs/Quick Start.ipynb +++ b/docs/python_docs/Quick Start.ipynb @@ -133,7 +133,7 @@ "tags": [] }, "source": [ - "Since the .sqMass file does not contain any metadata, to link chromatograms with their corresponding peptide sequence we must link an ``.osw`` file with the ``.sqMass`` file. In MassSeer this is done by initiating a :py:class:`~loaders.SqMassLoader` as shown below. " + "Since the .sqMass file does not contain any metadata, to link chromatograms with their corresponding peptide sequence we must link an ``.osw`` file with the ``.sqMass`` file. In MassDash this is done by initiating a :py:class:`~loaders.SqMassLoader` as shown below. " ] }, { diff --git a/manuscript/main_figures/Figure-5-Peak-Picking-Demonstration.ipynb b/manuscript/main_figures/Figure-5-Peak-Picking-Demonstration.ipynb index d0712622..3ec1d3e9 100644 --- a/manuscript/main_figures/Figure-5-Peak-Picking-Demonstration.ipynb +++ b/manuscript/main_figures/Figure-5-Peak-Picking-Demonstration.ipynb @@ -424,7 +424,7 @@ "metadata": {}, "outputs": [], "source": [ - "from massseer.plotting import InteractivePlotter, PlotConfig\n", + "from massdash.plotting import InteractivePlotter, PlotConfig\n", "\n", "plotConfig = PlotConfig()\n", "plotConfig.smoothing_dict = dict(type='none')\n", diff --git a/massdash/loaders/__init__.py b/massdash/loaders/__init__.py index c5df530c..66774bc7 100644 --- a/massdash/loaders/__init__.py +++ b/massdash/loaders/__init__.py @@ -2,7 +2,7 @@ massdash/loaders ~~~~~~~~~~~~~~~~ -The :mod:`massseer.loaders` subpackage contains the structures for loading data into MassSeer +The :mod:`massdash.loaders` subpackage contains the structures for loading data into MassDash """ from .GenericChromatogramLoader import GenericChromatogramLoader diff --git a/massdash/server/SearchResultsAnalysisServer.py b/massdash/server/SearchResultsAnalysisServer.py index 29fb2d8f..ffcc2b09 100644 --- a/massdash/server/SearchResultsAnalysisServer.py +++ b/massdash/server/SearchResultsAnalysisServer.py @@ -27,15 +27,15 @@ class SearchResultsAnalysisServer: A class representing the server-side functionality for search results analysis. """ - def __init__(self, massseer_gui) -> None: + def __init__(self, massdash_gui) -> None: """ Initializes the SearchResultsAnalysisServer object. Args: - massseer_gui : object - An object representing the MassSeer GUI. + massdash_gui : object + An object representing the MassDash GUI. """ - self.massseer_gui = massseer_gui + self.massdash_gui = massdash_gui self.file_input_settings = None self.analysis_settings = None self.analysis = None @@ -108,7 +108,7 @@ def main(self) -> None: self.analysis_type.analysis_type() # self.load_search_result_entries.clear() - search_results_access_dict = self.load_search_result_entries(self.massseer_gui.file_input_settings.feature_file_entries) + search_results_access_dict = self.load_search_result_entries(self.massdash_gui.file_input_settings.feature_file_entries) # Create a UI for the analysis if self.analysis_type.analysis == "Results": diff --git a/massdash/structs/__init__.py b/massdash/structs/__init__.py index be91d918..ab52de73 100644 --- a/massdash/structs/__init__.py +++ b/massdash/structs/__init__.py @@ -2,7 +2,7 @@ massdash/structs ~~~~~~~~~~~~~~~~ -This subpackage contains the structures for storing MassSeer data +This subpackage contains the structures for storing MassDash data """ from .TransitionGroupFeature import TransitionGroupFeature diff --git a/massdash/ui/ExtractedIonChromatogramAnalysisUI.py b/massdash/ui/ExtractedIonChromatogramAnalysisUI.py index badb6817..73ea0d2f 100644 --- a/massdash/ui/ExtractedIonChromatogramAnalysisUI.py +++ b/massdash/ui/ExtractedIonChromatogramAnalysisUI.py @@ -30,7 +30,7 @@ def __init__(self, transition_list: SpectralLibraryLoader) -> None: Initializes the ExtractedIonChromatogramAnalysisServer object. Args: - massseer_gui : object + massdash_gui : object An object representing the MassDash GUI. transition_list : object An object representing the transition list. diff --git a/massdash/ui/__init__.py b/massdash/ui/__init__.py index 4f54033f..e1e755c4 100644 --- a/massdash/ui/__init__.py +++ b/massdash/ui/__init__.py @@ -2,7 +2,7 @@ massdash/ui ~~~~~~~~~~~ -This subpackage contains the structures for storing MassSeer data +This subpackage contains the structures for storing MassDash data """ from .BaseUISettings import BaseUISettings from .ChromatogramPlotUISettings import ChromatogramPlotUISettings From 0cd217c142ac869d8c38ee31e3a278e74de65139 Mon Sep 17 00:00:00 2001 From: Joshua Charkow Date: Wed, 24 Jan 2024 12:45:49 -0500 Subject: [PATCH 16/19] fix: address Justin's comments --- massdash/constants.py | 3 ++- massdash/peakPickers/ConformerPeakPicker.py | 20 ++++++++++++++------ massdash/structs/Mobilogram.py | 2 +- massdash/structs/Spectrum.py | 2 +- massdash/structs/TransitionGroup.py | 8 +++++++- massdash/ui/ConformerPickerUISettings.py | 4 ++-- 6 files changed, 27 insertions(+), 12 deletions(-) diff --git a/massdash/constants.py b/massdash/constants.py index 7f44dcc4..5eba91ef 100644 --- a/massdash/constants.py +++ b/massdash/constants.py @@ -23,4 +23,5 @@ URL_TEST_OSW = "https://github.com/Roestlab/massdash/raw/dev/test/test_data/example_dia/openswath/osw/test.osw" URL_TEST_PQP = "https://github.com/Roestlab/massdash/raw/dev/test/test_data/example_dia/openswath/lib/test.pqp" URL_TEST_RAW_MZML = "https://github.com/Roestlab/massdash/raw/dev/test/test_data/example_dia/raw/test_raw_1.mzML" -URL_TEST_DREAMDIA_REPORT = "https://github.com/Roestlab/massdash/raw/dev/test/test_data/example_dia/dreamdia/test_dreamdia_report.tsv" \ No newline at end of file +URL_TEST_DREAMDIA_REPORT = "https://github.com/Roestlab/massdash/raw/dev/test/test_data/example_dia/dreamdia/test_dreamdia_report.tsv" +URL_PRETRAINED_CONFORMER = "https://github.com/Roestlab/massdash/releases/download/v0.0.1-alpha/base_cape.onnx" \ No newline at end of file diff --git a/massdash/peakPickers/ConformerPeakPicker.py b/massdash/peakPickers/ConformerPeakPicker.py index 503a0407..a5387d6a 100644 --- a/massdash/peakPickers/ConformerPeakPicker.py +++ b/massdash/peakPickers/ConformerPeakPicker.py @@ -14,6 +14,7 @@ from ..loaders.SpectralLibraryLoader import SpectralLibraryLoader # Utils from ..util import check_package +from ..util import LOGGER onnxruntime, ONNXRUNTIME_AVAILABLE = check_package("onnxruntime") @@ -57,6 +58,8 @@ def __init__(self, library_file: str, pretrained_model_file: str, prediction_thr ## set in load_model self.onnx_session = None self.window_size = None + + LOGGER.name = __class__.__name__ def _validate_model(self): """ @@ -76,7 +79,12 @@ def load_model(self): raise ImportError("onnxruntime is required for loading the pretrained Conformer model, but not installed.") # Load pretrained model self.onnx_session = onnxruntime.InferenceSession(self.pretrained_model_file) - self.window_size = self.onnx_session.get_inputs()[0].shape[2] + if len(self.onnx_session.get_inputs()) == 0: + raise ValueError("Pretrained model does not have any inputs.") + elif len(self.onnx_session.get_inputs()[0].shape) != 3: + raise ValueError("First input to model must be a 3D numpy array, current shape: {}".format(len(self.onnx_session.get_inputs()[0].shape))) + else: + self.window_size = self.onnx_session.get_inputs()[0].shape[2] def pick(self, transition_group, max_int_transition: int=1000) -> List[TransitionGroupFeature]: """ @@ -89,19 +97,19 @@ def pick(self, transition_group, max_int_transition: int=1000) -> List[Transitio List[TransitionGroupFeature]: The list of transition group features. """ # Transform data into required input - print("Loading model...") + LOGGER.info("Loading model...") self.load_model() - print("Preprocessing data...") + LOGGER.info("Preprocessing data...") conformer_preprocessor = ConformerPreprocessor(transition_group, self.window_size) input_data = conformer_preprocessor.preprocess(self.library) - print("Predicting...") + LOGGER.info("Predicting...") ort_input = {self.onnx_session.get_inputs()[0].name: input_data} ort_output = self.onnx_session.run(None, ort_input) - print("Getting predicted boundaries...") + LOGGER.info("Getting predicted boundaries...") peak_info = conformer_preprocessor.find_top_peaks(ort_output[0], ["precursor"], self.prediction_threshold, self.prediction_type) # Get actual peak boundaries peak_info = conformer_preprocessor.get_peak_boundaries(peak_info) - print(f"Peak info: {peak_info}") + LOGGER.info(f"Peak info: {peak_info}") return self._convertConformerFeatureToTransitionGroupFeatures(peak_info, max_int_transition) def _convertConformerFeatureToTransitionGroupFeatures(self, peak_info: dict, max_int_transition: int=1000) -> List[TransitionGroupFeature]: diff --git a/massdash/structs/Mobilogram.py b/massdash/structs/Mobilogram.py index cb64e1e0..002147c0 100644 --- a/massdash/structs/Mobilogram.py +++ b/massdash/structs/Mobilogram.py @@ -21,7 +21,7 @@ def toPandasDf(self) -> pd.DataFrame: def pad(self, length: int) -> 'Mobilogram': """ - Pad the chromatogram with zeros on both sides. + Pad the mobilogram with zeros on both sides. Args: pad (int): The number of zeros to pad on both sides. diff --git a/massdash/structs/Spectrum.py b/massdash/structs/Spectrum.py index e6e4eb85..14c5d056 100644 --- a/massdash/structs/Spectrum.py +++ b/massdash/structs/Spectrum.py @@ -21,7 +21,7 @@ def toPandasDf(self) -> pd.DataFrame: def pad(self, length: int) -> 'Spectrum': """ - Pad the chromatogram with zeros on both sides. + Pad the spectrum with zeros on both sides. Args: pad (int): The number of zeros to pad on both sides. diff --git a/massdash/structs/TransitionGroup.py b/massdash/structs/TransitionGroup.py index 6ce5859d..98c75d8b 100644 --- a/massdash/structs/TransitionGroup.py +++ b/massdash/structs/TransitionGroup.py @@ -153,10 +153,16 @@ def empty(self) -> bool: def pad(self, length: int) -> None: """ - Pad the data and intensity arrays with zeros to a given length. Modifies the object in place. + Pad the data and intensity arrays with zeros to a given length on both sides. Or slices to the given length if the length is smaller than the current length. + + E.g. if the data array is [1, 2, 3] and the desired length is 7, + the padded data array will be [0, 0, 1, 2, 3, 0, 0]. Args: length (int): The length of the output array + + Returns: + TransitionGroup: A new TransitionGroup object with padded data and intensity. """ new_precursorData = [] new_transitionData = [] diff --git a/massdash/ui/ConformerPickerUISettings.py b/massdash/ui/ConformerPickerUISettings.py index 88ea4cca..1cb906c7 100644 --- a/massdash/ui/ConformerPickerUISettings.py +++ b/massdash/ui/ConformerPickerUISettings.py @@ -9,6 +9,7 @@ # UI from .ChromatogramPlotUISettings import ChromatogramPlotUISettings # Utils +from ..constants import URL_PRETRAINED_CONFORMER from ..util import download_file DIRNAME = os.path.dirname(__file__) @@ -46,8 +47,7 @@ def create_ui(self, plot_settings: ChromatogramPlotUISettings): if not os.path.exists(self.pretrained_model_file): with st.spinner(f"Downloading pretrained model: {self.pretrained_model_file}..."): tmp_download_folder = os.path.join(DIRNAME, '..', 'assets', 'models', 'conformer') - url_pretrained_conformer = "https://github.com/Roestlab/massdash/releases/download/v0.0.1-alpha/base_cape.onnx" - download_file(url_pretrained_conformer, tmp_download_folder) + download_file(URL_PRETRAINED_CONFORMER, tmp_download_folder) else: self.pretrained_model_file = st.sidebar.text_input("Pretrained model file", value="", help="The pretrained model file to use.") From 5fcbd5e89dba999385de0391388ce647f269e851 Mon Sep 17 00:00:00 2001 From: Joshua Charkow Date: Wed, 24 Jan 2024 12:59:45 -0500 Subject: [PATCH 17/19] doc: rename method pad-->adjust_length rename pad to adjust_length because the length can either be padded or truncated --- massdash/preprocess/ConformerPreprocessor.py | 4 ++-- massdash/structs/Chromatogram.py | 10 +++++----- massdash/structs/Data1D.py | 15 +++++++++++++-- massdash/structs/Mobilogram.py | 10 +++++----- massdash/structs/Spectrum.py | 10 +++++----- massdash/structs/TransitionGroup.py | 16 +++++++++++----- 6 files changed, 41 insertions(+), 24 deletions(-) diff --git a/massdash/preprocess/ConformerPreprocessor.py b/massdash/preprocess/ConformerPreprocessor.py index 0738ccf5..320f7e2c 100644 --- a/massdash/preprocess/ConformerPreprocessor.py +++ b/massdash/preprocess/ConformerPreprocessor.py @@ -41,7 +41,7 @@ def __init__(self, transition_group: TransitionGroup, window_size: int=175): super().__init__(transition_group) ## pad the transition group to the window size - self.transition_group = self.transition_group.pad(window_size) + self.transition_group = self.transition_group.adjust_length(window_size) self.window_size = window_size @staticmethod @@ -113,7 +113,7 @@ def preprocess(self, library: SpectralLibraryLoader) -> np.ndarray: Code adapted from CAPE Args: - window_size (int): The desired window size for trimming the data. Default is 175. + SpectralLibraryLoader (SpectralLibraryLoader): The spectral library loader. Returns: np.ndarray: The preprocessed data as a numpy array with shape (1, 21, len(data[0])). diff --git a/massdash/structs/Chromatogram.py b/massdash/structs/Chromatogram.py index 08fae0ab..dec98cd9 100644 --- a/massdash/structs/Chromatogram.py +++ b/massdash/structs/Chromatogram.py @@ -31,15 +31,15 @@ def to_pyopenms(self, id: Optional[str] = None): def toPandasDf(self) -> pd.DataFrame: return super().toPandasDfHelper_('rt') - def pad(self, length: int) -> 'Chromatogram': + def adjust_length(self, length: int) -> 'Chromatogram': """ - Pad the chromatogram with zeros on both sides. + Adjust the length of the chromatogram to a given length, this involved either padding or truncating the chromatogram Args: - pad (int): The number of zeros to pad on both sides. + length (int): The desired output length. Returns: - Chromatogram: A new chromatogram object with padded data and intensity. + Chromatogram: A new chromatogram object with padded/truncated rt and intensity. """ - new_data, new_intensity = super().pad(length) + new_data, new_intensity = super().adjust_length(length) return Chromatogram(new_data, new_intensity, self.label) \ No newline at end of file diff --git a/massdash/structs/Data1D.py b/massdash/structs/Data1D.py index d3f2667e..42f4fac1 100644 --- a/massdash/structs/Data1D.py +++ b/massdash/structs/Data1D.py @@ -100,15 +100,26 @@ def median(self, boundary: Optional[Tuple[float, float]] = None) -> float: else: return np.median(self.intensity) - def pad(self, length): + def adjust_length(self, length): """ + Adjusts the length of the Data1D object. + + If the length is smaller than the current length, the data will be sliced to the given length. + If the length is larger than the current length, the data will be padded with zeros on both sides. + + E.g. if the data array is [1, 2, 3] and the desired length is 7, + the returned array will be [0, 0, 1, 2, 3, 0, 0]. + + E.g. if the data array is [1, 2, 3] and the desired length is 1, + the returned data array will be [1]. + Pad the data and intensity arrays with zeros to a given length. Modifies the object in place. Args: length (int): The length of the output array Returns: - (new_data, new_intensity) : tuple of padded data and intensity + (new_data, new_intensity) : tuple of padded/truncated data and intensity """ diff --git a/massdash/structs/Mobilogram.py b/massdash/structs/Mobilogram.py index 002147c0..06149fd9 100644 --- a/massdash/structs/Mobilogram.py +++ b/massdash/structs/Mobilogram.py @@ -19,15 +19,15 @@ def __init__(self, im, intensity, label): def toPandasDf(self) -> pd.DataFrame: return super().toPandasDfHelper_(self, 'im') - def pad(self, length: int) -> 'Mobilogram': + def adjust_length(self, length: int) -> 'Mobilogram': """ - Pad the mobilogram with zeros on both sides. + Adjust the length of the mobilogram to a given length, this involved either padding or truncating the mobilogram Args: - pad (int): The number of zeros to pad on both sides. + length (int): The desired output length. Returns: - Chromatogram: A new chromatogram object with padded data and intensity. + Mobilogram: A new Mobilogram object with padded/truncated driftTime and intensity. """ - new_data, new_intensity = super().pad(length) + new_data, new_intensity = super().adjust_length(length) return Mobilogram(new_data, new_intensity, self.label) \ No newline at end of file diff --git a/massdash/structs/Spectrum.py b/massdash/structs/Spectrum.py index 14c5d056..6c73b809 100644 --- a/massdash/structs/Spectrum.py +++ b/massdash/structs/Spectrum.py @@ -19,15 +19,15 @@ def __init__(self, mz, intensity, label): def toPandasDf(self) -> pd.DataFrame: return super().toPandasDfHelper_(self, 'mz') - def pad(self, length: int) -> 'Spectrum': + def adjust_length(self, length: int) -> 'Spectrum': """ - Pad the spectrum with zeros on both sides. + Adjust the length of the mobilogram to a given length, this involved either padding or truncating the chromatogram Args: - pad (int): The number of zeros to pad on both sides. + length (int): The desired output length. Returns: - Chromatogram: A new chromatogram object with padded data and intensity. + Spectrum: A new Spectrum object with padded/truncated length of mz and intensity. """ - new_data, new_intensity = super().pad(length) + new_data, new_intensity = super().adjust_length(length) return Spectrum(new_data, new_intensity, self.label) \ No newline at end of file diff --git a/massdash/structs/TransitionGroup.py b/massdash/structs/TransitionGroup.py index 98c75d8b..59559898 100644 --- a/massdash/structs/TransitionGroup.py +++ b/massdash/structs/TransitionGroup.py @@ -151,12 +151,18 @@ def empty(self) -> bool: """ return not any(p.empty() for p in self.precursorData) and any(t.empty() for t in self.transitionData) - def pad(self, length: int) -> None: + def adjust_length(self, length: int) -> None: """ - Pad the data and intensity arrays with zeros to a given length on both sides. Or slices to the given length if the length is smaller than the current length. + Adjusts the length size of the chromatograms, mobilograms, and spectra. + + If the length is smaller than the current length, the data will be sliced to the given length. + If the length is larger than the current length, the data will be padded with zeros on both sides. E.g. if the data array is [1, 2, 3] and the desired length is 7, - the padded data array will be [0, 0, 1, 2, 3, 0, 0]. + the returned array will be [0, 0, 1, 2, 3, 0, 0]. + + E.g. if the data array is [1, 2, 3] and the desired length is 1, + the returned data array will be [1]. Args: length (int): The length of the output array @@ -167,9 +173,9 @@ def pad(self, length: int) -> None: new_precursorData = [] new_transitionData = [] for c in self.precursorData: - new_precursorData.append(c.pad(length)) + new_precursorData.append(c.adjust_length(length)) for c in self.transitionData: - new_transitionData.append(c.pad(length)) + new_transitionData.append(c.adjust_length(length)) return TransitionGroup(new_precursorData, new_transitionData, self.sequence, self.precursor_charge) From 2431bb533a54d37e5abcbdb3ca4f223949c98796 Mon Sep 17 00:00:00 2001 From: Joshua Charkow Date: Wed, 24 Jan 2024 15:15:43 -0500 Subject: [PATCH 18/19] doc: change mob/chrom to spectrum --- massdash/structs/Spectrum.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/massdash/structs/Spectrum.py b/massdash/structs/Spectrum.py index 6c73b809..000e40b8 100644 --- a/massdash/structs/Spectrum.py +++ b/massdash/structs/Spectrum.py @@ -21,7 +21,7 @@ def toPandasDf(self) -> pd.DataFrame: def adjust_length(self, length: int) -> 'Spectrum': """ - Adjust the length of the mobilogram to a given length, this involved either padding or truncating the chromatogram + Adjust the length of the spectrum to a given length, this involved either padding or truncating the spectrum Args: length (int): The desired output length. From 03ccb2c5a787906ccb5a07fc30e95c484b96e9fb Mon Sep 17 00:00:00 2001 From: Joshua Charkow Date: Wed, 24 Jan 2024 15:17:49 -0500 Subject: [PATCH 19/19] minor: remove unnecessary imports --- massdash/structs/Chromatogram.py | 2 +- massdash/structs/Mobilogram.py | 1 - massdash/structs/Spectrum.py | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/massdash/structs/Chromatogram.py b/massdash/structs/Chromatogram.py index dec98cd9..951ceb60 100644 --- a/massdash/structs/Chromatogram.py +++ b/massdash/structs/Chromatogram.py @@ -4,7 +4,7 @@ """ import pyopenms as po -from typing import Optional, Tuple, List +from typing import Optional import pandas as pd # Structs diff --git a/massdash/structs/Mobilogram.py b/massdash/structs/Mobilogram.py index 06149fd9..b3622c30 100644 --- a/massdash/structs/Mobilogram.py +++ b/massdash/structs/Mobilogram.py @@ -3,7 +3,6 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from typing import Tuple import pandas as pd # Structs diff --git a/massdash/structs/Spectrum.py b/massdash/structs/Spectrum.py index 000e40b8..88e2dd93 100644 --- a/massdash/structs/Spectrum.py +++ b/massdash/structs/Spectrum.py @@ -3,7 +3,6 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~ """ -from typing import Tuple import pandas as pd # Structs