From ea19c54054c97cd282a0ff235d30ccb9614f6113 Mon Sep 17 00:00:00 2001 From: amnona Date: Mon, 19 Aug 2024 13:00:23 +0300 Subject: [PATCH 01/10] Get the heatmap click info from Experiment --- calour/amplicon_experiment.py | 18 ++++++++++++++++++ calour/experiment.py | 18 ++++++++++++++++++ calour/heatmap/plotgui.py | 2 +- calour/heatmap/plotgui_qt5.py | 2 +- 4 files changed, 38 insertions(+), 2 deletions(-) diff --git a/calour/amplicon_experiment.py b/calour/amplicon_experiment.py index 17de670f..41c3f475 100644 --- a/calour/amplicon_experiment.py +++ b/calour/amplicon_experiment.py @@ -84,6 +84,24 @@ class AmpliconExperiment(Experiment): def __init__(self, *args, databases=('dbbact',), **kwargs): super().__init__(*args, databases=databases, **kwargs) + def _get_abundance_info(self, row:int , col:int): + '''Get a string with the abundance information for display in the interactive heatmap + For amplicon experiment (that is based on normalized discrete reads), we show the abundance in float format (with 2 decimal points). + + Parameters + ---------- + row : int + The row index + col : int + The column index + + Returns + ------- + str + The string with the abundance information + ''' + return '{:.2f}'.format(self.data[row, col]) + def heatmap(self, *args, **kwargs): '''Plot a heatmap for the amplicon experiment. diff --git a/calour/experiment.py b/calour/experiment.py index b0b2f52a..ac8c19c0 100644 --- a/calour/experiment.py +++ b/calour/experiment.py @@ -237,6 +237,24 @@ def __getitem__(self, pos): dat = self.get_data() return dat[sample_pos, feature_pos] + def _get_abundance_info(self, row:int , col:int): + '''Get a string with the abundance information for display in the interactive heatmap + Can be overwritten with different classes to show additional row/col information + + Parameters + ---------- + row : int + The row index + col : int + The column index + + Returns + ------- + str + The string with the abundance information + ''' + return '{:.2E}'.format(self.data[row, col]) + def copy(self): '''Copy the object (deeply). diff --git a/calour/heatmap/plotgui.py b/calour/heatmap/plotgui.py index 9f58dd42..2297c595 100644 --- a/calour/heatmap/plotgui.py +++ b/calour/heatmap/plotgui.py @@ -187,7 +187,7 @@ def get_selection_info(self): row, col = self.current_select fid = self.exp.feature_metadata.index[col] sid = self.exp.sample_metadata.index[row] - abd = self.exp.data[row, col] + abd = self.exp._get_abundance_info(row, col) return sid, fid, abd def get_database_annotations(self, feature): diff --git a/calour/heatmap/plotgui_qt5.py b/calour/heatmap/plotgui_qt5.py index 921081a3..4f4b6369 100644 --- a/calour/heatmap/plotgui_qt5.py +++ b/calour/heatmap/plotgui_qt5.py @@ -77,7 +77,7 @@ def show_info(self): self._display_annotation_in_qlistwidget(annt) def _update_info_labels(self, sid, fid, abd): - self.app_window.w_abund.setText('{:.01f}'.format(abd)) + self.app_window.w_abund.setText(abd) self.app_window.w_fid.setText(str(fid)) self.app_window.w_sid.setText(str(sid)) sample_field = str(self.app_window.w_sfield.currentText()) From b0c7eb1de6b164c55d871dafd0909624055c9835 Mon Sep 17 00:00:00 2001 From: amnona Date: Mon, 19 Aug 2024 13:03:11 +0300 Subject: [PATCH 02/10] add CorrelationExperiment --- calour/__init__.py | 6 +- calour/correlation_experiment.py | 315 +++++++++++++++++++++++++++++++ calour/io.py | 35 +++- 3 files changed, 353 insertions(+), 3 deletions(-) create mode 100644 calour/correlation_experiment.py diff --git a/calour/__init__.py b/calour/__init__.py index 5379192a..2474942f 100644 --- a/calour/__init__.py +++ b/calour/__init__.py @@ -12,6 +12,7 @@ from .experiment import Experiment from .amplicon_experiment import AmpliconExperiment +from .correlation_experiment import CorrelationExperiment from .ms1_experiment import MS1Experiment from .mrna_experiment import mRNAExperiment from .io import read, read_amplicon, read_ms, read_qiime2 @@ -21,13 +22,14 @@ __credits__ = "https://github.com/biocore/calour/graphs/contributors" __version__ = "2024.5.30" -__all__ = ['read', 'read_amplicon', 'read_ms', 'read_qiime2', +__all__ = ['read', 'read_amplicon', 'read_ms', 'read_qiime2', 'read_correlation', 'Experiment', 'AmpliconExperiment', 'MS1Experiment','mRNAExperiment', 'set_log_level'] # add member functions to the class -register_functions((Experiment, AmpliconExperiment, MS1Experiment, mRNAExperiment)) +register_functions((Experiment, AmpliconExperiment, MS1Experiment, mRNAExperiment, CorrelationExperiment)) +# register_functions((Experiment, AmpliconExperiment, MS1Experiment, mRNAExperiment)) # setting False allows other logger to print log. diff --git a/calour/correlation_experiment.py b/calour/correlation_experiment.py new file mode 100644 index 00000000..bac7cc56 --- /dev/null +++ b/calour/correlation_experiment.py @@ -0,0 +1,315 @@ +''' +correlation experiment (:mod:`calour.correlation_experiment`) +======================================================= + +.. currentmodule:: calour.correlation_experiment + +Classes +^^^^^^^ +.. autosummary:: + :toctree: generated + + CorrelationExperiment +''' + +# ---------------------------------------------------------------------------- +# Copyright (c) 2016--, Calour development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file COPYING.txt, distributed with this software. +# ---------------------------------------------------------------------------- + +from logging import getLogger + +import numpy as np +import pandas as pd +import scipy.stats +from statsmodels.stats.multitest import multipletests + +from .experiment import Experiment +from .util import _to_list +from .analysis import _new_experiment_from_pvals, _CALOUR_DIRECTION, _CALOUR_STAT + +logger = getLogger(__name__) + + +class CorrelationExperiment(Experiment): + '''This class stores a correlation matrix data and corresponding analysis methods. + Besides the main data matrix (which is the correlation values) it also stores an additional Experiment (in self.qvals) that contains a matrix containing the q-values for each correlation. + These can be plotted on top of the correlation matrix to show the significance of each correlation. + + This is a child class of :class:`.Experiment`. + + Parameters + ---------- + data : numpy.ndarray or scipy.sparse.csr_matrix + The Correlation values (between -1 and 1) + sample_metadata : pandas.DataFrame + The metadata on the samples (rows in the matrix, shown in columns in the heatmap) + feature_metadata : pandas.DataFrame + The metadata on the features (columns in the matrix, shown in rows in the heatmap) + qvals : numpy.ndarray or scipy.sparse.csr_matrix or None + The q-values for the correlation values + description : str + name of experiment + sparse : bool + store the data array in :class:`scipy.sparse.csr_matrix` + or :class:`numpy.ndarray` + databases: iterable of str, optional + database interface names to show by default in heatmap() function + by default use None (no databases) + For ASV correlations, can use 'dbbact' + For gene correlations, can use 'mrna' + + Attributes + ---------- + data : numpy.ndarray or scipy.sparse.csr_matrix + The log ratio table for OTUs or ASVs. + Samples are in row and features in column. values are float (can be negative) + with np.nan indicating ratio for the specific feature does not exist. + sample_metadata : pandas.DataFrame + The metadata on the samples + feature_metadata : pandas.DataFrame + The metadata on the features + qvals: numpy.ndarray or scipy.sparse.csr_matrix or None + The q-values for the correlation values + shape : tuple of (int, int) + the dimension of data + sparse : bool + store the data as sparse matrix (scipy.sparse.csr_matrix) or dense numpy array. + info : dict + information about the experiment (data md5, filenames, etc.) + description : str + name of the experiment + databases : dict + keys are the database names (i.e. 'dbbact' / 'gnps') + values are the database specific data for the experiment (i.e. annotations for dbbact) + + See Also + -------- + Experiment + ''' + def __init__(self, *args, qvals=None, **kwargs): + super().__init__(*args, **kwargs) + if qvals is not None: + if self.data.shape != qvals.shape: + raise ValueError('qvals shape %s does not match data shape %s' % (qvals.shape, self.data.shape)) + self.qvals = Experiment(data=qvals, sample_metadata=self.sample_metadata, feature_metadata=self.feature_metadata, sparse=self.sparse) + + def _sync_qvals(self): + '''Sync the q-values experiment with the main experiment + Used to make sure the q-values are in the same order as the data matrix. + ''' + self.qvals = self.qvals.filter_ids(self.feature_metadata.index, axis='f') + self.qvals = self.qvals.filter_ids(self.sample_metadata.index, axis='s') + + def _get_abundance_info(self, row:int , col:int): + '''Get a string with the abundance information for display in the interactive heatmap + Also returns the qvalue if it exists. + + Parameters + ---------- + row : int + The row index + col : int + The column index + + Returns + ------- + str + The string with the abundance information + ''' + if self.qvals is None: + qval = 'NA' + else: + qval = self.qvals.data[row, col] + return '{:.2E}, qval: {:.2f}'.format(self.data[row, col], qval) + + def heatmap(self, show_significance=True, significance_threshold=0.05, significance_plot_params={'color': 'red'},*args, **kwargs): + '''Plot a heatmap for the ratio experiment. + + This method accepts the same parameters as input with + its parent class method. + In addition, it accepts the following parameters: + show_significance : bool, optional + If True, the q-values will be plotted on top of the heatmap. + significance_threshold : float, optional + The threshold for the q-values to be considered significant. + significance_plot_params : dict, optional + The parameters to be passed to the plot function for the significance values. + + See Also + -------- + Experiment.heatmap + + ''' + if 'clim' not in kwargs: + min_val = np.min(self.get_data()[:]) + max_val = np.max(self.get_data()[:]) + range_val = np.max([np.abs(min_val), np.abs(max_val)]) + kwargs['clim'] = (-range_val, range_val) + if 'cmap' not in kwargs: + kwargs['cmap'] = 'coolwarm' + + ax = super().heatmap(*args, **kwargs) + if show_significance: + if self.qvals is not None: + self._sync_qvals() + qv = self.qvals.get_data(sparse=False) + show_pos = np.where(qv < significance_threshold) + for i, j in zip(*show_pos): + ax.plot([i-0.5, i+0.5], [j-0.5, j+0.5], **significance_plot_params) + ax.plot([i-0.5, i+0.5], [j+0.5, j-0.5], **significance_plot_params) + + return ax + + def save(self, prefix, **kwargs): + '''Save the correlation experiment to a file + overwrites the save function in Experiment to also save the q-values (as a new experiment named prefix+"_qvals"). + + Parameters + ---------- + prefix : str + file path (suffixes auto added for the 3 files) to save to. + **kwargs : dict + Additional arguments to pass to the Experiment.save() function + ''' + super().save(prefix, **kwargs) + if self.qvals is not None: + self.qvals.save_biom(prefix+'_qvals.biom') + logger.debug('Saved qvals experiment to %s_qvals.biom' % prefix) + else: + logger.warning('No qvals attached to experiment. qvals experiment not saved') + + def _calculate_corr_matrix(df1, df2): + '''Calculate the spearman correlation matrix between all columns of two DataFrames + Ignores non-numeric values + + Parameters + ---------- + df : pandas.DataFrame + The DataFrame to calculate the correlation matrix for + + Returns + ------- + corrs : numpy.ndarray + The correlation matrix + pvals : numpy.ndarray + The p-values for the correlation matrix + ''' + pvals=np.ones([len(df1.columns),len(df2.columns)]) + corrs=np.zeros([len(df1.columns),len(df2.columns)]) + for idx1,r in enumerate(df1.columns): + for idx2,c in enumerate(df2.columns): + c1=df1[r].values + c2=df2[c].values + try: + ccor = scipy.stats.spearmanr(c1,c2,nan_policy='omit') + pvals[idx1][idx2] = ccor.pvalue + corrs[idx1][idx2] = ccor.correlation + if np.isnan(ccor.correlation): + pvals[idx1][idx2] = 1 + corrs[idx1][idx2] = 0 + except: + pvals[idx1][idx2] = 1 + corrs[idx1][idx2] = 0 + return corrs,pvals + + + # def save(self, filename, **kwargs): + # '''Save the correlation experiment to a file + + # Parameters + # ---------- + # filename : str + # The file to save the experiment to + # **kwargs : dict + # Additional arguments to pass to the save + # ''' + # super().save(filename, **kwargs) + # if self.qvals is not None: + # self.qvals(filename+'.qvals', **kwargs) + + + @classmethod + def read_correlation(self, filename, **kwargs): + '''Read the correlation experiment from a file + + Parameters + ---------- + filename : str + The file to read the experiment from + **kwargs : dict + Additional arguments to pass to the read + ''' + from .io import read + + if 'normalize' not in kwargs: + kwargs['normalize'] = None + + exp = read(filename+'.biom', sample_metadata_file=filename+'_sample.txt', feature_metadata_file=filename+'_feature.txt', cls=CorrelationExperiment, **kwargs) + + exp.qvals = read(filename+'_qvals.biom', sample_metadata_file=filename+'_qvals_sample.txt', feature_metadata_file=filename+'_qvals_feature.txt', **kwargs) + return exp + + # @classmethod + # def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame = None): + # '''Create a CorrelationExperiment from a pandas DataFrame (such as the experiment sample_metadata) + # Calculates the correlations between all dataframe columns + + # Parameters + # ---------- + # df1 : pandas.DataFrame + # The first DataFrame to calculate the correlation matrix for + # df2 : pandas.DataFrame + # The second DataFrame to calculate the correlation matrix for + # If None, will use df1 + + # Returns + # ------- + # CorrelationExperiment + # The correlation experiment + # ''' + # if df2 is None: + # df2=df1 + # corrs,pvals = self._calculate_corr_matrix(df1, df2) + # new_smd = pd.DataFrame(index=df1.columns) + # new_fmd = pd.DataFrame(index=df2.columns) + # new_smd['SampleID']=new_smd.index.values + # new_fmd['_feature_id']=new_fmd.index.values + # exp=CorrelationExperiment(data=corrs, sample_metadata=new_smd, feature_metadata=new_fmd, qvals=pvals, sparse=False) + # exp=exp.cluster_data(axis='f') + # exp=exp.cluster_data(axis='s') + # return exp + + # @classmethod + # def from_data(self, corr, samples, features, qvals): + # '''Create a CorrelationExperiment from a numpy array and metadata + + # Parameters + # ---------- + # corr : numpy.ndarray + # The correlation matrix + # samples : list or pandas.DataFrame + # The sample metadata + # features : list or pandas.DataFrame + # The feature metadata + # qvals : numpy.ndarray + # The q-value matrix for the correlations + + # Returns + # ------- + # CorrelationExperiment + # The correlation experiment + # ''' + # if isinstance(samples, list): + # samples=pd.DataFrame(index=samples) + # if isinstance(features, list): + # features=pd.DataFrame(index=features) + # if 'SampleID' not in samples.columns: + # samples['SampleID']=samples.index.values + # if '_feature_id' not in features.columns: + # features['_feature_id']=features.index.values + + # return CorrelationExperiment(data=corr, sample_metadata=samples, feature_metadata=features, qvals=qvals, sparse=False) diff --git a/calour/io.py b/calour/io.py index 9604a6c5..575c6955 100644 --- a/calour/io.py +++ b/calour/io.py @@ -35,7 +35,7 @@ import numpy as np import biom -from . import Experiment, AmpliconExperiment, MS1Experiment +from . import Experiment, AmpliconExperiment, MS1Experiment, CorrelationExperiment from .util import get_file_md5, get_data_md5, _get_taxonomy_string from ._doc import ds from .database import _get_database_class @@ -531,6 +531,39 @@ def read_amplicon(data_file, sample_metadata_file=None, return exp +@ds.with_indent(4) +def read_correlation(prefix, **kwargs) -> CorrelationExperiment: + '''Read a saved correlation experiment. + Loads both the original correlation data experiment and the q-values experiment. + + Parameters + ---------- + prefix : str + The file to read the experiment from (the names passed to CorrelationExperiment.save) + **kwargs : dict + Additional arguments to pass to the read + ''' + # store the function parameters for call history + fparams = locals() + + # by default, don't normalize the data since it is correlation data + if 'normalize' not in kwargs: + kwargs['normalize'] = None + + # load the main correlation experiment + logger.debug('Reading correlation experiment from %s' % prefix) + exp = read(prefix+'.biom', sample_metadata_file=prefix+'_sample.txt', feature_metadata_file=prefix+'_feature.txt', cls=CorrelationExperiment, **kwargs) + # and load the q-values table + logger.debug('Reading correlation matrix %s_qvals.biom' % prefix) + exp.qvals = read(prefix+'_qvals.biom', normalize=None) + + # initialize the call history + param = ['{0!s}={1!r}'.format(k, v) for k, v in fparams.items()] + exp._call_history = ['{0}({1})'.format('read_correlation', ','.join(param))] + + return exp + + @ds.with_indent(4) def read_ms(data_file, sample_metadata_file=None, feature_metadata_file=None, gnps_file=None, data_file_type='mzmine2', sample_in_row=None, direct_ids=None, get_mz_rt_from_feature_id=None, From 217dc6277457bfc56c2fa325f018e136ebacd2c7 Mon Sep 17 00:00:00 2001 From: amnona Date: Mon, 19 Aug 2024 13:11:35 +0300 Subject: [PATCH 03/10] fix CorrelationExperiemt save/read to store qvals metadata --- calour/correlation_experiment.py | 44 +++----------------------------- calour/io.py | 2 +- 2 files changed, 5 insertions(+), 41 deletions(-) diff --git a/calour/correlation_experiment.py b/calour/correlation_experiment.py index bac7cc56..f65b5d53 100644 --- a/calour/correlation_experiment.py +++ b/calour/correlation_experiment.py @@ -174,11 +174,12 @@ def save(self, prefix, **kwargs): file path (suffixes auto added for the 3 files) to save to. **kwargs : dict Additional arguments to pass to the Experiment.save() function - ''' + ''' + self._sync_qvals() super().save(prefix, **kwargs) if self.qvals is not None: - self.qvals.save_biom(prefix+'_qvals.biom') - logger.debug('Saved qvals experiment to %s_qvals.biom' % prefix) + self.qvals.save(prefix+'_qvals', **kwargs) + logger.debug('Saved qvals experiment to %s_qvals' % prefix) else: logger.warning('No qvals attached to experiment. qvals experiment not saved') @@ -216,43 +217,6 @@ def _calculate_corr_matrix(df1, df2): corrs[idx1][idx2] = 0 return corrs,pvals - - # def save(self, filename, **kwargs): - # '''Save the correlation experiment to a file - - # Parameters - # ---------- - # filename : str - # The file to save the experiment to - # **kwargs : dict - # Additional arguments to pass to the save - # ''' - # super().save(filename, **kwargs) - # if self.qvals is not None: - # self.qvals(filename+'.qvals', **kwargs) - - - @classmethod - def read_correlation(self, filename, **kwargs): - '''Read the correlation experiment from a file - - Parameters - ---------- - filename : str - The file to read the experiment from - **kwargs : dict - Additional arguments to pass to the read - ''' - from .io import read - - if 'normalize' not in kwargs: - kwargs['normalize'] = None - - exp = read(filename+'.biom', sample_metadata_file=filename+'_sample.txt', feature_metadata_file=filename+'_feature.txt', cls=CorrelationExperiment, **kwargs) - - exp.qvals = read(filename+'_qvals.biom', sample_metadata_file=filename+'_qvals_sample.txt', feature_metadata_file=filename+'_qvals_feature.txt', **kwargs) - return exp - # @classmethod # def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame = None): # '''Create a CorrelationExperiment from a pandas DataFrame (such as the experiment sample_metadata) diff --git a/calour/io.py b/calour/io.py index 575c6955..d7763aa8 100644 --- a/calour/io.py +++ b/calour/io.py @@ -555,7 +555,7 @@ def read_correlation(prefix, **kwargs) -> CorrelationExperiment: exp = read(prefix+'.biom', sample_metadata_file=prefix+'_sample.txt', feature_metadata_file=prefix+'_feature.txt', cls=CorrelationExperiment, **kwargs) # and load the q-values table logger.debug('Reading correlation matrix %s_qvals.biom' % prefix) - exp.qvals = read(prefix+'_qvals.biom', normalize=None) + exp.qvals = read(prefix+'_qvals.biom', sample_metadata_file=prefix+'_qvals_sample.txt', feature_metadata_file=prefix+'_qvals_feature.txt', normalize=None) # initialize the call history param = ['{0!s}={1!r}'.format(k, v) for k, v in fparams.items()] From a9dc463a07e836fffc5c5d52e00d6b2602f8ccad Mon Sep 17 00:00:00 2001 From: amnona Date: Mon, 19 Aug 2024 13:13:31 +0300 Subject: [PATCH 04/10] add CorrelationExperiment from dataframe/data --- calour/correlation_experiment.py | 120 +++++++++++++++---------------- 1 file changed, 60 insertions(+), 60 deletions(-) diff --git a/calour/correlation_experiment.py b/calour/correlation_experiment.py index f65b5d53..bb5a3e25 100644 --- a/calour/correlation_experiment.py +++ b/calour/correlation_experiment.py @@ -217,63 +217,63 @@ def _calculate_corr_matrix(df1, df2): corrs[idx1][idx2] = 0 return corrs,pvals - # @classmethod - # def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame = None): - # '''Create a CorrelationExperiment from a pandas DataFrame (such as the experiment sample_metadata) - # Calculates the correlations between all dataframe columns - - # Parameters - # ---------- - # df1 : pandas.DataFrame - # The first DataFrame to calculate the correlation matrix for - # df2 : pandas.DataFrame - # The second DataFrame to calculate the correlation matrix for - # If None, will use df1 - - # Returns - # ------- - # CorrelationExperiment - # The correlation experiment - # ''' - # if df2 is None: - # df2=df1 - # corrs,pvals = self._calculate_corr_matrix(df1, df2) - # new_smd = pd.DataFrame(index=df1.columns) - # new_fmd = pd.DataFrame(index=df2.columns) - # new_smd['SampleID']=new_smd.index.values - # new_fmd['_feature_id']=new_fmd.index.values - # exp=CorrelationExperiment(data=corrs, sample_metadata=new_smd, feature_metadata=new_fmd, qvals=pvals, sparse=False) - # exp=exp.cluster_data(axis='f') - # exp=exp.cluster_data(axis='s') - # return exp - - # @classmethod - # def from_data(self, corr, samples, features, qvals): - # '''Create a CorrelationExperiment from a numpy array and metadata - - # Parameters - # ---------- - # corr : numpy.ndarray - # The correlation matrix - # samples : list or pandas.DataFrame - # The sample metadata - # features : list or pandas.DataFrame - # The feature metadata - # qvals : numpy.ndarray - # The q-value matrix for the correlations - - # Returns - # ------- - # CorrelationExperiment - # The correlation experiment - # ''' - # if isinstance(samples, list): - # samples=pd.DataFrame(index=samples) - # if isinstance(features, list): - # features=pd.DataFrame(index=features) - # if 'SampleID' not in samples.columns: - # samples['SampleID']=samples.index.values - # if '_feature_id' not in features.columns: - # features['_feature_id']=features.index.values - - # return CorrelationExperiment(data=corr, sample_metadata=samples, feature_metadata=features, qvals=qvals, sparse=False) + @classmethod + def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame = None): + '''Create a CorrelationExperiment from a pandas DataFrame (such as the experiment sample_metadata) + Calculates the correlations between all dataframe columns + + Parameters + ---------- + df1 : pandas.DataFrame + The first DataFrame to calculate the correlation matrix for + df2 : pandas.DataFrame + The second DataFrame to calculate the correlation matrix for + If None, will use df1 + + Returns + ------- + CorrelationExperiment + The correlation experiment + ''' + if df2 is None: + df2=df1 + corrs,pvals = self._calculate_corr_matrix(df1, df2) + new_smd = pd.DataFrame(index=df1.columns) + new_fmd = pd.DataFrame(index=df2.columns) + new_smd['SampleID']=new_smd.index.values + new_fmd['_feature_id']=new_fmd.index.values + exp=CorrelationExperiment(data=corrs, sample_metadata=new_smd, feature_metadata=new_fmd, qvals=pvals, sparse=False) + exp=exp.cluster_data(axis='f') + exp=exp.cluster_data(axis='s') + return exp + + @classmethod + def from_data(self, corr, samples, features, qvals): + '''Create a CorrelationExperiment from a numpy array and metadata + + Parameters + ---------- + corr : numpy.ndarray + The correlation matrix + samples : list or pandas.DataFrame + The sample metadata + features : list or pandas.DataFrame + The feature metadata + qvals : numpy.ndarray + The q-value matrix for the correlations + + Returns + ------- + CorrelationExperiment + The correlation experiment + ''' + if isinstance(samples, list): + samples=pd.DataFrame(index=samples) + if isinstance(features, list): + features=pd.DataFrame(index=features) + if 'SampleID' not in samples.columns: + samples['SampleID']=samples.index.values + if '_feature_id' not in features.columns: + features['_feature_id']=features.index.values + + return CorrelationExperiment(data=corr, sample_metadata=samples, feature_metadata=features, qvals=qvals, sparse=False) From b72dc5afb98b0c593b564a33091883879384b7dd Mon Sep 17 00:00:00 2001 From: amnona Date: Mon, 19 Aug 2024 13:15:34 +0300 Subject: [PATCH 05/10] add type hints --- calour/correlation_experiment.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/calour/correlation_experiment.py b/calour/correlation_experiment.py index bb5a3e25..dcaef8f4 100644 --- a/calour/correlation_experiment.py +++ b/calour/correlation_experiment.py @@ -248,7 +248,7 @@ def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame = None): return exp @classmethod - def from_data(self, corr, samples, features, qvals): + def from_data(self, corr: np.array, samples: pd.DataFrame, features: pd.DataFrame, qvals: np.array) -> 'CorrelationExperiment': '''Create a CorrelationExperiment from a numpy array and metadata Parameters From 04c99e492472c89655911a4eb3e61d955fc6fdf0 Mon Sep 17 00:00:00 2001 From: amnona Date: Sun, 25 Aug 2024 16:35:58 +0300 Subject: [PATCH 06/10] update heatmap options --- calour/correlation_experiment.py | 75 +++++++++++++++++++++++++++----- 1 file changed, 65 insertions(+), 10 deletions(-) diff --git a/calour/correlation_experiment.py b/calour/correlation_experiment.py index dcaef8f4..84d2dcc9 100644 --- a/calour/correlation_experiment.py +++ b/calour/correlation_experiment.py @@ -51,6 +51,7 @@ class CorrelationExperiment(Experiment): The metadata on the features (columns in the matrix, shown in rows in the heatmap) qvals : numpy.ndarray or scipy.sparse.csr_matrix or None The q-values for the correlation values + NOTE: This is not guaranteed to be in the same order as the data matrix (unless _sync_qvals() is called) description : str name of experiment sparse : bool @@ -91,6 +92,11 @@ class CorrelationExperiment(Experiment): Experiment ''' def __init__(self, *args, qvals=None, **kwargs): + '''Init the CorrelationExperiment class + By default we set sparse=False (as we usually have a dense matrix) + ''' + if 'sparse' not in kwargs: + kwargs['sparse'] = False super().__init__(*args, **kwargs) if qvals is not None: if self.data.shape != qvals.shape: @@ -125,37 +131,85 @@ def _get_abundance_info(self, row:int , col:int): else: qval = self.qvals.data[row, col] return '{:.2E}, qval: {:.2f}'.format(self.data[row, col], qval) - - def heatmap(self, show_significance=True, significance_threshold=0.05, significance_plot_params={'color': 'red'},*args, **kwargs): + + def heatmap(self, significance_plot=['cmap'],significance_threshold=0.05, significance_plot_params={'color': 'red'}, cmap='bwr', *args, **kwargs): '''Plot a heatmap for the ratio experiment. This method accepts the same parameters as input with its parent class method. In addition, it accepts the following parameters: - show_significance : bool, optional - If True, the q-values will be plotted on top of the heatmap. + significance_plot : list of str, optional + The type of significance plot to show. Can be 'cmap' and/or 'x' significance_threshold : float, optional The threshold for the q-values to be considered significant. significance_plot_params : dict, optional The parameters to be passed to the plot function for the significance values. + If 'cmap' is in the list, use the 'cmap' parameter in significance_plot_params to set the colormap for the significant values. + If 'x' is in the list, use the 'significance_plot_params' parameter to set the plot parameters for the significance values. See Also -------- Experiment.heatmap ''' + import matplotlib.pyplot as plt + from matplotlib.colors import LinearSegmentedColormap + if 'clim' not in kwargs: min_val = np.min(self.get_data()[:]) max_val = np.max(self.get_data()[:]) range_val = np.max([np.abs(min_val), np.abs(max_val)]) kwargs['clim'] = (-range_val, range_val) - if 'cmap' not in kwargs: - kwargs['cmap'] = 'coolwarm' + if significance_plot is None or significance_plot == []: + if self.qvals is None: + raise ValueError('No qvals attached to experiment. Please provide a qvals matrix to plot the significance values or use significance_plot=[] to not plot significance values.') + else: + self._sync_qvals() + + data_changed = False + if 'cmap' in significance_plot: + # copy the data + old_data = self.get_data(copy=True) + data_changed = True + + # eps is added to the data to avoid overlap in the colormaps for significant/non-significant values + eps = 1e-7 + + max_val = kwargs['clim'][1] + min_val = kwargs['clim'][0] + self.data[self.data>max_val]=max_val + self.data[self.data 'CorrelationExperiment': - '''Create a CorrelationExperiment from a numpy array and metadata + '''Create a CorrelationExperiment from a numpy array (effect size), numpy array (qvals) and corresponding metadata + Similar to the __init__ function, but can take lists as input for the metadata instead of DataFrames Parameters ---------- From 177180e60ed3c1d01c9510139cac3c3357aa9c8a Mon Sep 17 00:00:00 2001 From: amnona Date: Sun, 25 Aug 2024 16:56:54 +0300 Subject: [PATCH 07/10] include CorrelationExperiment in __init__ and update changelog and version --- CHANGELOG.md | 6 ++++++ calour/__init__.py | 7 ++++--- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6beffa6a..77635cb4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,10 @@ # calour changelog +## Version 2024.8.25 + +New features: +* Add CorrelationExperiment class for working with correlation matrices and showing significance in heatmap +Other changes: +* Update experiment classes to provide the _get_abundance_info() method for the interactive heatmap (instead of being produced by the heatmap() method). This allows experiment class specific information to be shown in the heatmap abundance field when clicking on a feature/sample. ## Version 2024.5.30 add mRNAExperiment class for handling rna-seq data. interactive heatmap gene information is via the rna_calour module using Harmonizome server (https://maayanlab.cloud/Harmonizome) diff --git a/calour/__init__.py b/calour/__init__.py index 2474942f..8f4de74b 100644 --- a/calour/__init__.py +++ b/calour/__init__.py @@ -15,15 +15,16 @@ from .correlation_experiment import CorrelationExperiment from .ms1_experiment import MS1Experiment from .mrna_experiment import mRNAExperiment -from .io import read, read_amplicon, read_ms, read_qiime2 +from .io import read, read_amplicon, read_ms, read_qiime2, read_correlation from .util import set_log_level, register_functions __credits__ = "https://github.com/biocore/calour/graphs/contributors" -__version__ = "2024.5.30" +__version__ = "2024.8.25" __all__ = ['read', 'read_amplicon', 'read_ms', 'read_qiime2', 'read_correlation', 'Experiment', 'AmpliconExperiment', 'MS1Experiment','mRNAExperiment', + 'CorrelationExperiment', 'set_log_level'] @@ -33,4 +34,4 @@ # setting False allows other logger to print log. -fileConfig(resource_filename(__package__, 'log.cfg'), disable_existing_loggers=False) +fileConfig(resource_filename(__package__, 'log.cfg'), disable_existing_loggers=False) \ No newline at end of file From e6e75425c585d855a8c0053d96fc77d92f46c8c3 Mon Sep 17 00:00:00 2001 From: amnona Date: Thu, 29 Aug 2024 15:05:26 +0300 Subject: [PATCH 08/10] better documentation --- calour/correlation_experiment.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/calour/correlation_experiment.py b/calour/correlation_experiment.py index 84d2dcc9..ffcd9417 100644 --- a/calour/correlation_experiment.py +++ b/calour/correlation_experiment.py @@ -134,9 +134,9 @@ def _get_abundance_info(self, row:int , col:int): def heatmap(self, significance_plot=['cmap'],significance_threshold=0.05, significance_plot_params={'color': 'red'}, cmap='bwr', *args, **kwargs): '''Plot a heatmap for the ratio experiment. + The heatmap includes indication for significant correlations. This can be as a different set of colors for the significant correlations or by plotting a marker for the significant correlations. - This method accepts the same parameters as input with - its parent class method. + This method accepts the same parameters as input with its parent class method. In addition, it accepts the following parameters: significance_plot : list of str, optional The type of significance plot to show. Can be 'cmap' and/or 'x' From 98d9709c37cd2b9688f023a46bb302391232e5c9 Mon Sep 17 00:00:00 2001 From: amnona Date: Thu, 29 Aug 2024 17:13:54 +0300 Subject: [PATCH 09/10] remove from_data since will add to Experiment.__init__ to accept lists instead of DataFrames for metadata --- calour/correlation_experiment.py | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/calour/correlation_experiment.py b/calour/correlation_experiment.py index ffcd9417..66c5fdbd 100644 --- a/calour/correlation_experiment.py +++ b/calour/correlation_experiment.py @@ -300,35 +300,3 @@ def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame|None = None): exp=exp.cluster_data(axis='f') exp=exp.cluster_data(axis='s') return exp - - @classmethod - def from_data(self, corr: np.array, samples: pd.DataFrame, features: pd.DataFrame, qvals: np.array) -> 'CorrelationExperiment': - '''Create a CorrelationExperiment from a numpy array (effect size), numpy array (qvals) and corresponding metadata - Similar to the __init__ function, but can take lists as input for the metadata instead of DataFrames - - Parameters - ---------- - corr : numpy.ndarray - The correlation matrix - samples : list or pandas.DataFrame - The sample metadata - features : list or pandas.DataFrame - The feature metadata - qvals : numpy.ndarray - The q-value matrix for the correlations - - Returns - ------- - CorrelationExperiment - The correlation experiment - ''' - if isinstance(samples, list): - samples=pd.DataFrame(index=samples) - if isinstance(features, list): - features=pd.DataFrame(index=features) - if 'SampleID' not in samples.columns: - samples['SampleID']=samples.index.values - if '_feature_id' not in features.columns: - features['_feature_id']=features.index.values - - return CorrelationExperiment(data=corr, sample_metadata=samples, feature_metadata=features, qvals=qvals, sparse=False) From 9e89ca3e4c1e6d648cd44bebf4f62ba0fe464e98 Mon Sep 17 00:00:00 2001 From: amnona Date: Thu, 29 Aug 2024 17:18:05 +0300 Subject: [PATCH 10/10] remove commented code --- calour/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/calour/__init__.py b/calour/__init__.py index 8f4de74b..c2109b98 100644 --- a/calour/__init__.py +++ b/calour/__init__.py @@ -30,7 +30,6 @@ # add member functions to the class register_functions((Experiment, AmpliconExperiment, MS1Experiment, mRNAExperiment, CorrelationExperiment)) -# register_functions((Experiment, AmpliconExperiment, MS1Experiment, mRNAExperiment)) # setting False allows other logger to print log.