From ea19c54054c97cd282a0ff235d30ccb9614f6113 Mon Sep 17 00:00:00 2001
From: amnona <amnonim@gmail.com>
Date: Mon, 19 Aug 2024 13:00:23 +0300
Subject: [PATCH 01/10] Get the heatmap click info from Experiment

---
 calour/amplicon_experiment.py | 18 ++++++++++++++++++
 calour/experiment.py          | 18 ++++++++++++++++++
 calour/heatmap/plotgui.py     |  2 +-
 calour/heatmap/plotgui_qt5.py |  2 +-
 4 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/calour/amplicon_experiment.py b/calour/amplicon_experiment.py
index 17de670f..41c3f475 100644
--- a/calour/amplicon_experiment.py
+++ b/calour/amplicon_experiment.py
@@ -84,6 +84,24 @@ class AmpliconExperiment(Experiment):
     def __init__(self, *args, databases=('dbbact',), **kwargs):
         super().__init__(*args, databases=databases, **kwargs)
 
+    def _get_abundance_info(self, row:int , col:int):
+        '''Get a string with the abundance information for display in the interactive heatmap
+        For amplicon experiment (that is based on normalized discrete reads), we show the abundance in float format (with 2 decimal points).
+
+        Parameters
+        ----------
+        row : int
+            The row index
+        col : int
+            The column index
+
+        Returns
+        -------
+        str
+            The string with the abundance information
+        '''
+        return '{:.2f}'.format(self.data[row, col])
+
     def heatmap(self, *args, **kwargs):
         '''Plot a heatmap for the amplicon experiment.
 
diff --git a/calour/experiment.py b/calour/experiment.py
index b0b2f52a..ac8c19c0 100644
--- a/calour/experiment.py
+++ b/calour/experiment.py
@@ -237,6 +237,24 @@ def __getitem__(self, pos):
             dat = self.get_data()
         return dat[sample_pos, feature_pos]
 
+    def _get_abundance_info(self, row:int , col:int):
+        '''Get a string with the abundance information for display in the interactive heatmap
+        Can be overwritten with different classes to show additional row/col information
+
+        Parameters
+        ----------
+        row : int
+            The row index
+        col : int
+            The column index
+
+        Returns
+        -------
+        str
+            The string with the abundance information
+        '''
+        return '{:.2E}'.format(self.data[row, col])
+
     def copy(self):
         '''Copy the object (deeply).
 
diff --git a/calour/heatmap/plotgui.py b/calour/heatmap/plotgui.py
index 9f58dd42..2297c595 100644
--- a/calour/heatmap/plotgui.py
+++ b/calour/heatmap/plotgui.py
@@ -187,7 +187,7 @@ def get_selection_info(self):
         row, col = self.current_select
         fid = self.exp.feature_metadata.index[col]
         sid = self.exp.sample_metadata.index[row]
-        abd = self.exp.data[row, col]
+        abd = self.exp._get_abundance_info(row, col)
         return sid, fid, abd
 
     def get_database_annotations(self, feature):
diff --git a/calour/heatmap/plotgui_qt5.py b/calour/heatmap/plotgui_qt5.py
index 921081a3..4f4b6369 100644
--- a/calour/heatmap/plotgui_qt5.py
+++ b/calour/heatmap/plotgui_qt5.py
@@ -77,7 +77,7 @@ def show_info(self):
         self._display_annotation_in_qlistwidget(annt)
 
     def _update_info_labels(self, sid, fid, abd):
-        self.app_window.w_abund.setText('{:.01f}'.format(abd))
+        self.app_window.w_abund.setText(abd)
         self.app_window.w_fid.setText(str(fid))
         self.app_window.w_sid.setText(str(sid))
         sample_field = str(self.app_window.w_sfield.currentText())

From b0c7eb1de6b164c55d871dafd0909624055c9835 Mon Sep 17 00:00:00 2001
From: amnona <amnonim@gmail.com>
Date: Mon, 19 Aug 2024 13:03:11 +0300
Subject: [PATCH 02/10] add CorrelationExperiment

---
 calour/__init__.py               |   6 +-
 calour/correlation_experiment.py | 315 +++++++++++++++++++++++++++++++
 calour/io.py                     |  35 +++-
 3 files changed, 353 insertions(+), 3 deletions(-)
 create mode 100644 calour/correlation_experiment.py

diff --git a/calour/__init__.py b/calour/__init__.py
index 5379192a..2474942f 100644
--- a/calour/__init__.py
+++ b/calour/__init__.py
@@ -12,6 +12,7 @@
 
 from .experiment import Experiment
 from .amplicon_experiment import AmpliconExperiment
+from .correlation_experiment import CorrelationExperiment
 from .ms1_experiment import MS1Experiment
 from .mrna_experiment import mRNAExperiment
 from .io import read, read_amplicon, read_ms, read_qiime2
@@ -21,13 +22,14 @@
 __credits__ = "https://github.com/biocore/calour/graphs/contributors"
 __version__ = "2024.5.30"
 
-__all__ = ['read', 'read_amplicon', 'read_ms', 'read_qiime2',
+__all__ = ['read', 'read_amplicon', 'read_ms', 'read_qiime2', 'read_correlation',
            'Experiment', 'AmpliconExperiment', 'MS1Experiment','mRNAExperiment',
            'set_log_level']
 
 
 # add member functions to the class
-register_functions((Experiment, AmpliconExperiment, MS1Experiment, mRNAExperiment))
+register_functions((Experiment, AmpliconExperiment, MS1Experiment, mRNAExperiment, CorrelationExperiment))
+# register_functions((Experiment, AmpliconExperiment, MS1Experiment, mRNAExperiment))
 
 
 # setting False allows other logger to print log.
diff --git a/calour/correlation_experiment.py b/calour/correlation_experiment.py
new file mode 100644
index 00000000..bac7cc56
--- /dev/null
+++ b/calour/correlation_experiment.py
@@ -0,0 +1,315 @@
+'''
+correlation experiment (:mod:`calour.correlation_experiment`)
+=======================================================
+
+.. currentmodule:: calour.correlation_experiment
+
+Classes
+^^^^^^^
+.. autosummary::
+   :toctree: generated
+
+   CorrelationExperiment
+'''
+
+# ----------------------------------------------------------------------------
+# Copyright (c) 2016--,  Calour development team.
+#
+# Distributed under the terms of the Modified BSD License.
+#
+# The full license is in the file COPYING.txt, distributed with this software.
+# ----------------------------------------------------------------------------
+
+from logging import getLogger
+
+import numpy as np
+import pandas as pd
+import scipy.stats
+from statsmodels.stats.multitest import multipletests
+
+from .experiment import Experiment
+from .util import _to_list
+from .analysis import _new_experiment_from_pvals, _CALOUR_DIRECTION, _CALOUR_STAT
+
+logger = getLogger(__name__)
+
+
+class CorrelationExperiment(Experiment):
+    '''This class stores a correlation matrix data and corresponding analysis methods.
+    Besides the main data matrix (which is the correlation values) it also stores an additional Experiment (in self.qvals) that contains a matrix containing the q-values for each correlation.
+    These can be plotted on top of the correlation matrix to show the significance of each correlation.
+
+    This is a child class of :class:`.Experiment`.
+
+    Parameters
+    ----------
+    data : numpy.ndarray or scipy.sparse.csr_matrix
+        The Correlation values (between -1 and 1)
+    sample_metadata : pandas.DataFrame
+        The metadata on the samples (rows in the matrix, shown in columns in the heatmap)
+    feature_metadata : pandas.DataFrame
+        The metadata on the features (columns in the matrix, shown in rows in the heatmap)
+    qvals : numpy.ndarray or scipy.sparse.csr_matrix or None
+        The q-values for the correlation values
+    description : str
+        name of experiment
+    sparse : bool
+        store the data array in :class:`scipy.sparse.csr_matrix`
+        or :class:`numpy.ndarray`
+    databases: iterable of str, optional
+        database interface names to show by default in heatmap() function
+        by default use None (no databases)
+        For ASV correlations, can use 'dbbact'
+        For gene correlations, can use 'mrna'
+
+    Attributes
+    ----------
+    data : numpy.ndarray or scipy.sparse.csr_matrix
+        The log ratio table for OTUs or ASVs.
+        Samples are in row and features in column. values are float (can be negative)
+        with np.nan indicating ratio for the specific feature does not exist.
+    sample_metadata : pandas.DataFrame
+        The metadata on the samples
+    feature_metadata : pandas.DataFrame
+        The metadata on the features
+    qvals: numpy.ndarray or scipy.sparse.csr_matrix or None
+        The q-values for the correlation values
+    shape : tuple of (int, int)
+        the dimension of data
+    sparse : bool
+        store the data as sparse matrix (scipy.sparse.csr_matrix) or dense numpy array.
+    info : dict
+        information about the experiment (data md5, filenames, etc.)
+    description : str
+        name of the experiment
+    databases : dict
+        keys are the database names (i.e. 'dbbact' / 'gnps')
+        values are the database specific data for the experiment (i.e. annotations for dbbact)
+
+    See Also
+    --------
+    Experiment
+    '''
+    def __init__(self, *args, qvals=None, **kwargs):
+        super().__init__(*args, **kwargs)
+        if qvals is not None:
+            if self.data.shape != qvals.shape:
+                raise ValueError('qvals shape %s does not match data shape %s' % (qvals.shape, self.data.shape))
+            self.qvals = Experiment(data=qvals, sample_metadata=self.sample_metadata, feature_metadata=self.feature_metadata, sparse=self.sparse)
+
+    def _sync_qvals(self):
+        '''Sync the q-values experiment with the main experiment
+        Used to make sure the q-values are in the same order as the data matrix.
+        '''
+        self.qvals = self.qvals.filter_ids(self.feature_metadata.index, axis='f')
+        self.qvals = self.qvals.filter_ids(self.sample_metadata.index, axis='s')
+
+    def _get_abundance_info(self, row:int , col:int):
+        '''Get a string with the abundance information for display in the interactive heatmap
+        Also returns the qvalue if it exists.
+
+        Parameters
+        ----------
+        row : int
+            The row index
+        col : int
+            The column index
+
+        Returns
+        -------
+        str
+            The string with the abundance information
+        '''
+        if self.qvals is None:
+            qval = 'NA'
+        else:
+            qval = self.qvals.data[row, col]
+        return '{:.2E}, qval: {:.2f}'.format(self.data[row, col], qval)
+
+    def heatmap(self, show_significance=True, significance_threshold=0.05, significance_plot_params={'color': 'red'},*args, **kwargs):
+        '''Plot a heatmap for the ratio experiment.
+
+        This method accepts the same parameters as input with
+        its parent class method.
+        In addition, it accepts the following parameters:
+        show_significance : bool, optional
+            If True, the q-values will be plotted on top of the heatmap.
+        significance_threshold : float, optional
+            The threshold for the q-values to be considered significant.
+        significance_plot_params : dict, optional
+            The parameters to be passed to the plot function for the significance values.
+
+        See Also
+        --------
+        Experiment.heatmap
+
+        '''
+        if 'clim' not in kwargs:
+            min_val = np.min(self.get_data()[:])
+            max_val = np.max(self.get_data()[:])
+            range_val = np.max([np.abs(min_val), np.abs(max_val)])
+            kwargs['clim'] = (-range_val, range_val)
+        if 'cmap' not in kwargs:
+            kwargs['cmap'] = 'coolwarm'
+
+        ax = super().heatmap(*args, **kwargs)
+        if show_significance:
+            if self.qvals is not None:
+                self._sync_qvals()
+                qv = self.qvals.get_data(sparse=False)
+                show_pos = np.where(qv < significance_threshold)
+                for i, j in zip(*show_pos):
+                    ax.plot([i-0.5, i+0.5], [j-0.5, j+0.5], **significance_plot_params)
+                    ax.plot([i-0.5, i+0.5], [j+0.5, j-0.5], **significance_plot_params)
+
+        return ax
+    
+    def save(self, prefix, **kwargs):
+        '''Save the correlation experiment to a file
+        overwrites the save function in Experiment to also save the q-values (as a new experiment named prefix+"_qvals").
+
+        Parameters
+        ----------
+        prefix : str
+            file path (suffixes auto added for the 3 files) to save to.
+        **kwargs : dict
+            Additional arguments to pass to the Experiment.save() function
+        ''' 
+        super().save(prefix, **kwargs)
+        if self.qvals is not None:
+            self.qvals.save_biom(prefix+'_qvals.biom')
+            logger.debug('Saved qvals experiment to %s_qvals.biom' % prefix)
+        else:
+            logger.warning('No qvals attached to experiment. qvals experiment not saved')
+
+    def _calculate_corr_matrix(df1, df2):
+        '''Calculate the spearman correlation matrix between all columns of two DataFrames
+        Ignores non-numeric values
+
+        Parameters
+        ----------
+        df : pandas.DataFrame
+            The DataFrame to calculate the correlation matrix for
+            
+        Returns
+        -------
+        corrs : numpy.ndarray
+            The correlation matrix
+            pvals : numpy.ndarray
+            The p-values for the correlation matrix
+        '''
+        pvals=np.ones([len(df1.columns),len(df2.columns)])
+        corrs=np.zeros([len(df1.columns),len(df2.columns)])
+        for idx1,r in enumerate(df1.columns):
+            for idx2,c in enumerate(df2.columns):
+                c1=df1[r].values
+                c2=df2[c].values
+                try:
+                    ccor = scipy.stats.spearmanr(c1,c2,nan_policy='omit')
+                    pvals[idx1][idx2] = ccor.pvalue
+                    corrs[idx1][idx2] = ccor.correlation
+                    if np.isnan(ccor.correlation):
+                        pvals[idx1][idx2] = 1
+                        corrs[idx1][idx2] = 0
+                except:
+                    pvals[idx1][idx2] = 1
+                    corrs[idx1][idx2] = 0
+        return corrs,pvals
+
+
+    # def save(self, filename, **kwargs):
+    #     '''Save the correlation experiment to a file
+
+    #     Parameters
+    #     ----------
+    #     filename : str
+    #         The file to save the experiment to
+    #     **kwargs : dict
+    #         Additional arguments to pass to the save
+    #     '''
+    #     super().save(filename, **kwargs)
+    #     if self.qvals is not None:
+    #         self.qvals(filename+'.qvals', **kwargs)
+
+
+    @classmethod
+    def read_correlation(self, filename, **kwargs):
+        '''Read the correlation experiment from a file
+
+        Parameters
+        ----------
+        filename : str
+            The file to read the experiment from
+        **kwargs : dict
+            Additional arguments to pass to the read
+        '''
+        from .io import read
+
+        if 'normalize' not in kwargs:
+            kwargs['normalize'] = None
+
+        exp = read(filename+'.biom', sample_metadata_file=filename+'_sample.txt', feature_metadata_file=filename+'_feature.txt', cls=CorrelationExperiment, **kwargs)
+
+        exp.qvals = read(filename+'_qvals.biom', sample_metadata_file=filename+'_qvals_sample.txt', feature_metadata_file=filename+'_qvals_feature.txt', **kwargs)
+        return exp
+
+    # @classmethod
+    # def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame = None):
+    #     '''Create a CorrelationExperiment from a pandas DataFrame (such as the experiment sample_metadata)
+    #     Calculates the correlations between all dataframe columns
+
+    #     Parameters
+    #     ----------
+    #     df1 : pandas.DataFrame
+    #         The first DataFrame to calculate the correlation matrix for
+    #     df2 : pandas.DataFrame
+    #         The second DataFrame to calculate the correlation matrix for
+    #         If None, will use df1
+
+    #     Returns
+    #     -------
+    #     CorrelationExperiment
+    #         The correlation experiment
+    #     '''
+    #     if df2 is None:
+    #         df2=df1
+    #     corrs,pvals = self._calculate_corr_matrix(df1, df2)
+    #     new_smd = pd.DataFrame(index=df1.columns)
+    #     new_fmd = pd.DataFrame(index=df2.columns)
+    #     new_smd['SampleID']=new_smd.index.values
+    #     new_fmd['_feature_id']=new_fmd.index.values
+    #     exp=CorrelationExperiment(data=corrs, sample_metadata=new_smd, feature_metadata=new_fmd, qvals=pvals, sparse=False)
+    #     exp=exp.cluster_data(axis='f')
+    #     exp=exp.cluster_data(axis='s')
+    #     return exp
+
+    # @classmethod
+    # def from_data(self, corr, samples, features, qvals):
+    #     '''Create a CorrelationExperiment from a numpy array and metadata
+
+    #     Parameters
+    #     ----------
+    #     corr : numpy.ndarray
+    #         The correlation matrix
+    #     samples : list or pandas.DataFrame
+    #         The sample metadata
+    #     features : list or pandas.DataFrame
+    #         The feature metadata
+    #     qvals : numpy.ndarray
+    #         The q-value matrix for the correlations
+
+    #     Returns
+    #     -------
+    #     CorrelationExperiment
+    #         The correlation experiment
+    #     '''
+    #     if isinstance(samples, list):
+    #         samples=pd.DataFrame(index=samples)
+    #     if isinstance(features, list):
+    #         features=pd.DataFrame(index=features)
+    #     if 'SampleID' not in samples.columns:
+    #         samples['SampleID']=samples.index.values
+    #     if '_feature_id' not in features.columns:
+    #         features['_feature_id']=features.index.values
+
+    #     return CorrelationExperiment(data=corr, sample_metadata=samples, feature_metadata=features, qvals=qvals, sparse=False)
diff --git a/calour/io.py b/calour/io.py
index 9604a6c5..575c6955 100644
--- a/calour/io.py
+++ b/calour/io.py
@@ -35,7 +35,7 @@
 import numpy as np
 import biom
 
-from . import Experiment, AmpliconExperiment, MS1Experiment
+from . import Experiment, AmpliconExperiment, MS1Experiment, CorrelationExperiment
 from .util import get_file_md5, get_data_md5, _get_taxonomy_string
 from ._doc import ds
 from .database import _get_database_class
@@ -531,6 +531,39 @@ def read_amplicon(data_file, sample_metadata_file=None,
     return exp
 
 
+@ds.with_indent(4)
+def read_correlation(prefix, **kwargs) -> CorrelationExperiment:
+    '''Read a saved correlation experiment.
+    Loads both the original correlation data experiment and the q-values experiment.
+
+    Parameters
+    ----------
+    prefix : str
+        The file to read the experiment from (the names passed to CorrelationExperiment.save)
+    **kwargs : dict
+        Additional arguments to pass to the read
+    '''
+    # store the function parameters for call history
+    fparams = locals()
+
+    # by default, don't normalize the data since it is correlation data
+    if 'normalize' not in kwargs:
+        kwargs['normalize'] = None
+
+    # load the main correlation experiment
+    logger.debug('Reading correlation experiment from %s' % prefix)
+    exp = read(prefix+'.biom', sample_metadata_file=prefix+'_sample.txt', feature_metadata_file=prefix+'_feature.txt', cls=CorrelationExperiment, **kwargs)
+    # and load the q-values table
+    logger.debug('Reading correlation matrix %s_qvals.biom' % prefix)
+    exp.qvals = read(prefix+'_qvals.biom', normalize=None)
+
+    # initialize the call history
+    param = ['{0!s}={1!r}'.format(k, v) for k, v in fparams.items()]
+    exp._call_history = ['{0}({1})'.format('read_correlation', ','.join(param))]
+
+    return exp
+
+
 @ds.with_indent(4)
 def read_ms(data_file, sample_metadata_file=None, feature_metadata_file=None, gnps_file=None,
             data_file_type='mzmine2', sample_in_row=None, direct_ids=None, get_mz_rt_from_feature_id=None,

From 217dc6277457bfc56c2fa325f018e136ebacd2c7 Mon Sep 17 00:00:00 2001
From: amnona <amnonim@gmail.com>
Date: Mon, 19 Aug 2024 13:11:35 +0300
Subject: [PATCH 03/10] fix CorrelationExperiemt save/read to store qvals
 metadata

---
 calour/correlation_experiment.py | 44 +++-----------------------------
 calour/io.py                     |  2 +-
 2 files changed, 5 insertions(+), 41 deletions(-)

diff --git a/calour/correlation_experiment.py b/calour/correlation_experiment.py
index bac7cc56..f65b5d53 100644
--- a/calour/correlation_experiment.py
+++ b/calour/correlation_experiment.py
@@ -174,11 +174,12 @@ def save(self, prefix, **kwargs):
             file path (suffixes auto added for the 3 files) to save to.
         **kwargs : dict
             Additional arguments to pass to the Experiment.save() function
-        ''' 
+        '''
+        self._sync_qvals()
         super().save(prefix, **kwargs)
         if self.qvals is not None:
-            self.qvals.save_biom(prefix+'_qvals.biom')
-            logger.debug('Saved qvals experiment to %s_qvals.biom' % prefix)
+            self.qvals.save(prefix+'_qvals', **kwargs)
+            logger.debug('Saved qvals experiment to %s_qvals' % prefix)
         else:
             logger.warning('No qvals attached to experiment. qvals experiment not saved')
 
@@ -216,43 +217,6 @@ def _calculate_corr_matrix(df1, df2):
                     corrs[idx1][idx2] = 0
         return corrs,pvals
 
-
-    # def save(self, filename, **kwargs):
-    #     '''Save the correlation experiment to a file
-
-    #     Parameters
-    #     ----------
-    #     filename : str
-    #         The file to save the experiment to
-    #     **kwargs : dict
-    #         Additional arguments to pass to the save
-    #     '''
-    #     super().save(filename, **kwargs)
-    #     if self.qvals is not None:
-    #         self.qvals(filename+'.qvals', **kwargs)
-
-
-    @classmethod
-    def read_correlation(self, filename, **kwargs):
-        '''Read the correlation experiment from a file
-
-        Parameters
-        ----------
-        filename : str
-            The file to read the experiment from
-        **kwargs : dict
-            Additional arguments to pass to the read
-        '''
-        from .io import read
-
-        if 'normalize' not in kwargs:
-            kwargs['normalize'] = None
-
-        exp = read(filename+'.biom', sample_metadata_file=filename+'_sample.txt', feature_metadata_file=filename+'_feature.txt', cls=CorrelationExperiment, **kwargs)
-
-        exp.qvals = read(filename+'_qvals.biom', sample_metadata_file=filename+'_qvals_sample.txt', feature_metadata_file=filename+'_qvals_feature.txt', **kwargs)
-        return exp
-
     # @classmethod
     # def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame = None):
     #     '''Create a CorrelationExperiment from a pandas DataFrame (such as the experiment sample_metadata)
diff --git a/calour/io.py b/calour/io.py
index 575c6955..d7763aa8 100644
--- a/calour/io.py
+++ b/calour/io.py
@@ -555,7 +555,7 @@ def read_correlation(prefix, **kwargs) -> CorrelationExperiment:
     exp = read(prefix+'.biom', sample_metadata_file=prefix+'_sample.txt', feature_metadata_file=prefix+'_feature.txt', cls=CorrelationExperiment, **kwargs)
     # and load the q-values table
     logger.debug('Reading correlation matrix %s_qvals.biom' % prefix)
-    exp.qvals = read(prefix+'_qvals.biom', normalize=None)
+    exp.qvals = read(prefix+'_qvals.biom', sample_metadata_file=prefix+'_qvals_sample.txt', feature_metadata_file=prefix+'_qvals_feature.txt', normalize=None)
 
     # initialize the call history
     param = ['{0!s}={1!r}'.format(k, v) for k, v in fparams.items()]

From a9dc463a07e836fffc5c5d52e00d6b2602f8ccad Mon Sep 17 00:00:00 2001
From: amnona <amnonim@gmail.com>
Date: Mon, 19 Aug 2024 13:13:31 +0300
Subject: [PATCH 04/10] add CorrelationExperiment from dataframe/data

---
 calour/correlation_experiment.py | 120 +++++++++++++++----------------
 1 file changed, 60 insertions(+), 60 deletions(-)

diff --git a/calour/correlation_experiment.py b/calour/correlation_experiment.py
index f65b5d53..bb5a3e25 100644
--- a/calour/correlation_experiment.py
+++ b/calour/correlation_experiment.py
@@ -217,63 +217,63 @@ def _calculate_corr_matrix(df1, df2):
                     corrs[idx1][idx2] = 0
         return corrs,pvals
 
-    # @classmethod
-    # def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame = None):
-    #     '''Create a CorrelationExperiment from a pandas DataFrame (such as the experiment sample_metadata)
-    #     Calculates the correlations between all dataframe columns
-
-    #     Parameters
-    #     ----------
-    #     df1 : pandas.DataFrame
-    #         The first DataFrame to calculate the correlation matrix for
-    #     df2 : pandas.DataFrame
-    #         The second DataFrame to calculate the correlation matrix for
-    #         If None, will use df1
-
-    #     Returns
-    #     -------
-    #     CorrelationExperiment
-    #         The correlation experiment
-    #     '''
-    #     if df2 is None:
-    #         df2=df1
-    #     corrs,pvals = self._calculate_corr_matrix(df1, df2)
-    #     new_smd = pd.DataFrame(index=df1.columns)
-    #     new_fmd = pd.DataFrame(index=df2.columns)
-    #     new_smd['SampleID']=new_smd.index.values
-    #     new_fmd['_feature_id']=new_fmd.index.values
-    #     exp=CorrelationExperiment(data=corrs, sample_metadata=new_smd, feature_metadata=new_fmd, qvals=pvals, sparse=False)
-    #     exp=exp.cluster_data(axis='f')
-    #     exp=exp.cluster_data(axis='s')
-    #     return exp
-
-    # @classmethod
-    # def from_data(self, corr, samples, features, qvals):
-    #     '''Create a CorrelationExperiment from a numpy array and metadata
-
-    #     Parameters
-    #     ----------
-    #     corr : numpy.ndarray
-    #         The correlation matrix
-    #     samples : list or pandas.DataFrame
-    #         The sample metadata
-    #     features : list or pandas.DataFrame
-    #         The feature metadata
-    #     qvals : numpy.ndarray
-    #         The q-value matrix for the correlations
-
-    #     Returns
-    #     -------
-    #     CorrelationExperiment
-    #         The correlation experiment
-    #     '''
-    #     if isinstance(samples, list):
-    #         samples=pd.DataFrame(index=samples)
-    #     if isinstance(features, list):
-    #         features=pd.DataFrame(index=features)
-    #     if 'SampleID' not in samples.columns:
-    #         samples['SampleID']=samples.index.values
-    #     if '_feature_id' not in features.columns:
-    #         features['_feature_id']=features.index.values
-
-    #     return CorrelationExperiment(data=corr, sample_metadata=samples, feature_metadata=features, qvals=qvals, sparse=False)
+    @classmethod
+    def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame = None):
+        '''Create a CorrelationExperiment from a pandas DataFrame (such as the experiment sample_metadata)
+        Calculates the correlations between all dataframe columns
+
+        Parameters
+        ----------
+        df1 : pandas.DataFrame
+            The first DataFrame to calculate the correlation matrix for
+        df2 : pandas.DataFrame
+            The second DataFrame to calculate the correlation matrix for
+            If None, will use df1
+
+        Returns
+        -------
+        CorrelationExperiment
+            The correlation experiment
+        '''
+        if df2 is None:
+            df2=df1
+        corrs,pvals = self._calculate_corr_matrix(df1, df2)
+        new_smd = pd.DataFrame(index=df1.columns)
+        new_fmd = pd.DataFrame(index=df2.columns)
+        new_smd['SampleID']=new_smd.index.values
+        new_fmd['_feature_id']=new_fmd.index.values
+        exp=CorrelationExperiment(data=corrs, sample_metadata=new_smd, feature_metadata=new_fmd, qvals=pvals, sparse=False)
+        exp=exp.cluster_data(axis='f')
+        exp=exp.cluster_data(axis='s')
+        return exp
+
+    @classmethod
+    def from_data(self, corr, samples, features, qvals):
+        '''Create a CorrelationExperiment from a numpy array and metadata
+
+        Parameters
+        ----------
+        corr : numpy.ndarray
+            The correlation matrix
+        samples : list or pandas.DataFrame
+            The sample metadata
+        features : list or pandas.DataFrame
+            The feature metadata
+        qvals : numpy.ndarray
+            The q-value matrix for the correlations
+
+        Returns
+        -------
+        CorrelationExperiment
+            The correlation experiment
+        '''
+        if isinstance(samples, list):
+            samples=pd.DataFrame(index=samples)
+        if isinstance(features, list):
+            features=pd.DataFrame(index=features)
+        if 'SampleID' not in samples.columns:
+            samples['SampleID']=samples.index.values
+        if '_feature_id' not in features.columns:
+            features['_feature_id']=features.index.values
+
+        return CorrelationExperiment(data=corr, sample_metadata=samples, feature_metadata=features, qvals=qvals, sparse=False)

From b72dc5afb98b0c593b564a33091883879384b7dd Mon Sep 17 00:00:00 2001
From: amnona <amnonim@gmail.com>
Date: Mon, 19 Aug 2024 13:15:34 +0300
Subject: [PATCH 05/10] add type hints

---
 calour/correlation_experiment.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/calour/correlation_experiment.py b/calour/correlation_experiment.py
index bb5a3e25..dcaef8f4 100644
--- a/calour/correlation_experiment.py
+++ b/calour/correlation_experiment.py
@@ -248,7 +248,7 @@ def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame = None):
         return exp
 
     @classmethod
-    def from_data(self, corr, samples, features, qvals):
+    def from_data(self, corr: np.array, samples: pd.DataFrame, features: pd.DataFrame, qvals: np.array) -> 'CorrelationExperiment':
         '''Create a CorrelationExperiment from a numpy array and metadata
 
         Parameters

From 04c99e492472c89655911a4eb3e61d955fc6fdf0 Mon Sep 17 00:00:00 2001
From: amnona <amnonim@gmail.com>
Date: Sun, 25 Aug 2024 16:35:58 +0300
Subject: [PATCH 06/10] update heatmap options

---
 calour/correlation_experiment.py | 75 +++++++++++++++++++++++++++-----
 1 file changed, 65 insertions(+), 10 deletions(-)

diff --git a/calour/correlation_experiment.py b/calour/correlation_experiment.py
index dcaef8f4..84d2dcc9 100644
--- a/calour/correlation_experiment.py
+++ b/calour/correlation_experiment.py
@@ -51,6 +51,7 @@ class CorrelationExperiment(Experiment):
         The metadata on the features (columns in the matrix, shown in rows in the heatmap)
     qvals : numpy.ndarray or scipy.sparse.csr_matrix or None
         The q-values for the correlation values
+        NOTE: This is not guaranteed to be in the same order as the data matrix (unless _sync_qvals() is called)
     description : str
         name of experiment
     sparse : bool
@@ -91,6 +92,11 @@ class CorrelationExperiment(Experiment):
     Experiment
     '''
     def __init__(self, *args, qvals=None, **kwargs):
+        '''Init the CorrelationExperiment class
+        By default we set sparse=False (as we usually have a dense matrix)
+        '''
+        if 'sparse' not in kwargs:
+            kwargs['sparse'] = False
         super().__init__(*args, **kwargs)
         if qvals is not None:
             if self.data.shape != qvals.shape:
@@ -125,37 +131,85 @@ def _get_abundance_info(self, row:int , col:int):
         else:
             qval = self.qvals.data[row, col]
         return '{:.2E}, qval: {:.2f}'.format(self.data[row, col], qval)
-
-    def heatmap(self, show_significance=True, significance_threshold=0.05, significance_plot_params={'color': 'red'},*args, **kwargs):
+    
+    def heatmap(self, significance_plot=['cmap'],significance_threshold=0.05, significance_plot_params={'color': 'red'}, cmap='bwr', *args, **kwargs):
         '''Plot a heatmap for the ratio experiment.
 
         This method accepts the same parameters as input with
         its parent class method.
         In addition, it accepts the following parameters:
-        show_significance : bool, optional
-            If True, the q-values will be plotted on top of the heatmap.
+        significance_plot : list of str, optional
+            The type of significance plot to show. Can be 'cmap' and/or 'x'
         significance_threshold : float, optional
             The threshold for the q-values to be considered significant.
         significance_plot_params : dict, optional
             The parameters to be passed to the plot function for the significance values.
+            If 'cmap' is in the list, use the 'cmap' parameter in significance_plot_params to set the colormap for the significant values.
+            If 'x' is in the list, use the 'significance_plot_params' parameter to set the plot parameters for the significance values.
 
         See Also
         --------
         Experiment.heatmap
 
         '''
+        import matplotlib.pyplot as plt
+        from matplotlib.colors import LinearSegmentedColormap
+
         if 'clim' not in kwargs:
             min_val = np.min(self.get_data()[:])
             max_val = np.max(self.get_data()[:])
             range_val = np.max([np.abs(min_val), np.abs(max_val)])
             kwargs['clim'] = (-range_val, range_val)
-        if 'cmap' not in kwargs:
-            kwargs['cmap'] = 'coolwarm'
 
+        if significance_plot is None or significance_plot == []:
+            if self.qvals is None:
+                raise ValueError('No qvals attached to experiment. Please provide a qvals matrix to plot the significance values or use significance_plot=[] to not plot significance values.')
+        else:
+            self._sync_qvals()
+
+        data_changed = False
+        if 'cmap' in significance_plot:
+            # copy the data
+            old_data = self.get_data(copy=True)
+            data_changed = True
+
+            # eps is added to the data to avoid overlap in the colormaps for significant/non-significant values
+            eps = 1e-7
+            
+            max_val = kwargs['clim'][1]
+            min_val = kwargs['clim'][0]
+            self.data[self.data>max_val]=max_val
+            self.data[self.data<min_val]=min_val
+            self.data = self.data - (max_val + eps)
+
+            qv = self.qvals.get_data(sparse=False)
+            sig_pos = qv < significance_threshold
+            self.data[sig_pos]+= (2*max_val)+eps
+            if 'cmap' in significance_plot_params:
+                cmap_sig = significance_plot_params['cmap']
+                del significance_plot_params['cmap']
+            else:
+                cmap_sig = 'PiYG'
+
+            # create the colormap which is a concatenation of the original colormap and the significant colormap
+            colors_nonsig = plt.get_cmap(cmap)(np.linspace(0, 1, 128))
+            colors_sig = plt.get_cmap(cmap_sig)(np.linspace(0, 1, 128))
+            colors = np.vstack((colors_nonsig, colors_sig))
+            concatenated_cmap = LinearSegmentedColormap.from_list('concatenated_cmap', colors)
+            kwargs['cmap'] = concatenated_cmap
+            # adjust the clim to account for the added values (negative values are for the non-significant values, positive values are for the significant values)
+            kwargs['clim'] = (2*kwargs['clim'][0], 2*kwargs['clim'][1])
+
+        # call the heatmap function from the parent class using the exp object
         ax = super().heatmap(*args, **kwargs)
-        if show_significance:
+
+        # if the data was changed (for the significance plot), revert it back to the original data
+        if data_changed:
+            self.data = old_data
+
+        # add the significant correlations plot
+        if 'x' in significance_plot:
             if self.qvals is not None:
-                self._sync_qvals()
                 qv = self.qvals.get_data(sparse=False)
                 show_pos = np.where(qv < significance_threshold)
                 for i, j in zip(*show_pos):
@@ -218,7 +272,7 @@ def _calculate_corr_matrix(df1, df2):
         return corrs,pvals
 
     @classmethod
-    def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame = None):
+    def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame|None = None):
         '''Create a CorrelationExperiment from a pandas DataFrame (such as the experiment sample_metadata)
         Calculates the correlations between all dataframe columns
 
@@ -249,7 +303,8 @@ def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame = None):
 
     @classmethod
     def from_data(self, corr: np.array, samples: pd.DataFrame, features: pd.DataFrame, qvals: np.array) -> 'CorrelationExperiment':
-        '''Create a CorrelationExperiment from a numpy array and metadata
+        '''Create a CorrelationExperiment from a numpy array (effect size), numpy array (qvals) and corresponding metadata
+        Similar to the __init__ function, but can take lists as input for the metadata instead of DataFrames
 
         Parameters
         ----------

From 177180e60ed3c1d01c9510139cac3c3357aa9c8a Mon Sep 17 00:00:00 2001
From: amnona <amnonim@gmail.com>
Date: Sun, 25 Aug 2024 16:56:54 +0300
Subject: [PATCH 07/10] include CorrelationExperiment in __init__ and update
 changelog and version

---
 CHANGELOG.md       | 6 ++++++
 calour/__init__.py | 7 ++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 6beffa6a..77635cb4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,4 +1,10 @@
 # calour changelog
+## Version 2024.8.25
+
+New features:
+* Add CorrelationExperiment class for working with correlation matrices and showing significance in heatmap
+Other changes:
+* Update experiment classes to provide the _get_abundance_info() method for the interactive heatmap (instead of being produced by the heatmap() method). This allows experiment class specific information to be shown in the heatmap abundance field when clicking on a feature/sample.
 
 ## Version 2024.5.30
 add mRNAExperiment class for handling rna-seq data. interactive heatmap gene information is via the rna_calour module using Harmonizome server (https://maayanlab.cloud/Harmonizome)
diff --git a/calour/__init__.py b/calour/__init__.py
index 2474942f..8f4de74b 100644
--- a/calour/__init__.py
+++ b/calour/__init__.py
@@ -15,15 +15,16 @@
 from .correlation_experiment import CorrelationExperiment
 from .ms1_experiment import MS1Experiment
 from .mrna_experiment import mRNAExperiment
-from .io import read, read_amplicon, read_ms, read_qiime2
+from .io import read, read_amplicon, read_ms, read_qiime2, read_correlation
 from .util import set_log_level, register_functions
 
 
 __credits__ = "https://github.com/biocore/calour/graphs/contributors"
-__version__ = "2024.5.30"
+__version__ = "2024.8.25"
 
 __all__ = ['read', 'read_amplicon', 'read_ms', 'read_qiime2', 'read_correlation',
            'Experiment', 'AmpliconExperiment', 'MS1Experiment','mRNAExperiment',
+           'CorrelationExperiment',
            'set_log_level']
 
 
@@ -33,4 +34,4 @@
 
 
 # setting False allows other logger to print log.
-fileConfig(resource_filename(__package__, 'log.cfg'), disable_existing_loggers=False)
+fileConfig(resource_filename(__package__, 'log.cfg'), disable_existing_loggers=False)
\ No newline at end of file

From e6e75425c585d855a8c0053d96fc77d92f46c8c3 Mon Sep 17 00:00:00 2001
From: amnona <amnonim@gmail.com>
Date: Thu, 29 Aug 2024 15:05:26 +0300
Subject: [PATCH 08/10] better documentation

---
 calour/correlation_experiment.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/calour/correlation_experiment.py b/calour/correlation_experiment.py
index 84d2dcc9..ffcd9417 100644
--- a/calour/correlation_experiment.py
+++ b/calour/correlation_experiment.py
@@ -134,9 +134,9 @@ def _get_abundance_info(self, row:int , col:int):
     
     def heatmap(self, significance_plot=['cmap'],significance_threshold=0.05, significance_plot_params={'color': 'red'}, cmap='bwr', *args, **kwargs):
         '''Plot a heatmap for the ratio experiment.
+        The heatmap includes indication for significant correlations. This can be as a different set of colors for the significant correlations or by plotting a marker for the significant correlations.
 
-        This method accepts the same parameters as input with
-        its parent class method.
+        This method accepts the same parameters as input with its parent class method.
         In addition, it accepts the following parameters:
         significance_plot : list of str, optional
             The type of significance plot to show. Can be 'cmap' and/or 'x'

From 98d9709c37cd2b9688f023a46bb302391232e5c9 Mon Sep 17 00:00:00 2001
From: amnona <amnonim@gmail.com>
Date: Thu, 29 Aug 2024 17:13:54 +0300
Subject: [PATCH 09/10] remove from_data since will add to Experiment.__init__
 to accept lists instead of DataFrames for metadata

---
 calour/correlation_experiment.py | 32 --------------------------------
 1 file changed, 32 deletions(-)

diff --git a/calour/correlation_experiment.py b/calour/correlation_experiment.py
index ffcd9417..66c5fdbd 100644
--- a/calour/correlation_experiment.py
+++ b/calour/correlation_experiment.py
@@ -300,35 +300,3 @@ def from_dataframes(self, df1: pd.DataFrame, df2: pd.DataFrame|None = None):
         exp=exp.cluster_data(axis='f')
         exp=exp.cluster_data(axis='s')
         return exp
-
-    @classmethod
-    def from_data(self, corr: np.array, samples: pd.DataFrame, features: pd.DataFrame, qvals: np.array) -> 'CorrelationExperiment':
-        '''Create a CorrelationExperiment from a numpy array (effect size), numpy array (qvals) and corresponding metadata
-        Similar to the __init__ function, but can take lists as input for the metadata instead of DataFrames
-
-        Parameters
-        ----------
-        corr : numpy.ndarray
-            The correlation matrix
-        samples : list or pandas.DataFrame
-            The sample metadata
-        features : list or pandas.DataFrame
-            The feature metadata
-        qvals : numpy.ndarray
-            The q-value matrix for the correlations
-
-        Returns
-        -------
-        CorrelationExperiment
-            The correlation experiment
-        '''
-        if isinstance(samples, list):
-            samples=pd.DataFrame(index=samples)
-        if isinstance(features, list):
-            features=pd.DataFrame(index=features)
-        if 'SampleID' not in samples.columns:
-            samples['SampleID']=samples.index.values
-        if '_feature_id' not in features.columns:
-            features['_feature_id']=features.index.values
-
-        return CorrelationExperiment(data=corr, sample_metadata=samples, feature_metadata=features, qvals=qvals, sparse=False)

From 9e89ca3e4c1e6d648cd44bebf4f62ba0fe464e98 Mon Sep 17 00:00:00 2001
From: amnona <amnonim@gmail.com>
Date: Thu, 29 Aug 2024 17:18:05 +0300
Subject: [PATCH 10/10] remove commented code

---
 calour/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/calour/__init__.py b/calour/__init__.py
index 8f4de74b..c2109b98 100644
--- a/calour/__init__.py
+++ b/calour/__init__.py
@@ -30,7 +30,6 @@
 
 # add member functions to the class
 register_functions((Experiment, AmpliconExperiment, MS1Experiment, mRNAExperiment, CorrelationExperiment))
-# register_functions((Experiment, AmpliconExperiment, MS1Experiment, mRNAExperiment))
 
 
 # setting False allows other logger to print log.