From 9bd72d1ac513ad939279ea0ae5d4452192c11698 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Thu, 21 Nov 2019 17:54:53 -0500
Subject: [PATCH 01/15] graphtools estimator

---
 graphtools/estimator.py | 386 ++++++++++++++++++++++++++++++++++++++++
 graphtools/utils.py     | 131 ++++++++++++++
 2 files changed, 517 insertions(+)
 create mode 100644 graphtools/estimator.py

diff --git a/graphtools/estimator.py b/graphtools/estimator.py
new file mode 100644
index 0000000..5979cf0
--- /dev/null
+++ b/graphtools/estimator.py
@@ -0,0 +1,386 @@
+import numpy as np
+import tasklogger
+
+try:
+    import anndata
+except ImportError:
+    # anndata not installed
+    pass
+
+try:
+    import pygsp
+except ImportError:
+    # anndata not installed
+    pass
+
+from functools import partial
+from scipy import sparse
+
+from . import api, graphs, base, utils
+
+_logger = tasklogger.get_tasklogger("graphtools")
+
+
+class GraphEstimator(object):
+    """Estimator which builds a graphtools Graph
+
+    Parameters
+    ----------
+
+    knn : int, optional, default: 5
+        number of nearest neighbors on which to build kernel
+
+    decay : int, optional, default: 40
+        sets decay rate of kernel tails.
+        If None, alpha decaying kernel is not used
+
+    n_landmark : int, optional, default: 2000
+        number of landmarks to use in fast PHATE
+
+    n_pca : int, optional, default: 100
+        Number of principal components to use for calculating
+        neighborhoods. For extremely large datasets, using
+        n_pca < 20 allows neighborhoods to be calculated in
+        roughly log(n_samples) time.
+
+    distance : string, optional, default: 'euclidean'
+        recommended values: 'euclidean', 'cosine', 'precomputed'
+        Any metric from `scipy.spatial.distance` can be used
+        distance metric for building kNN graph. Custom distance
+        functions of form `f(x, y) = d` are also accepted. If 'precomputed',
+        `data` should be an n_samples x n_samples distance or
+        affinity matrix. Distance matrices are assumed to have zeros
+        down the diagonal, while affinity matrices are assumed to have
+        non-zero values down the diagonal. This is detected automatically using
+        `data[0,0]`. You can override this detection with
+        `distance='precomputed_distance'` or `distance='precomputed_affinity'`.
+
+    n_jobs : integer, optional, default: 1
+        The number of jobs to use for the computation.
+        If -1 all CPUs are used. If 1 is given, no parallel computing code is
+        used at all, which is useful for debugging.
+        For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for
+        n_jobs = -2, all CPUs but one are used
+
+    random_state : integer or numpy.RandomState, optional, default: None
+        If an integer is given, it fixes the seed
+        Defaults to the global `numpy` random number generator
+
+    verbose : `int` or `boolean`, optional (default: 1)
+        If `True` or `> 0`, print status messages
+        
+    n_svd : (default: 100)
+    
+    thresh : (default: 1e-4)
+    
+    kwargs : additional arguments for graphtools.Graph
+    """
+
+    X = utils.attribute("X", doc="Stored input data")
+    graph = utils.attribute("graph", doc="graphtools Graph object")
+
+    @graph.setter
+    def graph(self, G):
+        self._graph = G
+        if G is None:
+            self._reset_graph()
+
+    n_pca = utils.attribute(
+        "n_pca",
+        default=100,
+        on_set=partial(utils.check_if_not, None, utils.check_positive, utils.check_int),
+    )
+    random_state = utils.attribute("random_state")
+
+    knn = utils.attribute(
+        "knn", default=5, on_set=[utils.check_positive, utils.check_int]
+    )
+    decay = utils.attribute("decay", default=40, on_set=utils.check_positive)
+    distance = utils.attribute(
+        "distance",
+        default="euclidean",
+        on_set=partial(
+            utils.check_in,
+            [
+                "euclidean",
+                "precomputed",
+                "cosine",
+                "correlation",
+                "cityblock",
+                "l1",
+                "l2",
+                "manhattan",
+                "braycurtis",
+                "canberra",
+                "chebyshev",
+                "dice",
+                "hamming",
+                "jaccard",
+                "kulsinski",
+                "mahalanobis",
+                "matching",
+                "minkowski",
+                "rogerstanimoto",
+                "russellrao",
+                "seuclidean",
+                "sokalmichener",
+                "sokalsneath",
+                "sqeuclidean",
+                "yule",
+                "precomputed_affinity",
+                "precomputed_distance",
+            ],
+        ),
+    )
+    n_svd = utils.attribute(
+        "n_svd",
+        default=100,
+        on_set=partial(utils.check_if_not, None, utils.check_positive, utils.check_int),
+    )
+    n_jobs = utils.attribute(
+        "n_jobs", on_set=partial(utils.check_if_not, None, utils.check_int)
+    )
+    verbose = utils.attribute("verbose", default=0)
+    thresh = utils.attribute(
+        "thresh",
+        default=1e-4,
+        on_set=partial(utils.check_if_not, 0, utils.check_positive),
+    )
+
+    n_landmark = utils.attribute("n_landmark")
+
+    @n_landmark.setter
+    def n_landmark(self, n_landmark):
+        self._n_landmark = n_landmark
+        utils.check_if_not(
+            None, utils.check_positive, utils.check_int, n_landmark=n_landmark
+        )
+        if self.graph is not None:
+            if n_landmark is None and isinstance(self.graph, graphs.LandmarkGraph):
+                self.graph = None
+            elif n_landmark is not None and not isinstance(
+                self.graph, graphs.LandmarkGraph
+            ):
+                self.graph = None
+
+    def __init__(
+        self,
+        knn=5,
+        decay=40,
+        n_pca=100,
+        n_landmark=None,
+        random_state=None,
+        distance="euclidean",
+        n_svd=100,
+        n_jobs=1,
+        verbose=1,
+        thresh=1e-4,
+        **kwargs
+    ):
+
+        if verbose is True:
+            verbose = 1
+        elif verbose is False:
+            verbose = 0
+
+        self.n_pca = n_pca
+        self.n_landmark = n_landmark
+        self.random_state = random_state
+        self.knn = knn
+        self.decay = decay
+        self.distance = distance
+        self.n_svd = n_svd
+        self.n_jobs = n_jobs
+        self.verbose = verbose
+        self.thresh = thresh
+        self.kwargs = kwargs
+        _logger.set_level(self.verbose)
+
+    def set_params(self, **params):
+        for p in params:
+            setattr(self, p, params[p])
+        self._set_graph_params(**params)
+
+    def _set_graph_params(self, **params):
+        if self.graph is not None:
+            try:
+                self.graph.set_params(**params)
+            except ValueError as e:
+                _logger.debug("Reset graph due to {}".format(str(e)))
+                self.graph = None
+
+    def _reset_graph(self):
+        pass
+
+    def _detect_precomputed_matrix_type(self, X):
+        if isinstance(X, sparse.coo_matrix):
+            X = X.tocsr()
+        if X[0, 0] == 0:
+            return "distance"
+        else:
+            return "affinity"
+
+    def _parse_n_landmark(self, X):
+        if self.n_landmark is not None and self.n_landmark >= X.shape[0]:
+            return None
+        else:
+            return self.n_landmark
+
+    def _parse_input(self, X):
+        # passing graphs as input
+        if isinstance(X, base.BaseGraph):
+            if isinstance(X, graphs.LandmarkGraph) or (
+                isinstance(X, base.BaseGraph) and self.n_landmark is None
+            ):
+                # we can keep this graph
+                self.graph = X
+                X = X.data
+                n_pca = self.graph.n_pca
+                update_graph = False
+                if isinstance(self.graph, graphs.TraditionalGraph):
+                    precomputed = self.graph.precomputed
+                else:
+                    precomputed = None
+                return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph
+            else:
+                # n_landmark is set, but this is not a landmark graph
+                self.graph = None
+                X = X.kernel
+                precomputed = "affinity"
+                n_pca = None
+                update_graph = False
+                return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph
+        else:
+            try:
+                if isinstance(X, pygsp.graphs.Graph):
+                    self.graph = None
+                    X = X.W
+                    precomputed = "adjacency"
+                    update_graph = False
+                    n_pca = None
+                    return (
+                        X,
+                        n_pca,
+                        self._parse_n_landmark(X),
+                        precomputed,
+                        update_graph,
+                    )
+            except NameError:
+                # pygsp not installed
+                pass
+
+        # checks on regular data
+        update_graph = True
+        try:
+            if isinstance(X, anndata.AnnData):
+                X = X.X
+        except NameError:
+            # anndata not installed
+            pass
+        if not callable(self.distance) and self.distance.startswith("precomputed"):
+            if self.distance == "precomputed":
+                # automatic detection
+                precomputed = self._detect_precomputed_matrix_type(X)
+            elif self.distance in ["precomputed_affinity", "precomputed_distance"]:
+                precomputed = self.distance.split("_")[1]
+            else:
+                raise ValueError(
+                    "distance {} not recognized. Did you mean "
+                    "'precomputed_distance', "
+                    "'precomputed_affinity', or 'precomputed' "
+                    "(automatically detects distance or affinity)?".format(
+                        self.distance
+                    )
+                )
+            n_pca = None
+        else:
+            precomputed = None
+            if self.n_pca is None or self.n_pca >= np.min(X.shape):
+                n_pca = None
+            else:
+                n_pca = self.n_pca
+        return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph
+
+    def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs):
+        if self.X is not None and not utils.matrix_is_equivalent(X, self.X):
+            """
+            If the same data is used, we can reuse existing kernel and
+            diffusion matrices. Otherwise we have to recompute.
+            """
+            self.graph = None
+        else:
+            try:
+                self.graph.set_params(
+                    n_pca=n_pca,
+                    precomputed=precomputed,
+                    n_landmark=n_landmark,
+                    random_state=self.random_state,
+                    knn=self.knn,
+                    decay=self.decay,
+                    distance=self.distance,
+                    n_svd=self.n_svd,
+                    n_jobs=self.n_jobs,
+                    thresh=self.thresh,
+                    verbose=self.verbose,
+                    **(self.kwargs)
+                )
+                _logger.info("Using precomputed graph and diffusion operator...")
+            except ValueError as e:
+                # something changed that should have invalidated the graph
+                _logger.debug("Reset graph due to {}".format(str(e)))
+                self.graph = None
+
+    def fit(self, X):
+        """Computes the graph
+
+        Parameters
+        ----------
+        X : array, shape=[n_samples, n_features]
+            input data with `n_samples` samples and `n_dimensions`
+            dimensions. Accepted data types: `numpy.ndarray`,
+            `scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`. If
+            `knn_dist` is 'precomputed', `data` should be a n_samples x
+            n_samples distance or affinity matrix
+
+        Returns
+        -------
+        self : graphtools.estimator.GraphEstimator
+        """
+        X, n_pca, n_landmark, precomputed, update_graph = self._parse_input(X)
+
+        if precomputed is None:
+            _logger.info(
+                "Building graph on {} samples and {} features.".format(
+                    X.shape[0], X.shape[1]
+                )
+            )
+        else:
+            _logger.info(
+                "Building graph on precomputed {} matrix with {} cells.".format(
+                    precomputed, X.shape[0]
+                )
+            )
+
+        if self.graph is not None and update_graph:
+            self._update_graph(X, precomputed, n_pca, n_landmark)
+
+        self.X = X
+
+        if self.graph is None:
+            with _logger.task("graph and diffusion operator"):
+                self.graph = api.Graph(
+                    X,
+                    n_pca=n_pca,
+                    precomputed=precomputed,
+                    n_landmark=n_landmark,
+                    random_state=self.random_state,
+                    knn=self.knn,
+                    decay=self.decay,
+                    distance=self.distance,
+                    n_svd=self.n_svd,
+                    n_jobs=self.n_jobs,
+                    thresh=self.thresh,
+                    verbose=self.verbose,
+                    **(self.kwargs)
+                )
+        return self
diff --git a/graphtools/utils.py b/graphtools/utils.py
index 64d1b08..1710a06 100644
--- a/graphtools/utils.py
+++ b/graphtools/utils.py
@@ -1,6 +1,7 @@
 import numpy as np
 from scipy import sparse
 import numbers
+from functools import partial
 
 
 def if_sparse(sparse_func, dense_func, *args, **kwargs):
@@ -79,3 +80,133 @@ def to_array(X):
     elif isinstance(X, np.matrix):
         X = X.A
     return X
+
+
+def matrix_is_equivalent(X, Y):
+    """
+    Checks matrix equivalence with numpy, scipy and pandas
+    """
+    return X is Y or (
+        isinstance(X, Y.__class__)
+        and X.shape == Y.shape
+        and np.sum((X != Y).sum()) == 0
+    )
+
+
+def check_positive(**params):
+    """Check that parameters are positive as expected
+
+    Raises
+    ------
+    ValueError : unacceptable choice of parameters
+    """
+    for p in params:
+        if not isinstance(params[p], numbers.Number) or params[p] <= 0:
+            raise ValueError("Expected {} > 0, got {}".format(p, params[p]))
+
+
+def check_int(**params):
+    """Check that parameters are integers as expected
+
+    Raises
+    ------
+    ValueError : unacceptable choice of parameters
+    """
+    for p in params:
+        if not isinstance(params[p], numbers.Integral):
+            raise ValueError("Expected {} integer, got {}".format(p, params[p]))
+
+
+def check_if_not(x, *checks, **params):
+    """Run checks only if parameters are not equal to a specified value
+
+    Parameters
+    ----------
+
+    x : excepted value
+        Checks not run if parameters equal x
+
+    checks : function
+        Unnamed arguments, check functions to be run
+
+    params : object
+        Named arguments, parameters to be checked
+
+    Raises
+    ------
+    ValueError : unacceptable choice of parameters
+    """
+    for p in params:
+        if params[p] is not x and params[p] != x:
+            [check(**{p: params[p]}) for check in checks]
+
+
+def check_in(choices, **params):
+    """Checks parameters are in a list of allowed parameters
+
+    Parameters
+    ----------
+
+    choices : array-like, accepted values
+
+    params : object
+        Named arguments, parameters to be checked
+
+    Raises
+    ------
+    ValueError : unacceptable choice of parameters
+    """
+    for p in params:
+        if params[p] not in choices:
+            raise ValueError(
+                "{} value {} not recognized. Choose from {}".format(
+                    p, params[p], choices
+                )
+            )
+
+
+def check_between(v_min, v_max, **params):
+    """Checks parameters are in a specified range
+
+    Parameters
+    ----------
+
+    v_min : float, minimum allowed value (inclusive)
+
+    v_max : float, maximum allowed value (inclusive)
+
+    params : object
+        Named arguments, parameters to be checked
+
+    Raises
+    ------
+    ValueError : unacceptable choice of parameters
+    """
+    for p in params:
+        if params[p] < v_min or params[p] > v_max:
+            raise ValueError(
+                "Expected {} between {} and {}, "
+                "got {}".format(p, v_min, v_max, params[p])
+            )
+
+
+def attribute(attr, default=None, doc=None, on_set=None):
+    def getter(self, attr):
+        try:
+            return getattr(self, "_" + attr)
+        except AttributeError:
+            return default
+
+    def setter(self, value, attr, on_set=None):
+        if on_set is not None:
+            if callable(on_set):
+                on_set = [on_set]
+            for fn in on_set:
+                fn(**{attr: value})
+        setattr(self, "_" + attr, value)
+
+    return property(
+        fget=partial(getter, attr=attr),
+        fset=partial(setter, attr=attr, on_set=on_set),
+        doc=doc,
+    )

From fe43da1f353c570101c4e2bdec19c99575b2c2d4 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Tue, 4 Feb 2020 18:03:38 -0500
Subject: [PATCH 02/15] bump version

---
 graphtools/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/graphtools/version.py b/graphtools/version.py
index daa50c7..5a6e033 100644
--- a/graphtools/version.py
+++ b/graphtools/version.py
@@ -1 +1 @@
-__version__ = "1.4.2"
+__version__ = "1.4.3a0"

From b367813e6bbfebffeb0fb71bba49363ec881d87a Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 24 Feb 2020 15:16:31 -0500
Subject: [PATCH 03/15] migrate matrix functions out of utils, test estimator

---
 graphtools/base.py      |  16 +--
 graphtools/estimator.py | 115 +++++++++++++--------
 graphtools/graphs.py    |  10 +-
 graphtools/utils.py     | 221 ++++++++++++++++++----------------------
 requirements.txt        |   1 +
 setup.py                |   1 +
 test/test_estimator.py  |  35 +++++++
 test/test_matrix.py     | 166 ++++++++++++++++++++++++++++++
 test/test_utils.py      |  40 --------
 9 files changed, 388 insertions(+), 217 deletions(-)
 create mode 100644 test/test_estimator.py
 create mode 100644 test/test_matrix.py
 delete mode 100644 test/test_utils.py

diff --git a/graphtools/base.py b/graphtools/base.py
index 4fabcb0..838bf90 100644
--- a/graphtools/base.py
+++ b/graphtools/base.py
@@ -27,7 +27,7 @@
     # anndata not installed
     pass
 
-from . import utils
+from . import matrix, utils
 
 _logger = tasklogger.get_tasklogger("graphtools")
 
@@ -584,9 +584,9 @@ def symmetrize_kernel(self, K):
             K = K.multiply(K.T)
         elif self.kernel_symm == "mnn":
             _logger.debug("Using mnn symmetrization (theta = {}).".format(self.theta))
-            K = self.theta * utils.elementwise_minimum(K, K.T) + (
+            K = self.theta * matrix.elementwise_minimum(K, K.T) + (
                 1 - self.theta
-            ) * utils.elementwise_maximum(K, K.T)
+            ) * matrix.elementwise_maximum(K, K.T)
         elif self.kernel_symm is None:
             _logger.debug("Using no symmetrization.")
             pass
@@ -683,7 +683,9 @@ def kernel_degree(self):
         try:
             return self._kernel_degree
         except AttributeError:
-            self._kernel_degree = utils.to_array(self.kernel.sum(axis=1)).reshape(-1, 1)
+            self._kernel_degree = matrix.to_array(self.kernel.sum(axis=1)).reshape(
+                -1, 1
+            )
             return self._kernel_degree
 
     @property
@@ -823,12 +825,12 @@ def to_igraph(self, attribute="weight", **kwargs):
         except AttributeError:
             # not a pygsp graph
             W = self.K.copy()
-            W = utils.set_diagonal(W, 0)
+            W = matrix.set_diagonal(W, 0)
         sources, targets = W.nonzero()
         edgelist = list(zip(sources, targets))
         g = ig.Graph(W.shape[0], edgelist, **kwargs)
         weights = W[W.nonzero()]
-        weights = utils.to_array(weights)
+        weights = matrix.to_array(weights)
         g.es[attribute] = weights.flatten().tolist()
         return g
 
@@ -987,7 +989,7 @@ def _build_weight_from_kernel(self, kernel):
 
         weight = kernel.copy()
         self._diagonal = weight.diagonal().copy()
-        weight = utils.set_diagonal(weight, 0)
+        weight = matrix.set_diagonal(weight, 0)
         return weight
 
 
diff --git a/graphtools/estimator.py b/graphtools/estimator.py
index 5979cf0..8d708ae 100644
--- a/graphtools/estimator.py
+++ b/graphtools/estimator.py
@@ -1,27 +1,40 @@
 import numpy as np
 import tasklogger
-
-try:
-    import anndata
-except ImportError:
-    # anndata not installed
-    pass
-
-try:
-    import pygsp
-except ImportError:
-    # anndata not installed
-    pass
+import pygsp
+import abc
 
 from functools import partial
 from scipy import sparse
 
 from . import api, graphs, base, utils
 
+
+def attribute(attr, default=None, doc=None, on_set=None):
+    def getter(self, attr):
+        try:
+            return getattr(self, "_" + attr)
+        except AttributeError:
+            return default
+
+    def setter(self, value, attr, on_set=None):
+        if on_set is not None:
+            if callable(on_set):
+                on_set = [on_set]
+            for fn in on_set:
+                fn(**{attr: value})
+        setattr(self, "_" + attr, value)
+
+    return property(
+        fget=partial(getter, attr=attr),
+        fset=partial(setter, attr=attr, on_set=on_set),
+        doc=doc,
+    )
+
+
 _logger = tasklogger.get_tasklogger("graphtools")
 
 
-class GraphEstimator(object):
+class GraphEstimator(object, metaclass=abc.ABCMeta):
     """Estimator which builds a graphtools Graph
 
     Parameters
@@ -35,7 +48,7 @@ class GraphEstimator(object):
         If None, alpha decaying kernel is not used
 
     n_landmark : int, optional, default: 2000
-        number of landmarks to use in fast PHATE
+        number of landmarks to use in graph construction
 
     n_pca : int, optional, default: 100
         Number of principal components to use for calculating
@@ -69,15 +82,22 @@ class GraphEstimator(object):
     verbose : `int` or `boolean`, optional (default: 1)
         If `True` or `> 0`, print status messages
         
-    n_svd : (default: 100)
+    n_svd : int, optional (default: 100)
+        number of singular vectors to compute for landmarking
     
-    thresh : (default: 1e-4)
+    thresh : float, optional (default: 1e-4)
+        threshold below which to truncate kernel
     
     kwargs : additional arguments for graphtools.Graph
+    
+    Attributes
+    ----------
+    
+    graph : graphtools.Graph
     """
 
-    X = utils.attribute("X", doc="Stored input data")
-    graph = utils.attribute("graph", doc="graphtools Graph object")
+    X = attribute("X", doc="Stored input data")
+    graph = attribute("graph", doc="graphtools Graph object")
 
     @graph.setter
     def graph(self, G):
@@ -85,18 +105,16 @@ def graph(self, G):
         if G is None:
             self._reset_graph()
 
-    n_pca = utils.attribute(
+    n_pca = attribute(
         "n_pca",
         default=100,
         on_set=partial(utils.check_if_not, None, utils.check_positive, utils.check_int),
     )
-    random_state = utils.attribute("random_state")
+    random_state = attribute("random_state")
 
-    knn = utils.attribute(
-        "knn", default=5, on_set=[utils.check_positive, utils.check_int]
-    )
-    decay = utils.attribute("decay", default=40, on_set=utils.check_positive)
-    distance = utils.attribute(
+    knn = attribute("knn", default=5, on_set=[utils.check_positive, utils.check_int])
+    decay = attribute("decay", default=40, on_set=utils.check_positive)
+    distance = attribute(
         "distance",
         default="euclidean",
         on_set=partial(
@@ -132,22 +150,22 @@ def graph(self, G):
             ],
         ),
     )
-    n_svd = utils.attribute(
+    n_svd = attribute(
         "n_svd",
         default=100,
         on_set=partial(utils.check_if_not, None, utils.check_positive, utils.check_int),
     )
-    n_jobs = utils.attribute(
+    n_jobs = attribute(
         "n_jobs", on_set=partial(utils.check_if_not, None, utils.check_int)
     )
-    verbose = utils.attribute("verbose", default=0)
-    thresh = utils.attribute(
+    verbose = attribute("verbose", default=0)
+    thresh = attribute(
         "thresh",
         default=1e-4,
         on_set=partial(utils.check_if_not, 0, utils.check_positive),
     )
 
-    n_landmark = utils.attribute("n_landmark")
+    n_landmark = attribute("n_landmark")
 
     @n_landmark.setter
     def n_landmark(self, n_landmark):
@@ -156,12 +174,18 @@ def n_landmark(self, n_landmark):
             None, utils.check_positive, utils.check_int, n_landmark=n_landmark
         )
         if self.graph is not None:
-            if n_landmark is None and isinstance(self.graph, graphs.LandmarkGraph):
-                self.graph = None
-            elif n_landmark is not None and not isinstance(
-                self.graph, graphs.LandmarkGraph
+            if (
+                n_landmark is None and isinstance(self.graph, graphs.LandmarkGraph)
+            ) or (
+                n_landmark is not None
+                and not isinstance(self.graph, graphs.LandmarkGraph)
             ):
+                # new graph but the same kernel
+                # there may be a better way to do this
+                kernel = self.graph.kernel
                 self.graph = None
+                self.fit(self.X, initialize=False)
+                self.graph._kernel = kernel
 
     def __init__(
         self,
@@ -194,6 +218,7 @@ def __init__(
         self.verbose = verbose
         self.thresh = thresh
         self.kwargs = kwargs
+        self.logger = _logger
         _logger.set_level(self.verbose)
 
     def set_params(self, **params):
@@ -209,7 +234,12 @@ def _set_graph_params(self, **params):
                 _logger.debug("Reset graph due to {}".format(str(e)))
                 self.graph = None
 
+    @abc.abstractmethod
     def _reset_graph(self):
+        """Trigger a reset of self.graph
+        
+        Any downstream effects of resetting the graph should override this function
+        """
         pass
 
     def _detect_precomputed_matrix_type(self, X):
@@ -271,12 +301,8 @@ def _parse_input(self, X):
 
         # checks on regular data
         update_graph = True
-        try:
-            if isinstance(X, anndata.AnnData):
-                X = X.X
-        except NameError:
-            # anndata not installed
-            pass
+        if utils.is_Anndata(X):
+            X = X.X
         if not callable(self.distance) and self.distance.startswith("precomputed"):
             if self.distance == "precomputed":
                 # automatic detection
@@ -330,7 +356,7 @@ def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs):
                 _logger.debug("Reset graph due to {}".format(str(e)))
                 self.graph = None
 
-    def fit(self, X):
+    def fit(self, X, **kwargs):
         """Computes the graph
 
         Parameters
@@ -342,6 +368,8 @@ def fit(self, X):
             `knn_dist` is 'precomputed', `data` should be a n_samples x
             n_samples distance or affinity matrix
 
+        kwargs : additional arguments for graphtools.Graph
+
         Returns
         -------
         self : graphtools.estimator.GraphEstimator
@@ -356,7 +384,7 @@ def fit(self, X):
             )
         else:
             _logger.info(
-                "Building graph on precomputed {} matrix with {} cells.".format(
+                "Building graph on precomputed {} matrix with {} samples.".format(
                     precomputed, X.shape[0]
                 )
             )
@@ -381,6 +409,7 @@ def fit(self, X):
                     n_jobs=self.n_jobs,
                     thresh=self.thresh,
                     verbose=self.verbose,
-                    **(self.kwargs)
+                    **(self.kwargs),
+                    **kwargs
                 )
         return self
diff --git a/graphtools/graphs.py b/graphtools/graphs.py
index c51851c..3d6fbe7 100644
--- a/graphtools/graphs.py
+++ b/graphtools/graphs.py
@@ -12,7 +12,7 @@
 import warnings
 import tasklogger
 
-from . import utils
+from . import matrix, utils
 from .base import DataGraph, PyGSPGraph
 
 _logger = tasklogger.get_tasklogger("graphtools")
@@ -983,7 +983,7 @@ def build_kernel(self):
                 isinstance(K, sparse.dok_matrix) or isinstance(K, sparse.lil_matrix)
             ):
                 K = K.tolil()
-            K = utils.set_diagonal(K, 1)
+            K = matrix.set_diagonal(K, 1)
         else:
             with _logger.task("affinities"):
                 if sparse.issparse(self.data_nu):
@@ -1110,7 +1110,7 @@ def build_kernel_to_data(self, Y, knn=None, bandwidth=None, bandwidth_scale=None
     @property
     def weighted(self):
         if self.precomputed is not None:
-            return not utils.nonzero_discrete(self.K, [0.5, 1])
+            return not matrix.nonzero_discrete(self.K, [0.5, 1])
         else:
             return super().weighted
 
@@ -1333,7 +1333,7 @@ def build_kernel(self):
             else:
                 K = np.zeros([self.data_nu.shape[0], self.data_nu.shape[0]])
             for i, X in enumerate(self.subgraphs):
-                K = utils.set_submatrix(
+                K = matrix.set_submatrix(
                     K,
                     self.sample_idx == self.samples[i],
                     self.sample_idx == self.samples[i],
@@ -1358,7 +1358,7 @@ def build_kernel(self):
                             Kij = Kij.multiply(scale[:, None])
                         else:
                             Kij = Kij * scale[:, None]
-                        K = utils.set_submatrix(
+                        K = matrix.set_submatrix(
                             K,
                             self.sample_idx == self.samples[i],
                             self.sample_idx == self.samples[j],
diff --git a/graphtools/utils.py b/graphtools/utils.py
index ad9c3b0..8482d2a 100644
--- a/graphtools/utils.py
+++ b/graphtools/utils.py
@@ -1,9 +1,7 @@
-import numpy as np
 import numbers
 import warnings
-
-from scipy import sparse
-from functools import partial
+from deprecated import deprecated
+from . import matrix
 
 try:
     import pandas as pd
@@ -11,94 +9,37 @@
     # pandas not installed
     pass
 
-
-def if_sparse(sparse_func, dense_func, *args, **kwargs):
-    if sparse.issparse(args[0]):
-        for arg in args[1:]:
-            assert sparse.issparse(arg)
-        return sparse_func(*args, **kwargs)
-    else:
-        return dense_func(*args, **kwargs)
-
-
-def sparse_minimum(X, Y):
-    return X.minimum(Y)
-
-
-def sparse_maximum(X, Y):
-    return X.maximum(Y)
-
-
-def elementwise_minimum(X, Y):
-    return if_sparse(sparse_minimum, np.minimum, X, Y)
-
-
-def elementwise_maximum(X, Y):
-    return if_sparse(sparse_maximum, np.maximum, X, Y)
-
-
-def dense_set_diagonal(X, diag):
-    X[np.diag_indices(X.shape[0])] = diag
-    return X
-
-
-def sparse_set_diagonal(X, diag):
-    cls = type(X)
-    if not isinstance(X, (sparse.lil_matrix, sparse.dia_matrix)):
-        X = X.tocoo()
-    X.setdiag(diag)
-    return cls(X)
-
-
-def set_diagonal(X, diag):
-    return if_sparse(sparse_set_diagonal, dense_set_diagonal, X, diag=diag)
-
-
-def set_submatrix(X, i, j, values):
-    X[np.ix_(i, j)] = values
-    return X
-
-
-def sparse_nonzero_discrete(X, values):
-    if isinstance(
-        X, (sparse.bsr_matrix, sparse.dia_matrix, sparse.dok_matrix, sparse.lil_matrix)
-    ):
-        X = X.tocsr()
-    return dense_nonzero_discrete(X.data, values)
-
-
-def dense_nonzero_discrete(X, values):
-    result = np.full_like(X, False, dtype=bool)
-    for value in values:
-        result = np.logical_or(result, X == value)
-    return np.all(result)
-
-
-def nonzero_discrete(X, values):
-    if isinstance(values, numbers.Number):
-        values = [values]
-    if 0 not in values:
-        values.append(0)
-    return if_sparse(sparse_nonzero_discrete, dense_nonzero_discrete, X, values=values)
+try:
+    import anndata
+except ImportError:
+    # anndata not installed
+    pass
 
 
-def to_array(X):
-    if sparse.issparse(X):
-        X = X.toarray()
-    elif isinstance(X, np.matrix):
-        X = X.A
-    return X
+def is_SparseDataFrame(X):
+    try:
+        pd
+    except NameError:
+        # pandas not installed
+        return False
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore",
+            "The SparseDataFrame class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version",
+            FutureWarning,
+        )
+        try:
+            return isinstance(X, pd.SparseDataFrame)
+        except AttributeError:
+            return False
 
 
-def matrix_is_equivalent(X, Y):
-    """
-    Checks matrix equivalence with numpy, scipy and pandas
-    """
-    return X is Y or (
-        isinstance(X, Y.__class__)
-        and X.shape == Y.shape
-        and np.sum((X != Y).sum()) == 0
-    )
+def is_Anndata(X):
+    try:
+        return isinstance(X, anndata.AnnData)
+    except NameError:
+        # anndata not installed
+        return False
 
 
 def check_positive(**params):
@@ -198,41 +139,77 @@ def check_between(v_min, v_max, **params):
             )
 
 
-def attribute(attr, default=None, doc=None, on_set=None):
-    def getter(self, attr):
-        try:
-            return getattr(self, "_" + attr)
-        except AttributeError:
-            return default
+@deprecated(version="1.5.0", reason="Use graphtools.matrix.if_sparse instead")
+def if_sparse(*args, **kwargs):
+    return matrix.if_sparse(*args, **kwargs)
 
-    def setter(self, value, attr, on_set=None):
-        if on_set is not None:
-            if callable(on_set):
-                on_set = [on_set]
-            for fn in on_set:
-                fn(**{attr: value})
-        setattr(self, "_" + attr, value)
 
-    return property(
-        fget=partial(getter, attr=attr),
-        fset=partial(setter, attr=attr, on_set=on_set),
-        doc=doc,
-    )
+@deprecated(version="1.5.0", reason="Use graphtools.matrix.sparse_minimum instead")
+def sparse_minimum(*args, **kwargs):
+    return matrix.sparse_minimum(*args, **kwargs)
 
 
-def is_SparseDataFrame(X):
-    try:
-        pd
-    except NameError:
-        # pandas not installed
-        return False
-    with warnings.catch_warnings():
-        warnings.filterwarnings(
-            "ignore",
-            "The SparseDataFrame class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version",
-            FutureWarning,
-        )
-        try:
-            return isinstance(X, pd.SparseDataFrame)
-        except AttributeError:
-            return False
+@deprecated(version="1.5.0", reason="Use graphtools.matrix.sparse_maximum instead")
+def sparse_maximum(*args, **kwargs):
+    return matrix.sparse_maximum(*args, **kwargs)
+
+
+@deprecated(version="1.5.0", reason="Use graphtools.matrix.elementwise_minimum instead")
+def elementwise_minimum(*args, **kwargs):
+    return matrix.elementwise_minimum(*args, **kwargs)
+
+
+@deprecated(version="1.5.0", reason="Use graphtools.matrix.elementwise_maximum instead")
+def elementwise_maximum(*args, **kwargs):
+    return matrix.elementwise_maximum(*args, **kwargs)
+
+
+@deprecated(version="1.5.0", reason="Use graphtools.matrix.dense_set_diagonal instead")
+def dense_set_diagonal(*args, **kwargs):
+    return matrix.dense_set_diagonal(*args, **kwargs)
+
+
+@deprecated(version="1.5.0", reason="Use graphtools.matrix.sparse_set_diagonal instead")
+def sparse_set_diagonal(*args, **kwargs):
+    return matrix.sparse_set_diagonal(*args, **kwargs)
+
+
+@deprecated(version="1.5.0", reason="Use graphtools.matrix.set_diagonal instead")
+def set_diagonal(*args, **kwargs):
+    return matrix.set_diagonal(*args, **kwargs)
+
+
+@deprecated(version="1.5.0", reason="Use graphtools.matrix.set_submatrix instead")
+def set_submatrix(*args, **kwargs):
+    return matrix.set_submatrix(*args, **kwargs)
+
+
+@deprecated(
+    version="1.5.0", reason="Use graphtools.matrix.sparse_nonzero_discrete instead"
+)
+def sparse_nonzero_discrete(*args, **kwargs):
+    return matrix.sparse_nonzero_discrete(*args, **kwargs)
+
+
+@deprecated(
+    version="1.5.0", reason="Use graphtools.matrix.dense_nonzero_discrete instead"
+)
+def dense_nonzero_discrete(*args, **kwargs):
+    return matrix.dense_nonzero_discrete(*args, **kwargs)
+
+
+@deprecated(version="1.5.0", reason="Use graphtools.matrix.nonzero_discrete instead")
+def nonzero_discrete(*args, **kwargs):
+    return matrix.nonzero_discrete(*args, **kwargs)
+
+
+@deprecated(version="1.5.0", reason="Use graphtools.matrix.to_array instead")
+def to_array(*args, **kwargs):
+    return matrix.to_array(*args, **kwargs)
+
+
+@deprecated(
+    version="1.5.0", reason="Use graphtools.matrix.matrix_is_equivalent instead"
+)
+def matrix_is_equivalent(*args, **kwargs):
+    return matrix.matrix_is_equivalent(*args, **kwargs)
diff --git a/requirements.txt b/requirements.txt
index 08fcd1b..c31163d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ pygsp>=>=0.5.1
 scikit-learn>=0.20.0
 future
 tasklogger>=1.0
+Deprecated
diff --git a/setup.py b/setup.py
index 4d7be50..4384517 100644
--- a/setup.py
+++ b/setup.py
@@ -9,6 +9,7 @@
     "scikit-learn>=0.20.0",
     "future",
     "tasklogger>=1.0",
+    "Deprecated",
 ]
 
 test_requires = [
diff --git a/test/test_estimator.py b/test/test_estimator.py
new file mode 100644
index 0000000..4f6fc15
--- /dev/null
+++ b/test/test_estimator.py
@@ -0,0 +1,35 @@
+import graphtools
+import graphtools.estimator
+import numpy as np
+from load_tests import data
+
+
+class Estimator(graphtools.estimator.GraphEstimator):
+    def _reset_graph(self):
+        self.reset = True
+
+
+def test_estimator():
+    E = Estimator(verbose=True)
+    assert E.verbose == 1
+    E = Estimator(verbose=False, n_landmark=None)
+    assert E.verbose == 0
+    E.fit(data)
+    assert np.all(E.X == data)
+    assert isinstance(E.graph, graphtools.graphs.kNNGraph)
+    assert not isinstance(E.graph, graphtools.graphs.LandmarkGraph)
+    assert not hasattr(E, "reset")
+    # convert non landmark to landmark
+    E.set_params(n_landmark=data.shape[0] // 2)
+    assert E.reset
+    assert isinstance(E.graph, graphtools.graphs.LandmarkGraph)
+    del E.reset
+    # convert landmark to non landmark
+    E.set_params(n_landmark=None)
+    assert E.reset
+    assert not isinstance(E.graph, graphtools.graphs.LandmarkGraph)
+    del E.reset
+    # change parameters that force reset
+    E.set_params(knn=E.knn * 2)
+    assert E.reset
+    assert E.graph is None
diff --git a/test/test_matrix.py b/test/test_matrix.py
new file mode 100644
index 0000000..aac45a7
--- /dev/null
+++ b/test/test_matrix.py
@@ -0,0 +1,166 @@
+import graphtools.matrix
+import graphtools.utils
+from parameterized import parameterized
+from scipy import sparse
+import numpy as np
+import graphtools
+from load_tests import data
+from load_tests import assert_warns_message
+
+
+@parameterized(
+    [
+        (np.array,),
+        (sparse.csr_matrix,),
+        (sparse.csc_matrix,),
+        (sparse.bsr_matrix,),
+        (sparse.lil_matrix,),
+        (sparse.coo_matrix,),
+    ]
+)
+def test_nonzero_discrete(matrix_class):
+    X = np.random.choice([0, 1, 2], p=[0.95, 0.025, 0.025], size=(100, 100))
+    X = matrix_class(X)
+    assert graphtools.matrix.nonzero_discrete(X, [1, 2])
+    assert not graphtools.matrix.nonzero_discrete(X, [1, 3])
+
+
+@parameterized([(0,), (1e-4,)])
+def test_nonzero_discrete_knngraph(thresh):
+    G = graphtools.Graph(data, n_pca=10, knn=5, decay=None, thresh=thresh)
+    assert graphtools.matrix.nonzero_discrete(G.K, [0.5, 1])
+
+
+@parameterized([(0,), (1e-4,)])
+def test_nonzero_discrete_decay_graph(thresh):
+    G = graphtools.Graph(data, n_pca=10, knn=5, decay=15, thresh=thresh)
+    assert not graphtools.matrix.nonzero_discrete(G.K, [0.5, 1])
+
+
+def test_nonzero_discrete_constant():
+    assert graphtools.matrix.nonzero_discrete(2, [1, 2])
+    assert not graphtools.matrix.nonzero_discrete(2, [1, 3])
+
+
+def test_if_sparse_deprecated():
+    with assert_warns_message(
+        DeprecationWarning,
+        "Call to deprecated function (or staticmethod) if_sparse. (Use graphtools.matrix.if_sparse instead) -- Deprecated since version 1.5.0.",
+    ):
+        graphtools.utils.if_sparse(lambda x: x, lambda x: x, np.zeros((4, 4)))
+
+
+def test_sparse_minimum_deprecated():
+    with assert_warns_message(
+        DeprecationWarning,
+        "Call to deprecated function (or staticmethod) sparse_minimum. (Use graphtools.matrix.sparse_minimum instead) -- Deprecated since version 1.5.0.",
+    ):
+        graphtools.utils.sparse_minimum(
+            sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4))
+        )
+
+
+def test_sparse_maximum_deprecated():
+    with assert_warns_message(
+        DeprecationWarning,
+        "Call to deprecated function (or staticmethod) sparse_maximum. (Use graphtools.matrix.sparse_maximum instead) -- Deprecated since version 1.5.0.",
+    ):
+        graphtools.utils.sparse_maximum(
+            sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4))
+        )
+
+
+def test_elementwise_minimum_deprecated():
+    with assert_warns_message(
+        DeprecationWarning,
+        "Call to deprecated function (or staticmethod) elementwise_minimum. (Use graphtools.matrix.elementwise_minimum instead) -- Deprecated since version 1.5.0.",
+    ):
+        graphtools.utils.elementwise_minimum(
+            sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4))
+        )
+
+
+def test_elementwise_maximum_deprecated():
+    with assert_warns_message(
+        DeprecationWarning,
+        "Call to deprecated function (or staticmethod) elementwise_maximum. (Use graphtools.matrix.elementwise_maximum instead) -- Deprecated since version 1.5.0.",
+    ):
+        graphtools.utils.elementwise_maximum(
+            sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4))
+        )
+
+
+def test_dense_set_diagonal_deprecated():
+    with assert_warns_message(
+        DeprecationWarning,
+        "Call to deprecated function (or staticmethod) dense_set_diagonal. (Use graphtools.matrix.dense_set_diagonal instead) -- Deprecated since version 1.5.0.",
+    ):
+        graphtools.utils.dense_set_diagonal(np.zeros((4, 4)), 1)
+
+
+def test_sparse_set_diagonal_deprecated():
+    with assert_warns_message(
+        DeprecationWarning,
+        "Call to deprecated function (or staticmethod) sparse_set_diagonal. (Use graphtools.matrix.sparse_set_diagonal instead) -- Deprecated since version 1.5.0.",
+    ):
+        graphtools.utils.sparse_set_diagonal(sparse.csr_matrix((4, 4)), 1)
+
+
+def test_set_diagonal_deprecated():
+    with assert_warns_message(
+        DeprecationWarning,
+        "Call to deprecated function (or staticmethod) set_diagonal. (Use graphtools.matrix.set_diagonal instead) -- Deprecated since version 1.5.0.",
+    ):
+        graphtools.utils.set_diagonal(np.zeros((4, 4)), 1)
+
+
+def test_set_submatrix_deprecated():
+    with assert_warns_message(
+        DeprecationWarning,
+        "Call to deprecated function (or staticmethod) set_submatrix. (Use graphtools.matrix.set_submatrix instead) -- Deprecated since version 1.5.0.",
+    ):
+        graphtools.utils.set_submatrix(
+            sparse.lil_matrix((4, 4)), [1, 2], [0, 1], np.array([[1, 2], [3, 4]])
+        )
+
+
+def test_sparse_nonzero_discrete_deprecated():
+    with assert_warns_message(
+        DeprecationWarning,
+        "Call to deprecated function (or staticmethod) sparse_nonzero_discrete. (Use graphtools.matrix.sparse_nonzero_discrete instead) -- Deprecated since version 1.5.0.",
+    ):
+        graphtools.utils.sparse_nonzero_discrete(sparse.csr_matrix((4, 4)), [1])
+
+
+def test_dense_nonzero_discrete_deprecated():
+    with assert_warns_message(
+        DeprecationWarning,
+        "Call to deprecated function (or staticmethod) dense_nonzero_discrete. (Use graphtools.matrix.dense_nonzero_discrete instead) -- Deprecated since version 1.5.0.",
+    ):
+        graphtools.utils.dense_nonzero_discrete(np.zeros((4, 4)), [1])
+
+
+def test_nonzero_discrete_deprecated():
+    with assert_warns_message(
+        DeprecationWarning,
+        "Call to deprecated function (or staticmethod) nonzero_discrete. (Use graphtools.matrix.nonzero_discrete instead) -- Deprecated since version 1.5.0.",
+    ):
+        graphtools.utils.nonzero_discrete(np.zeros((4, 4)), [1])
+
+
+def test_to_array_deprecated():
+    with assert_warns_message(
+        DeprecationWarning,
+        "Call to deprecated function (or staticmethod) to_array. (Use graphtools.matrix.to_array instead) -- Deprecated since version 1.5.0.",
+    ):
+        graphtools.utils.to_array([1])
+
+
+def test_matrix_is_equivalent_deprecated():
+    with assert_warns_message(
+        DeprecationWarning,
+        "Call to deprecated function (or staticmethod) matrix_is_equivalent. (Use graphtools.matrix.matrix_is_equivalent instead) -- Deprecated since version 1.5.0.",
+    ):
+        graphtools.utils.matrix_is_equivalent(
+            sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4))
+        )
diff --git a/test/test_utils.py b/test/test_utils.py
deleted file mode 100644
index 0d72174..0000000
--- a/test/test_utils.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import graphtools.utils
-from parameterized import parameterized
-from scipy import sparse
-import numpy as np
-import graphtools
-from load_tests import data
-
-
-@parameterized(
-    [
-        (np.array,),
-        (sparse.csr_matrix,),
-        (sparse.csc_matrix,),
-        (sparse.bsr_matrix,),
-        (sparse.lil_matrix,),
-        (sparse.coo_matrix,),
-    ]
-)
-def test_nonzero_discrete(matrix_class):
-    X = np.random.choice([0, 1, 2], p=[0.95, 0.025, 0.025], size=(100, 100))
-    X = matrix_class(X)
-    assert graphtools.utils.nonzero_discrete(X, [1, 2])
-    assert not graphtools.utils.nonzero_discrete(X, [1, 3])
-
-
-@parameterized([(0,), (1e-4,)])
-def test_nonzero_discrete_knngraph(thresh):
-    G = graphtools.Graph(data, n_pca=10, knn=5, decay=None, thresh=thresh)
-    assert graphtools.utils.nonzero_discrete(G.K, [0.5, 1])
-
-
-@parameterized([(0,), (1e-4,)])
-def test_nonzero_discrete_decay_graph(thresh):
-    G = graphtools.Graph(data, n_pca=10, knn=5, decay=15, thresh=thresh)
-    assert not graphtools.utils.nonzero_discrete(G.K, [0.5, 1])
-
-
-def test_nonzero_discrete_constant():
-    assert graphtools.utils.nonzero_discrete(2, [1, 2])
-    assert not graphtools.utils.nonzero_discrete(2, [1, 3])

From 37fc848d7d6d529b2f56799891a8aab73730f95d Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 24 Feb 2020 15:19:51 -0500
Subject: [PATCH 04/15] bump version

---
 graphtools/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/graphtools/version.py b/graphtools/version.py
index 5a6e033..61a6a00 100644
--- a/graphtools/version.py
+++ b/graphtools/version.py
@@ -1 +1 @@
-__version__ = "1.4.3a0"
+__version__ = "1.5.0a0"

From afe46f11fd7bfb9222ac50ad5216e6a98cd59c05 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 24 Feb 2020 16:02:35 -0500
Subject: [PATCH 05/15] fix deprecated function call

---
 graphtools/estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/graphtools/estimator.py b/graphtools/estimator.py
index 8d708ae..8b1b24d 100644
--- a/graphtools/estimator.py
+++ b/graphtools/estimator.py
@@ -328,7 +328,7 @@ def _parse_input(self, X):
         return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph
 
     def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs):
-        if self.X is not None and not utils.matrix_is_equivalent(X, self.X):
+        if self.X is not None and not matrix.matrix_is_equivalent(X, self.X):
             """
             If the same data is used, we can reuse existing kernel and
             diffusion matrices. Otherwise we have to recompute.

From b65897dbda85afc369721f2ea75ecd79da887654 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 24 Feb 2020 16:05:57 -0500
Subject: [PATCH 06/15] add missing matrix submodule

---
 graphtools/matrix.py | 93 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 93 insertions(+)
 create mode 100644 graphtools/matrix.py

diff --git a/graphtools/matrix.py b/graphtools/matrix.py
new file mode 100644
index 0000000..8c818f2
--- /dev/null
+++ b/graphtools/matrix.py
@@ -0,0 +1,93 @@
+import numpy as np
+import numbers
+
+from scipy import sparse
+
+
+def if_sparse(sparse_func, dense_func, *args, **kwargs):
+    if sparse.issparse(args[0]):
+        for arg in args[1:]:
+            assert sparse.issparse(arg)
+        return sparse_func(*args, **kwargs)
+    else:
+        return dense_func(*args, **kwargs)
+
+
+def sparse_minimum(X, Y):
+    return X.minimum(Y)
+
+
+def sparse_maximum(X, Y):
+    return X.maximum(Y)
+
+
+def elementwise_minimum(X, Y):
+    return if_sparse(sparse_minimum, np.minimum, X, Y)
+
+
+def elementwise_maximum(X, Y):
+    return if_sparse(sparse_maximum, np.maximum, X, Y)
+
+
+def dense_set_diagonal(X, diag):
+    X[np.diag_indices(X.shape[0])] = diag
+    return X
+
+
+def sparse_set_diagonal(X, diag):
+    cls = type(X)
+    if not isinstance(X, (sparse.lil_matrix, sparse.dia_matrix)):
+        X = X.tocoo()
+    X.setdiag(diag)
+    return cls(X)
+
+
+def set_diagonal(X, diag):
+    return if_sparse(sparse_set_diagonal, dense_set_diagonal, X, diag=diag)
+
+
+def set_submatrix(X, i, j, values):
+    X[np.ix_(i, j)] = values
+    return X
+
+
+def sparse_nonzero_discrete(X, values):
+    if isinstance(
+        X, (sparse.bsr_matrix, sparse.dia_matrix, sparse.dok_matrix, sparse.lil_matrix)
+    ):
+        X = X.tocsr()
+    return dense_nonzero_discrete(X.data, values)
+
+
+def dense_nonzero_discrete(X, values):
+    result = np.full_like(X, False, dtype=bool)
+    for value in values:
+        result = np.logical_or(result, X == value)
+    return np.all(result)
+
+
+def nonzero_discrete(X, values):
+    if isinstance(values, numbers.Number):
+        values = [values]
+    if 0 not in values:
+        values.append(0)
+    return if_sparse(sparse_nonzero_discrete, dense_nonzero_discrete, X, values=values)
+
+
+def to_array(X):
+    if sparse.issparse(X):
+        X = X.toarray()
+    elif isinstance(X, np.matrix):
+        X = X.A
+    return X
+
+
+def matrix_is_equivalent(X, Y):
+    """
+    Checks matrix equivalence with numpy, scipy and pandas
+    """
+    return X is Y or (
+        isinstance(X, Y.__class__)
+        and X.shape == Y.shape
+        and np.sum((X != Y).sum()) == 0
+    )

From e9ea8bf946a6c6179440541ff16096369d87bfa0 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 24 Feb 2020 16:08:42 -0500
Subject: [PATCH 07/15] add missing matrix import

---
 graphtools/estimator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/graphtools/estimator.py b/graphtools/estimator.py
index 8b1b24d..9196a08 100644
--- a/graphtools/estimator.py
+++ b/graphtools/estimator.py
@@ -6,7 +6,7 @@
 from functools import partial
 from scipy import sparse
 
-from . import api, graphs, base, utils
+from . import api, graphs, base, utils, matrix
 
 
 def attribute(attr, default=None, doc=None, on_set=None):

From 1511193a401ed1f6a64a348f814be996862f3d8c Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 24 Feb 2020 17:22:52 -0500
Subject: [PATCH 08/15] test utils

---
 graphtools/utils.py | 31 ++++++++++++++++++++++++-------
 test/test_utils.py  | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+), 7 deletions(-)
 create mode 100644 test/test_utils.py

diff --git a/graphtools/utils.py b/graphtools/utils.py
index 8482d2a..1ef9c3e 100644
--- a/graphtools/utils.py
+++ b/graphtools/utils.py
@@ -5,13 +5,13 @@
 
 try:
     import pandas as pd
-except ImportError:
+except ImportError:  # pragma: no cover
     # pandas not installed
     pass
 
 try:
     import anndata
-except ImportError:
+except ImportError:  # pragma: no cover
     # anndata not installed
     pass
 
@@ -19,7 +19,7 @@
 def is_SparseDataFrame(X):
     try:
         pd
-    except NameError:
+    except NameError:  # pragma: no cover
         # pandas not installed
         return False
     with warnings.catch_warnings():
@@ -37,11 +37,29 @@ def is_SparseDataFrame(X):
 def is_Anndata(X):
     try:
         return isinstance(X, anndata.AnnData)
-    except NameError:
+    except NameError:  # pragma: no cover
         # anndata not installed
         return False
 
 
+def check_greater(x, **params):
+    """Check that parameters are greater than x as expected
+
+    Parameters
+    ----------
+
+    x : excepted boundary
+        Checks not run if parameters are greater than x
+
+    Raises
+    ------
+    ValueError : unacceptable choice of parameters
+    """
+    for p in params:
+        if not isinstance(params[p], numbers.Number) or params[p] <= x:
+            raise ValueError("Expected {} > {}, got {}".format(p, x, params[p]))
+
+
 def check_positive(**params):
     """Check that parameters are positive as expected
 
@@ -49,9 +67,7 @@ def check_positive(**params):
     ------
     ValueError : unacceptable choice of parameters
     """
-    for p in params:
-        if not isinstance(params[p], numbers.Number) or params[p] <= 0:
-            raise ValueError("Expected {} > 0, got {}".format(p, params[p]))
+    return check_greater(0, **params)
 
 
 def check_int(**params):
@@ -131,6 +147,7 @@ def check_between(v_min, v_max, **params):
     ------
     ValueError : unacceptable choice of parameters
     """
+    check_greater(v_min, v_max=v_max)
     for p in params:
         if params[p] < v_min or params[p] > v_max:
             raise ValueError(
diff --git a/test/test_utils.py b/test/test_utils.py
new file mode 100644
index 0000000..1aadd82
--- /dev/null
+++ b/test/test_utils.py
@@ -0,0 +1,39 @@
+import graphtools
+from load_tests import assert_raises_message
+
+
+def test_check_in():
+    graphtools.utils.check_in(["hello", "world"], foo="hello")
+    with assert_raises_message(
+        ValueError, "foo value bar not recognized. Choose from ['hello', 'world']"
+    ):
+        graphtools.utils.check_in(["hello", "world"], foo="bar")
+
+
+def test_check_int():
+    graphtools.utils.check_int(foo=5)
+    graphtools.utils.check_int(foo=-5)
+    with assert_raises_message(ValueError, "Expected foo integer, got 5.3"):
+        graphtools.utils.check_int(foo=5.3)
+
+
+def test_check_positive():
+    graphtools.utils.check_positive(foo=5)
+    with assert_raises_message(ValueError, "Expected foo > 0, got -5"):
+        graphtools.utils.check_positive(foo=-5)
+    with assert_raises_message(ValueError, "Expected foo > 0, got 0"):
+        graphtools.utils.check_positive(foo=0)
+
+
+def test_check_if_not():
+    graphtools.utils.check_if_not(-5, graphtools.utils.check_positive, foo=-5)
+    with assert_raises_message(ValueError, "Expected foo > 0, got -5"):
+        graphtools.utils.check_if_not(-4, graphtools.utils.check_positive, foo=-5)
+
+
+def test_check_between():
+    graphtools.utils.check_between(-5, -3, foo=-4)
+    with assert_raises_message(ValueError, "Expected foo between -5 and -3, got -6"):
+        graphtools.utils.check_between(-5, -3, foo=-6)
+    with assert_raises_message(ValueError, "Expected v_max > -3, got -5"):
+        graphtools.utils.check_between(-3, -5, foo=-6)

From 418cf7286b5033f135f850a59e3e025a6cfdf8ff Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 24 Feb 2020 17:26:43 -0500
Subject: [PATCH 09/15] migrate pandas to utils

---
 graphtools/base.py  | 45 ++++++++++++---------------------------------
 graphtools/utils.py |  8 ++++++++
 2 files changed, 20 insertions(+), 33 deletions(-)

diff --git a/graphtools/base.py b/graphtools/base.py
index 838bf90..a8671d2 100644
--- a/graphtools/base.py
+++ b/graphtools/base.py
@@ -15,18 +15,6 @@
 import sys
 import tasklogger
 
-try:
-    import pandas as pd
-except ImportError:
-    # pandas not installed
-    pass
-
-try:
-    import anndata
-except (ImportError, SyntaxError):
-    # anndata not installed
-    pass
-
 from . import matrix, utils
 
 _logger = tasklogger.get_tasklogger("graphtools")
@@ -126,28 +114,19 @@ def __init__(
 
         self._check_data(data)
         n_pca, rank_threshold = self._parse_n_pca_threshold(data, n_pca, rank_threshold)
-        try:
-            pd
-        except NameError:
-            # pandas not installed
-            pass
-        else:
-            if utils.is_SparseDataFrame(data):
-                data = data.to_coo()
-            elif isinstance(data, pd.DataFrame):
-                try:
-                    data = data.sparse.to_coo()
-                except AttributeError:
-                    data = np.array(data)
 
-        try:
-            anndata
-        except NameError:
-            # anndata not installed
-            pass
-        else:
-            if isinstance(data, anndata.AnnData):
-                data = data.X
+        if utils.is_SparseDataFrame(data):
+            data = data.to_coo()
+        elif utils.is_DataFrame(data):
+            try:
+                # sparse data
+                data = data.sparse.to_coo()
+            except AttributeError:
+                # dense data
+                data = np.array(data)
+        elif utils.is_Anndata(data):
+            data = data.X
+
         self.data = data
         self.n_pca = n_pca
         self.rank_threshold = rank_threshold
diff --git a/graphtools/utils.py b/graphtools/utils.py
index 1ef9c3e..55e2bd3 100644
--- a/graphtools/utils.py
+++ b/graphtools/utils.py
@@ -16,6 +16,14 @@
     pass
 
 
+def is_DataFrame(X):
+    try:
+        return isinstance(X, pd.DataFrame)
+    except NameError:  # pragma: no cover
+        # pandas not installed
+        return False
+
+
 def is_SparseDataFrame(X):
     try:
         pd

From 02664d27a979193cfa0243fdd6bcb16b414ee295 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 24 Feb 2020 17:28:52 -0500
Subject: [PATCH 10/15] clean up untestables

---
 graphtools/base.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/graphtools/base.py b/graphtools/base.py
index a8671d2..4789b8a 100644
--- a/graphtools/base.py
+++ b/graphtools/base.py
@@ -570,11 +570,7 @@ def symmetrize_kernel(self, K):
             _logger.debug("Using no symmetrization.")
             pass
         else:
-            # this should never happen
-            raise ValueError(
-                "Expected kernel_symm in ['+', '*', 'mnn' or None]. "
-                "Got {}".format(self.theta)
-            )
+            raise NotImplementedError
         return K
 
     def apply_anisotropy(self, K):
@@ -795,7 +791,7 @@ def to_igraph(self, attribute="weight", **kwargs):
         """
         try:
             import igraph as ig
-        except ImportError:
+        except ImportError:  # pragma: no cover
             raise ImportError(
                 "Please install igraph with " "`pip install --user python-igraph`."
             )

From f802646b83493de2333edb2fd4774d27388729ad Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 24 Feb 2020 17:44:04 -0500
Subject: [PATCH 11/15] clean up logic

---
 graphtools/estimator.py | 152 ++++++++++++++++++----------------------
 1 file changed, 70 insertions(+), 82 deletions(-)

diff --git a/graphtools/estimator.py b/graphtools/estimator.py
index 9196a08..f68db58 100644
--- a/graphtools/estimator.py
+++ b/graphtools/estimator.py
@@ -223,16 +223,26 @@ def __init__(
 
     def set_params(self, **params):
         for p in params:
-            setattr(self, p, params[p])
+            if not getattr(self, p) == params[p]:
+                setattr(self, p, params[p])
         self._set_graph_params(**params)
 
     def _set_graph_params(self, **params):
         if self.graph is not None:
+            if "n_landmark" in params:
+                n_landmark = params["n_landmark"]
+                del params["n_landmark"]
             try:
                 self.graph.set_params(**params)
             except ValueError as e:
                 _logger.debug("Reset graph due to {}".format(str(e)))
                 self.graph = None
+            else:
+                try:
+                    # special way to reset the graph here
+                    self.n_landmark = n_landmark
+                except NameError:
+                    pass
 
     @abc.abstractmethod
     def _reset_graph(self):
@@ -240,7 +250,7 @@ def _reset_graph(self):
         
         Any downstream effects of resetting the graph should override this function
         """
-        pass
+        raise NotImplementedError
 
     def _detect_precomputed_matrix_type(self, X):
         if isinstance(X, sparse.coo_matrix):
@@ -259,72 +269,54 @@ def _parse_n_landmark(self, X):
     def _parse_input(self, X):
         # passing graphs as input
         if isinstance(X, base.BaseGraph):
-            if isinstance(X, graphs.LandmarkGraph) or (
-                isinstance(X, base.BaseGraph) and self.n_landmark is None
-            ):
-                # we can keep this graph
-                self.graph = X
-                X = X.data
-                n_pca = self.graph.n_pca
-                update_graph = False
-                if isinstance(self.graph, graphs.TraditionalGraph):
-                    precomputed = self.graph.precomputed
-                else:
-                    precomputed = None
-                return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph
+            # we can keep this graph
+            self.graph = X
+            X = X.data
+            # immutable graph properties override operator
+            n_pca = self.graph.n_pca
+            self.knn = X.knn
+            self.decay = X.decay
+            self.distance = X.distance
+            self.thresh = X.thresh
+            update_graph = False
+            if isinstance(self.graph, graphs.TraditionalGraph):
+                precomputed = self.graph.precomputed
             else:
-                # n_landmark is set, but this is not a landmark graph
-                self.graph = None
-                X = X.kernel
-                precomputed = "affinity"
-                n_pca = None
-                update_graph = False
-                return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph
-        else:
-            try:
-                if isinstance(X, pygsp.graphs.Graph):
-                    self.graph = None
-                    X = X.W
-                    precomputed = "adjacency"
-                    update_graph = False
-                    n_pca = None
-                    return (
-                        X,
-                        n_pca,
-                        self._parse_n_landmark(X),
-                        precomputed,
-                        update_graph,
-                    )
-            except NameError:
-                # pygsp not installed
-                pass
-
-        # checks on regular data
-        update_graph = True
-        if utils.is_Anndata(X):
-            X = X.X
-        if not callable(self.distance) and self.distance.startswith("precomputed"):
-            if self.distance == "precomputed":
-                # automatic detection
-                precomputed = self._detect_precomputed_matrix_type(X)
-            elif self.distance in ["precomputed_affinity", "precomputed_distance"]:
-                precomputed = self.distance.split("_")[1]
-            else:
-                raise ValueError(
-                    "distance {} not recognized. Did you mean "
-                    "'precomputed_distance', "
-                    "'precomputed_affinity', or 'precomputed' "
-                    "(automatically detects distance or affinity)?".format(
-                        self.distance
-                    )
-                )
+                precomputed = None
+        elif isinstance(X, pygsp.graphs.Graph):
+            # convert pygsp to graphtools
+            self.graph = None
+            X = X.W
+            precomputed = "adjacency"
+            update_graph = False
             n_pca = None
         else:
-            precomputed = None
-            if self.n_pca is None or self.n_pca >= np.min(X.shape):
+            # data matrix
+            update_graph = True
+            if utils.is_Anndata(X):
+                X = X.X
+            if not callable(self.distance) and self.distance.startswith("precomputed"):
+                if self.distance == "precomputed":
+                    # automatic detection
+                    precomputed = self._detect_precomputed_matrix_type(X)
+                elif self.distance in ["precomputed_affinity", "precomputed_distance"]:
+                    precomputed = self.distance.split("_")[1]
+                else:
+                    raise ValueError(
+                        "distance {} not recognized. Did you mean "
+                        "'precomputed_distance', "
+                        "'precomputed_affinity', or 'precomputed' "
+                        "(automatically detects distance or affinity)?".format(
+                            self.distance
+                        )
+                    )
                 n_pca = None
             else:
-                n_pca = self.n_pca
+                precomputed = None
+                if self.n_pca is None or self.n_pca >= np.min(X.shape):
+                    n_pca = None
+                else:
+                    n_pca = self.n_pca
         return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph
 
     def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs):
@@ -335,26 +327,22 @@ def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs):
             """
             self.graph = None
         else:
-            try:
-                self.graph.set_params(
-                    n_pca=n_pca,
-                    precomputed=precomputed,
-                    n_landmark=n_landmark,
-                    random_state=self.random_state,
-                    knn=self.knn,
-                    decay=self.decay,
-                    distance=self.distance,
-                    n_svd=self.n_svd,
-                    n_jobs=self.n_jobs,
-                    thresh=self.thresh,
-                    verbose=self.verbose,
-                    **(self.kwargs)
-                )
+            self._set_graph_params(
+                n_pca=n_pca,
+                precomputed=precomputed,
+                n_landmark=n_landmark,
+                random_state=self.random_state,
+                knn=self.knn,
+                decay=self.decay,
+                distance=self.distance,
+                n_svd=self.n_svd,
+                n_jobs=self.n_jobs,
+                thresh=self.thresh,
+                verbose=self.verbose,
+                **(self.kwargs)
+            )
+            if self.graph is not None:
                 _logger.info("Using precomputed graph and diffusion operator...")
-            except ValueError as e:
-                # something changed that should have invalidated the graph
-                _logger.debug("Reset graph due to {}".format(str(e)))
-                self.graph = None
 
     def fit(self, X, **kwargs):
         """Computes the graph

From 57f366fc09b5ed8bc82c70e203c9a16cca2b536e Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 24 Feb 2020 18:02:07 -0500
Subject: [PATCH 12/15] test precomputed and graph input

---
 graphtools/estimator.py | 12 ++++++------
 test/test_estimator.py  | 40 +++++++++++++++++++++++++++++++++++++++-
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/graphtools/estimator.py b/graphtools/estimator.py
index f68db58..66bd0c9 100644
--- a/graphtools/estimator.py
+++ b/graphtools/estimator.py
@@ -47,7 +47,7 @@ class GraphEstimator(object, metaclass=abc.ABCMeta):
         sets decay rate of kernel tails.
         If None, alpha decaying kernel is not used
 
-    n_landmark : int, optional, default: 2000
+    n_landmark : int, optional, default: None
         number of landmarks to use in graph construction
 
     n_pca : int, optional, default: 100
@@ -253,7 +253,7 @@ def _reset_graph(self):
         raise NotImplementedError
 
     def _detect_precomputed_matrix_type(self, X):
-        if isinstance(X, sparse.coo_matrix):
+        if isinstance(X, (sparse.coo_matrix, sparse.dia_matrix)):
             X = X.tocsr()
         if X[0, 0] == 0:
             return "distance"
@@ -274,10 +274,10 @@ def _parse_input(self, X):
             X = X.data
             # immutable graph properties override operator
             n_pca = self.graph.n_pca
-            self.knn = X.knn
-            self.decay = X.decay
-            self.distance = X.distance
-            self.thresh = X.thresh
+            self.knn = self.graph.knn
+            self.decay = self.graph.decay
+            self.distance = self.graph.distance
+            self.thresh = self.graph.thresh
             update_graph = False
             if isinstance(self.graph, graphs.TraditionalGraph):
                 precomputed = self.graph.precomputed
diff --git a/test/test_estimator.py b/test/test_estimator.py
index 4f6fc15..997a25b 100644
--- a/test/test_estimator.py
+++ b/test/test_estimator.py
@@ -1,7 +1,10 @@
 import graphtools
 import graphtools.estimator
+import pygsp
 import numpy as np
 from load_tests import data
+from scipy import sparse
+from parameterized import parameterized
 
 
 class Estimator(graphtools.estimator.GraphEstimator):
@@ -12,7 +15,7 @@ def _reset_graph(self):
 def test_estimator():
     E = Estimator(verbose=True)
     assert E.verbose == 1
-    E = Estimator(verbose=False, n_landmark=None)
+    E = Estimator(verbose=False)
     assert E.verbose == 0
     E.fit(data)
     assert np.all(E.X == data)
@@ -33,3 +36,38 @@ def test_estimator():
     E.set_params(knn=E.knn * 2)
     assert E.reset
     assert E.graph is None
+
+
+@parameterized(
+    [
+        (1 - np.eye(10), "distance"),
+        (np.eye(10), "affinity"),
+        (sparse.coo_matrix(1 - np.eye(10)), "distance"),
+        (sparse.eye(10), "affinity"),
+    ]
+)
+def test_precomputed(X, precomputed):
+    E = Estimator(verbose=False, distance="precomputed")
+    assert E._detect_precomputed_matrix_type(X) == precomputed
+    E.fit(X)
+    assert isinstance(E.graph, graphtools.graphs.TraditionalGraph)
+    assert E.graph.precomputed == precomputed
+
+
+def test_graph_input():
+    X = np.random.normal(0, 1, (10, 2))
+    E = Estimator(verbose=0)
+    G = graphtools.Graph(X)
+    E.fit(G)
+    assert E.graph == G
+    G = graphtools.Graph(X, knn=2, decay=5, distance="cosine", thresh=0)
+    E.fit(G)
+    assert E.graph == G
+    assert E.knn == G.knn
+    assert E.decay == G.decay
+    assert E.distance == G.distance
+    assert E.thresh == G.thresh
+    W = G.K - np.eye(X.shape[0])
+    G = pygsp.graphs.Graph(W)
+    E.fit(G, use_pygsp=True)
+    assert np.all(E.graph.W.toarray() == W)

From a352cecf5ea7c3e0830ea36f12517157d43112a8 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 24 Feb 2020 18:26:52 -0500
Subject: [PATCH 13/15] test api more

---
 graphtools/estimator.py | 33 +++++++++++--------------
 setup.py                |  1 +
 test/test_estimator.py  | 55 ++++++++++++++++++++++++++++++++++-------
 3 files changed, 61 insertions(+), 28 deletions(-)

diff --git a/graphtools/estimator.py b/graphtools/estimator.py
index 66bd0c9..f494b40 100644
--- a/graphtools/estimator.py
+++ b/graphtools/estimator.py
@@ -173,7 +173,11 @@ def n_landmark(self, n_landmark):
         utils.check_if_not(
             None, utils.check_positive, utils.check_int, n_landmark=n_landmark
         )
+        self._update_n_landmark(n_landmark)
+
+    def _update_n_landmark(self, n_landmark):
         if self.graph is not None:
+            n_landmark = self._parse_n_landmark(self.graph.data_nu)
             if (
                 n_landmark is None and isinstance(self.graph, graphs.LandmarkGraph)
             ) or (
@@ -229,20 +233,11 @@ def set_params(self, **params):
 
     def _set_graph_params(self, **params):
         if self.graph is not None:
-            if "n_landmark" in params:
-                n_landmark = params["n_landmark"]
-                del params["n_landmark"]
             try:
                 self.graph.set_params(**params)
             except ValueError as e:
                 _logger.debug("Reset graph due to {}".format(str(e)))
                 self.graph = None
-            else:
-                try:
-                    # special way to reset the graph here
-                    self.n_landmark = n_landmark
-                except NameError:
-                    pass
 
     @abc.abstractmethod
     def _reset_graph(self):
@@ -266,6 +261,12 @@ def _parse_n_landmark(self, X):
         else:
             return self.n_landmark
 
+    def _parse_n_svd(self, X):
+        if self.n_svd >= X.shape[0]:
+            return X.shape[0] - 1
+        else:
+            return self.n_svd
+
     def _parse_input(self, X):
         # passing graphs as input
         if isinstance(X, base.BaseGraph):
@@ -302,14 +303,7 @@ def _parse_input(self, X):
                 elif self.distance in ["precomputed_affinity", "precomputed_distance"]:
                     precomputed = self.distance.split("_")[1]
                 else:
-                    raise ValueError(
-                        "distance {} not recognized. Did you mean "
-                        "'precomputed_distance', "
-                        "'precomputed_affinity', or 'precomputed' "
-                        "(automatically detects distance or affinity)?".format(
-                            self.distance
-                        )
-                    )
+                    raise NotImplementedError
                 n_pca = None
             else:
                 precomputed = None
@@ -327,6 +321,7 @@ def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs):
             """
             self.graph = None
         else:
+            self._update_n_landmark(n_landmark)
             self._set_graph_params(
                 n_pca=n_pca,
                 precomputed=precomputed,
@@ -335,7 +330,7 @@ def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs):
                 knn=self.knn,
                 decay=self.decay,
                 distance=self.distance,
-                n_svd=self.n_svd,
+                n_svd=self._parse_n_svd(self.X),
                 n_jobs=self.n_jobs,
                 thresh=self.thresh,
                 verbose=self.verbose,
@@ -393,7 +388,7 @@ def fit(self, X, **kwargs):
                     knn=self.knn,
                     decay=self.decay,
                     distance=self.distance,
-                    n_svd=self.n_svd,
+                    n_svd=self._parse_n_svd(self.X),
                     n_jobs=self.n_jobs,
                     thresh=self.thresh,
                     verbose=self.verbose,
diff --git a/setup.py b/setup.py
index 4384517..0586273 100644
--- a/setup.py
+++ b/setup.py
@@ -20,6 +20,7 @@
     "coveralls",
     "python-igraph",
     "parameterized",
+    "anndata",
 ]
 
 if sys.version_info[0] == 3:
diff --git a/test/test_estimator.py b/test/test_estimator.py
index 997a25b..062a332 100644
--- a/test/test_estimator.py
+++ b/test/test_estimator.py
@@ -1,8 +1,10 @@
 import graphtools
 import graphtools.estimator
 import pygsp
+import anndata
+import warnings
 import numpy as np
-from load_tests import data
+from load_tests import data, assert_raises_message
 from scipy import sparse
 from parameterized import parameterized
 
@@ -40,16 +42,19 @@ def test_estimator():
 
 @parameterized(
     [
-        (1 - np.eye(10), "distance"),
-        (np.eye(10), "affinity"),
-        (sparse.coo_matrix(1 - np.eye(10)), "distance"),
-        (sparse.eye(10), "affinity"),
+        ("precomputed", 1 - np.eye(10), "distance"),
+        ("precomputed", np.eye(10), "affinity"),
+        ("precomputed", sparse.coo_matrix(1 - np.eye(10)), "distance"),
+        ("precomputed", sparse.eye(10), "affinity"),
+        ("precomputed_affinity", 1 - np.eye(10), "affinity"),
+        ("precomputed_distance", np.ones((10, 10)), "distance"),
     ]
 )
-def test_precomputed(X, precomputed):
-    E = Estimator(verbose=False, distance="precomputed")
-    assert E._detect_precomputed_matrix_type(X) == precomputed
-    E.fit(X)
+def test_precomputed(distance, X, precomputed):
+    E = Estimator(verbose=False, distance=distance)
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", message="K should have a non-zero diagonal")
+        E.fit(X)
     assert isinstance(E.graph, graphtools.graphs.TraditionalGraph)
     assert E.graph.precomputed == precomputed
 
@@ -71,3 +76,35 @@ def test_graph_input():
     G = pygsp.graphs.Graph(W)
     E.fit(G, use_pygsp=True)
     assert np.all(E.graph.W.toarray() == W)
+
+
+def test_anndata_input():
+    X = np.random.normal(0, 1, (10, 2))
+    E = Estimator(verbose=0)
+    E.fit(X)
+    E2 = Estimator(verbose=0)
+    E2.fit(anndata.AnnData(X))
+    np.testing.assert_allclose(
+        E.graph.K.toarray(), E2.graph.K.toarray(), rtol=1e-6, atol=2e-7
+    )
+
+
+def test_new_input():
+    X = np.random.normal(0, 1, (10, 2))
+    X2 = np.random.normal(0, 1, (10, 2))
+    E = Estimator(verbose=0)
+    E.fit(X)
+    G = E.graph
+    E.fit(X)
+    assert E.graph is G
+    E.fit(X.copy())
+    assert E.graph is G
+    E.n_landmark = 500
+    E.fit(X)
+    assert E.graph is G
+    E.n_landmark = 5
+    E.fit(X)
+    assert np.all(E.graph.K.toarray() == G.K.toarray())
+    G = E.graph
+    E.fit(X2)
+    assert E.graph is not G

From 31e46eb6b885bcdea1b7f751c57d16b69fcd10a0 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Mon, 24 Feb 2020 18:43:00 -0500
Subject: [PATCH 14/15] test pca

---
 graphtools/estimator.py | 52 ++++++++++++++++++++++++++++++-----------
 test/test_estimator.py  | 14 +++++++++++
 2 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/graphtools/estimator.py b/graphtools/estimator.py
index f494b40..5cb130f 100644
--- a/graphtools/estimator.py
+++ b/graphtools/estimator.py
@@ -177,7 +177,7 @@ def n_landmark(self, n_landmark):
 
     def _update_n_landmark(self, n_landmark):
         if self.graph is not None:
-            n_landmark = self._parse_n_landmark(self.graph.data_nu)
+            n_landmark = self._parse_n_landmark(self.graph.data_nu, n_landmark)
             if (
                 n_landmark is None and isinstance(self.graph, graphs.LandmarkGraph)
             ) or (
@@ -234,6 +234,18 @@ def set_params(self, **params):
     def _set_graph_params(self, **params):
         if self.graph is not None:
             try:
+                if "n_pca" in params:
+                    params["n_pca"] = self._parse_n_pca(
+                        self.graph.data_nu, params["n_pca"]
+                    )
+                if "n_svd" in params:
+                    params["n_svd"] = self._parse_n_svd(
+                        self.graph.data_nu, params["n_svd"]
+                    )
+                if "n_landmark" in params:
+                    params["n_landmark"] = self._parse_n_landmark(
+                        self.graph.data_nu, params["n_landmark"]
+                    )
                 self.graph.set_params(**params)
             except ValueError as e:
                 _logger.debug("Reset graph due to {}".format(str(e)))
@@ -255,17 +267,26 @@ def _detect_precomputed_matrix_type(self, X):
         else:
             return "affinity"
 
-    def _parse_n_landmark(self, X):
-        if self.n_landmark is not None and self.n_landmark >= X.shape[0]:
+    @staticmethod
+    def _parse_n_landmark(X, n_landmark):
+        if n_landmark is not None and n_landmark >= X.shape[0]:
             return None
         else:
-            return self.n_landmark
+            return n_landmark
 
-    def _parse_n_svd(self, X):
-        if self.n_svd >= X.shape[0]:
+    @staticmethod
+    def _parse_n_pca(X, n_pca):
+        if n_pca is not None and n_pca >= min(X.shape):
+            return None
+        else:
+            return n_pca
+
+    @staticmethod
+    def _parse_n_svd(X, n_svd):
+        if n_svd is not None and n_svd >= X.shape[0]:
             return X.shape[0] - 1
         else:
-            return self.n_svd
+            return n_svd
 
     def _parse_input(self, X):
         # passing graphs as input
@@ -307,11 +328,14 @@ def _parse_input(self, X):
                 n_pca = None
             else:
                 precomputed = None
-                if self.n_pca is None or self.n_pca >= np.min(X.shape):
-                    n_pca = None
-                else:
-                    n_pca = self.n_pca
-        return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph
+                n_pca = self._parse_n_pca(X, self.n_pca)
+        return (
+            X,
+            n_pca,
+            self._parse_n_landmark(X, self.n_landmark),
+            precomputed,
+            update_graph,
+        )
 
     def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs):
         if self.X is not None and not matrix.matrix_is_equivalent(X, self.X):
@@ -330,7 +354,7 @@ def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs):
                 knn=self.knn,
                 decay=self.decay,
                 distance=self.distance,
-                n_svd=self._parse_n_svd(self.X),
+                n_svd=self._parse_n_svd(self.X, self.n_svd),
                 n_jobs=self.n_jobs,
                 thresh=self.thresh,
                 verbose=self.verbose,
@@ -388,7 +412,7 @@ def fit(self, X, **kwargs):
                     knn=self.knn,
                     decay=self.decay,
                     distance=self.distance,
-                    n_svd=self._parse_n_svd(self.X),
+                    n_svd=self._parse_n_svd(self.X, self.n_svd),
                     n_jobs=self.n_jobs,
                     thresh=self.thresh,
                     verbose=self.verbose,
diff --git a/test/test_estimator.py b/test/test_estimator.py
index 062a332..179abda 100644
--- a/test/test_estimator.py
+++ b/test/test_estimator.py
@@ -78,6 +78,20 @@ def test_graph_input():
     assert np.all(E.graph.W.toarray() == W)
 
 
+def test_pca():
+    X = np.random.normal(0, 1, (10, 6))
+    E = Estimator(verbose=0)
+    E.fit(X)
+    G = E.graph
+    E.set_params(n_pca=100)
+    E.fit(X)
+    assert E.graph is G
+    E.set_params(n_pca=3)
+    E.fit(X)
+    assert E.graph is not G
+    assert E.graph.n_pca == 3
+
+
 def test_anndata_input():
     X = np.random.normal(0, 1, (10, 2))
     E = Estimator(verbose=0)

From ee61f1a4ffd1d6f83a387f3ebcfd6879fe094afa Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@gmail.com>
Date: Tue, 25 Feb 2020 11:05:02 -0500
Subject: [PATCH 15/15] bump version

---
 graphtools/version.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/graphtools/version.py b/graphtools/version.py
index 61a6a00..5b60188 100644
--- a/graphtools/version.py
+++ b/graphtools/version.py
@@ -1 +1 @@
-__version__ = "1.5.0a0"
+__version__ = "1.5.0"