From 9bd72d1ac513ad939279ea0ae5d4452192c11698 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 21 Nov 2019 17:54:53 -0500 Subject: [PATCH 01/15] graphtools estimator --- graphtools/estimator.py | 386 ++++++++++++++++++++++++++++++++++++++++ graphtools/utils.py | 131 ++++++++++++++ 2 files changed, 517 insertions(+) create mode 100644 graphtools/estimator.py diff --git a/graphtools/estimator.py b/graphtools/estimator.py new file mode 100644 index 0000000..5979cf0 --- /dev/null +++ b/graphtools/estimator.py @@ -0,0 +1,386 @@ +import numpy as np +import tasklogger + +try: + import anndata +except ImportError: + # anndata not installed + pass + +try: + import pygsp +except ImportError: + # anndata not installed + pass + +from functools import partial +from scipy import sparse + +from . import api, graphs, base, utils + +_logger = tasklogger.get_tasklogger("graphtools") + + +class GraphEstimator(object): + """Estimator which builds a graphtools Graph + + Parameters + ---------- + + knn : int, optional, default: 5 + number of nearest neighbors on which to build kernel + + decay : int, optional, default: 40 + sets decay rate of kernel tails. + If None, alpha decaying kernel is not used + + n_landmark : int, optional, default: 2000 + number of landmarks to use in fast PHATE + + n_pca : int, optional, default: 100 + Number of principal components to use for calculating + neighborhoods. For extremely large datasets, using + n_pca < 20 allows neighborhoods to be calculated in + roughly log(n_samples) time. + + distance : string, optional, default: 'euclidean' + recommended values: 'euclidean', 'cosine', 'precomputed' + Any metric from `scipy.spatial.distance` can be used + distance metric for building kNN graph. Custom distance + functions of form `f(x, y) = d` are also accepted. If 'precomputed', + `data` should be an n_samples x n_samples distance or + affinity matrix. Distance matrices are assumed to have zeros + down the diagonal, while affinity matrices are assumed to have + non-zero values down the diagonal. This is detected automatically using + `data[0,0]`. You can override this detection with + `distance='precomputed_distance'` or `distance='precomputed_affinity'`. + + n_jobs : integer, optional, default: 1 + The number of jobs to use for the computation. + If -1 all CPUs are used. If 1 is given, no parallel computing code is + used at all, which is useful for debugging. + For n_jobs below -1, (n_cpus + 1 + n_jobs) are used. Thus for + n_jobs = -2, all CPUs but one are used + + random_state : integer or numpy.RandomState, optional, default: None + If an integer is given, it fixes the seed + Defaults to the global `numpy` random number generator + + verbose : `int` or `boolean`, optional (default: 1) + If `True` or `> 0`, print status messages + + n_svd : (default: 100) + + thresh : (default: 1e-4) + + kwargs : additional arguments for graphtools.Graph + """ + + X = utils.attribute("X", doc="Stored input data") + graph = utils.attribute("graph", doc="graphtools Graph object") + + @graph.setter + def graph(self, G): + self._graph = G + if G is None: + self._reset_graph() + + n_pca = utils.attribute( + "n_pca", + default=100, + on_set=partial(utils.check_if_not, None, utils.check_positive, utils.check_int), + ) + random_state = utils.attribute("random_state") + + knn = utils.attribute( + "knn", default=5, on_set=[utils.check_positive, utils.check_int] + ) + decay = utils.attribute("decay", default=40, on_set=utils.check_positive) + distance = utils.attribute( + "distance", + default="euclidean", + on_set=partial( + utils.check_in, + [ + "euclidean", + "precomputed", + "cosine", + "correlation", + "cityblock", + "l1", + "l2", + "manhattan", + "braycurtis", + "canberra", + "chebyshev", + "dice", + "hamming", + "jaccard", + "kulsinski", + "mahalanobis", + "matching", + "minkowski", + "rogerstanimoto", + "russellrao", + "seuclidean", + "sokalmichener", + "sokalsneath", + "sqeuclidean", + "yule", + "precomputed_affinity", + "precomputed_distance", + ], + ), + ) + n_svd = utils.attribute( + "n_svd", + default=100, + on_set=partial(utils.check_if_not, None, utils.check_positive, utils.check_int), + ) + n_jobs = utils.attribute( + "n_jobs", on_set=partial(utils.check_if_not, None, utils.check_int) + ) + verbose = utils.attribute("verbose", default=0) + thresh = utils.attribute( + "thresh", + default=1e-4, + on_set=partial(utils.check_if_not, 0, utils.check_positive), + ) + + n_landmark = utils.attribute("n_landmark") + + @n_landmark.setter + def n_landmark(self, n_landmark): + self._n_landmark = n_landmark + utils.check_if_not( + None, utils.check_positive, utils.check_int, n_landmark=n_landmark + ) + if self.graph is not None: + if n_landmark is None and isinstance(self.graph, graphs.LandmarkGraph): + self.graph = None + elif n_landmark is not None and not isinstance( + self.graph, graphs.LandmarkGraph + ): + self.graph = None + + def __init__( + self, + knn=5, + decay=40, + n_pca=100, + n_landmark=None, + random_state=None, + distance="euclidean", + n_svd=100, + n_jobs=1, + verbose=1, + thresh=1e-4, + **kwargs + ): + + if verbose is True: + verbose = 1 + elif verbose is False: + verbose = 0 + + self.n_pca = n_pca + self.n_landmark = n_landmark + self.random_state = random_state + self.knn = knn + self.decay = decay + self.distance = distance + self.n_svd = n_svd + self.n_jobs = n_jobs + self.verbose = verbose + self.thresh = thresh + self.kwargs = kwargs + _logger.set_level(self.verbose) + + def set_params(self, **params): + for p in params: + setattr(self, p, params[p]) + self._set_graph_params(**params) + + def _set_graph_params(self, **params): + if self.graph is not None: + try: + self.graph.set_params(**params) + except ValueError as e: + _logger.debug("Reset graph due to {}".format(str(e))) + self.graph = None + + def _reset_graph(self): + pass + + def _detect_precomputed_matrix_type(self, X): + if isinstance(X, sparse.coo_matrix): + X = X.tocsr() + if X[0, 0] == 0: + return "distance" + else: + return "affinity" + + def _parse_n_landmark(self, X): + if self.n_landmark is not None and self.n_landmark >= X.shape[0]: + return None + else: + return self.n_landmark + + def _parse_input(self, X): + # passing graphs as input + if isinstance(X, base.BaseGraph): + if isinstance(X, graphs.LandmarkGraph) or ( + isinstance(X, base.BaseGraph) and self.n_landmark is None + ): + # we can keep this graph + self.graph = X + X = X.data + n_pca = self.graph.n_pca + update_graph = False + if isinstance(self.graph, graphs.TraditionalGraph): + precomputed = self.graph.precomputed + else: + precomputed = None + return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph + else: + # n_landmark is set, but this is not a landmark graph + self.graph = None + X = X.kernel + precomputed = "affinity" + n_pca = None + update_graph = False + return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph + else: + try: + if isinstance(X, pygsp.graphs.Graph): + self.graph = None + X = X.W + precomputed = "adjacency" + update_graph = False + n_pca = None + return ( + X, + n_pca, + self._parse_n_landmark(X), + precomputed, + update_graph, + ) + except NameError: + # pygsp not installed + pass + + # checks on regular data + update_graph = True + try: + if isinstance(X, anndata.AnnData): + X = X.X + except NameError: + # anndata not installed + pass + if not callable(self.distance) and self.distance.startswith("precomputed"): + if self.distance == "precomputed": + # automatic detection + precomputed = self._detect_precomputed_matrix_type(X) + elif self.distance in ["precomputed_affinity", "precomputed_distance"]: + precomputed = self.distance.split("_")[1] + else: + raise ValueError( + "distance {} not recognized. Did you mean " + "'precomputed_distance', " + "'precomputed_affinity', or 'precomputed' " + "(automatically detects distance or affinity)?".format( + self.distance + ) + ) + n_pca = None + else: + precomputed = None + if self.n_pca is None or self.n_pca >= np.min(X.shape): + n_pca = None + else: + n_pca = self.n_pca + return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph + + def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs): + if self.X is not None and not utils.matrix_is_equivalent(X, self.X): + """ + If the same data is used, we can reuse existing kernel and + diffusion matrices. Otherwise we have to recompute. + """ + self.graph = None + else: + try: + self.graph.set_params( + n_pca=n_pca, + precomputed=precomputed, + n_landmark=n_landmark, + random_state=self.random_state, + knn=self.knn, + decay=self.decay, + distance=self.distance, + n_svd=self.n_svd, + n_jobs=self.n_jobs, + thresh=self.thresh, + verbose=self.verbose, + **(self.kwargs) + ) + _logger.info("Using precomputed graph and diffusion operator...") + except ValueError as e: + # something changed that should have invalidated the graph + _logger.debug("Reset graph due to {}".format(str(e))) + self.graph = None + + def fit(self, X): + """Computes the graph + + Parameters + ---------- + X : array, shape=[n_samples, n_features] + input data with `n_samples` samples and `n_dimensions` + dimensions. Accepted data types: `numpy.ndarray`, + `scipy.sparse.spmatrix`, `pd.DataFrame`, `anndata.AnnData`. If + `knn_dist` is 'precomputed', `data` should be a n_samples x + n_samples distance or affinity matrix + + Returns + ------- + self : graphtools.estimator.GraphEstimator + """ + X, n_pca, n_landmark, precomputed, update_graph = self._parse_input(X) + + if precomputed is None: + _logger.info( + "Building graph on {} samples and {} features.".format( + X.shape[0], X.shape[1] + ) + ) + else: + _logger.info( + "Building graph on precomputed {} matrix with {} cells.".format( + precomputed, X.shape[0] + ) + ) + + if self.graph is not None and update_graph: + self._update_graph(X, precomputed, n_pca, n_landmark) + + self.X = X + + if self.graph is None: + with _logger.task("graph and diffusion operator"): + self.graph = api.Graph( + X, + n_pca=n_pca, + precomputed=precomputed, + n_landmark=n_landmark, + random_state=self.random_state, + knn=self.knn, + decay=self.decay, + distance=self.distance, + n_svd=self.n_svd, + n_jobs=self.n_jobs, + thresh=self.thresh, + verbose=self.verbose, + **(self.kwargs) + ) + return self diff --git a/graphtools/utils.py b/graphtools/utils.py index 64d1b08..1710a06 100644 --- a/graphtools/utils.py +++ b/graphtools/utils.py @@ -1,6 +1,7 @@ import numpy as np from scipy import sparse import numbers +from functools import partial def if_sparse(sparse_func, dense_func, *args, **kwargs): @@ -79,3 +80,133 @@ def to_array(X): elif isinstance(X, np.matrix): X = X.A return X + + +def matrix_is_equivalent(X, Y): + """ + Checks matrix equivalence with numpy, scipy and pandas + """ + return X is Y or ( + isinstance(X, Y.__class__) + and X.shape == Y.shape + and np.sum((X != Y).sum()) == 0 + ) + + +def check_positive(**params): + """Check that parameters are positive as expected + + Raises + ------ + ValueError : unacceptable choice of parameters + """ + for p in params: + if not isinstance(params[p], numbers.Number) or params[p] <= 0: + raise ValueError("Expected {} > 0, got {}".format(p, params[p])) + + +def check_int(**params): + """Check that parameters are integers as expected + + Raises + ------ + ValueError : unacceptable choice of parameters + """ + for p in params: + if not isinstance(params[p], numbers.Integral): + raise ValueError("Expected {} integer, got {}".format(p, params[p])) + + +def check_if_not(x, *checks, **params): + """Run checks only if parameters are not equal to a specified value + + Parameters + ---------- + + x : excepted value + Checks not run if parameters equal x + + checks : function + Unnamed arguments, check functions to be run + + params : object + Named arguments, parameters to be checked + + Raises + ------ + ValueError : unacceptable choice of parameters + """ + for p in params: + if params[p] is not x and params[p] != x: + [check(**{p: params[p]}) for check in checks] + + +def check_in(choices, **params): + """Checks parameters are in a list of allowed parameters + + Parameters + ---------- + + choices : array-like, accepted values + + params : object + Named arguments, parameters to be checked + + Raises + ------ + ValueError : unacceptable choice of parameters + """ + for p in params: + if params[p] not in choices: + raise ValueError( + "{} value {} not recognized. Choose from {}".format( + p, params[p], choices + ) + ) + + +def check_between(v_min, v_max, **params): + """Checks parameters are in a specified range + + Parameters + ---------- + + v_min : float, minimum allowed value (inclusive) + + v_max : float, maximum allowed value (inclusive) + + params : object + Named arguments, parameters to be checked + + Raises + ------ + ValueError : unacceptable choice of parameters + """ + for p in params: + if params[p] < v_min or params[p] > v_max: + raise ValueError( + "Expected {} between {} and {}, " + "got {}".format(p, v_min, v_max, params[p]) + ) + + +def attribute(attr, default=None, doc=None, on_set=None): + def getter(self, attr): + try: + return getattr(self, "_" + attr) + except AttributeError: + return default + + def setter(self, value, attr, on_set=None): + if on_set is not None: + if callable(on_set): + on_set = [on_set] + for fn in on_set: + fn(**{attr: value}) + setattr(self, "_" + attr, value) + + return property( + fget=partial(getter, attr=attr), + fset=partial(setter, attr=attr, on_set=on_set), + doc=doc, + ) From fe43da1f353c570101c4e2bdec19c99575b2c2d4 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 4 Feb 2020 18:03:38 -0500 Subject: [PATCH 02/15] bump version --- graphtools/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/version.py b/graphtools/version.py index daa50c7..5a6e033 100644 --- a/graphtools/version.py +++ b/graphtools/version.py @@ -1 +1 @@ -__version__ = "1.4.2" +__version__ = "1.4.3a0" From b367813e6bbfebffeb0fb71bba49363ec881d87a Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Mon, 24 Feb 2020 15:16:31 -0500 Subject: [PATCH 03/15] migrate matrix functions out of utils, test estimator --- graphtools/base.py | 16 +-- graphtools/estimator.py | 115 +++++++++++++-------- graphtools/graphs.py | 10 +- graphtools/utils.py | 221 ++++++++++++++++++---------------------- requirements.txt | 1 + setup.py | 1 + test/test_estimator.py | 35 +++++++ test/test_matrix.py | 166 ++++++++++++++++++++++++++++++ test/test_utils.py | 40 -------- 9 files changed, 388 insertions(+), 217 deletions(-) create mode 100644 test/test_estimator.py create mode 100644 test/test_matrix.py delete mode 100644 test/test_utils.py diff --git a/graphtools/base.py b/graphtools/base.py index 4fabcb0..838bf90 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -27,7 +27,7 @@ # anndata not installed pass -from . import utils +from . import matrix, utils _logger = tasklogger.get_tasklogger("graphtools") @@ -584,9 +584,9 @@ def symmetrize_kernel(self, K): K = K.multiply(K.T) elif self.kernel_symm == "mnn": _logger.debug("Using mnn symmetrization (theta = {}).".format(self.theta)) - K = self.theta * utils.elementwise_minimum(K, K.T) + ( + K = self.theta * matrix.elementwise_minimum(K, K.T) + ( 1 - self.theta - ) * utils.elementwise_maximum(K, K.T) + ) * matrix.elementwise_maximum(K, K.T) elif self.kernel_symm is None: _logger.debug("Using no symmetrization.") pass @@ -683,7 +683,9 @@ def kernel_degree(self): try: return self._kernel_degree except AttributeError: - self._kernel_degree = utils.to_array(self.kernel.sum(axis=1)).reshape(-1, 1) + self._kernel_degree = matrix.to_array(self.kernel.sum(axis=1)).reshape( + -1, 1 + ) return self._kernel_degree @property @@ -823,12 +825,12 @@ def to_igraph(self, attribute="weight", **kwargs): except AttributeError: # not a pygsp graph W = self.K.copy() - W = utils.set_diagonal(W, 0) + W = matrix.set_diagonal(W, 0) sources, targets = W.nonzero() edgelist = list(zip(sources, targets)) g = ig.Graph(W.shape[0], edgelist, **kwargs) weights = W[W.nonzero()] - weights = utils.to_array(weights) + weights = matrix.to_array(weights) g.es[attribute] = weights.flatten().tolist() return g @@ -987,7 +989,7 @@ def _build_weight_from_kernel(self, kernel): weight = kernel.copy() self._diagonal = weight.diagonal().copy() - weight = utils.set_diagonal(weight, 0) + weight = matrix.set_diagonal(weight, 0) return weight diff --git a/graphtools/estimator.py b/graphtools/estimator.py index 5979cf0..8d708ae 100644 --- a/graphtools/estimator.py +++ b/graphtools/estimator.py @@ -1,27 +1,40 @@ import numpy as np import tasklogger - -try: - import anndata -except ImportError: - # anndata not installed - pass - -try: - import pygsp -except ImportError: - # anndata not installed - pass +import pygsp +import abc from functools import partial from scipy import sparse from . import api, graphs, base, utils + +def attribute(attr, default=None, doc=None, on_set=None): + def getter(self, attr): + try: + return getattr(self, "_" + attr) + except AttributeError: + return default + + def setter(self, value, attr, on_set=None): + if on_set is not None: + if callable(on_set): + on_set = [on_set] + for fn in on_set: + fn(**{attr: value}) + setattr(self, "_" + attr, value) + + return property( + fget=partial(getter, attr=attr), + fset=partial(setter, attr=attr, on_set=on_set), + doc=doc, + ) + + _logger = tasklogger.get_tasklogger("graphtools") -class GraphEstimator(object): +class GraphEstimator(object, metaclass=abc.ABCMeta): """Estimator which builds a graphtools Graph Parameters @@ -35,7 +48,7 @@ class GraphEstimator(object): If None, alpha decaying kernel is not used n_landmark : int, optional, default: 2000 - number of landmarks to use in fast PHATE + number of landmarks to use in graph construction n_pca : int, optional, default: 100 Number of principal components to use for calculating @@ -69,15 +82,22 @@ class GraphEstimator(object): verbose : `int` or `boolean`, optional (default: 1) If `True` or `> 0`, print status messages - n_svd : (default: 100) + n_svd : int, optional (default: 100) + number of singular vectors to compute for landmarking - thresh : (default: 1e-4) + thresh : float, optional (default: 1e-4) + threshold below which to truncate kernel kwargs : additional arguments for graphtools.Graph + + Attributes + ---------- + + graph : graphtools.Graph """ - X = utils.attribute("X", doc="Stored input data") - graph = utils.attribute("graph", doc="graphtools Graph object") + X = attribute("X", doc="Stored input data") + graph = attribute("graph", doc="graphtools Graph object") @graph.setter def graph(self, G): @@ -85,18 +105,16 @@ def graph(self, G): if G is None: self._reset_graph() - n_pca = utils.attribute( + n_pca = attribute( "n_pca", default=100, on_set=partial(utils.check_if_not, None, utils.check_positive, utils.check_int), ) - random_state = utils.attribute("random_state") + random_state = attribute("random_state") - knn = utils.attribute( - "knn", default=5, on_set=[utils.check_positive, utils.check_int] - ) - decay = utils.attribute("decay", default=40, on_set=utils.check_positive) - distance = utils.attribute( + knn = attribute("knn", default=5, on_set=[utils.check_positive, utils.check_int]) + decay = attribute("decay", default=40, on_set=utils.check_positive) + distance = attribute( "distance", default="euclidean", on_set=partial( @@ -132,22 +150,22 @@ def graph(self, G): ], ), ) - n_svd = utils.attribute( + n_svd = attribute( "n_svd", default=100, on_set=partial(utils.check_if_not, None, utils.check_positive, utils.check_int), ) - n_jobs = utils.attribute( + n_jobs = attribute( "n_jobs", on_set=partial(utils.check_if_not, None, utils.check_int) ) - verbose = utils.attribute("verbose", default=0) - thresh = utils.attribute( + verbose = attribute("verbose", default=0) + thresh = attribute( "thresh", default=1e-4, on_set=partial(utils.check_if_not, 0, utils.check_positive), ) - n_landmark = utils.attribute("n_landmark") + n_landmark = attribute("n_landmark") @n_landmark.setter def n_landmark(self, n_landmark): @@ -156,12 +174,18 @@ def n_landmark(self, n_landmark): None, utils.check_positive, utils.check_int, n_landmark=n_landmark ) if self.graph is not None: - if n_landmark is None and isinstance(self.graph, graphs.LandmarkGraph): - self.graph = None - elif n_landmark is not None and not isinstance( - self.graph, graphs.LandmarkGraph + if ( + n_landmark is None and isinstance(self.graph, graphs.LandmarkGraph) + ) or ( + n_landmark is not None + and not isinstance(self.graph, graphs.LandmarkGraph) ): + # new graph but the same kernel + # there may be a better way to do this + kernel = self.graph.kernel self.graph = None + self.fit(self.X, initialize=False) + self.graph._kernel = kernel def __init__( self, @@ -194,6 +218,7 @@ def __init__( self.verbose = verbose self.thresh = thresh self.kwargs = kwargs + self.logger = _logger _logger.set_level(self.verbose) def set_params(self, **params): @@ -209,7 +234,12 @@ def _set_graph_params(self, **params): _logger.debug("Reset graph due to {}".format(str(e))) self.graph = None + @abc.abstractmethod def _reset_graph(self): + """Trigger a reset of self.graph + + Any downstream effects of resetting the graph should override this function + """ pass def _detect_precomputed_matrix_type(self, X): @@ -271,12 +301,8 @@ def _parse_input(self, X): # checks on regular data update_graph = True - try: - if isinstance(X, anndata.AnnData): - X = X.X - except NameError: - # anndata not installed - pass + if utils.is_Anndata(X): + X = X.X if not callable(self.distance) and self.distance.startswith("precomputed"): if self.distance == "precomputed": # automatic detection @@ -330,7 +356,7 @@ def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs): _logger.debug("Reset graph due to {}".format(str(e))) self.graph = None - def fit(self, X): + def fit(self, X, **kwargs): """Computes the graph Parameters @@ -342,6 +368,8 @@ def fit(self, X): `knn_dist` is 'precomputed', `data` should be a n_samples x n_samples distance or affinity matrix + kwargs : additional arguments for graphtools.Graph + Returns ------- self : graphtools.estimator.GraphEstimator @@ -356,7 +384,7 @@ def fit(self, X): ) else: _logger.info( - "Building graph on precomputed {} matrix with {} cells.".format( + "Building graph on precomputed {} matrix with {} samples.".format( precomputed, X.shape[0] ) ) @@ -381,6 +409,7 @@ def fit(self, X): n_jobs=self.n_jobs, thresh=self.thresh, verbose=self.verbose, - **(self.kwargs) + **(self.kwargs), + **kwargs ) return self diff --git a/graphtools/graphs.py b/graphtools/graphs.py index c51851c..3d6fbe7 100644 --- a/graphtools/graphs.py +++ b/graphtools/graphs.py @@ -12,7 +12,7 @@ import warnings import tasklogger -from . import utils +from . import matrix, utils from .base import DataGraph, PyGSPGraph _logger = tasklogger.get_tasklogger("graphtools") @@ -983,7 +983,7 @@ def build_kernel(self): isinstance(K, sparse.dok_matrix) or isinstance(K, sparse.lil_matrix) ): K = K.tolil() - K = utils.set_diagonal(K, 1) + K = matrix.set_diagonal(K, 1) else: with _logger.task("affinities"): if sparse.issparse(self.data_nu): @@ -1110,7 +1110,7 @@ def build_kernel_to_data(self, Y, knn=None, bandwidth=None, bandwidth_scale=None @property def weighted(self): if self.precomputed is not None: - return not utils.nonzero_discrete(self.K, [0.5, 1]) + return not matrix.nonzero_discrete(self.K, [0.5, 1]) else: return super().weighted @@ -1333,7 +1333,7 @@ def build_kernel(self): else: K = np.zeros([self.data_nu.shape[0], self.data_nu.shape[0]]) for i, X in enumerate(self.subgraphs): - K = utils.set_submatrix( + K = matrix.set_submatrix( K, self.sample_idx == self.samples[i], self.sample_idx == self.samples[i], @@ -1358,7 +1358,7 @@ def build_kernel(self): Kij = Kij.multiply(scale[:, None]) else: Kij = Kij * scale[:, None] - K = utils.set_submatrix( + K = matrix.set_submatrix( K, self.sample_idx == self.samples[i], self.sample_idx == self.samples[j], diff --git a/graphtools/utils.py b/graphtools/utils.py index ad9c3b0..8482d2a 100644 --- a/graphtools/utils.py +++ b/graphtools/utils.py @@ -1,9 +1,7 @@ -import numpy as np import numbers import warnings - -from scipy import sparse -from functools import partial +from deprecated import deprecated +from . import matrix try: import pandas as pd @@ -11,94 +9,37 @@ # pandas not installed pass - -def if_sparse(sparse_func, dense_func, *args, **kwargs): - if sparse.issparse(args[0]): - for arg in args[1:]: - assert sparse.issparse(arg) - return sparse_func(*args, **kwargs) - else: - return dense_func(*args, **kwargs) - - -def sparse_minimum(X, Y): - return X.minimum(Y) - - -def sparse_maximum(X, Y): - return X.maximum(Y) - - -def elementwise_minimum(X, Y): - return if_sparse(sparse_minimum, np.minimum, X, Y) - - -def elementwise_maximum(X, Y): - return if_sparse(sparse_maximum, np.maximum, X, Y) - - -def dense_set_diagonal(X, diag): - X[np.diag_indices(X.shape[0])] = diag - return X - - -def sparse_set_diagonal(X, diag): - cls = type(X) - if not isinstance(X, (sparse.lil_matrix, sparse.dia_matrix)): - X = X.tocoo() - X.setdiag(diag) - return cls(X) - - -def set_diagonal(X, diag): - return if_sparse(sparse_set_diagonal, dense_set_diagonal, X, diag=diag) - - -def set_submatrix(X, i, j, values): - X[np.ix_(i, j)] = values - return X - - -def sparse_nonzero_discrete(X, values): - if isinstance( - X, (sparse.bsr_matrix, sparse.dia_matrix, sparse.dok_matrix, sparse.lil_matrix) - ): - X = X.tocsr() - return dense_nonzero_discrete(X.data, values) - - -def dense_nonzero_discrete(X, values): - result = np.full_like(X, False, dtype=bool) - for value in values: - result = np.logical_or(result, X == value) - return np.all(result) - - -def nonzero_discrete(X, values): - if isinstance(values, numbers.Number): - values = [values] - if 0 not in values: - values.append(0) - return if_sparse(sparse_nonzero_discrete, dense_nonzero_discrete, X, values=values) +try: + import anndata +except ImportError: + # anndata not installed + pass -def to_array(X): - if sparse.issparse(X): - X = X.toarray() - elif isinstance(X, np.matrix): - X = X.A - return X +def is_SparseDataFrame(X): + try: + pd + except NameError: + # pandas not installed + return False + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The SparseDataFrame class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version", + FutureWarning, + ) + try: + return isinstance(X, pd.SparseDataFrame) + except AttributeError: + return False -def matrix_is_equivalent(X, Y): - """ - Checks matrix equivalence with numpy, scipy and pandas - """ - return X is Y or ( - isinstance(X, Y.__class__) - and X.shape == Y.shape - and np.sum((X != Y).sum()) == 0 - ) +def is_Anndata(X): + try: + return isinstance(X, anndata.AnnData) + except NameError: + # anndata not installed + return False def check_positive(**params): @@ -198,41 +139,77 @@ def check_between(v_min, v_max, **params): ) -def attribute(attr, default=None, doc=None, on_set=None): - def getter(self, attr): - try: - return getattr(self, "_" + attr) - except AttributeError: - return default +@deprecated(version="1.5.0", reason="Use graphtools.matrix.if_sparse instead") +def if_sparse(*args, **kwargs): + return matrix.if_sparse(*args, **kwargs) - def setter(self, value, attr, on_set=None): - if on_set is not None: - if callable(on_set): - on_set = [on_set] - for fn in on_set: - fn(**{attr: value}) - setattr(self, "_" + attr, value) - return property( - fget=partial(getter, attr=attr), - fset=partial(setter, attr=attr, on_set=on_set), - doc=doc, - ) +@deprecated(version="1.5.0", reason="Use graphtools.matrix.sparse_minimum instead") +def sparse_minimum(*args, **kwargs): + return matrix.sparse_minimum(*args, **kwargs) -def is_SparseDataFrame(X): - try: - pd - except NameError: - # pandas not installed - return False - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - "The SparseDataFrame class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version", - FutureWarning, - ) - try: - return isinstance(X, pd.SparseDataFrame) - except AttributeError: - return False +@deprecated(version="1.5.0", reason="Use graphtools.matrix.sparse_maximum instead") +def sparse_maximum(*args, **kwargs): + return matrix.sparse_maximum(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.elementwise_minimum instead") +def elementwise_minimum(*args, **kwargs): + return matrix.elementwise_minimum(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.elementwise_maximum instead") +def elementwise_maximum(*args, **kwargs): + return matrix.elementwise_maximum(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.dense_set_diagonal instead") +def dense_set_diagonal(*args, **kwargs): + return matrix.dense_set_diagonal(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.sparse_set_diagonal instead") +def sparse_set_diagonal(*args, **kwargs): + return matrix.sparse_set_diagonal(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.set_diagonal instead") +def set_diagonal(*args, **kwargs): + return matrix.set_diagonal(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.set_submatrix instead") +def set_submatrix(*args, **kwargs): + return matrix.set_submatrix(*args, **kwargs) + + +@deprecated( + version="1.5.0", reason="Use graphtools.matrix.sparse_nonzero_discrete instead" +) +def sparse_nonzero_discrete(*args, **kwargs): + return matrix.sparse_nonzero_discrete(*args, **kwargs) + + +@deprecated( + version="1.5.0", reason="Use graphtools.matrix.dense_nonzero_discrete instead" +) +def dense_nonzero_discrete(*args, **kwargs): + return matrix.dense_nonzero_discrete(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.nonzero_discrete instead") +def nonzero_discrete(*args, **kwargs): + return matrix.nonzero_discrete(*args, **kwargs) + + +@deprecated(version="1.5.0", reason="Use graphtools.matrix.to_array instead") +def to_array(*args, **kwargs): + return matrix.to_array(*args, **kwargs) + + +@deprecated( + version="1.5.0", reason="Use graphtools.matrix.matrix_is_equivalent instead" +) +def matrix_is_equivalent(*args, **kwargs): + return matrix.matrix_is_equivalent(*args, **kwargs) diff --git a/requirements.txt b/requirements.txt index 08fcd1b..c31163d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ pygsp>=>=0.5.1 scikit-learn>=0.20.0 future tasklogger>=1.0 +Deprecated diff --git a/setup.py b/setup.py index 4d7be50..4384517 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,7 @@ "scikit-learn>=0.20.0", "future", "tasklogger>=1.0", + "Deprecated", ] test_requires = [ diff --git a/test/test_estimator.py b/test/test_estimator.py new file mode 100644 index 0000000..4f6fc15 --- /dev/null +++ b/test/test_estimator.py @@ -0,0 +1,35 @@ +import graphtools +import graphtools.estimator +import numpy as np +from load_tests import data + + +class Estimator(graphtools.estimator.GraphEstimator): + def _reset_graph(self): + self.reset = True + + +def test_estimator(): + E = Estimator(verbose=True) + assert E.verbose == 1 + E = Estimator(verbose=False, n_landmark=None) + assert E.verbose == 0 + E.fit(data) + assert np.all(E.X == data) + assert isinstance(E.graph, graphtools.graphs.kNNGraph) + assert not isinstance(E.graph, graphtools.graphs.LandmarkGraph) + assert not hasattr(E, "reset") + # convert non landmark to landmark + E.set_params(n_landmark=data.shape[0] // 2) + assert E.reset + assert isinstance(E.graph, graphtools.graphs.LandmarkGraph) + del E.reset + # convert landmark to non landmark + E.set_params(n_landmark=None) + assert E.reset + assert not isinstance(E.graph, graphtools.graphs.LandmarkGraph) + del E.reset + # change parameters that force reset + E.set_params(knn=E.knn * 2) + assert E.reset + assert E.graph is None diff --git a/test/test_matrix.py b/test/test_matrix.py new file mode 100644 index 0000000..aac45a7 --- /dev/null +++ b/test/test_matrix.py @@ -0,0 +1,166 @@ +import graphtools.matrix +import graphtools.utils +from parameterized import parameterized +from scipy import sparse +import numpy as np +import graphtools +from load_tests import data +from load_tests import assert_warns_message + + +@parameterized( + [ + (np.array,), + (sparse.csr_matrix,), + (sparse.csc_matrix,), + (sparse.bsr_matrix,), + (sparse.lil_matrix,), + (sparse.coo_matrix,), + ] +) +def test_nonzero_discrete(matrix_class): + X = np.random.choice([0, 1, 2], p=[0.95, 0.025, 0.025], size=(100, 100)) + X = matrix_class(X) + assert graphtools.matrix.nonzero_discrete(X, [1, 2]) + assert not graphtools.matrix.nonzero_discrete(X, [1, 3]) + + +@parameterized([(0,), (1e-4,)]) +def test_nonzero_discrete_knngraph(thresh): + G = graphtools.Graph(data, n_pca=10, knn=5, decay=None, thresh=thresh) + assert graphtools.matrix.nonzero_discrete(G.K, [0.5, 1]) + + +@parameterized([(0,), (1e-4,)]) +def test_nonzero_discrete_decay_graph(thresh): + G = graphtools.Graph(data, n_pca=10, knn=5, decay=15, thresh=thresh) + assert not graphtools.matrix.nonzero_discrete(G.K, [0.5, 1]) + + +def test_nonzero_discrete_constant(): + assert graphtools.matrix.nonzero_discrete(2, [1, 2]) + assert not graphtools.matrix.nonzero_discrete(2, [1, 3]) + + +def test_if_sparse_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) if_sparse. (Use graphtools.matrix.if_sparse instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.if_sparse(lambda x: x, lambda x: x, np.zeros((4, 4))) + + +def test_sparse_minimum_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) sparse_minimum. (Use graphtools.matrix.sparse_minimum instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.sparse_minimum( + sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4)) + ) + + +def test_sparse_maximum_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) sparse_maximum. (Use graphtools.matrix.sparse_maximum instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.sparse_maximum( + sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4)) + ) + + +def test_elementwise_minimum_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) elementwise_minimum. (Use graphtools.matrix.elementwise_minimum instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.elementwise_minimum( + sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4)) + ) + + +def test_elementwise_maximum_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) elementwise_maximum. (Use graphtools.matrix.elementwise_maximum instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.elementwise_maximum( + sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4)) + ) + + +def test_dense_set_diagonal_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) dense_set_diagonal. (Use graphtools.matrix.dense_set_diagonal instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.dense_set_diagonal(np.zeros((4, 4)), 1) + + +def test_sparse_set_diagonal_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) sparse_set_diagonal. (Use graphtools.matrix.sparse_set_diagonal instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.sparse_set_diagonal(sparse.csr_matrix((4, 4)), 1) + + +def test_set_diagonal_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) set_diagonal. (Use graphtools.matrix.set_diagonal instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.set_diagonal(np.zeros((4, 4)), 1) + + +def test_set_submatrix_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) set_submatrix. (Use graphtools.matrix.set_submatrix instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.set_submatrix( + sparse.lil_matrix((4, 4)), [1, 2], [0, 1], np.array([[1, 2], [3, 4]]) + ) + + +def test_sparse_nonzero_discrete_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) sparse_nonzero_discrete. (Use graphtools.matrix.sparse_nonzero_discrete instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.sparse_nonzero_discrete(sparse.csr_matrix((4, 4)), [1]) + + +def test_dense_nonzero_discrete_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) dense_nonzero_discrete. (Use graphtools.matrix.dense_nonzero_discrete instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.dense_nonzero_discrete(np.zeros((4, 4)), [1]) + + +def test_nonzero_discrete_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) nonzero_discrete. (Use graphtools.matrix.nonzero_discrete instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.nonzero_discrete(np.zeros((4, 4)), [1]) + + +def test_to_array_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) to_array. (Use graphtools.matrix.to_array instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.to_array([1]) + + +def test_matrix_is_equivalent_deprecated(): + with assert_warns_message( + DeprecationWarning, + "Call to deprecated function (or staticmethod) matrix_is_equivalent. (Use graphtools.matrix.matrix_is_equivalent instead) -- Deprecated since version 1.5.0.", + ): + graphtools.utils.matrix_is_equivalent( + sparse.csr_matrix((4, 4)), sparse.bsr_matrix((4, 4)) + ) diff --git a/test/test_utils.py b/test/test_utils.py deleted file mode 100644 index 0d72174..0000000 --- a/test/test_utils.py +++ /dev/null @@ -1,40 +0,0 @@ -import graphtools.utils -from parameterized import parameterized -from scipy import sparse -import numpy as np -import graphtools -from load_tests import data - - -@parameterized( - [ - (np.array,), - (sparse.csr_matrix,), - (sparse.csc_matrix,), - (sparse.bsr_matrix,), - (sparse.lil_matrix,), - (sparse.coo_matrix,), - ] -) -def test_nonzero_discrete(matrix_class): - X = np.random.choice([0, 1, 2], p=[0.95, 0.025, 0.025], size=(100, 100)) - X = matrix_class(X) - assert graphtools.utils.nonzero_discrete(X, [1, 2]) - assert not graphtools.utils.nonzero_discrete(X, [1, 3]) - - -@parameterized([(0,), (1e-4,)]) -def test_nonzero_discrete_knngraph(thresh): - G = graphtools.Graph(data, n_pca=10, knn=5, decay=None, thresh=thresh) - assert graphtools.utils.nonzero_discrete(G.K, [0.5, 1]) - - -@parameterized([(0,), (1e-4,)]) -def test_nonzero_discrete_decay_graph(thresh): - G = graphtools.Graph(data, n_pca=10, knn=5, decay=15, thresh=thresh) - assert not graphtools.utils.nonzero_discrete(G.K, [0.5, 1]) - - -def test_nonzero_discrete_constant(): - assert graphtools.utils.nonzero_discrete(2, [1, 2]) - assert not graphtools.utils.nonzero_discrete(2, [1, 3]) From 37fc848d7d6d529b2f56799891a8aab73730f95d Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Mon, 24 Feb 2020 15:19:51 -0500 Subject: [PATCH 04/15] bump version --- graphtools/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/version.py b/graphtools/version.py index 5a6e033..61a6a00 100644 --- a/graphtools/version.py +++ b/graphtools/version.py @@ -1 +1 @@ -__version__ = "1.4.3a0" +__version__ = "1.5.0a0" From afe46f11fd7bfb9222ac50ad5216e6a98cd59c05 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Mon, 24 Feb 2020 16:02:35 -0500 Subject: [PATCH 05/15] fix deprecated function call --- graphtools/estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/estimator.py b/graphtools/estimator.py index 8d708ae..8b1b24d 100644 --- a/graphtools/estimator.py +++ b/graphtools/estimator.py @@ -328,7 +328,7 @@ def _parse_input(self, X): return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs): - if self.X is not None and not utils.matrix_is_equivalent(X, self.X): + if self.X is not None and not matrix.matrix_is_equivalent(X, self.X): """ If the same data is used, we can reuse existing kernel and diffusion matrices. Otherwise we have to recompute. From b65897dbda85afc369721f2ea75ecd79da887654 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Mon, 24 Feb 2020 16:05:57 -0500 Subject: [PATCH 06/15] add missing matrix submodule --- graphtools/matrix.py | 93 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 graphtools/matrix.py diff --git a/graphtools/matrix.py b/graphtools/matrix.py new file mode 100644 index 0000000..8c818f2 --- /dev/null +++ b/graphtools/matrix.py @@ -0,0 +1,93 @@ +import numpy as np +import numbers + +from scipy import sparse + + +def if_sparse(sparse_func, dense_func, *args, **kwargs): + if sparse.issparse(args[0]): + for arg in args[1:]: + assert sparse.issparse(arg) + return sparse_func(*args, **kwargs) + else: + return dense_func(*args, **kwargs) + + +def sparse_minimum(X, Y): + return X.minimum(Y) + + +def sparse_maximum(X, Y): + return X.maximum(Y) + + +def elementwise_minimum(X, Y): + return if_sparse(sparse_minimum, np.minimum, X, Y) + + +def elementwise_maximum(X, Y): + return if_sparse(sparse_maximum, np.maximum, X, Y) + + +def dense_set_diagonal(X, diag): + X[np.diag_indices(X.shape[0])] = diag + return X + + +def sparse_set_diagonal(X, diag): + cls = type(X) + if not isinstance(X, (sparse.lil_matrix, sparse.dia_matrix)): + X = X.tocoo() + X.setdiag(diag) + return cls(X) + + +def set_diagonal(X, diag): + return if_sparse(sparse_set_diagonal, dense_set_diagonal, X, diag=diag) + + +def set_submatrix(X, i, j, values): + X[np.ix_(i, j)] = values + return X + + +def sparse_nonzero_discrete(X, values): + if isinstance( + X, (sparse.bsr_matrix, sparse.dia_matrix, sparse.dok_matrix, sparse.lil_matrix) + ): + X = X.tocsr() + return dense_nonzero_discrete(X.data, values) + + +def dense_nonzero_discrete(X, values): + result = np.full_like(X, False, dtype=bool) + for value in values: + result = np.logical_or(result, X == value) + return np.all(result) + + +def nonzero_discrete(X, values): + if isinstance(values, numbers.Number): + values = [values] + if 0 not in values: + values.append(0) + return if_sparse(sparse_nonzero_discrete, dense_nonzero_discrete, X, values=values) + + +def to_array(X): + if sparse.issparse(X): + X = X.toarray() + elif isinstance(X, np.matrix): + X = X.A + return X + + +def matrix_is_equivalent(X, Y): + """ + Checks matrix equivalence with numpy, scipy and pandas + """ + return X is Y or ( + isinstance(X, Y.__class__) + and X.shape == Y.shape + and np.sum((X != Y).sum()) == 0 + ) From e9ea8bf946a6c6179440541ff16096369d87bfa0 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Mon, 24 Feb 2020 16:08:42 -0500 Subject: [PATCH 07/15] add missing matrix import --- graphtools/estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/estimator.py b/graphtools/estimator.py index 8b1b24d..9196a08 100644 --- a/graphtools/estimator.py +++ b/graphtools/estimator.py @@ -6,7 +6,7 @@ from functools import partial from scipy import sparse -from . import api, graphs, base, utils +from . import api, graphs, base, utils, matrix def attribute(attr, default=None, doc=None, on_set=None): From 1511193a401ed1f6a64a348f814be996862f3d8c Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Mon, 24 Feb 2020 17:22:52 -0500 Subject: [PATCH 08/15] test utils --- graphtools/utils.py | 31 ++++++++++++++++++++++++------- test/test_utils.py | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+), 7 deletions(-) create mode 100644 test/test_utils.py diff --git a/graphtools/utils.py b/graphtools/utils.py index 8482d2a..1ef9c3e 100644 --- a/graphtools/utils.py +++ b/graphtools/utils.py @@ -5,13 +5,13 @@ try: import pandas as pd -except ImportError: +except ImportError: # pragma: no cover # pandas not installed pass try: import anndata -except ImportError: +except ImportError: # pragma: no cover # anndata not installed pass @@ -19,7 +19,7 @@ def is_SparseDataFrame(X): try: pd - except NameError: + except NameError: # pragma: no cover # pandas not installed return False with warnings.catch_warnings(): @@ -37,11 +37,29 @@ def is_SparseDataFrame(X): def is_Anndata(X): try: return isinstance(X, anndata.AnnData) - except NameError: + except NameError: # pragma: no cover # anndata not installed return False +def check_greater(x, **params): + """Check that parameters are greater than x as expected + + Parameters + ---------- + + x : excepted boundary + Checks not run if parameters are greater than x + + Raises + ------ + ValueError : unacceptable choice of parameters + """ + for p in params: + if not isinstance(params[p], numbers.Number) or params[p] <= x: + raise ValueError("Expected {} > {}, got {}".format(p, x, params[p])) + + def check_positive(**params): """Check that parameters are positive as expected @@ -49,9 +67,7 @@ def check_positive(**params): ------ ValueError : unacceptable choice of parameters """ - for p in params: - if not isinstance(params[p], numbers.Number) or params[p] <= 0: - raise ValueError("Expected {} > 0, got {}".format(p, params[p])) + return check_greater(0, **params) def check_int(**params): @@ -131,6 +147,7 @@ def check_between(v_min, v_max, **params): ------ ValueError : unacceptable choice of parameters """ + check_greater(v_min, v_max=v_max) for p in params: if params[p] < v_min or params[p] > v_max: raise ValueError( diff --git a/test/test_utils.py b/test/test_utils.py new file mode 100644 index 0000000..1aadd82 --- /dev/null +++ b/test/test_utils.py @@ -0,0 +1,39 @@ +import graphtools +from load_tests import assert_raises_message + + +def test_check_in(): + graphtools.utils.check_in(["hello", "world"], foo="hello") + with assert_raises_message( + ValueError, "foo value bar not recognized. Choose from ['hello', 'world']" + ): + graphtools.utils.check_in(["hello", "world"], foo="bar") + + +def test_check_int(): + graphtools.utils.check_int(foo=5) + graphtools.utils.check_int(foo=-5) + with assert_raises_message(ValueError, "Expected foo integer, got 5.3"): + graphtools.utils.check_int(foo=5.3) + + +def test_check_positive(): + graphtools.utils.check_positive(foo=5) + with assert_raises_message(ValueError, "Expected foo > 0, got -5"): + graphtools.utils.check_positive(foo=-5) + with assert_raises_message(ValueError, "Expected foo > 0, got 0"): + graphtools.utils.check_positive(foo=0) + + +def test_check_if_not(): + graphtools.utils.check_if_not(-5, graphtools.utils.check_positive, foo=-5) + with assert_raises_message(ValueError, "Expected foo > 0, got -5"): + graphtools.utils.check_if_not(-4, graphtools.utils.check_positive, foo=-5) + + +def test_check_between(): + graphtools.utils.check_between(-5, -3, foo=-4) + with assert_raises_message(ValueError, "Expected foo between -5 and -3, got -6"): + graphtools.utils.check_between(-5, -3, foo=-6) + with assert_raises_message(ValueError, "Expected v_max > -3, got -5"): + graphtools.utils.check_between(-3, -5, foo=-6) From 418cf7286b5033f135f850a59e3e025a6cfdf8ff Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Mon, 24 Feb 2020 17:26:43 -0500 Subject: [PATCH 09/15] migrate pandas to utils --- graphtools/base.py | 45 ++++++++++++--------------------------------- graphtools/utils.py | 8 ++++++++ 2 files changed, 20 insertions(+), 33 deletions(-) diff --git a/graphtools/base.py b/graphtools/base.py index 838bf90..a8671d2 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -15,18 +15,6 @@ import sys import tasklogger -try: - import pandas as pd -except ImportError: - # pandas not installed - pass - -try: - import anndata -except (ImportError, SyntaxError): - # anndata not installed - pass - from . import matrix, utils _logger = tasklogger.get_tasklogger("graphtools") @@ -126,28 +114,19 @@ def __init__( self._check_data(data) n_pca, rank_threshold = self._parse_n_pca_threshold(data, n_pca, rank_threshold) - try: - pd - except NameError: - # pandas not installed - pass - else: - if utils.is_SparseDataFrame(data): - data = data.to_coo() - elif isinstance(data, pd.DataFrame): - try: - data = data.sparse.to_coo() - except AttributeError: - data = np.array(data) - try: - anndata - except NameError: - # anndata not installed - pass - else: - if isinstance(data, anndata.AnnData): - data = data.X + if utils.is_SparseDataFrame(data): + data = data.to_coo() + elif utils.is_DataFrame(data): + try: + # sparse data + data = data.sparse.to_coo() + except AttributeError: + # dense data + data = np.array(data) + elif utils.is_Anndata(data): + data = data.X + self.data = data self.n_pca = n_pca self.rank_threshold = rank_threshold diff --git a/graphtools/utils.py b/graphtools/utils.py index 1ef9c3e..55e2bd3 100644 --- a/graphtools/utils.py +++ b/graphtools/utils.py @@ -16,6 +16,14 @@ pass +def is_DataFrame(X): + try: + return isinstance(X, pd.DataFrame) + except NameError: # pragma: no cover + # pandas not installed + return False + + def is_SparseDataFrame(X): try: pd From 02664d27a979193cfa0243fdd6bcb16b414ee295 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Mon, 24 Feb 2020 17:28:52 -0500 Subject: [PATCH 10/15] clean up untestables --- graphtools/base.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/graphtools/base.py b/graphtools/base.py index a8671d2..4789b8a 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -570,11 +570,7 @@ def symmetrize_kernel(self, K): _logger.debug("Using no symmetrization.") pass else: - # this should never happen - raise ValueError( - "Expected kernel_symm in ['+', '*', 'mnn' or None]. " - "Got {}".format(self.theta) - ) + raise NotImplementedError return K def apply_anisotropy(self, K): @@ -795,7 +791,7 @@ def to_igraph(self, attribute="weight", **kwargs): """ try: import igraph as ig - except ImportError: + except ImportError: # pragma: no cover raise ImportError( "Please install igraph with " "`pip install --user python-igraph`." ) From f802646b83493de2333edb2fd4774d27388729ad Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Mon, 24 Feb 2020 17:44:04 -0500 Subject: [PATCH 11/15] clean up logic --- graphtools/estimator.py | 152 ++++++++++++++++++---------------------- 1 file changed, 70 insertions(+), 82 deletions(-) diff --git a/graphtools/estimator.py b/graphtools/estimator.py index 9196a08..f68db58 100644 --- a/graphtools/estimator.py +++ b/graphtools/estimator.py @@ -223,16 +223,26 @@ def __init__( def set_params(self, **params): for p in params: - setattr(self, p, params[p]) + if not getattr(self, p) == params[p]: + setattr(self, p, params[p]) self._set_graph_params(**params) def _set_graph_params(self, **params): if self.graph is not None: + if "n_landmark" in params: + n_landmark = params["n_landmark"] + del params["n_landmark"] try: self.graph.set_params(**params) except ValueError as e: _logger.debug("Reset graph due to {}".format(str(e))) self.graph = None + else: + try: + # special way to reset the graph here + self.n_landmark = n_landmark + except NameError: + pass @abc.abstractmethod def _reset_graph(self): @@ -240,7 +250,7 @@ def _reset_graph(self): Any downstream effects of resetting the graph should override this function """ - pass + raise NotImplementedError def _detect_precomputed_matrix_type(self, X): if isinstance(X, sparse.coo_matrix): @@ -259,72 +269,54 @@ def _parse_n_landmark(self, X): def _parse_input(self, X): # passing graphs as input if isinstance(X, base.BaseGraph): - if isinstance(X, graphs.LandmarkGraph) or ( - isinstance(X, base.BaseGraph) and self.n_landmark is None - ): - # we can keep this graph - self.graph = X - X = X.data - n_pca = self.graph.n_pca - update_graph = False - if isinstance(self.graph, graphs.TraditionalGraph): - precomputed = self.graph.precomputed - else: - precomputed = None - return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph + # we can keep this graph + self.graph = X + X = X.data + # immutable graph properties override operator + n_pca = self.graph.n_pca + self.knn = X.knn + self.decay = X.decay + self.distance = X.distance + self.thresh = X.thresh + update_graph = False + if isinstance(self.graph, graphs.TraditionalGraph): + precomputed = self.graph.precomputed else: - # n_landmark is set, but this is not a landmark graph - self.graph = None - X = X.kernel - precomputed = "affinity" - n_pca = None - update_graph = False - return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph - else: - try: - if isinstance(X, pygsp.graphs.Graph): - self.graph = None - X = X.W - precomputed = "adjacency" - update_graph = False - n_pca = None - return ( - X, - n_pca, - self._parse_n_landmark(X), - precomputed, - update_graph, - ) - except NameError: - # pygsp not installed - pass - - # checks on regular data - update_graph = True - if utils.is_Anndata(X): - X = X.X - if not callable(self.distance) and self.distance.startswith("precomputed"): - if self.distance == "precomputed": - # automatic detection - precomputed = self._detect_precomputed_matrix_type(X) - elif self.distance in ["precomputed_affinity", "precomputed_distance"]: - precomputed = self.distance.split("_")[1] - else: - raise ValueError( - "distance {} not recognized. Did you mean " - "'precomputed_distance', " - "'precomputed_affinity', or 'precomputed' " - "(automatically detects distance or affinity)?".format( - self.distance - ) - ) + precomputed = None + elif isinstance(X, pygsp.graphs.Graph): + # convert pygsp to graphtools + self.graph = None + X = X.W + precomputed = "adjacency" + update_graph = False n_pca = None else: - precomputed = None - if self.n_pca is None or self.n_pca >= np.min(X.shape): + # data matrix + update_graph = True + if utils.is_Anndata(X): + X = X.X + if not callable(self.distance) and self.distance.startswith("precomputed"): + if self.distance == "precomputed": + # automatic detection + precomputed = self._detect_precomputed_matrix_type(X) + elif self.distance in ["precomputed_affinity", "precomputed_distance"]: + precomputed = self.distance.split("_")[1] + else: + raise ValueError( + "distance {} not recognized. Did you mean " + "'precomputed_distance', " + "'precomputed_affinity', or 'precomputed' " + "(automatically detects distance or affinity)?".format( + self.distance + ) + ) n_pca = None else: - n_pca = self.n_pca + precomputed = None + if self.n_pca is None or self.n_pca >= np.min(X.shape): + n_pca = None + else: + n_pca = self.n_pca return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs): @@ -335,26 +327,22 @@ def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs): """ self.graph = None else: - try: - self.graph.set_params( - n_pca=n_pca, - precomputed=precomputed, - n_landmark=n_landmark, - random_state=self.random_state, - knn=self.knn, - decay=self.decay, - distance=self.distance, - n_svd=self.n_svd, - n_jobs=self.n_jobs, - thresh=self.thresh, - verbose=self.verbose, - **(self.kwargs) - ) + self._set_graph_params( + n_pca=n_pca, + precomputed=precomputed, + n_landmark=n_landmark, + random_state=self.random_state, + knn=self.knn, + decay=self.decay, + distance=self.distance, + n_svd=self.n_svd, + n_jobs=self.n_jobs, + thresh=self.thresh, + verbose=self.verbose, + **(self.kwargs) + ) + if self.graph is not None: _logger.info("Using precomputed graph and diffusion operator...") - except ValueError as e: - # something changed that should have invalidated the graph - _logger.debug("Reset graph due to {}".format(str(e))) - self.graph = None def fit(self, X, **kwargs): """Computes the graph From 57f366fc09b5ed8bc82c70e203c9a16cca2b536e Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Mon, 24 Feb 2020 18:02:07 -0500 Subject: [PATCH 12/15] test precomputed and graph input --- graphtools/estimator.py | 12 ++++++------ test/test_estimator.py | 40 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/graphtools/estimator.py b/graphtools/estimator.py index f68db58..66bd0c9 100644 --- a/graphtools/estimator.py +++ b/graphtools/estimator.py @@ -47,7 +47,7 @@ class GraphEstimator(object, metaclass=abc.ABCMeta): sets decay rate of kernel tails. If None, alpha decaying kernel is not used - n_landmark : int, optional, default: 2000 + n_landmark : int, optional, default: None number of landmarks to use in graph construction n_pca : int, optional, default: 100 @@ -253,7 +253,7 @@ def _reset_graph(self): raise NotImplementedError def _detect_precomputed_matrix_type(self, X): - if isinstance(X, sparse.coo_matrix): + if isinstance(X, (sparse.coo_matrix, sparse.dia_matrix)): X = X.tocsr() if X[0, 0] == 0: return "distance" @@ -274,10 +274,10 @@ def _parse_input(self, X): X = X.data # immutable graph properties override operator n_pca = self.graph.n_pca - self.knn = X.knn - self.decay = X.decay - self.distance = X.distance - self.thresh = X.thresh + self.knn = self.graph.knn + self.decay = self.graph.decay + self.distance = self.graph.distance + self.thresh = self.graph.thresh update_graph = False if isinstance(self.graph, graphs.TraditionalGraph): precomputed = self.graph.precomputed diff --git a/test/test_estimator.py b/test/test_estimator.py index 4f6fc15..997a25b 100644 --- a/test/test_estimator.py +++ b/test/test_estimator.py @@ -1,7 +1,10 @@ import graphtools import graphtools.estimator +import pygsp import numpy as np from load_tests import data +from scipy import sparse +from parameterized import parameterized class Estimator(graphtools.estimator.GraphEstimator): @@ -12,7 +15,7 @@ def _reset_graph(self): def test_estimator(): E = Estimator(verbose=True) assert E.verbose == 1 - E = Estimator(verbose=False, n_landmark=None) + E = Estimator(verbose=False) assert E.verbose == 0 E.fit(data) assert np.all(E.X == data) @@ -33,3 +36,38 @@ def test_estimator(): E.set_params(knn=E.knn * 2) assert E.reset assert E.graph is None + + +@parameterized( + [ + (1 - np.eye(10), "distance"), + (np.eye(10), "affinity"), + (sparse.coo_matrix(1 - np.eye(10)), "distance"), + (sparse.eye(10), "affinity"), + ] +) +def test_precomputed(X, precomputed): + E = Estimator(verbose=False, distance="precomputed") + assert E._detect_precomputed_matrix_type(X) == precomputed + E.fit(X) + assert isinstance(E.graph, graphtools.graphs.TraditionalGraph) + assert E.graph.precomputed == precomputed + + +def test_graph_input(): + X = np.random.normal(0, 1, (10, 2)) + E = Estimator(verbose=0) + G = graphtools.Graph(X) + E.fit(G) + assert E.graph == G + G = graphtools.Graph(X, knn=2, decay=5, distance="cosine", thresh=0) + E.fit(G) + assert E.graph == G + assert E.knn == G.knn + assert E.decay == G.decay + assert E.distance == G.distance + assert E.thresh == G.thresh + W = G.K - np.eye(X.shape[0]) + G = pygsp.graphs.Graph(W) + E.fit(G, use_pygsp=True) + assert np.all(E.graph.W.toarray() == W) From a352cecf5ea7c3e0830ea36f12517157d43112a8 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Mon, 24 Feb 2020 18:26:52 -0500 Subject: [PATCH 13/15] test api more --- graphtools/estimator.py | 33 +++++++++++-------------- setup.py | 1 + test/test_estimator.py | 55 ++++++++++++++++++++++++++++++++++------- 3 files changed, 61 insertions(+), 28 deletions(-) diff --git a/graphtools/estimator.py b/graphtools/estimator.py index 66bd0c9..f494b40 100644 --- a/graphtools/estimator.py +++ b/graphtools/estimator.py @@ -173,7 +173,11 @@ def n_landmark(self, n_landmark): utils.check_if_not( None, utils.check_positive, utils.check_int, n_landmark=n_landmark ) + self._update_n_landmark(n_landmark) + + def _update_n_landmark(self, n_landmark): if self.graph is not None: + n_landmark = self._parse_n_landmark(self.graph.data_nu) if ( n_landmark is None and isinstance(self.graph, graphs.LandmarkGraph) ) or ( @@ -229,20 +233,11 @@ def set_params(self, **params): def _set_graph_params(self, **params): if self.graph is not None: - if "n_landmark" in params: - n_landmark = params["n_landmark"] - del params["n_landmark"] try: self.graph.set_params(**params) except ValueError as e: _logger.debug("Reset graph due to {}".format(str(e))) self.graph = None - else: - try: - # special way to reset the graph here - self.n_landmark = n_landmark - except NameError: - pass @abc.abstractmethod def _reset_graph(self): @@ -266,6 +261,12 @@ def _parse_n_landmark(self, X): else: return self.n_landmark + def _parse_n_svd(self, X): + if self.n_svd >= X.shape[0]: + return X.shape[0] - 1 + else: + return self.n_svd + def _parse_input(self, X): # passing graphs as input if isinstance(X, base.BaseGraph): @@ -302,14 +303,7 @@ def _parse_input(self, X): elif self.distance in ["precomputed_affinity", "precomputed_distance"]: precomputed = self.distance.split("_")[1] else: - raise ValueError( - "distance {} not recognized. Did you mean " - "'precomputed_distance', " - "'precomputed_affinity', or 'precomputed' " - "(automatically detects distance or affinity)?".format( - self.distance - ) - ) + raise NotImplementedError n_pca = None else: precomputed = None @@ -327,6 +321,7 @@ def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs): """ self.graph = None else: + self._update_n_landmark(n_landmark) self._set_graph_params( n_pca=n_pca, precomputed=precomputed, @@ -335,7 +330,7 @@ def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs): knn=self.knn, decay=self.decay, distance=self.distance, - n_svd=self.n_svd, + n_svd=self._parse_n_svd(self.X), n_jobs=self.n_jobs, thresh=self.thresh, verbose=self.verbose, @@ -393,7 +388,7 @@ def fit(self, X, **kwargs): knn=self.knn, decay=self.decay, distance=self.distance, - n_svd=self.n_svd, + n_svd=self._parse_n_svd(self.X), n_jobs=self.n_jobs, thresh=self.thresh, verbose=self.verbose, diff --git a/setup.py b/setup.py index 4384517..0586273 100644 --- a/setup.py +++ b/setup.py @@ -20,6 +20,7 @@ "coveralls", "python-igraph", "parameterized", + "anndata", ] if sys.version_info[0] == 3: diff --git a/test/test_estimator.py b/test/test_estimator.py index 997a25b..062a332 100644 --- a/test/test_estimator.py +++ b/test/test_estimator.py @@ -1,8 +1,10 @@ import graphtools import graphtools.estimator import pygsp +import anndata +import warnings import numpy as np -from load_tests import data +from load_tests import data, assert_raises_message from scipy import sparse from parameterized import parameterized @@ -40,16 +42,19 @@ def test_estimator(): @parameterized( [ - (1 - np.eye(10), "distance"), - (np.eye(10), "affinity"), - (sparse.coo_matrix(1 - np.eye(10)), "distance"), - (sparse.eye(10), "affinity"), + ("precomputed", 1 - np.eye(10), "distance"), + ("precomputed", np.eye(10), "affinity"), + ("precomputed", sparse.coo_matrix(1 - np.eye(10)), "distance"), + ("precomputed", sparse.eye(10), "affinity"), + ("precomputed_affinity", 1 - np.eye(10), "affinity"), + ("precomputed_distance", np.ones((10, 10)), "distance"), ] ) -def test_precomputed(X, precomputed): - E = Estimator(verbose=False, distance="precomputed") - assert E._detect_precomputed_matrix_type(X) == precomputed - E.fit(X) +def test_precomputed(distance, X, precomputed): + E = Estimator(verbose=False, distance=distance) + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message="K should have a non-zero diagonal") + E.fit(X) assert isinstance(E.graph, graphtools.graphs.TraditionalGraph) assert E.graph.precomputed == precomputed @@ -71,3 +76,35 @@ def test_graph_input(): G = pygsp.graphs.Graph(W) E.fit(G, use_pygsp=True) assert np.all(E.graph.W.toarray() == W) + + +def test_anndata_input(): + X = np.random.normal(0, 1, (10, 2)) + E = Estimator(verbose=0) + E.fit(X) + E2 = Estimator(verbose=0) + E2.fit(anndata.AnnData(X)) + np.testing.assert_allclose( + E.graph.K.toarray(), E2.graph.K.toarray(), rtol=1e-6, atol=2e-7 + ) + + +def test_new_input(): + X = np.random.normal(0, 1, (10, 2)) + X2 = np.random.normal(0, 1, (10, 2)) + E = Estimator(verbose=0) + E.fit(X) + G = E.graph + E.fit(X) + assert E.graph is G + E.fit(X.copy()) + assert E.graph is G + E.n_landmark = 500 + E.fit(X) + assert E.graph is G + E.n_landmark = 5 + E.fit(X) + assert np.all(E.graph.K.toarray() == G.K.toarray()) + G = E.graph + E.fit(X2) + assert E.graph is not G From 31e46eb6b885bcdea1b7f751c57d16b69fcd10a0 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Mon, 24 Feb 2020 18:43:00 -0500 Subject: [PATCH 14/15] test pca --- graphtools/estimator.py | 52 ++++++++++++++++++++++++++++++----------- test/test_estimator.py | 14 +++++++++++ 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/graphtools/estimator.py b/graphtools/estimator.py index f494b40..5cb130f 100644 --- a/graphtools/estimator.py +++ b/graphtools/estimator.py @@ -177,7 +177,7 @@ def n_landmark(self, n_landmark): def _update_n_landmark(self, n_landmark): if self.graph is not None: - n_landmark = self._parse_n_landmark(self.graph.data_nu) + n_landmark = self._parse_n_landmark(self.graph.data_nu, n_landmark) if ( n_landmark is None and isinstance(self.graph, graphs.LandmarkGraph) ) or ( @@ -234,6 +234,18 @@ def set_params(self, **params): def _set_graph_params(self, **params): if self.graph is not None: try: + if "n_pca" in params: + params["n_pca"] = self._parse_n_pca( + self.graph.data_nu, params["n_pca"] + ) + if "n_svd" in params: + params["n_svd"] = self._parse_n_svd( + self.graph.data_nu, params["n_svd"] + ) + if "n_landmark" in params: + params["n_landmark"] = self._parse_n_landmark( + self.graph.data_nu, params["n_landmark"] + ) self.graph.set_params(**params) except ValueError as e: _logger.debug("Reset graph due to {}".format(str(e))) @@ -255,17 +267,26 @@ def _detect_precomputed_matrix_type(self, X): else: return "affinity" - def _parse_n_landmark(self, X): - if self.n_landmark is not None and self.n_landmark >= X.shape[0]: + @staticmethod + def _parse_n_landmark(X, n_landmark): + if n_landmark is not None and n_landmark >= X.shape[0]: return None else: - return self.n_landmark + return n_landmark - def _parse_n_svd(self, X): - if self.n_svd >= X.shape[0]: + @staticmethod + def _parse_n_pca(X, n_pca): + if n_pca is not None and n_pca >= min(X.shape): + return None + else: + return n_pca + + @staticmethod + def _parse_n_svd(X, n_svd): + if n_svd is not None and n_svd >= X.shape[0]: return X.shape[0] - 1 else: - return self.n_svd + return n_svd def _parse_input(self, X): # passing graphs as input @@ -307,11 +328,14 @@ def _parse_input(self, X): n_pca = None else: precomputed = None - if self.n_pca is None or self.n_pca >= np.min(X.shape): - n_pca = None - else: - n_pca = self.n_pca - return X, n_pca, self._parse_n_landmark(X), precomputed, update_graph + n_pca = self._parse_n_pca(X, self.n_pca) + return ( + X, + n_pca, + self._parse_n_landmark(X, self.n_landmark), + precomputed, + update_graph, + ) def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs): if self.X is not None and not matrix.matrix_is_equivalent(X, self.X): @@ -330,7 +354,7 @@ def _update_graph(self, X, precomputed, n_pca, n_landmark, **kwargs): knn=self.knn, decay=self.decay, distance=self.distance, - n_svd=self._parse_n_svd(self.X), + n_svd=self._parse_n_svd(self.X, self.n_svd), n_jobs=self.n_jobs, thresh=self.thresh, verbose=self.verbose, @@ -388,7 +412,7 @@ def fit(self, X, **kwargs): knn=self.knn, decay=self.decay, distance=self.distance, - n_svd=self._parse_n_svd(self.X), + n_svd=self._parse_n_svd(self.X, self.n_svd), n_jobs=self.n_jobs, thresh=self.thresh, verbose=self.verbose, diff --git a/test/test_estimator.py b/test/test_estimator.py index 062a332..179abda 100644 --- a/test/test_estimator.py +++ b/test/test_estimator.py @@ -78,6 +78,20 @@ def test_graph_input(): assert np.all(E.graph.W.toarray() == W) +def test_pca(): + X = np.random.normal(0, 1, (10, 6)) + E = Estimator(verbose=0) + E.fit(X) + G = E.graph + E.set_params(n_pca=100) + E.fit(X) + assert E.graph is G + E.set_params(n_pca=3) + E.fit(X) + assert E.graph is not G + assert E.graph.n_pca == 3 + + def test_anndata_input(): X = np.random.normal(0, 1, (10, 2)) E = Estimator(verbose=0) From ee61f1a4ffd1d6f83a387f3ebcfd6879fe094afa Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 25 Feb 2020 11:05:02 -0500 Subject: [PATCH 15/15] bump version --- graphtools/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/version.py b/graphtools/version.py index 61a6a00..5b60188 100644 --- a/graphtools/version.py +++ b/graphtools/version.py @@ -1 +1 @@ -__version__ = "1.5.0a0" +__version__ = "1.5.0"