From 8c31d5817d2bfca40edce764e3b09d5d40070da3 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 7 Jun 2018 16:15:40 -0400 Subject: [PATCH 1/3] remove duplicated code --- graphtools/graphs.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/graphtools/graphs.py b/graphtools/graphs.py index 1eed886..81c0080 100644 --- a/graphtools/graphs.py +++ b/graphtools/graphs.py @@ -172,19 +172,7 @@ def build_kernel(self): symmetric matrix with ones down the diagonal with no non-negative entries. """ - if self.decay is None or self.thresh == 1: - # binary connectivity matrix - # sklearn has a function for this - log_start("KNN search") - K = kneighbors_graph(self.knn_tree, - n_neighbors=self.knn, - metric=self.distance, - mode='connectivity', - include_self=True) - log_complete("KNN search") - else: - # sparse fast alpha decay - K = self.build_kernel_to_data(self.data_nu) + K = self.build_kernel_to_data(self.data_nu) # symmetrize K = (K + K.T) / 2 return K From b71444de72b5f048d27da7ed5fc750611187814e Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 7 Jun 2018 16:15:46 -0400 Subject: [PATCH 2/3] fix typo --- graphtools/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/api.py b/graphtools/api.py index fc9722b..68c236e 100644 --- a/graphtools/api.py +++ b/graphtools/api.py @@ -201,7 +201,7 @@ def Graph(data, elif len(parent_classes) == 2: class Graph(parent_classes[0], parent_classes[1]): pass - elif len(parent_classes) == 2: + elif len(parent_classes) == 3: class Graph(parent_classes[0], parent_classes[1], parent_classes[2]): pass else: From 96018b52e5203723aa59cc546f95376eb544b7c8 Mon Sep 17 00:00:00 2001 From: jay Date: Thu, 7 Jun 2018 17:00:52 -0400 Subject: [PATCH 3/3] Reworked PCA interface to include access to PCA object Included truncated SVD removed U, S, V interface Removed sparse inverse svd test from sparse data --- graphtools/base.py | 96 +++++++--------------------------------------- test/test_data.py | 8 ---- 2 files changed, 13 insertions(+), 91 deletions(-) diff --git a/graphtools/base.py b/graphtools/base.py index 25de3b2..a76feee 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -3,8 +3,7 @@ import numpy as np import abc import pygsp -from sklearn.decomposition import PCA -from sklearn.utils.extmath import randomized_svd +from sklearn.decomposition import PCA, TruncatedSVD from sklearn.preprocessing import normalize from scipy import sparse import warnings @@ -58,14 +57,8 @@ class Data(Base): data_nu : array-like, shape=[n_samples,n_pca] Reduced data matrix - U : array-like, shape=[n_samples, n_pca] - Left singular vectors from PCA calculation - - S : array-like, shape=[n_pca] - Singular values from PCA calculation - - V : array-like, shape=[n_features, n_pca] - Right singular vectors from SVD calculation + data_pca : sklearn.decomposition.PCA or sklearn.decomposition.TruncatedSVD + sklearn PCA operator """ def __init__(self, data, n_pca=None, random_state=None, **kwargs): @@ -90,7 +83,6 @@ def __init__(self, data, n_pca=None, random_state=None, **kwargs): self.data = data self.n_pca = n_pca self.random_state = random_state - self.data_nu = self._reduce_data() super().__init__(**kwargs) @@ -108,17 +100,16 @@ def _reduce_data(self): if self.n_pca is not None and self.n_pca < self.data.shape[1]: log_start("PCA") if sparse.issparse(self.data): - _, _, VT = randomized_svd(self.data, self.n_pca, + self.data_pca = TruncatedSVD(self.n_pca, random_state=self.random_state) - V = VT.T - self._right_singular_vectors = V - data_nu = self.data.dot(V) + self.data_pca.fit(self.data) + data_nu = self.data_pca.transform(self.data) else: - self.pca = PCA(self.n_pca, + self.data_pca = PCA(self.n_pca, svd_solver='randomized', random_state=self.random_state) - self.pca.fit(self.data) - data_nu = self.pca.transform(self.data) + self.data_pca.fit(self.data) + data_nu = self.data_pca.transform(self.data) log_complete("PCA") return data_nu else: @@ -153,58 +144,6 @@ def set_params(self, **params): self.random_state = params['random_state'] return self - @property - def U(self): - """Left singular vectors - - Returns - ------- - Left singular vectors from PCA calculation, shape=[n_samples, n_pca] - - Raises - ------ - AttributeError : PCA was not performed - """ - try: - return self.pca.components_ - except AttributeError: - return None - - @property - def S(self): - """Singular values - - Returns - ------- - Singular values from PCA calculation, shape=[n_pca] - - Raises - ------ - AttributeError : PCA was not performed - """ - try: - return self.pca.singular_values_ - except AttributeError: - return None - - @property - def V(self): - """Right singular vectors - - TODO: can we get this from PCA as well? - - Returns - ------- - Right singular values from SVD calculation, shape=[n_features, n_pca] - - Raises - ------ - AttributeError : SVD was not performed - """ - try: - return self._right_singular_vectors - except AttributeError: - return None def transform(self, Y): """Transform input data `Y` to reduced data space defined by `self.data` @@ -227,13 +166,9 @@ def transform(self, Y): """ try: # try PCA first - return self.pca.transform(Y) - except AttributeError: - # no PCA - try SVD instead - try: - return Y.dot(self._right_singular_vectors) - except AttributeError: - # no SVD either - check if we can just return as is + + return self.data_pca.transform(Y) + except AttributeError: #no pca, try to return data try: if Y.shape[1] != self.data.shape[1]: # shape is wrong @@ -269,13 +204,8 @@ def inverse_transform(self, Y): """ try: # try PCA first - return self.pca.inverse_transform(Y) + return self.data_pca.inverse_transform(Y) except AttributeError: - # no PCA - try SVD instead - try: - return Y.dot(self._right_singular_vectors.T) - except AttributeError: - # no SVD either - check if we can just return as is try: if Y.shape[1] != self.data_nu.shape[1]: # shape is wrong diff --git a/test/test_data.py b/test/test_data.py index b502aaa..d09a502 100644 --- a/test/test_data.py +++ b/test/test_data.py @@ -113,14 +113,6 @@ def test_inverse_transform_dense_no_pca(): assert_raises(ValueError, G.inverse_transform, G.data[:, :15]) -def test_inverse_transform_sparse_pca(): - G = build_graph(data, sparse=True, n_pca=data.shape[1] - 1) - assert(np.allclose(G.data.toarray(), G.inverse_transform(G.data_nu))) - assert_raises(ValueError, G.inverse_transform, sp.csr_matrix(G.data)[:, 0]) - assert_raises(ValueError, G.inverse_transform, - sp.csr_matrix(G.data)[:, :15]) - - def test_inverse_transform_sparse_no_pca(): G = build_graph(data, sparse=True, n_pca=None) assert(np.sum(G.data != G.inverse_transform(G.data_nu)) == 0)