From 3885ecc6bc559b77da87abee20bd4e9cabf59a4f Mon Sep 17 00:00:00 2001 From: Daniel Burkhardt Date: Tue, 12 Feb 2019 10:29:57 -0500 Subject: [PATCH 01/12] Default theta = 1 --- graphtools/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphtools/base.py b/graphtools/base.py index 2dc8736..d400d74 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -316,7 +316,7 @@ class BaseGraph(with_metaclass(abc.ABCMeta, Base)): 'theta' : min-max 'none' : no symmetrization - theta: float (default: 0.5) + theta: float (default: 1) Min-max symmetrization constant. K = `theta * min(K, K.T) + (1 - theta) * max(K, K.T)` @@ -385,7 +385,7 @@ def _check_symmetrization(self, kernel_symm, theta): if theta is None: warnings.warn("kernel_symm='theta' but theta not given. " "Defaulting to theta=0.5.") - self.theta = theta = 0.5 + self.theta = theta = 1 elif not isinstance(theta, numbers.Number) or \ theta < 0 or theta > 1: raise ValueError("theta {} not recognized. Expected " From 679cc8b839831f49938f685097e2120352d95bd7 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 19 Feb 2019 09:55:29 -0500 Subject: [PATCH 02/12] import signature from inspect. fixes #34 --- graphtools/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/base.py b/graphtools/base.py index 2dc8736..5d82e48 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -3,7 +3,7 @@ import numpy as np import abc import pygsp -from sklearn.utils.fixes import signature +from inspect import signature from sklearn.decomposition import PCA, TruncatedSVD from sklearn.preprocessing import normalize from sklearn.utils.graph import graph_shortest_path From 7f6a1359d25abf5d8717a696aca1066fbab065fa Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Tue, 19 Feb 2019 09:56:09 -0500 Subject: [PATCH 03/12] remove unnecessary import --- graphtools/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/graphtools/base.py b/graphtools/base.py index c4934fd..84505a0 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -6,7 +6,6 @@ from inspect import signature from sklearn.decomposition import PCA, TruncatedSVD from sklearn.preprocessing import normalize -from sklearn.utils.graph import graph_shortest_path from scipy import sparse import warnings import numbers From 560ea76a5e9d0f69234532bec154fea42cbe3beb Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 21 Feb 2019 17:46:22 -0500 Subject: [PATCH 04/12] bump version --- graphtools/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/version.py b/graphtools/version.py index 5becc17..436b119 100644 --- a/graphtools/version.py +++ b/graphtools/version.py @@ -1 +1 @@ -__version__ = "1.0.0" +__version__ = "1.0.1-alpha" From 34243be31736ae9391b8ca580ea757c1d6e04d3c Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 21 Feb 2019 17:47:22 -0500 Subject: [PATCH 05/12] handle n_pca < n_samples --- graphtools/base.py | 6 +++--- test/test_data.py | 6 ++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/graphtools/base.py b/graphtools/base.py index 84505a0..c2b2b2a 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -105,10 +105,10 @@ class Data(Base): def __init__(self, data, n_pca=None, random_state=None, **kwargs): self._check_data(data) - if n_pca is not None and data.shape[1] <= n_pca: + if n_pca is not None and np.min(data.shape) <= n_pca: warnings.warn("Cannot perform PCA to {} dimensions on " - "data with {} dimensions".format(n_pca, - data.shape[1]), + "data with min(n_samples, n_features) = {}".format( + n_pca, np.min(data.shape)), RuntimeWarning) n_pca = None try: diff --git a/test/test_data.py b/test/test_data.py index dfa0889..aed139c 100644 --- a/test/test_data.py +++ b/test/test_data.py @@ -44,6 +44,12 @@ def test_too_many_n_pca(): build_graph(data, n_pca=data.shape[1]) +@warns(RuntimeWarning) +def test_too_many_n_pca(): + build_graph(data[:data.shape[1] - 1], + n_pca=data.shape[1] - 1) + + @warns(RuntimeWarning) def test_precomputed_with_pca(): build_graph(squareform(pdist(data)), From f31ffc4e80334a0c07b5b908fcf2945552cafb82 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Thu, 21 Feb 2019 18:19:22 -0500 Subject: [PATCH 06/12] fix duplicates message --- graphtools/graphs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/graphs.py b/graphtools/graphs.py index 298bc7e..8c27d32 100644 --- a/graphtools/graphs.py +++ b/graphtools/graphs.py @@ -245,7 +245,7 @@ def _check_duplicates(self, distances, indices): "Detected zero distance between {} pairs of samples. " "Consider removing duplicates to avoid errors in " "downstream processing.".format( - np.sum(np.sum(distances[:, 1:]))), + np.sum(np.sum(distances[:, 1:] == 0))), RuntimeWarning) def build_kernel_to_data(self, Y, knn=None, bandwidth=None, From 41996af65970009cd6250cffaa2daee3746d894e Mon Sep 17 00:00:00 2001 From: Daniel Burkhardt Date: Fri, 1 Mar 2019 13:23:35 -0500 Subject: [PATCH 07/12] adding to_pickle and read_pickle --- graphtools/__init__.py | 2 +- graphtools/api.py | 17 +++++++++++++++++ graphtools/base.py | 11 +++++++++++ test/test_api.py | 18 ++++++++++++++++++ 4 files changed, 47 insertions(+), 1 deletion(-) diff --git a/graphtools/__init__.py b/graphtools/__init__.py index 8fc8a50..7384afc 100644 --- a/graphtools/__init__.py +++ b/graphtools/__init__.py @@ -1,2 +1,2 @@ -from .api import Graph, from_igraph +from .api import Graph, from_igraph, read_pickle from .version import __version__ diff --git a/graphtools/api.py b/graphtools/api.py index 5d48316..794d28f 100644 --- a/graphtools/api.py +++ b/graphtools/api.py @@ -283,3 +283,20 @@ def from_igraph(G, attribute="weight", **kwargs): K = G.get_adjacency(attribute=None).data return Graph(sparse.coo_matrix(K), precomputed='adjacency', **kwargs) + + +def read_pickle(path): + """Load pickled Graphtools object (or any object) from file. + + Parameters + ---------- + path : str + File path where the pickled object will be loaded. + """ + import pickle + with open(path, 'rb') as f: + G = pickle.load(f) + + if not isinstance(G, base.BaseGraph): + warnings.warn('Returning object that is not a graphtools.base.BaseGraph') + return G diff --git a/graphtools/base.py b/graphtools/base.py index d400d74..cd78d6a 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -636,6 +636,17 @@ def to_igraph(self, attribute="weight", **kwargs): return ig.Graph.Weighted_Adjacency(utils.to_dense(W).tolist(), attr=attribute, **kwargs) + def to_pickle(self, path): + """Save the current Graph to a pickle. + + Parameters + ---------- + path : str + File path where the pickled object will be stored. + """ + import pickle + with open(path, 'wb') as f: + pickle.dump(self, f) class PyGSPGraph(with_metaclass(abc.ABCMeta, pygsp.graphs.Graph, Base)): """Interface between BaseGraph and PyGSP. diff --git a/test/test_api.py b/test/test_api.py index 5379cf5..55abf59 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -9,6 +9,8 @@ import igraph import numpy as np import graphtools +import tempfile +import os def test_from_igraph(): @@ -80,6 +82,22 @@ def test_to_igraph(): assert np.all(np.array(G2.get_adjacency( attribute="weight").data) == G.W) +def test_pickle_io(): + G = build_graph(data, use_pygsp=True) + with tempfile.TemporaryDirectory() as tempdir: + path = os.path.join(tempdir, 'tmp.pkl') + G.to_pickle(path) + G_prime = graphtools.read_pickle(path) + assert isinstance(G_prime, type(G)) + +@warns(UserWarning) +def test_pickle_bad_pickle(): + import pickle + with tempfile.TemporaryDirectory() as tempdir: + path = os.path.join(tempdir, 'tmp.pkl') + with open(path, 'wb') as f: + pickle.dump('hello world', f) + G = graphtools.read_pickle(path) @warns(UserWarning) def test_to_pygsp_invalid_precomputed(): From bc3ba3a04ca1438f32b19959e33ba073aca3db93 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Fri, 1 Mar 2019 16:46:06 -0500 Subject: [PATCH 08/12] remove unused import, move pickle import to top --- graphtools/base.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/graphtools/base.py b/graphtools/base.py index cd78d6a..6306935 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -6,11 +6,11 @@ from sklearn.utils.fixes import signature from sklearn.decomposition import PCA, TruncatedSVD from sklearn.preprocessing import normalize -from sklearn.utils.graph import graph_shortest_path from scipy import sparse import warnings import numbers import tasklogger +import pickle try: import pandas as pd @@ -644,10 +644,10 @@ def to_pickle(self, path): path : str File path where the pickled object will be stored. """ - import pickle with open(path, 'wb') as f: pickle.dump(self, f) + class PyGSPGraph(with_metaclass(abc.ABCMeta, pygsp.graphs.Graph, Base)): """Interface between BaseGraph and PyGSP. From e8c4fe4ee1898c0171604938f9999eef120b7608 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Fri, 1 Mar 2019 17:42:18 -0500 Subject: [PATCH 09/12] fix pygsp logger pickling issue --- graphtools/api.py | 8 ++++++-- graphtools/base.py | 7 +++++++ graphtools/graphs.py | 2 -- test/test_api.py | 38 ++++++++++++++++++++++++++++++++++++-- 4 files changed, 49 insertions(+), 6 deletions(-) diff --git a/graphtools/api.py b/graphtools/api.py index 794d28f..3bb19c1 100644 --- a/graphtools/api.py +++ b/graphtools/api.py @@ -2,6 +2,8 @@ import warnings import tasklogger from scipy import sparse +import pickle +import pygsp from . import base from . import graphs @@ -293,10 +295,12 @@ def read_pickle(path): path : str File path where the pickled object will be loaded. """ - import pickle with open(path, 'rb') as f: G = pickle.load(f) if not isinstance(G, base.BaseGraph): - warnings.warn('Returning object that is not a graphtools.base.BaseGraph') + warnings.warn( + 'Returning object that is not a graphtools.base.BaseGraph') + elif isinstance(G, base.PyGSPGraph): + G.logger = pygsp.utils.build_logger(G.logger) return G diff --git a/graphtools/base.py b/graphtools/base.py index 6306935..d916db1 100644 --- a/graphtools/base.py +++ b/graphtools/base.py @@ -11,6 +11,7 @@ import numbers import tasklogger import pickle +import sys try: import pandas as pd @@ -644,8 +645,14 @@ def to_pickle(self, path): path : str File path where the pickled object will be stored. """ + if int(sys.version.split(".")[1]) < 7 and isinstance(self, pygsp.graphs.Graph): + # python 3.5, 3.6 + logger = self.logger + self.logger = logger.name with open(path, 'wb') as f: pickle.dump(self, f) + if int(sys.version.split(".")[1]) < 7 and isinstance(self, pygsp.graphs.Graph): + self.logger = logger class PyGSPGraph(with_metaclass(abc.ABCMeta, pygsp.graphs.Graph, Base)): diff --git a/graphtools/graphs.py b/graphtools/graphs.py index 298bc7e..77c0270 100644 --- a/graphtools/graphs.py +++ b/graphtools/graphs.py @@ -14,8 +14,6 @@ import tasklogger from .utils import (set_diagonal, - elementwise_minimum, - elementwise_maximum, set_submatrix) from .base import DataGraph, PyGSPGraph diff --git a/test/test_api.py b/test/test_api.py index 55abf59..e5ae0d8 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -82,14 +82,47 @@ def test_to_igraph(): assert np.all(np.array(G2.get_adjacency( attribute="weight").data) == G.W) -def test_pickle_io(): - G = build_graph(data, use_pygsp=True) + +def test_pickle_io_knngraph(): + G = build_graph(data, knn=5, decay=None) with tempfile.TemporaryDirectory() as tempdir: path = os.path.join(tempdir, 'tmp.pkl') G.to_pickle(path) G_prime = graphtools.read_pickle(path) assert isinstance(G_prime, type(G)) + +def test_pickle_io_traditionalgraph(): + G = build_graph(data, knn=5, decay=10, thresh=0) + with tempfile.TemporaryDirectory() as tempdir: + path = os.path.join(tempdir, 'tmp.pkl') + G.to_pickle(path) + G_prime = graphtools.read_pickle(path) + assert isinstance(G_prime, type(G)) + + +def test_pickle_io_landmarkgraph(): + G = build_graph(data, knn=5, decay=None, + n_landmark=data.shape[0] // 2) + L = G.landmark_op + with tempfile.TemporaryDirectory() as tempdir: + path = os.path.join(tempdir, 'tmp.pkl') + G.to_pickle(path) + G_prime = graphtools.read_pickle(path) + assert isinstance(G_prime, type(G)) + np.testing.assert_array_equal(L, G_prime._landmark_op) + + +def test_pickle_io_pygspgraph(): + G = build_graph(data, knn=5, decay=None, use_pygsp=True) + with tempfile.TemporaryDirectory() as tempdir: + path = os.path.join(tempdir, 'tmp.pkl') + G.to_pickle(path) + G_prime = graphtools.read_pickle(path) + assert isinstance(G_prime, type(G)) + assert G_prime.logger.name == G.logger.name + + @warns(UserWarning) def test_pickle_bad_pickle(): import pickle @@ -99,6 +132,7 @@ def test_pickle_bad_pickle(): pickle.dump('hello world', f) G = graphtools.read_pickle(path) + @warns(UserWarning) def test_to_pygsp_invalid_precomputed(): G = build_graph(data) From 1c53ee8e06625e22f75cc48a6249b4094369ff14 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Fri, 1 Mar 2019 17:44:58 -0500 Subject: [PATCH 10/12] bump version --- graphtools/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/version.py b/graphtools/version.py index 5becc17..6849410 100644 --- a/graphtools/version.py +++ b/graphtools/version.py @@ -1 +1 @@ -__version__ = "1.0.0" +__version__ = "1.1.0" From 6abacc713478a1f514a281a5f06b59a51b0dd490 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Fri, 1 Mar 2019 17:49:34 -0500 Subject: [PATCH 11/12] only rebuild logger if it is a string --- graphtools/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/api.py b/graphtools/api.py index 3bb19c1..30a6f26 100644 --- a/graphtools/api.py +++ b/graphtools/api.py @@ -301,6 +301,6 @@ def read_pickle(path): if not isinstance(G, base.BaseGraph): warnings.warn( 'Returning object that is not a graphtools.base.BaseGraph') - elif isinstance(G, base.PyGSPGraph): + elif isinstance(G, base.PyGSPGraph) and isinstance(G.logger, str): G.logger = pygsp.utils.build_logger(G.logger) return G From 31ee73e9558968fe6138ade897b4644bbe31de92 Mon Sep 17 00:00:00 2001 From: Scott Gigante Date: Fri, 1 Mar 2019 18:53:33 -0500 Subject: [PATCH 12/12] bump version --- graphtools/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/graphtools/version.py b/graphtools/version.py index 436b119..6849410 100644 --- a/graphtools/version.py +++ b/graphtools/version.py @@ -1 +1 @@ -__version__ = "1.0.1-alpha" +__version__ = "1.1.0"