From 0a03c6f95bdf638cf29074d045be21f52194427f Mon Sep 17 00:00:00 2001
From: dani <dserven@gmail.com>
Date: Tue, 27 Mar 2018 16:43:43 -0700
Subject: [PATCH 1/9] use sp.special instead of sp.misc

---
 pygam/distributions.py | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/pygam/distributions.py b/pygam/distributions.py
index ff5acd33..f8ae4916 100644
--- a/pygam/distributions.py
+++ b/pygam/distributions.py
@@ -123,9 +123,9 @@ def __init__(self, scale=None):
         """
         super(NormalDist, self).__init__(name='normal', scale=scale)
 
-    def pdf(self, y, mu, weights=None):
+    def log_pdf(self, y, mu, weights=None):
         """
-        computes the pdf or pmf of the values under the current distribution
+        computes the log of the pdf or pmf of the values under the current distribution
 
         Parameters
         ----------
@@ -144,7 +144,7 @@ def pdf(self, y, mu, weights=None):
         if weights is None:
             weights = np.ones_like(mu)
         scale = self.scale / weights
-        return np.exp(-(y - mu)**2 / (2 * scale)) / (scale * 2 * np.pi)**0.5
+        return -(y - mu)**2 / (2 * scale) - 0.5 * np.log(scale * 2 * np.pi)
 
     @divide_weights
     def V(self, mu):
@@ -245,9 +245,9 @@ def __init__(self, levels=1):
         super(BinomialDist, self).__init__(name='binomial', scale=1.)
         self._exclude.append('scale')
 
-    def pdf(self, y, mu, weights=None):
+    def log_pdf(self, y, mu, weights=None):
         """
-        computes the pdf or pmf of the values under the current distribution
+        computes the log of the pdf or pmf of the values under the current distribution
 
         Parameters
         ----------
@@ -266,8 +266,8 @@ def pdf(self, y, mu, weights=None):
         if weights is None:
             weights = np.ones_like(mu)
         n = self.levels
-        return weights * (sp.misc.comb(n, y) * (mu / n)**y *
-                          (1 - (mu / n))**(n - y))
+        return (np.log(weights * sp.special.comb(n, y)) + np.log(mu / n) * y +
+                          np.log(1 - (mu / n)) * (n - y))
 
     @divide_weights
     def V(self, mu):
@@ -352,9 +352,9 @@ def __init__(self):
         super(PoissonDist, self).__init__(name='poisson', scale=1.)
         self._exclude.append('scale')
 
-    def pdf(self, y, mu, weights=None):
+    def log_pdf(self, y, mu, weights=None):
         """
-        computes the pdf or pmf of the values under the current distribution
+        computes the log of the pdf or pmf of the values under the current distribution
 
         Parameters
         ----------
@@ -376,7 +376,7 @@ def pdf(self, y, mu, weights=None):
         # so we want to pump up all our predictions
         # NOTE: we assume the targets are unchanged
         mu = mu * weights
-        return (mu**y) * np.exp(-mu) / sp.misc.factorial(y)
+        return np.log(mu) * y - mu - np.log(sp.special.factorial(y))
 
     @divide_weights
     def V(self, mu):
@@ -459,9 +459,9 @@ def __init__(self, scale=None):
         """
         super(GammaDist, self).__init__(name='gamma', scale=scale)
 
-    def pdf(self, y, mu, weights=None):
+    def log_pdf(self, y, mu, weights=None):
         """
-        computes the pdf or pmf of the values under the current distribution
+        computes the log of the pdf or pmf of the values under the current distribution
 
         Parameters
         ----------
@@ -480,8 +480,8 @@ def pdf(self, y, mu, weights=None):
         if weights is None:
             weights = np.ones_like(mu)
         nu = weights / self.scale
-        return (1. / sp.special.gamma(nu) *
-                (nu / mu)**nu * y**(nu - 1) * np.exp(-nu * y / mu))
+        return (-np.log(sp.special.gamma(nu)) +
+                np.log(nu / mu) * nu + np.log(y)*(nu - 1) + -nu * y / mu)
 
     @divide_weights
     def V(self, mu):
@@ -570,9 +570,9 @@ def __init__(self, scale=None):
         """
         super(InvGaussDist, self).__init__(name='inv_gauss', scale=scale)
 
-    def pdf(self, y, mu, weights=None):
+    def log_pdf(self, y, mu, weights=None):
         """
-        computes the pdf or pmf of the values under the current distribution
+        computes the log of the pdf or pmf of the values under the current distribution
 
         Parameters
         ----------
@@ -591,8 +591,8 @@ def pdf(self, y, mu, weights=None):
         if weights is None:
             weights = np.ones_like(mu)
         gamma = weights / self.scale
-        return ((gamma / (2 * np.pi * y**3))**.5 *
-                np.exp(-gamma * (y - mu)**2 / (2 * mu**2 * y)))
+        return (0.5 * np.log(gamma / (2 * np.pi * y**3)) +
+                -gamma * (y - mu)**2 / (2 * mu**2 * y))
 
     @divide_weights
     def V(self, mu):

From a94be032d1f58c2f08608eddc51ef0428a5a9a12 Mon Sep 17 00:00:00 2001
From: dani <dserven@gmail.com>
Date: Tue, 27 Mar 2018 16:44:00 -0700
Subject: [PATCH 2/9] add verbosity option for warnings

---
 pygam/utils.py | 47 +++++++++++++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/pygam/utils.py b/pygam/utils.py
index fa9fd144..0bd0be09 100644
--- a/pygam/utils.py
+++ b/pygam/utils.py
@@ -24,7 +24,7 @@ class NotPositiveDefiniteError(ValueError):
     """
 
 
-def cholesky(A, sparse=True):
+def cholesky(A, sparse=True, verbose=True):
     """
     Choose the best possible cholesky factorizor.
 
@@ -39,6 +39,8 @@ def cholesky(A, sparse=True):
         array to decompose
     sparse : boolean, default: True
         whether to return a sparse array
+    verbose : bool, default: True
+        whether to print warnings
     """
     if SKSPIMPORT:
         A = sp.sparse.csc_matrix(A)
@@ -66,7 +68,8 @@ def cholesky(A, sparse=True):
               'monotonicity/convexity penalties and many splines.\n'\
               'See installation instructions for installing '\
               'Scikit-Sparse and Suite-Sparse via Conda.'
-        warnings.warn(msg)
+        if verbose:
+            warnings.warn(msg)
 
         if sp.sparse.issparse(A):
             A = A.todense()
@@ -146,7 +149,7 @@ def check_dtype(X, ratio=.95):
     return dtypes
 
 
-def make_2d(array):
+def make_2d(array, verbose=True):
     """
     tiny tool to expand 1D arrays the way i want
 
@@ -154,6 +157,9 @@ def make_2d(array):
     ----------
     array : array-like
 
+    verbose : bool, default: True
+        whether to print warnings
+
     Returns
     -------
     np.array of with ndim = 2
@@ -162,13 +168,14 @@ def make_2d(array):
     if array.ndim < 2:
         msg = 'Expected 2D input data array, but found {}D. '\
               'Expanding to 2D.'.format(array.ndim)
-        warnings.warn(msg)
+        if verbose:
+            warnings.warn(msg)
         array = np.atleast_1d(array)[:,None]
     return array
 
 
 def check_array(array, force_2d=False, n_feats=None, n_dims=None,
-                min_samples=1, name='Input data'):
+                min_samples=1, name='Input data', verbose=True):
     """
     tool to perform basic data validation.
     called by check_X and check_y.
@@ -193,6 +200,8 @@ def check_array(array, force_2d=False, n_feats=None, n_dims=None,
     min_samples : int, default: 1
     name : str, default: 'Input data'
         name to use when referring to the array
+    verbose : bool, default: True
+        whether to print warnings
 
     Returns
     -------
@@ -200,7 +209,7 @@ def check_array(array, force_2d=False, n_feats=None, n_dims=None,
     """
     # make array
     if force_2d:
-        array = make_2d(array)
+        array = make_2d(array, verbose=verbose)
         n_dims = 2
     else:
         array = np.array(array)
@@ -242,7 +251,7 @@ def check_array(array, force_2d=False, n_feats=None, n_dims=None,
     return array
 
 
-def check_y(y, link, dist, min_samples=1):
+def check_y(y, link, dist, min_samples=1, verbose=True):
     """
     tool to ensure that the targets:
     - are in the domain of the link function
@@ -256,6 +265,8 @@ def check_y(y, link, dist, min_samples=1):
     link : Link object
     dist : Distribution object
     min_samples : int, default: 1
+    verbose : bool, default: True
+        whether to print warnings
 
     Returns
     -------
@@ -264,7 +275,7 @@ def check_y(y, link, dist, min_samples=1):
     y = np.ravel(y)
 
     y = check_array(y, force_2d=False, min_samples=min_samples, n_dims=1,
-                    name='y data')
+                    name='y data', verbose=verbose)
 
     warnings.filterwarnings('ignore', 'divide by zero encountered in log')
     if np.any(np.isnan(link.link(y, dist))):
@@ -277,7 +288,8 @@ def check_y(y, link, dist, min_samples=1):
 
     return y
 
-def check_X(X, n_feats=None, min_samples=1, edge_knots=None, dtypes=None):
+def check_X(X, n_feats=None, min_samples=1, edge_knots=None, dtypes=None,
+            verbose=True):
     """
     tool to ensure that X:
     - is 2 dimensional
@@ -296,13 +308,15 @@ def check_X(X, n_feats=None, min_samples=1, edge_knots=None, dtypes=None):
     min_samples : int, default: 1
     edge_knots : list of arrays, default: None
     dtypes : list of strings, default: None
+    verbose : bool, default: True
+        whether to print warnings
 
     Returns
     -------
     X : array with ndims == 2 containing validated X-data
     """
     X = check_array(X, force_2d=True, n_feats=n_feats, min_samples=min_samples,
-                    name='X data')
+                    name='X data', verbose=verbose)
 
     if (edge_knots is not None) and (dtypes is not None):
         for i, (dt, ek, feat) in enumerate(zip(dtypes, edge_knots, X.T)):
@@ -505,7 +519,7 @@ def print_data(data_dict, width=-5, keep_decimals=3, fill=' ', title=None):
         filler = fill*(width - nk - nv)
         print(k + filler + v)
 
-def gen_edge_knots(data, dtype):
+def gen_edge_knots(data, dtype, verbose=True):
     """
     generate uniform knots from data including the edges of the data
 
@@ -515,6 +529,8 @@ def gen_edge_knots(data, dtype):
     ----------
     data : array-like with one dimension
     dtype : str in {'categorical', 'numerical'}
+    verbose : bool, default: True
+        whether to print warnings
 
     Returns
     -------
@@ -526,7 +542,7 @@ def gen_edge_knots(data, dtype):
         return np.r_[np.min(data) - 0.5, np.unique(data) + 0.5]
     else:
         knots = np.r_[np.min(data), np.max(data)]
-        if knots[0] == knots[1]:
+        if knots[0] == knots[1] and verbose:
             warnings.warn('Data contains constant feature. '\
                           'Consider removing and setting fit_intercept=True',
                           stacklevel=2)
@@ -534,7 +550,7 @@ def gen_edge_knots(data, dtype):
 
 def b_spline_basis(x, edge_knots, n_splines=20,
                     spline_order=3, sparse=True,
-                    clamped=False):
+                    clamped=False, verbose=True):
     """
     tool to generate b-spline basis using vectorized De Boor recursion
     the basis functions extrapolate linearly past the end-knots.
@@ -560,6 +576,9 @@ def b_spline_basis(x, edge_knots, n_splines=20,
               on this property, like monotonicity and convexity are no longer
               valid.
 
+    verbose : bool, default: True
+        whether to print warnings
+
     Returns
     -------
     basis : sparse csc matrix or array containing b-spline basis functions
@@ -580,7 +599,7 @@ def b_spline_basis(x, edge_knots, n_splines=20,
                          'found: n_splines = {} and spline_order = {}'\
                          .format(n_splines, spline_order))
 
-    if n_splines == 0:
+    if n_splines == 0 and verbose:
         warnings.warn('Requested 1 spline. This is equivalent to '\
                       'fitting an intercept', stacklevel=2)
 

From 5ed87b7e1860e63e71660ac7e6e34324da31bce8 Mon Sep 17 00:00:00 2001
From: dani <dserven@gmail.com>
Date: Tue, 27 Mar 2018 16:44:35 -0700
Subject: [PATCH 3/9] add verbosity attribute, use log_pdf instead of pdf for
 the log_likelihood

---
 pygam/pygam.py | 83 +++++++++++++++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 34 deletions(-)

diff --git a/pygam/pygam.py b/pygam/pygam.py
index c8f6bd52..fc0ac7bd 100644
--- a/pygam/pygam.py
+++ b/pygam/pygam.py
@@ -210,6 +210,9 @@ class GAM(Core):
     tol : float, default: 1e-4
         Tolerance for stopping criteria.
 
+    verbose : bool, default: False
+        whether to show pyGAM warnings
+
     Attributes
     ----------
     coef_ : array, shape (n_classes, m_features)
@@ -243,7 +246,7 @@ def __init__(self, lam=0.6, max_iter=100, n_splines=25, spline_order=3,
                  penalties='auto', tol=1e-4, distribution='normal',
                  link='identity', callbacks=['deviance', 'diffs'],
                  fit_intercept=True, fit_linear=False, fit_splines=True,
-                 dtype='auto', constraints=None):
+                 dtype='auto', constraints=None, verbose=False):
 
         self.max_iter = max_iter
         self.tol = tol
@@ -259,6 +262,7 @@ def __init__(self, lam=0.6, max_iter=100, n_splines=25, spline_order=3,
         self.fit_linear = fit_linear
         self.fit_splines = fit_splines
         self.dtype = dtype
+        self.verbose = verbose
 
         # created by other methods
         self._n_coeffs = [] # useful for indexing into model coefficients
@@ -496,7 +500,7 @@ def _validate_data_dep_params(self, X):
         for i, (dt, x) in enumerate(zip(self._dtype, X.T)):
             if dt == 'auto':
                 dt = check_dtype(x)[0]
-                if dt == 'categorical':
+                if dt == 'categorical' and self.verbose:
                     warnings.warn('detected catergorical data for feature {}'\
                                   .format(i), stacklevel=2)
             self._dtype[i] = dt
@@ -521,7 +525,7 @@ def _validate_data_dep_params(self, X):
         self._expand_attr('fit_splines', m_features)
         for i, (fl, c) in enumerate(zip(self._fit_linear, self._constraints)):
             if bool(c) and (c is not 'none'):
-                if fl:
+                if fl and self.verbose:
                     warnings.warn('cannot do fit_linear with constraints. '\
                                   'setting fit_linear=False for feature {}'\
                                   .format(i))
@@ -539,7 +543,7 @@ def _validate_data_dep_params(self, X):
         # expand spline_order, n_splines, and prepare edge_knots
         self._expand_attr('spline_order', X.shape[1], dt_alt=0)
         self._expand_attr('n_splines', X.shape[1], dt_alt=0)
-        self._edge_knots = [gen_edge_knots(feat, dtype) for feat, dtype in \
+        self._edge_knots = [gen_edge_knots(feat, dtype, verbose=self.verbose) for feat, dtype in \
                             zip(X.T, self._dtype)]
 
         # update our n_splines correcting for categorical features, no splines
@@ -580,11 +584,11 @@ def loglikelihood(self, X, y, weights=None):
         log-likelihood : np.array of shape (n,)
             containing log-likelihood scores
         """
-        y = check_y(y, self.link, self.distribution)
+        y = check_y(y, self.link, self.distribution, verbose=self.verbose)
         mu = self.predict_mu(X)
 
         if weights is not None:
-            weights = check_array(weights, name='sample weights')
+            weights = check_array(weights, name='sample weights', verbose=self.verbose)
             check_lengths(y, weights)
         else:
             weights = np.ones_like(y).astype('f')
@@ -609,7 +613,7 @@ def _loglikelihood(self, y, mu, weights=None):
         log-likelihood : np.array of shape (n,)
             containing log-likelihood scores
         """
-        return np.log(self.distribution.pdf(y=y, mu=mu, weights=weights)).sum()
+        return self.distribution.log_pdf(y=y, mu=mu, weights=weights).sum()
 
     def _linear_predictor(self, X=None, modelmat=None, b=None, feature=-1):
         """linear predictor
@@ -667,7 +671,8 @@ def predict_mu(self, X):
             raise AttributeError('GAM has not been fitted. Call fit first.')
 
         X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
-                    edge_knots=self._edge_knots, dtypes=self._dtype)
+                    edge_knots=self._edge_knots, dtypes=self._dtype,
+                    verbose=self.verbose)
 
         lp = self._linear_predictor(X)
         return self.link.mu(lp, self.distribution)
@@ -691,7 +696,8 @@ def predict(self, X):
             raise AttributeError('GAM has not been fitted. Call fit first.')
 
         X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
-                    edge_knots=self._edge_knots, dtypes=self._dtype)
+                    edge_knots=self._edge_knots, dtypes=self._dtype,
+                    verbose=self.verbose)
 
         return self.predict_mu(X)
 
@@ -715,7 +721,8 @@ def _modelmat(self, X, feature=-1):
             containing model matrix of the spline basis for selected features
         """
         X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
-                    edge_knots=self._edge_knots, dtypes=self._dtype)
+                    edge_knots=self._edge_knots, dtypes=self._dtype,
+                    verbose=self.verbose)
 
         if feature >= len(self._n_coeffs) or feature < -1:
             raise ValueError('feature {} out of range for X with shape {}'\
@@ -742,7 +749,8 @@ def _modelmat(self, X, feature=-1):
                                              edge_knots=self._edge_knots[feature],
                                              spline_order=self._spline_order[feature],
                                              n_splines=self._n_splines[feature],
-                                             sparse=True))
+                                             sparse=True,
+                                             verbose=self.verbose))
 
         return sp.sparse.hstack(featuremat, format='csc')
 
@@ -770,13 +778,14 @@ def _cholesky(self, A, **kwargs):
         constraint_l2 = self._constraint_l2
         while constraint_l2 <= self._constraint_l2_max:
             try:
-                L = cholesky(A, **kwargs)
+                L = cholesky(A, verbose=self.verbose, **kwargs)
                 self._constraint_l2 = constraint_l2
                 return L
             except NotPositiveDefiniteError:
-                warnings.warn('Matrix is not positive definite. \n'\
-                              'Increasing l2 reg by factor of 10.',
-                              stacklevel=2)
+                if self.verbose:
+                    warnings.warn('Matrix is not positive definite. \n'\
+                                  'Increasing l2 reg by factor of 10.',
+                                  stacklevel=2)
                 A -= constraint_l2 * diag
                 constraint_l2 *= 10
                 A += constraint_l2 * diag
@@ -1187,12 +1196,12 @@ def fit(self, X, y, weights=None):
         self._validate_params()
 
         # validate data
-        y = check_y(y, self.link, self.distribution)
-        X = check_X(X)
+        y = check_y(y, self.link, self.distribution, verbose=self.verbose)
+        X = check_X(X, verbose=self.verbose)
         check_X_y(X, y)
 
         if weights is not None:
-            weights = check_array(weights, name='sample weights')
+            weights = check_array(weights, name='sample weights', verbose=self.verbose)
             check_lengths(y, weights)
         else:
             weights = np.ones_like(y).astype('f')
@@ -1237,13 +1246,14 @@ def deviance_residuals(self, X, y, weights=None, scaled=False):
         if not self._is_fitted:
             raise AttributeError('GAM has not been fitted. Call fit first.')
 
-        y = check_y(y, self.link, self.distribution)
+        y = check_y(y, self.link, self.distribution, verbose=self.verbose)
         X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
-                    edge_knots=self._edge_knots, dtypes=self._dtype)
+                    edge_knots=self._edge_knots, dtypes=self._dtype,
+                    verbose=self.verbose)
         check_X_y(X, y)
 
         if weights is not None:
-            weights = check_array(weights, name='sample weights')
+            weights = check_array(weights, name='sample weights', verbose=self.verbose)
             check_lengths(y, weights)
         else:
             weights = np.ones_like(y).astype('f')
@@ -1521,7 +1531,8 @@ def confidence_intervals(self, X, width=.95, quantiles=None):
             raise AttributeError('GAM has not been fitted. Call fit first.')
 
         X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
-                    edge_knots=self._edge_knots, dtypes=self._dtype)
+                    edge_knots=self._edge_knots, dtypes=self._dtype,
+                    verbose=self.verbose)
 
         return self._get_quantiles(X, width, quantiles, prediction=False)
 
@@ -1666,7 +1677,7 @@ def partial_dependence(self, X, feature=-1, width=None, quantiles=None):
 
         m = len(self._n_coeffs) - self._fit_intercept
         X = check_X(X, n_feats=m, edge_knots=self._edge_knots,
-                    dtypes=self._dtype)
+                    dtypes=self._dtype, verbose=self.verbose)
         p_deps = []
 
         compute_quantiles = (width is not None) or (quantiles is not None)
@@ -1797,12 +1808,12 @@ def gridsearch(self, X, y, weights=None, return_scores=False,
         if not self._is_fitted:
             self._validate_params()
 
-        y = check_y(y, self.link, self.distribution)
-        X = check_X(X)
+        y = check_y(y, self.link, self.distribution, verbose=self.verbose)
+        X = check_X(X, verbose=self.verbose)
         check_X_y(X, y)
 
         if weights is not None:
-            weights = check_array(weights, name='sample weights')
+            weights = check_array(weights, name='sample weights', verbose=self.verbose)
             check_lengths(y, weights)
         else:
             weights = np.ones_like(y).astype('f')
@@ -1899,7 +1910,8 @@ def gridsearch(self, X, y, weights=None, return_scores=False,
             except ValueError as error:
                 msg = str(error) + '\non model:\n' + str(gam)
                 msg += '\nskipping...\n'
-                warnings.warn(msg)
+                if self.verbose:
+                    warnings.warn(msg)
                 continue
 
             # record results
@@ -2376,7 +2388,8 @@ def prediction_intervals(self, X, width=.95, quantiles=None):
             raise AttributeError('GAM has not been fitted. Call fit first.')
 
         X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
-                    edge_knots=self._edge_knots, dtypes=self._dtype)
+                    edge_knots=self._edge_knots, dtypes=self._dtype,
+                    verbose=self.verbose)
 
         return self._get_quantiles(X, width, quantiles, prediction=True)
 
@@ -2557,10 +2570,11 @@ def accuracy(self, X=None, y=None, mu=None):
         if not self._is_fitted:
             raise AttributeError('GAM has not been fitted. Call fit first.')
 
-        y = check_y(y, self.link, self.distribution)
+        y = check_y(y, self.link, self.distribution, verbose=self.verbose)
         if X is not None:
             X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
-                        edge_knots=self._edge_knots, dtypes=self._dtype)
+                        edge_knots=self._edge_knots, dtypes=self._dtype,
+                        verbose=self.verbose)
 
         if mu is None:
             mu = self.predict_mu(X)
@@ -2778,7 +2792,7 @@ def _loglikelihood(self, y, mu, weights=None, rescale_y=True):
         if rescale_y:
             y = y * weights
 
-        return np.log(self.distribution.pdf(y=y, mu=mu, weights=weights)).sum()
+        return self.distribution.log_pdf(y=y, mu=mu, weights=weights).sum()
 
     def loglikelihood(self, X, y, exposure=None, weights=None):
         """
@@ -2801,11 +2815,11 @@ def loglikelihood(self, X, y, exposure=None, weights=None):
         log-likelihood : np.array of shape (n,)
             containing log-likelihood scores
         """
-        y = check_y(y, self.link, self.distribution)
+        y = check_y(y, self.link, self.distribution, verbose=self.verbose)
         mu = self.predict_mu(X)
 
         if weights is not None:
-            weights = check_array(weights, name='sample weights')
+            weights = check_array(weights, name='sample weights', verbose=self.verbose)
             check_lengths(y, weights)
         else:
             weights = np.ones_like(y).astype('f')
@@ -2908,7 +2922,8 @@ def predict(self, X, exposure=None):
             raise AttributeError('GAM has not been fitted. Call fit first.')
 
         X = check_X(X, n_feats=len(self._n_coeffs) - self._fit_intercept,
-                    edge_knots=self._edge_knots, dtypes=self._dtype)
+                    edge_knots=self._edge_knots, dtypes=self._dtype,
+                    verbose=self.verbose)
 
         if exposure is not None:
             exposure = np.array(exposure).astype('f')

From 261427a63ba28e26ebb9563658dfad9392f2a757 Mon Sep 17 00:00:00 2001
From: dani <dserven@gmail.com>
Date: Wed, 28 Mar 2018 22:26:50 -0700
Subject: [PATCH 4/9] change custom gam test now that weve changed gamma to
 non-canonical link

---
 pygam/tests/test_GAMs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pygam/tests/test_GAMs.py b/pygam/tests/test_GAMs.py
index 51e4fcb4..06ce3115 100644
--- a/pygam/tests/test_GAMs.py
+++ b/pygam/tests/test_GAMs.py
@@ -69,7 +69,7 @@ def test_CustomGAM(trees):
     check that we can fit a Custom GAM on real data
     """
     X, y = trees
-    gam = GAM(distribution='gamma', link='log').fit(X, y)
+    gam = GAM(distribution='gamma', link='inverse').fit(X, y)
     assert(gam._is_fitted)
 
 # TODO check dicts: DISTRIBUTIONS etc

From fda20ba94d340ba0612fd8c8c97cd86d5c8d918c Mon Sep 17 00:00:00 2001
From: dani <dserven@gmail.com>
Date: Wed, 28 Mar 2018 22:27:49 -0700
Subject: [PATCH 5/9] change weights to use float64, gridsearch can block
 progressbar

---
 pygam/pygam.py | 78 +++++++++++++++++++++++++++++---------------------
 1 file changed, 45 insertions(+), 33 deletions(-)

diff --git a/pygam/pygam.py b/pygam/pygam.py
index fc0ac7bd..aa2705ad 100644
--- a/pygam/pygam.py
+++ b/pygam/pygam.py
@@ -591,7 +591,7 @@ def loglikelihood(self, X, y, weights=None):
             weights = check_array(weights, name='sample weights', verbose=self.verbose)
             check_lengths(y, weights)
         else:
-            weights = np.ones_like(y).astype('f')
+            weights = np.ones_like(y).astype('float64')
 
         return self._loglikelihood(y, mu, weights=weights)
 
@@ -987,12 +987,12 @@ def _pirls(self, X, Y, weights):
             self.coef_ = np.ones(m) * np.sqrt(EPS) # allow more training
 
             # make a reasonable initial parameter guess
-            if self._fit_intercept:
-                # set the intercept as if we had a constant model
-                const_model = (self.link.link(Y, self.distribution))
-                if np.isfinite(const_model).sum() > 0:
-                    const_model = np.median(const_model[np.isfinite(const_model)])
-                    self.coef_[0] += const_model
+            # if self._fit_intercept:
+            #     # set the intercept as if we had a constant model
+            #     const_model = (self.link.link(Y, self.distribution))
+            #     if np.isfinite(const_model).sum() > 0:
+            #         const_model = np.median(const_model[np.isfinite(const_model)])
+            #         self.coef_[0] += const_model
 
         # do our penalties require recomputing cholesky?
         chol_pen = np.ravel([np.ravel(p) for p in self._penalties])
@@ -1204,7 +1204,7 @@ def fit(self, X, y, weights=None):
             weights = check_array(weights, name='sample weights', verbose=self.verbose)
             check_lengths(y, weights)
         else:
-            weights = np.ones_like(y).astype('f')
+            weights = np.ones_like(y).astype('float64')
 
         # validate data-dependent parameters
         self._validate_data_dep_params(X)
@@ -1256,7 +1256,7 @@ def deviance_residuals(self, X, y, weights=None, scaled=False):
             weights = check_array(weights, name='sample weights', verbose=self.verbose)
             check_lengths(y, weights)
         else:
-            weights = np.ones_like(y).astype('f')
+            weights = np.ones_like(y).astype('float64')
 
         mu = self.predict_mu(X)
         sign = np.sign(y-mu)
@@ -1426,9 +1426,9 @@ def _estimate_r2(self, X=None, y=None, mu=None, weights=None):
             mu = self.predict_mu_(X=X)
 
         if weights is None:
-            weights = np.ones_like(y)
+            weights = np.ones_like(y).astype('float64')
 
-        null_mu = y.mean() * np.ones_like(y)
+        null_mu = y.mean() * np.ones_like(y).astype('float64')
 
         null_d = self.distribution.deviance(y=y, mu=null_mu, weights=weights)
         full_d = self.distribution.deviance(y=y, mu=mu, weights=weights)
@@ -1438,7 +1438,9 @@ def _estimate_r2(self, X=None, y=None, mu=None, weights=None):
 
         r2 = OrderedDict()
         r2['explained_deviance'] = 1. - full_d.sum()/null_d.sum()
-        r2['McFadden'] = 1. - full_ll/null_ll
+        print(full_ll)
+        print(null_ll)
+        r2['McFadden'] = full_ll/null_ll
         r2['McFadden_adj'] = 1. - (full_ll - self.statistics_['edof'])/null_ll
 
         return r2
@@ -1489,7 +1491,7 @@ def _estimate_GCV_UBRE(self, X=None, y=None, modelmat=None, gamma=1.4,
             modelmat = self._modelmat(X)
 
         if weights is None:
-            weights = np.ones_like(y)
+            weights = np.ones_like(y).astype('float64')
 
         lp = self._linear_predictor(modelmat=modelmat)
         mu = self.link.mu(lp, self.distribution)
@@ -1744,7 +1746,8 @@ def summary(self):
         print_data(self.statistics_['pseudo_r2'], title='Pseudo-R^2')
 
     def gridsearch(self, X, y, weights=None, return_scores=False,
-                   keep_best=True, objective='auto', **param_grids):
+                   keep_best=True, objective='auto', progress=True,
+                   **param_grids):
         """
         performs a grid search over a space of parameters for a given objective
 
@@ -1772,27 +1775,30 @@ def gridsearch(self, X, y, weights=None, return_scores=False,
             if None, defaults to array of ones
 
         return_scores : boolean, default False
-          whether to return the hyperpamaters
-          and score for each element in the grid
+            whether to return the hyperpamaters
+            and score for each element in the grid
 
         keep_best : boolean
-          whether to keep the best GAM as self.
-          default: True
+            whether to keep the best GAM as self.
+            default: True
 
         objective : string, default: 'auto'
-          metric to optimize. must be in ['AIC', 'AICc', 'GCV', 'UBRE', 'auto']
-          if 'auto', then grid search will optimize GCV for models with unknown
-          scale and UBRE for models with known scale.
+            metric to optimize. must be in ['AIC', 'AICc', 'GCV', 'UBRE', 'auto']
+            if 'auto', then grid search will optimize GCV for models with unknown
+            scale and UBRE for models with known scale.
+
+        progress : bool, default: True
+            whether to display a progress bar
 
         **kwargs : dict, default {'lam': np.logspace(-3, 3, 11)}
-          pairs of parameters and iterables of floats, or
-          parameters and iterables of iterables of floats.
+            pairs of parameters and iterables of floats, or
+            parameters and iterables of iterables of floats.
 
-          if iterable of iterables of floats, the outer iterable must have
-          length m_features.
+            if iterable of iterables of floats, the outer iterable must have
+            length m_features.
 
-          the method will make a grid of all the combinations of the parameters
-          and fit a GAM to each combination.
+            the method will make a grid of all the combinations of the parameters
+            and fit a GAM to each combination.
 
 
         Returns
@@ -1816,7 +1822,7 @@ def gridsearch(self, X, y, weights=None, return_scores=False,
             weights = check_array(weights, name='sample weights', verbose=self.verbose)
             check_lengths(y, weights)
         else:
-            weights = np.ones_like(y).astype('f')
+            weights = np.ones_like(y).astype('float64')
 
         # validate objective
         if objective not in ['auto', 'GCV', 'UBRE', 'AIC', 'AICc']:
@@ -1889,8 +1895,13 @@ def gridsearch(self, X, y, weights=None, return_scores=False,
             best_model = models[-1]
             best_score = scores[-1]
 
+        # make progressbar optional
+        if progress:
+            pbar = ProgressBar()
+        else:
+            pbar = lambda x: x
+
         # loop through candidate model params
-        pbar = ProgressBar()
         for param_grid in pbar(param_grid_list):
 
             # define new model
@@ -1926,7 +1937,8 @@ def gridsearch(self, X, y, weights=None, return_scores=False,
         # problems
         if len(models) == 0:
             msg = 'No models were fitted.'
-            warnings.warn(msg)
+            if self.verbose:
+                warnings.warn(msg)
             return self
 
         # copy over the best
@@ -2822,7 +2834,7 @@ def loglikelihood(self, X, y, exposure=None, weights=None):
             weights = check_array(weights, name='sample weights', verbose=self.verbose)
             check_lengths(y, weights)
         else:
-            weights = np.ones_like(y).astype('f')
+            weights = np.ones_like(y).astype('float64')
 
         y, weights = self._exposure_to_weights(y, exposure, weights)
         return self._loglikelihood(y, mu, weights=weights, rescale_y=True)
@@ -2852,7 +2864,7 @@ def _exposure_to_weights(self, y, exposure=None, weights=None):
         if exposure is not None:
             exposure = np.array(exposure).astype('f')
         else:
-            exposure = np.ones_like(y).astype('f')
+            exposure = np.ones_like(y).astype('float64')
         check_lengths(y, exposure)
 
         # normalize response
@@ -2861,7 +2873,7 @@ def _exposure_to_weights(self, y, exposure=None, weights=None):
         if weights is not None:
             weights = np.array(weights).astype('f')
         else:
-            weights = np.ones_like(y).astype('f')
+            weights = np.ones_like(y).astype('float64')
         check_lengths(weights, exposure)
 
         # set exposure as the weight

From 48fd76fb14a217ce56505ca11f11ff8a7e994fba Mon Sep 17 00:00:00 2001
From: dani <dserven@gmail.com>
Date: Tue, 3 Apr 2018 13:22:32 -0700
Subject: [PATCH 6/9] ignore log warnings in check link-domain, verbose false
 in tests_utils

---
 pygam/tests/test_utils.py | 18 +++++++++---------
 pygam/utils.py            |  1 +
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/pygam/tests/test_utils.py b/pygam/tests/test_utils.py
index 61260d0e..30db0a3f 100644
--- a/pygam/tests/test_utils.py
+++ b/pygam/tests/test_utils.py
@@ -66,32 +66,32 @@ def test_check_y_not_min_samples(wage, wage_gam):
     """check_y expects a minimum number of samples"""
     X, y = wage
     try:
-        check_y(y, wage_gam.link, wage_gam.distribution, min_samples=len(y)+1)
+        check_y(y, wage_gam.link, wage_gam.distribution, min_samples=len(y)+1, verbose=False)
         assert False
     except ValueError:
-        check_y(y, wage_gam.link, wage_gam.distribution, min_samples=len(y))
+        check_y(y, wage_gam.link, wage_gam.distribution, min_samples=len(y), verbose=False)
         assert True
 
-def test_check_y_not_in_doamin_link(default, default_gam):
+def test_check_y_not_in_domain_link(default, default_gam):
     """if you give labels outide of the links domain, check_y will raise an error"""
     X, y = default
     gam = default_gam
 
     try:
-        check_y(y + .1, default_gam.link, default_gam.distribution)
+        check_y(y + .1, default_gam.link, default_gam.distribution, verbose=False)
         assert False
     except ValueError:
-        check_y(y, default_gam.link, default_gam.distribution)
+        check_y(y, default_gam.link, default_gam.distribution, verbose=False)
         assert True
 
 def test_check_X_not_int_not_float():
     """X  must be an in or a float"""
 
     try:
-        check_X(['hi'])
+        check_X(['hi'], verbose=False)
         assert False
     except ValueError:
-        check_X([4])
+        check_X([4], verbose=False)
         assert True
 
 def test_check_X_too_many_dims():
@@ -105,10 +105,10 @@ def test_check_X_too_many_dims():
 
 def test_check_X_not_min_samples():
     try:
-        check_X(np.ones((5)), min_samples=6)
+        check_X(np.ones((5)), min_samples=6, verbose=False)
         assert False
     except ValueError:
-        check_X(np.ones((5)), min_samples=5)
+        check_X(np.ones((5)), min_samples=5, verbose=False)
         assert True
 
 def test_check_X_y_different_lengths():
diff --git a/pygam/utils.py b/pygam/utils.py
index 0bd0be09..66e1537e 100644
--- a/pygam/utils.py
+++ b/pygam/utils.py
@@ -278,6 +278,7 @@ def check_y(y, link, dist, min_samples=1, verbose=True):
                     name='y data', verbose=verbose)
 
     warnings.filterwarnings('ignore', 'divide by zero encountered in log')
+    warnings.filterwarnings('ignore', 'invalid value encountered in log')
     if np.any(np.isnan(link.link(y, dist))):
         raise ValueError('y data is not in domain of {} link function. ' \
                          'Expected domain: {}, but found {}' \

From 1f59906ab2ff0fcda7cc4f3bd8243941c3d4d601 Mon Sep 17 00:00:00 2001
From: dani <dserven@gmail.com>
Date: Tue, 3 Apr 2018 15:34:57 -0700
Subject: [PATCH 7/9] replace all dist logpdfs with scipy implementation
 (except invgauss)

---
 pygam/distributions.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/pygam/distributions.py b/pygam/distributions.py
index f8ae4916..cee6780b 100644
--- a/pygam/distributions.py
+++ b/pygam/distributions.py
@@ -144,7 +144,7 @@ def log_pdf(self, y, mu, weights=None):
         if weights is None:
             weights = np.ones_like(mu)
         scale = self.scale / weights
-        return -(y - mu)**2 / (2 * scale) - 0.5 * np.log(scale * 2 * np.pi)
+        return sp.stats.norm.logpdf(y, loc=mu, scale=scale)
 
     @divide_weights
     def V(self, mu):
@@ -233,7 +233,7 @@ def __init__(self, levels=1):
         Parameters
         ----------
         levels : int of None, default: 1
-            number of levels in the binomial distribution
+            number of trials in the binomial distribution
 
         Returns
         -------
@@ -266,8 +266,8 @@ def log_pdf(self, y, mu, weights=None):
         if weights is None:
             weights = np.ones_like(mu)
         n = self.levels
-        return (np.log(weights * sp.special.comb(n, y)) + np.log(mu / n) * y +
-                          np.log(1 - (mu / n)) * (n - y))
+        p = mu / self.levels
+        return sp.stats.binom.logpmf(y, n, p)
 
     @divide_weights
     def V(self, mu):
@@ -376,7 +376,7 @@ def log_pdf(self, y, mu, weights=None):
         # so we want to pump up all our predictions
         # NOTE: we assume the targets are unchanged
         mu = mu * weights
-        return np.log(mu) * y - mu - np.log(sp.special.factorial(y))
+        return sp.stats.poisson.logpmf(y, mu=mu)
 
     @divide_weights
     def V(self, mu):
@@ -480,8 +480,7 @@ def log_pdf(self, y, mu, weights=None):
         if weights is None:
             weights = np.ones_like(mu)
         nu = weights / self.scale
-        return (-np.log(sp.special.gamma(nu)) +
-                np.log(nu / mu) * nu + np.log(y)*(nu - 1) + -nu * y / mu)
+        return sp.stats.gamma.logpdf(x=y, a=nu, scale=mu / nu)
 
     @divide_weights
     def V(self, mu):

From c220798e9574d4190c079fc4dc36384df43d45a0 Mon Sep 17 00:00:00 2001
From: dani <dserven@gmail.com>
Date: Tue, 3 Apr 2018 15:39:53 -0700
Subject: [PATCH 8/9] use scipy logpdf for invgauss dist

---
 pygam/distributions.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pygam/distributions.py b/pygam/distributions.py
index cee6780b..79ead1e9 100644
--- a/pygam/distributions.py
+++ b/pygam/distributions.py
@@ -590,8 +590,7 @@ def log_pdf(self, y, mu, weights=None):
         if weights is None:
             weights = np.ones_like(mu)
         gamma = weights / self.scale
-        return (0.5 * np.log(gamma / (2 * np.pi * y**3)) +
-                -gamma * (y - mu)**2 / (2 * mu**2 * y))
+        return sp.stats.invgauss.logpdf(y, mu, scale=1./gamma)
 
     @divide_weights
     def V(self, mu):

From 1195a908e55dba1522d54bed11774c71ff168e1c Mon Sep 17 00:00:00 2001
From: dani <dserven@gmail.com>
Date: Tue, 3 Apr 2018 15:40:06 -0700
Subject: [PATCH 9/9] remove diagnostic print

---
 pygam/pygam.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pygam/pygam.py b/pygam/pygam.py
index aa2705ad..048b46b2 100644
--- a/pygam/pygam.py
+++ b/pygam/pygam.py
@@ -1438,8 +1438,6 @@ def _estimate_r2(self, X=None, y=None, mu=None, weights=None):
 
         r2 = OrderedDict()
         r2['explained_deviance'] = 1. - full_d.sum()/null_d.sum()
-        print(full_ll)
-        print(null_ll)
         r2['McFadden'] = full_ll/null_ll
         r2['McFadden_adj'] = 1. - (full_ll - self.statistics_['edof'])/null_ll