diff --git a/pymc3_hmm/distributions.py b/pymc3_hmm/distributions.py index 1c7ed45..0b8ebad 100644 --- a/pymc3_hmm/distributions.py +++ b/pymc3_hmm/distributions.py @@ -1,21 +1,9 @@ import warnings import numpy as np - -try: # pragma: no cover - import aesara - import aesara.tensor as at - from aesara.graph.op import get_test_value - from aesara.scalar import upcast - from aesara.tensor.extra_ops import broadcast_to as at_broadcast_to -except ImportError: # pragma: no cover - import theano as aesara - import theano.tensor as at - from theano.graph.op import get_test_value - from theano.scalar import upcast - from theano.tensor.extra_ops import broadcast_to as at_broadcast_to - import pymc3 as pm +import theano +import theano.tensor as tt from pymc3.distributions.distribution import ( Distribution, _DrawValuesContext, @@ -23,6 +11,9 @@ generate_samples, ) from pymc3.distributions.mixture import _conversion_map, all_discrete +from theano.graph.op import get_test_value +from theano.scalar import upcast +from theano.tensor.extra_ops import broadcast_to as tt_broadcast_to from pymc3_hmm.utils import tt_broadcast_arrays, vsearchsorted @@ -62,7 +53,7 @@ def distribution_subset_args(dist, shape, idx): res = dict() for param in dist_param_names: - bcast_res = at_broadcast_to(getattr(dist, param), shape) + bcast_res = tt_broadcast_to(getattr(dist, param), shape) res[param] = bcast_res[idx] @@ -110,7 +101,7 @@ def __init__(self, comp_dists, states, *args, **kwargs): equal to the size of `comp_dists`. """ - self.states = at.as_tensor_variable(pm.intX(states)) + self.states = tt.as_tensor_variable(pm.intX(states)) if len(comp_dists) > 31: warnings.warn( @@ -136,7 +127,7 @@ def __init__(self, comp_dists, states, *args, **kwargs): bcast_means = tt_broadcast_arrays( *([self.states] + [d.mean.astype(dtype) for d in self.comp_dists]) ) - self.mean = at.choose(self.states, bcast_means[1:]) + self.mean = tt.choose(self.states, bcast_means[1:]) if "mean" not in defaults: defaults.append("mean") @@ -148,7 +139,7 @@ def __init__(self, comp_dists, states, *args, **kwargs): bcast_modes = tt_broadcast_arrays( *([self.states] + [d.mode.astype(dtype) for d in self.comp_dists]) ) - self.mode = at.choose(self.states, bcast_modes[1:]) + self.mode = tt.choose(self.states, bcast_modes[1:]) if "mode" not in defaults: defaults.append("mode") @@ -161,16 +152,16 @@ def __init__(self, comp_dists, states, *args, **kwargs): def logp(self, obs): """Return the Theano log-likelihood at a point.""" - obs_tt = at.as_tensor_variable(obs) + obs_tt = tt.as_tensor_variable(obs) - logp_val = at.alloc(-np.inf, *obs.shape) + logp_val = tt.alloc(-np.inf, *obs.shape) for i, dist in enumerate(self.comp_dists): - i_mask = at.eq(self.states, i) + i_mask = tt.eq(self.states, i) obs_i = obs_tt[i_mask] subset_dist_kwargs = distribution_subset_args(dist, obs.shape, i_mask) subset_dist = dist.dist(**subset_dist_kwargs) - logp_val = at.set_subtensor(logp_val[i_mask], subset_dist.logp(obs_i)) + logp_val = tt.set_subtensor(logp_val[i_mask], subset_dist.logp(obs_i)) return logp_val @@ -255,8 +246,8 @@ def __init__(self, mu=None, states=None, **kwargs): A vector of integer 0-1 states that indicate which component of the mixture is active at each point/time. """ - self.mu = at.as_tensor_variable(pm.floatX(mu)) - self.states = at.as_tensor_variable(states) + self.mu = tt.as_tensor_variable(pm.floatX(mu)) + self.states = tt.as_tensor_variable(states) super().__init__( [Constant.dist(np.array(0, dtype=np.int64)), pm.Poisson.dist(mu)], @@ -292,15 +283,15 @@ def __init__(self, Gammas, gamma_0, shape, **kwargs): Shape of the state sequence. The last dimension is `N`, i.e. the length of the state sequence(s). """ - self.gamma_0 = at.as_tensor_variable(pm.floatX(gamma_0)) + self.gamma_0 = tt.as_tensor_variable(pm.floatX(gamma_0)) assert Gammas.ndim >= 3 - self.Gammas = at.as_tensor_variable(pm.floatX(Gammas)) + self.Gammas = tt.as_tensor_variable(pm.floatX(Gammas)) shape = np.atleast_1d(shape) - dtype = _conversion_map[aesara.config.floatX] + dtype = _conversion_map[theano.config.floatX] self.mode = np.zeros(tuple(shape), dtype=dtype) super().__init__(shape=shape, **kwargs) @@ -324,23 +315,23 @@ def logp(self, states): """ # noqa: E501 - states_tt = at.as_tensor(states) + states_tt = tt.as_tensor(states) if states.ndim > 1 or self.Gammas.ndim > 3 or self.gamma_0.ndim > 1: raise NotImplementedError("Broadcasting not supported.") - Gammas_tt = at_broadcast_to( + Gammas_tt = tt_broadcast_to( self.Gammas, (states.shape[0],) + tuple(self.Gammas.shape)[-2:] ) gamma_0_tt = self.gamma_0 Gamma_1_tt = Gammas_tt[0] - P_S_1_tt = at.dot(gamma_0_tt, Gamma_1_tt)[states_tt[0]] + P_S_1_tt = tt.dot(gamma_0_tt, Gamma_1_tt)[states_tt[0]] # def S_logp_fn(S_tm1, S_t, Gamma): - # return at.log(Gamma[..., S_tm1, S_t]) + # return tt.log(Gamma[..., S_tm1, S_t]) # - # P_S_2T_tt, _ = aesara.scan( + # P_S_2T_tt, _ = theano.scan( # S_logp_fn, # sequences=[ # { @@ -350,10 +341,10 @@ def logp(self, states): # Gammas_tt, # ], # ) - P_S_2T_tt = Gammas_tt[at.arange(1, states.shape[0]), states[:-1], states[1:]] + P_S_2T_tt = Gammas_tt[tt.arange(1, states.shape[0]), states[:-1], states[1:]] - log_P_S_1T_tt = at.concatenate( - [at.shape_padright(at.log(P_S_1_tt)), at.log(P_S_2T_tt)] + log_P_S_1T_tt = tt.concatenate( + [tt.shape_padright(tt.log(P_S_1_tt)), tt.log(P_S_2T_tt)] ) res = log_P_S_1T_tt.sum() @@ -424,7 +415,7 @@ class Constant(Distribution): def __init__(self, c, shape=(), defaults=("mode",), **kwargs): - c = at.as_tensor_variable(c) + c = tt.as_tensor_variable(c) dtype = c.dtype @@ -448,7 +439,7 @@ def _random(c, dtype=self.dtype, size=None): ) def logp(self, value): - return at.switch(at.eq(value, self.c), 0.0, -np.inf) + return tt.switch(tt.eq(value, self.c), 0.0, -np.inf) def _distr_parameters_for_repr(self): return ["c"] diff --git a/pymc3_hmm/step_methods.py b/pymc3_hmm/step_methods.py index 9719db9..00f4130 100644 --- a/pymc3_hmm/step_methods.py +++ b/pymc3_hmm/step_methods.py @@ -1,47 +1,28 @@ +from functools import singledispatch from itertools import chain from typing import Callable, Tuple import numpy as np - -try: # pragma: no cover - import aesara.scalar as aes - import aesara.tensor as at - from aesara import config - from aesara.compile import optdb - from aesara.graph.basic import Variable, graph_inputs - from aesara.graph.fg import FunctionGraph - from aesara.graph.op import get_test_value as test_value - from aesara.graph.opt import OpRemove, pre_greedy_local_optimizer - from aesara.graph.optdb import Query - from aesara.scalar.basic import Dot - from aesara.sparse.basic import StructuredDot - from aesara.tensor.elemwise import DimShuffle, Elemwise - from aesara.tensor.subtensor import AdvancedIncSubtensor1 - from aesara.tensor.var import TensorConstant -except ImportError: # pragma: no cover - import theano.scalar as aes - import theano.tensor as at - from theano.compile import optdb - from theano.graph.basic import Variable, graph_inputs - from theano.graph.fg import FunctionGraph - from theano.graph.op import get_test_value as test_value - from theano.graph.opt import OpRemove, pre_greedy_local_optimizer - from theano.graph.optdb import Query - from theano.tensor.elemwise import DimShuffle, Elemwise - from theano.tensor.subtensor import AdvancedIncSubtensor1 - from theano.tensor.var import TensorConstant - from theano.tensor.basic import Dot - from theano.sparse.basic import StructuredDot - from theano import config - -from functools import singledispatch - import pymc3 as pm import scipy +import theano.scalar as ts +import theano.tensor as tt from pymc3.distributions.distribution import draw_values from pymc3.step_methods.arraystep import ArrayStep, BlockedStep, Competence from pymc3.util import get_untransformed_name from scipy.stats import invgamma +from theano import config +from theano.compile import optdb +from theano.graph.basic import Variable, graph_inputs +from theano.graph.fg import FunctionGraph +from theano.graph.op import get_test_value as test_value +from theano.graph.opt import OpRemove, pre_greedy_local_optimizer +from theano.graph.optdb import Query +from theano.sparse.basic import StructuredDot +from theano.tensor.basic import Dot +from theano.tensor.elemwise import DimShuffle, Elemwise +from theano.tensor.subtensor import AdvancedIncSubtensor1 +from theano.tensor.var import TensorConstant from pymc3_hmm.distributions import DiscreteMarkovChain, HorseShoe, SwitchingProcess from pymc3_hmm.utils import compute_trans_freqs @@ -185,7 +166,7 @@ def __init__(self, vars, values=None, model=None): for comp_dist in dependent_rv.distribution.comp_dists: comp_logps.append(comp_dist.logp(dependent_rv)) - comp_logp_stacked = at.stack(comp_logps) + comp_logp_stacked = tt.stack(comp_logps) else: raise TypeError( "This sampler only supports `SwitchingProcess` observations" @@ -193,7 +174,7 @@ def __init__(self, vars, values=None, model=None): dep_comps_logp_stacked.append(comp_logp_stacked) - comp_logp_stacked = at.sum(dep_comps_logp_stacked, axis=0) + comp_logp_stacked = tt.sum(dep_comps_logp_stacked, axis=0) (M,) = draw_values([var.distribution.gamma_0.shape[-1]], point=model.test_point) N = model.test_point[var.name].shape[-1] @@ -352,9 +333,9 @@ def _set_row_mappings(self, Gamma, dir_priors, model): Gamma = pre_greedy_local_optimizer( FunctionGraph([], []), [ - OpRemove(Elemwise(aes.Cast(aes.float32))), - OpRemove(Elemwise(aes.Cast(aes.float64))), - OpRemove(Elemwise(aes.identity)), + OpRemove(Elemwise(ts.Cast(ts.float32))), + OpRemove(Elemwise(ts.Cast(ts.float64))), + OpRemove(Elemwise(ts.identity)), ], Gamma, ) @@ -378,7 +359,7 @@ def _set_row_mappings(self, Gamma, dir_priors, model): Gamma_Join = Gamma_DimShuffle.inputs[0].owner - if not (isinstance(Gamma_Join.op, at.basic.Join)): + if not (isinstance(Gamma_Join.op, tt.basic.Join)): raise TypeError( "The transition matrix should be comprised of stacked row vectors" ) @@ -546,7 +527,7 @@ def hs_regression_model_Normal(dist, rv, model): mu = dist.mu y_X_fn = None if hasattr(rv, "observations"): - obs = at.as_tensor_variable(rv.observations) + obs = tt.as_tensor_variable(rv.observations) obs_fn = model.fn(obs) def y_X_fn(points, X): @@ -558,22 +539,22 @@ def y_X_fn(points, X): @hs_regression_model.register(pm.NegativeBinomial) def hs_regression_model_NegativeBinomial(dist, rv, model): - mu = at.as_tensor_variable(dist.mu) + mu = tt.as_tensor_variable(dist.mu) - if mu.owner and mu.owner.op == at.exp: + if mu.owner and mu.owner.op == tt.exp: eta = mu.owner.inputs[0] else: eta = mu - alpha = at.as_tensor_variable(dist.alpha) + alpha = tt.as_tensor_variable(dist.alpha) if hasattr(rv, "observations"): from polyagamma import random_polyagamma - obs = at.as_tensor_variable(rv.observations) + obs = tt.as_tensor_variable(rv.observations) h_z_alpha_fn = model.fn( [ alpha + obs, - eta.squeeze() - at.log(alpha), + eta.squeeze() - tt.log(alpha), alpha, obs, ] @@ -617,7 +598,7 @@ def find_dot(node, beta, model, y_fn): return node, X_fn, y_fn else: # if exp transformation - if isinstance(node.owner.op, at.elemwise.Elemwise): + if isinstance(node.owner.op, tt.elemwise.Elemwise): res = find_dot(node.owner.inputs[0], beta, model, y_fn) if res: node, X_fn, _ = res diff --git a/pymc3_hmm/utils.py b/pymc3_hmm/utils.py index 8f2047d..e05a077 100644 --- a/pymc3_hmm/utils.py +++ b/pymc3_hmm/utils.py @@ -3,21 +3,14 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd +import theano.tensor as tt from matplotlib import cm from matplotlib.axes import Axes from matplotlib.colors import Colormap from scipy.special import logsumexp - -try: # pragma: no cover - import aesara.tensor as at - from aesara.tensor.extra_ops import broadcast_shape - from aesara.tensor.extra_ops import broadcast_to as at_broadcast_to - from aesara.tensor.var import TensorVariable -except ImportError: # pragma: no cover - import theano.tensor as at - from theano.tensor.extra_ops import broadcast_shape - from theano.tensor.extra_ops import broadcast_to as at_broadcast_to - from theano.tensor.var import TensorVariable +from theano.tensor.extra_ops import broadcast_shape +from theano.tensor.extra_ops import broadcast_to as tt_broadcast_to +from theano.tensor.var import TensorVariable try: # pragma: no cover import datashader as ds @@ -44,8 +37,8 @@ def compute_steady_state(P): P = P[0] N_states = P.shape[-1] - Lam = (at.eye(N_states) - P + at.ones((N_states, N_states))).T - u = at.slinalg.solve(Lam, at.ones((N_states,))) + Lam = (tt.eye(N_states) - P + tt.ones((N_states, N_states))).T + u = tt.slinalg.solve(Lam, tt.ones((N_states,))) return u @@ -95,15 +88,15 @@ def compute_trans_freqs(states, N_states, counts_only=False): def tt_logsumexp(x, axis=None, keepdims=False): """Construct a Theano graph for a log-sum-exp calculation.""" - x_max_ = at.max(x, axis=axis, keepdims=True) + x_max_ = tt.max(x, axis=axis, keepdims=True) if x_max_.ndim > 0: - x_max_ = at.set_subtensor(x_max_[at.isinf(x_max_)], 0.0) - elif at.isinf(x_max_): - x_max_ = at.as_tensor(0.0) + x_max_ = tt.set_subtensor(x_max_[tt.isinf(x_max_)], 0.0) + elif tt.isinf(x_max_): + x_max_ = tt.as_tensor(0.0) - res = at.sum(at.exp(x - x_max_), axis=axis, keepdims=keepdims) - res = at.log(res) + res = tt.sum(tt.exp(x - x_max_), axis=axis, keepdims=keepdims) + res = tt.log(res) if not keepdims: # SciPy uses the `axis` keyword here, but Theano doesn't support that. @@ -193,7 +186,7 @@ def tt_broadcast_arrays(*args: TensorVariable): """ bcast_shape = broadcast_shape(*args) - return tuple(at_broadcast_to(a, bcast_shape) for a in args) + return tuple(tt_broadcast_to(a, bcast_shape) for a in args) def multilogit_inv(ys): @@ -216,7 +209,7 @@ def multilogit_inv(ys): lib = np lib_logsumexp = logsumexp else: - lib = at + lib = tt lib_logsumexp = tt_logsumexp # exp_ys = lib.exp(ys) diff --git a/tests/test_distributions.py b/tests/test_distributions.py index fb4c599..ec38eaf 100644 --- a/tests/test_distributions.py +++ b/tests/test_distributions.py @@ -1,14 +1,8 @@ import numpy as np - -try: - import aesara - import aesara.tensor as at -except ImportError: - import theano as aesara - import theano.tensor as at - import pymc3 as pm import pytest +import theano +import theano.tensor as tt from pymc3_hmm.distributions import ( Constant, @@ -22,8 +16,8 @@ def test_DiscreteMarkovChain_str(): - Gammas = at.as_tensor(np.eye(2)[None, ...], name="Gammas") - gamma_0 = at.as_tensor(np.r_[0, 1], name="gamma_0") + Gammas = tt.as_tensor(np.eye(2)[None, ...], name="Gammas") + gamma_0 = tt.as_tensor(np.r_[0, 1], name="gamma_0") with pm.Model(): test_dist = DiscreteMarkovChain("P_rv", Gammas, gamma_0, shape=(2,)) @@ -188,7 +182,7 @@ def test_DiscreteMarkovChain_random(): def test_DiscreteMarkovChain_point(): - test_Gammas = at.as_tensor_variable(np.array([[[1.0, 0.0], [0.0, 1.0]]])) + test_Gammas = tt.as_tensor_variable(np.array([[[1.0, 0.0], [0.0, 1.0]]])) with pm.Model(): # XXX: `draw_values` won't use the `Deterministic`s values in the `point` map! @@ -335,7 +329,7 @@ def test_DiscreteMarkovChain_point(): ], ) def test_DiscreteMarkovChain_logp(Gammas, gamma_0, obs, exp_res): - aesara.config.compute_test_value = "warn" + theano.config.compute_test_value = "warn" test_dist = DiscreteMarkovChain.dist(Gammas, gamma_0, shape=obs.shape[-1]) test_logp_tt = test_dist.logp(obs) test_logp_val = test_logp_tt.eval() @@ -516,9 +510,9 @@ def test_SwitchingProcess(): with pytest.raises(TypeError): SwitchingProcess.dist([1], test_states) - with aesara.change_flags(compute_test_value="off"): + with theano.change_flags(compute_test_value="off"): # Test for the case when a default can't be computed - test_dist = pm.Poisson.dist(at.scalar()) + test_dist = pm.Poisson.dist(tt.scalar()) # Confirm that there's no default with pytest.raises(AttributeError): @@ -529,13 +523,13 @@ def test_SwitchingProcess(): SwitchingProcess.dist([test_dist], test_states) # Evaluate multiple observed state sequences in an extreme case - test_states = at.imatrix("states") + test_states = tt.imatrix("states") test_states.tag.test_value = np.zeros((10, 4)).astype("int32") test_dist = SwitchingProcess.dist([Constant.dist(0), Constant.dist(1)], test_states) test_obs = np.tile(np.arange(4), (10, 1)).astype("int32") test_logp = test_dist.logp(test_obs) exp_logp = np.tile( - np.array([0.0] + [-np.inf] * 3, dtype=aesara.config.floatX), (10, 1) + np.array([0.0] + [-np.inf] * 3, dtype=theano.config.floatX), (10, 1) ) assert np.array_equal(test_logp.tag.test_value, exp_logp) diff --git a/tests/test_estimation.py b/tests/test_estimation.py index c84a872..7269c7f 100644 --- a/tests/test_estimation.py +++ b/tests/test_estimation.py @@ -4,26 +4,17 @@ import numpy as np import pandas as pd import patsy - -try: - import aesara - import aesara.tensor as at - from aesara import shared - - TV_CONFIG = {"aesara_config": {"compute_test_value": "ignore"}} -except ImportError: - import theano as aesara - import theano.tensor as at - from theano import shared - - TV_CONFIG = {"theano_config": {"compute_test_value": "ignore"}} - import pymc3 as pm +import theano +import theano.tensor as tt +from theano import shared from pymc3_hmm.distributions import Constant, DiscreteMarkovChain, SwitchingProcess from pymc3_hmm.step_methods import FFBSStep from pymc3_hmm.utils import multilogit_inv +TV_CONFIG = {"theano_config": {"compute_test_value": "ignore"}} + def gen_toy_data(days=-7 * 10): dates = date.today() + timedelta(days=days), date.today() + timedelta(days=2) @@ -40,7 +31,7 @@ def gen_toy_data(days=-7 * 10): def create_dirac_zero_hmm(X, mu, xis, observed): S = 2 - z_tt = at.stack([at.dot(X, xis[..., s, :]) for s in range(S)], axis=1) + z_tt = tt.stack([tt.dot(X, xis[..., s, :]) for s in range(S)], axis=1) Gammas_tt = pm.Deterministic("Gamma", multilogit_inv(z_tt)) gamma_0_rv = pm.Dirichlet("gamma_0", np.ones((S,)), shape=S) @@ -73,8 +64,8 @@ def test_only_positive_state(): p_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) p_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) - P_tt = at.stack([p_0_rv, p_1_rv]) - Gammas_tt = pm.Deterministic("P_tt", at.shape_padleft(P_tt)) + P_tt = tt.stack([p_0_rv, p_1_rv]) + Gammas_tt = pm.Deterministic("P_tt", tt.shape_padleft(P_tt)) gamma_0_rv = pm.Dirichlet("gamma_0", np.ones((S,)), shape=S) @@ -187,7 +178,7 @@ def test_time_varying_model(): trace = posterior_trace.posterior.drop_vars(["Gamma", "V_t"]) - with aesara.config.change_flags(compute_test_value="off"): + with theano.config.change_flags(compute_test_value="off"): adds_pois_ppc = pm.sample_posterior_predictive( trace, var_names=["V_t", "Y_t", "Gamma"], model=model ) diff --git a/tests/test_step_methods.py b/tests/test_step_methods.py index 4c5508c..184b68a 100644 --- a/tests/test_step_methods.py +++ b/tests/test_step_methods.py @@ -1,22 +1,14 @@ import warnings import numpy as np - -try: - import aesara.tensor as at - from aesara import shared - from aesara.graph.op import get_test_value - from aesara.sparse import structured_dot as sp_dot -except ImportError: - import theano.tensor as at - from theano.graph.op import get_test_value - from theano.sparse import structured_dot as sp_dot - from theano import shared - import pymc3 as pm import pytest import scipy as sp +import theano.tensor as tt from pymc3.exceptions import SamplingError +from theano import shared +from theano.graph.op import get_test_value +from theano.sparse import structured_dot as sp_dot from pymc3_hmm.distributions import DiscreteMarkovChain, HorseShoe, PoissonZeroProcess from pymc3_hmm.step_methods import ( @@ -143,8 +135,8 @@ def test_FFBSStep(): p_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) p_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) - P_tt = at.stack([p_0_rv, p_1_rv]) - P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt)) + P_tt = tt.stack([p_0_rv, p_1_rv]) + P_rv = pm.Deterministic("P_tt", tt.shape_padleft(P_tt)) pi_0_tt = compute_steady_state(P_rv) @@ -177,8 +169,8 @@ def test_FFBSStep_extreme(): p_0_rv = poiszero_sim["p_0"] p_1_rv = poiszero_sim["p_1"] - P_tt = at.stack([p_0_rv, p_1_rv]) - P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt)) + P_tt = tt.stack([p_0_rv, p_1_rv]) + P_rv = pm.Deterministic("P_tt", tt.shape_padleft(P_tt)) pi_0_tt = poiszero_sim["pi_0"] @@ -241,8 +233,8 @@ def test_TransMatConjugateStep(): p_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) p_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) - P_tt = at.stack([p_0_rv, p_1_rv]) - P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt)) + P_tt = tt.stack([p_0_rv, p_1_rv]) + P_rv = pm.Deterministic("P_tt", tt.shape_padleft(P_tt)) pi_0_tt = compute_steady_state(P_rv) @@ -284,14 +276,14 @@ def test_TransMatConjugateStep_subtensors(): d_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) d_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) - p_0_rv = at.as_tensor([0, 0, 1]) - p_1_rv = at.zeros(3) - p_1_rv = at.set_subtensor(p_0_rv[[0, 2]], d_0_rv) - p_2_rv = at.zeros(3) - p_2_rv = at.set_subtensor(p_1_rv[[1, 2]], d_1_rv) + p_0_rv = tt.as_tensor([0, 0, 1]) + p_1_rv = tt.zeros(3) + p_1_rv = tt.set_subtensor(p_0_rv[[0, 2]], d_0_rv) + p_2_rv = tt.zeros(3) + p_2_rv = tt.set_subtensor(p_1_rv[[1, 2]], d_1_rv) - P_tt = at.stack([p_0_rv, p_1_rv, p_2_rv]) - P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt)) + P_tt = tt.stack([p_0_rv, p_1_rv, p_2_rv]) + P_rv = pm.Deterministic("P_tt", tt.shape_padleft(P_tt)) DiscreteMarkovChain("S_t", P_rv, np.r_[1, 0, 0], shape=(10,)) transmat = TransMatConjugateStep(P_rv) @@ -308,16 +300,16 @@ def test_TransMatConjugateStep_subtensors(): d_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) d_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) - p_0_rv = at.as_tensor([0, 0, 1]) - p_1_rv = at.zeros(3) - p_1_rv = at.set_subtensor(p_0_rv[[0, 2]], d_0_rv) - p_2_rv = at.zeros(3) - p_2_rv = at.set_subtensor(p_1_rv[[1, 2]], d_1_rv) + p_0_rv = tt.as_tensor([0, 0, 1]) + p_1_rv = tt.zeros(3) + p_1_rv = tt.set_subtensor(p_0_rv[[0, 2]], d_0_rv) + p_2_rv = tt.zeros(3) + p_2_rv = tt.set_subtensor(p_1_rv[[1, 2]], d_1_rv) - P_tt = at.horizontal_stack( + P_tt = tt.horizontal_stack( p_0_rv[..., None], p_1_rv[..., None], p_2_rv[..., None] ) - P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt.T)) + P_rv = pm.Deterministic("P_tt", tt.shape_padleft(P_tt.T)) DiscreteMarkovChain("S_t", P_rv, np.r_[1, 0, 0], shape=(10,)) transmat = TransMatConjugateStep(P_rv) @@ -334,16 +326,16 @@ def test_TransMatConjugateStep_subtensors(): d_0_rv = pm.Dirichlet("p_0", np.r_[1, 1], shape=2) d_1_rv = pm.Dirichlet("p_1", np.r_[1, 1], shape=2) - p_0_rv = at.as_tensor([0, 0, 1]) - p_1_rv = at.zeros(3) - p_1_rv = at.set_subtensor(p_0_rv[[0, 2]], d_0_rv) - p_2_rv = at.zeros(3) - p_2_rv = at.set_subtensor(p_1_rv[[1, 2]], d_1_rv) + p_0_rv = tt.as_tensor([0, 0, 1]) + p_1_rv = tt.zeros(3) + p_1_rv = tt.set_subtensor(p_0_rv[[0, 2]], d_0_rv) + p_2_rv = tt.zeros(3) + p_2_rv = tt.set_subtensor(p_1_rv[[1, 2]], d_1_rv) - P_tt = at.horizontal_stack( + P_tt = tt.horizontal_stack( p_0_rv[..., None], p_1_rv[..., None], p_2_rv[..., None] ) - P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt.T)) + P_rv = pm.Deterministic("P_tt", tt.shape_padleft(P_tt.T)) DiscreteMarkovChain( "S_t", P_rv, np.r_[1, 0, 0], shape=(4,), observed=np.r_[0, 1, 0, 2] ) @@ -499,7 +491,7 @@ def test_HSStep_sparse(): M = X.shape[1] with pm.Model(): beta = HorseShoe("beta", tau=1, shape=M) - pm.Normal("y", mu=sp_dot(X, at.shape_padright(beta)), sigma=1, observed=y) + pm.Normal("y", mu=sp_dot(X, tt.shape_padright(beta)), sigma=1, observed=y) hsstep = HSStep([beta]) trace = pm.sample( draws=50, @@ -526,7 +518,7 @@ def test_HSStep_NegativeBinomial(): N_draws = 500 with pm.Model(): beta = HorseShoe("beta", tau=1, shape=M) - pm.NegativeBinomial("y", mu=at.exp(beta.dot(X.T)), alpha=1, observed=y_nb) + pm.NegativeBinomial("y", mu=tt.exp(beta.dot(X.T)), alpha=1, observed=y_nb) hsstep = HSStep([beta]) trace = pm.sample( draws=N_draws, @@ -558,7 +550,7 @@ def test_HSStep_NegativeBinomial(): with pm.Model(): beta = HorseShoe("beta", tau=1, shape=M, testval=beta_true * 0.1) eta = pm.NegativeBinomial("eta", mu=beta.dot(X.T), alpha=1, shape=N) - pm.Normal("y", mu=at.exp(eta), sigma=1, observed=y_nb) + pm.Normal("y", mu=tt.exp(eta), sigma=1, observed=y_nb) with pytest.raises(SamplingError): HSStep([beta]) @@ -585,7 +577,7 @@ def test_HSStep_NegativeBinomial_sparse(): with pm.Model(): beta = HorseShoe("beta", tau=1, shape=M) pm.NegativeBinomial( - "y", mu=at.exp(sp_dot(X, at.shape_padright(beta))), alpha=1, observed=y_nb + "y", mu=tt.exp(sp_dot(X, tt.shape_padright(beta))), alpha=1, observed=y_nb ) hsstep = HSStep([beta]) trace = pm.sample( @@ -619,7 +611,7 @@ def test_HSStep_NegativeBinomial_sparse_shared_y(): beta = HorseShoe("beta", tau=1, shape=M) pm.NegativeBinomial( "y", - mu=at.exp(sp_dot(X_tt, at.shape_padright(beta))), + mu=tt.exp(sp_dot(X_tt, tt.shape_padright(beta))), alpha=1, observed=y_tt, ) diff --git a/tests/test_utils.py b/tests/test_utils.py index d9b62dd..df7f400 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,13 +1,8 @@ import numpy as np import pytest import scipy as sp - -try: - import aesara - import aesara.tensor as at -except ImportError: - import theano as aesara - import theano.tensor as at +import theano +import theano.tensor as tt from pymc3_hmm.utils import ( compute_trans_freqs, @@ -40,7 +35,7 @@ def test_compute_trans_freqs(): ) def test_logsumexp(test_input): np_res = sp.special.logsumexp(test_input) - tt_res = tt_logsumexp(at.as_tensor_variable(test_input)).eval() + tt_res = tt_logsumexp(tt.as_tensor_variable(test_input)).eval() assert np.array_equal(np_res, tt_res) @@ -69,33 +64,31 @@ def test_logdotexp(): assert np.allclose(A.dot(b), np.exp(test_res)) +@theano.config.change_flags(compute_test_value="warn") +@np.errstate(over="ignore", under="ignore") def test_tt_logdotexp(): - np.seterr(over="ignore", under="ignore") - - aesara.config.compute_test_value = "warn" - A = np.c_[[1.0, 2.0], [3.0, 4.0], [10.0, 20.0]] b = np.c_[[0.1], [0.2], [30.0]].T - A_tt = at.as_tensor_variable(A) - b_tt = at.as_tensor_variable(b) - test_res = tt_logdotexp(at.log(A_tt), at.log(b_tt)).eval() + A_tt = tt.as_tensor_variable(A) + b_tt = tt.as_tensor_variable(b) + test_res = tt_logdotexp(tt.log(A_tt), tt.log(b_tt)).eval() assert test_res.shape == (2, 1) assert np.allclose(A.dot(b), np.exp(test_res)) b = np.r_[0.1, 0.2, 30.0] - test_res = tt_logdotexp(at.log(A), at.log(b)).eval() + test_res = tt_logdotexp(tt.log(A), tt.log(b)).eval() assert test_res.shape == (2,) assert np.allclose(A.dot(b), np.exp(test_res)) A = np.c_[[1.0, 2.0], [10.0, 20.0]] b = np.c_[[0.1], [0.2]].T - test_res = tt_logdotexp(at.log(A), at.log(b)).eval() + test_res = tt_logdotexp(tt.log(A), tt.log(b)).eval() assert test_res.shape == (2, 1) assert np.allclose(A.dot(b), np.exp(test_res)) b = np.r_[0.1, 0.2] - test_res = tt_logdotexp(at.log(A), at.log(b)).eval() + test_res = tt_logdotexp(tt.log(A), tt.log(b)).eval() assert test_res.shape == (2,) assert np.allclose(A.dot(b), np.exp(test_res)) @@ -122,6 +115,6 @@ def test_multilogit_inv(test_input, test_output): assert np.array_equal(res.round(2), test_output) # Theano testing - res = multilogit_inv(at.as_tensor_variable(test_input)) + res = multilogit_inv(tt.as_tensor_variable(test_input)) res = res.eval() assert np.array_equal(res.round(2), test_output) diff --git a/tests/utils.py b/tests/utils.py index c339ba1..29f742b 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1,11 +1,6 @@ import numpy as np - -try: - import aesara.tensor as at -except ImportError: - import theano.tensor as at - import pymc3 as pm +import theano.tensor as tt from pymc3_hmm.distributions import DiscreteMarkovChain, PoissonZeroProcess @@ -18,8 +13,8 @@ def simulate_poiszero_hmm( p_0_rv = pm.Dirichlet("p_0", p_0_a, shape=np.shape(pi_0_a)) p_1_rv = pm.Dirichlet("p_1", p_1_a, shape=np.shape(pi_0_a)) - P_tt = at.stack([p_0_rv, p_1_rv]) - P_rv = pm.Deterministic("P_tt", at.shape_padleft(P_tt)) + P_tt = tt.stack([p_0_rv, p_1_rv]) + P_rv = pm.Deterministic("P_tt", tt.shape_padleft(P_tt)) pi_0_tt = pm.Dirichlet("pi_0", pi_0_a, shape=np.shape(pi_0_a))