diff --git a/aitk/_version.py b/aitk/_version.py index b211de5..9e2806c 100644 --- a/aitk/_version.py +++ b/aitk/_version.py @@ -8,5 +8,5 @@ # # ************************************************************** -version_info = (2, 1, 0) +version_info = (3, 0, 0) __version__ = ".".join(map(str, version_info)) diff --git a/aitk/keras/README.md b/aitk/keras/README.md deleted file mode 100644 index 0521ade..0000000 --- a/aitk/keras/README.md +++ /dev/null @@ -1,93 +0,0 @@ -# Neural network models -This module implements building-blocks for larger neural network models in the -Keras-style. This module does _not_ implement a general autograd system in order -emphasize conceptual understanding over flexibility. - -1. **Activations**. Common activation nonlinearities. Includes: - - Rectified linear units (ReLU) ([Hahnloser et al., 2000](http://invibe.net/biblio_database_dyva/woda/data/att/6525.file.pdf)) - - Leaky rectified linear units - ([Maas, Hannun, & Ng, 2013](https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf)) - - Exponential linear units (ELU) ([Clevert, Unterthiner, & Hochreiter, 2016](http://arxiv.org/abs/1511.07289)) - - Scaled exponential linear units ([Klambauer, Unterthiner, & Mayr, 2017](https://arxiv.org/pdf/1706.02515.pdf)) - - Softplus units - - Hard sigmoid units - - Exponential units - - Hyperbolic tangent (tanh) - - Logistic sigmoid - - Affine - -2. **Losses**. Common loss functions. Includes: - - Squared error - - Categorical cross entropy - - VAE Bernoulli loss ([Kingma & Welling, 2014](https://arxiv.org/abs/1312.6114)) - - Wasserstein loss with gradient penalty ([Gulrajani et al., 2017](https://arxiv.org/pdf/1704.00028.pdf)) - - Noise contrastive estimation (NCE) loss ([Gutmann & Hyvärinen](https://www.cs.helsinki.fi/u/ahyvarin/papers/Gutmann10AISTATS.pdf); [Minh & Teh, 2012](https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf)) - -3. **Wrappers**. Layer wrappers. Includes: - - Dropout ([Srivastava, et al., 2014](http://www.jmlr.org/papers/volume15/srivastava14a/srivastava14a.pdf)) - -4. **Layers**. Common layers / layer-wise operations that can be composed to - create larger neural networks. Includes: - - Fully-connected - - Sparse evolutionary ([Mocanu et al., 2018](https://www.nature.com/articles/s41467-018-04316-3)) - - Dot-product attention ([Luong, Pho, & Manning, 2015](https://arxiv.org/pdf/1508.04025.pdf); [Vaswani et al., 2017](https://arxiv.org/pdf/1706.03762.pdf)) - - 1D and 2D convolution (with stride, padding, and dilation) ([van den Oord et al., 2016](https://arxiv.org/pdf/1609.03499.pdf); [Yu & Kolton, 2016](https://arxiv.org/pdf/1511.07122.pdf)) - - 2D "deconvolution" (with stride and padding) ([Zeiler et al., 2010](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf)) - - Restricted Boltzmann machines (with CD-_n_ training) ([Smolensky, 1996](http://stanford.edu/~jlmcc/papers/PDP/Volume%201/Chap6_PDP86.pdf); [Carreira-Perpiñán & Hinton, 2005](http://www.cs.toronto.edu/~fritz/absps/cdmiguel.pdf)) - - Elementwise multiplication - - Embedding - - Summation - - Flattening - - Softmax - - Max & average pooling - - 1D and 2D batch normalization ([Ioffe & Szegedy, 2015](http://proceedings.mlr.press/v37/ioffe15.pdf)) - - 1D and 2D layer normalization ([Ba, Kiros, & Hinton, 2016](https://arxiv.org/pdf/1607.06450.pdf)) - - Recurrent ([Elman, 1990](https://crl.ucsd.edu/~elman/Papers/fsit.pdf)) - - Long short-term memory (LSTM) ([Hochreiter & Schmidhuber, 1997](http://www.bioinf.jku.at/publications/older/2604.pdf)) - -5. **Optimizers**. Common modifications to stochastic gradient descent. - Includes: - - SGD with momentum ([Rummelhart, Hinton, & Williams, 1986](https://www.cs.princeton.edu/courses/archive/spring18/cos495/res/backprop_old.pdf)) - - AdaGrad ([Duchi, Hazan, & Singer, 2011](http://jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)) - - RMSProp ([Tieleman & Hinton, 2012](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)) - - Adam ([Kingma & Ba, 2015](https://arxiv.org/pdf/1412.6980v8.pdf)) - -6. **Learning Rate Schedulers**. Common learning rate decay schedules. - - Constant - - Exponential decay - - Noam/Transformer scheduler ([Vaswani et al., 2017](https://arxiv.org/pdf/1706.03762.pdf)) - - King/Dlib scheduler ([King, 2018](http://blog.dlib.net/2018/02/automatic-learning-rate-scheduling-that.html)) - -6. **Initializers**. Common weight initialization strategies. - - Glorot/Xavier uniform and normal ([Glorot & Bengio, 2010](http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf)) - - He/Kaiming uniform and normal ([He et al., 2015](https://arxiv.org/pdf/1502.01852v1.pdf)) - - Standard normal - - Truncated normal - -7. **Modules**. Common multi-layer blocks that appear across many deep networks. - Includes: - - Bidirectional LSTMs ([Schuster & Paliwal, 1997](https://pdfs.semanticscholar.org/4b80/89bc9b49f84de43acc2eb8900035f7d492b2.pdf)) - - ResNet-style "identity" (i.e., `same`-convolution) residual blocks ([He et al., 2015](https://arxiv.org/pdf/1512.03385.pdf)) - - ResNet-style "convolutional" (i.e., parametric) residual blocks ([He et al., 2015](https://arxiv.org/pdf/1512.03385.pdf)) - - WaveNet-style residual block with dilated causal convolutions ([van den Oord et al., 2016](https://arxiv.org/pdf/1609.03499.pdf)) - - Transformer-style multi-headed dot-product attention ([Vaswani et al., 2017](https://arxiv.org/pdf/1706.03762.pdf)) - -8. **Models**. Well-known network architectures. Includes: - - `vae.py`: Bernoulli variational autoencoder ([Kingma & Welling, 2014](https://arxiv.org/abs/1312.6114)) - - `wgan_gp.py`: Wasserstein generative adversarial network with gradient - penalty ([Gulrajani et al., 2017](https://arxiv.org/pdf/1704.00028.pdf); -[Goodfellow et al., 2014](https://papers.nips.cc/paper/5423-generative-adversarial-nets.pdf)) - - `w2v.py`: word2vec model with CBOW and skip-gram architectures and - training via noise contrastive estimation ([Mikolov et al., 2012](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf)) - -8. **Utils**. Common helper functions, primarily for dealing with CNNs. - Includes: - - `im2col` - - `col2im` - - `conv1D` - - `conv2D` - - `dilate` - - `deconv2D` - - `minibatch` - - Various weight initialization utilities - - Various padding and convolution arithmetic utilities diff --git a/aitk/keras/__init__.py b/aitk/keras/__init__.py deleted file mode 100644 index 9accc14..0000000 --- a/aitk/keras/__init__.py +++ /dev/null @@ -1,30 +0,0 @@ -# -*- coding: utf-8 -*- -# ************************************************************** -# aitk.keras: A Python Keras model API -# -# Copyright (c) 2021 AITK Developers -# -# https://github.com/ArtificialIntelligenceToolkit/aitk.keras -# -# ************************************************************** - -"""A module of basic building blcoks for constructing neural networks""" -from . import utils -from . import losses -from . import activations -from . import schedulers -from . import optimizers -from . import wrappers -from . import layers -from . import initializers -from . import modules -from . import models -from . import datasets - -import sys -import numpy - -# Create a fake module "backend" that is really numpy -backend = numpy -backend.image_data_format = lambda: 'channels_last' -sys.modules["aitk.keras.backend"] = backend diff --git a/aitk/keras/activations/README.md b/aitk/keras/activations/README.md deleted file mode 100644 index 6287b59..0000000 --- a/aitk/keras/activations/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# Activation Functions -The `activations` module implements several common activation functions: - -- Rectified linear units (ReLU) ([Hahnloser et al., 2000](http://invibe.net/biblio_database_dyva/woda/data/att/6525.file.pdf)) -- Leaky rectified linear units - ([Maas, Hannun, & Ng, 2013](https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf)) -- Exponential linear units ([Clevert, Unterthiner, & Hochreiter, 2016](https://arxiv.org/pdf/1511.07289.pdf)) -- Scaled exponential linear units ([Klambauer, Unterthiner, & Mayr, 2017](https://arxiv.org/pdf/1706.02515.pdf)) -- Softplus units -- Hard sigmoid units -- Exponential units -- Hyperbolic tangent (tanh) -- Logistic sigmoid -- Affine - - -## Plots -

- -

diff --git a/aitk/keras/activations/__init__.py b/aitk/keras/activations/__init__.py deleted file mode 100644 index 8ba160e..0000000 --- a/aitk/keras/activations/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .activations import * diff --git a/aitk/keras/activations/activations.py b/aitk/keras/activations/activations.py deleted file mode 100644 index f2c1949..0000000 --- a/aitk/keras/activations/activations.py +++ /dev/null @@ -1,627 +0,0 @@ -"""A collection of activation function objects for building neural networks""" - -from abc import ABC, abstractmethod - -import numpy as np - - -class ActivationBase(ABC): - def __init__(self, **kwargs): - """Initialize the ActivationBase object""" - super().__init__() - - def __call__(self, z): - """Apply the activation function to an input""" - if z.ndim == 1: - z = z.reshape(1, -1) - return self.fn(z) - - @abstractmethod - def fn(self, z): - """Apply the activation function to an input""" - raise NotImplementedError - - @abstractmethod - def grad(self, x, **kwargs): - """Compute the gradient of the activation function wrt the input""" - raise NotImplementedError - - -class Sigmoid(ActivationBase): - def __init__(self): - """A logistic sigmoid activation function.""" - super().__init__() - - def __str__(self): - """Return a string representation of the activation function""" - return "Sigmoid" - - def fn(self, z): - r""" - Evaluate the logistic sigmoid, :math:`\sigma`, on the elements of input `z`. - - .. math:: - - \sigma(x_i) = \frac{1}{1 + e^{-x_i}} - """ - return 1 / (1 + np.exp(-z)) - - def grad(self, x): - r""" - Evaluate the first derivative of the logistic sigmoid on the elements of `x`. - - .. math:: - - \frac{\partial \sigma}{\partial x_i} = \sigma(x_i) (1 - \sigma(x_i)) - """ - fn_x = self.fn(x) - return fn_x * (1 - fn_x) - - def grad2(self, x): - r""" - Evaluate the second derivative of the logistic sigmoid on the elements of `x`. - - .. math:: - - \frac{\partial^2 \sigma}{\partial x_i^2} = - \frac{\partial \sigma}{\partial x_i} (1 - 2 \sigma(x_i)) - """ - fn_x = self.fn(x) - return fn_x * (1 - fn_x) * (1 - 2 * fn_x) - - -class ReLU(ActivationBase): - """ - A rectified linear activation function. - - Notes - ----- - "ReLU units can be fragile during training and can "die". For example, a - large gradient flowing through a ReLU neuron could cause the weights to - update in such a way that the neuron will never activate on any datapoint - again. If this happens, then the gradient flowing through the unit will - forever be zero from that point on. That is, the ReLU units can - irreversibly die during training since they can get knocked off the data - manifold. - - For example, you may find that as much as 40% of your network can be "dead" - (i.e. neurons that never activate across the entire training dataset) if - the learning rate is set too high. With a proper setting of the learning - rate this is less frequently an issue." [*]_ - - References - ---------- - .. [*] Karpathy, A. "CS231n: Convolutional neural networks for visual recognition". - """ - - def __init__(self): - super().__init__() - - def __str__(self): - """Return a string representation of the activation function""" - return "ReLU" - - def fn(self, z): - r""" - Evaulate the ReLU function on the elements of input `z`. - - .. math:: - - \text{ReLU}(z_i) - &= z_i \ \ \ \ &&\text{if }z_i > 0 \\ - &= 0 \ \ \ \ &&\text{otherwise} - """ - return np.clip(z, 0, np.inf) - - def grad(self, x): - r""" - Evaulate the first derivative of the ReLU function on the elements of input `x`. - - .. math:: - - \frac{\partial \text{ReLU}}{\partial x_i} - &= 1 \ \ \ \ &&\text{if }x_i > 0 \\ - &= 0 \ \ \ \ &&\text{otherwise} - """ - return (x > 0).astype(int) - - def grad2(self, x): - r""" - Evaulate the second derivative of the ReLU function on the elements of - input `x`. - - .. math:: - - \frac{\partial^2 \text{ReLU}}{\partial x_i^2} = 0 - """ - return np.zeros_like(x) - - -class LeakyReLU(ActivationBase): - """ - 'Leaky' version of a rectified linear unit (ReLU). - - Notes - ----- - Leaky ReLUs [*]_ are designed to address the vanishing gradient problem in - ReLUs by allowing a small non-zero gradient when `x` is negative. - - Parameters - ---------- - alpha: float - Activation slope when x < 0. Default is 0.3. - - References - ---------- - .. [*] Mass, L. M., Hannun, A. Y, & Ng, A. Y. (2013). "Rectifier - nonlinearities improve neural network acoustic models". *Proceedings of - the 30th International Conference of Machine Learning, 30*. - """ - - def __init__(self, alpha=0.3): - self.alpha = alpha - super().__init__() - - def __str__(self): - """Return a string representation of the activation function""" - return "Leaky ReLU(alpha={})".format(self.alpha) - - def fn(self, z): - r""" - Evaluate the leaky ReLU function on the elements of input `z`. - - .. math:: - - \text{LeakyReLU}(z_i) - &= z_i \ \ \ \ &&\text{if } z_i > 0 \\ - &= \alpha z_i \ \ \ \ &&\text{otherwise} - """ - _z = z.copy() - _z[z < 0] = _z[z < 0] * self.alpha - return _z - - def grad(self, x): - r""" - Evaluate the first derivative of the leaky ReLU function on the elements - of input `x`. - - .. math:: - - \frac{\partial \text{LeakyReLU}}{\partial x_i} - &= 1 \ \ \ \ &&\text{if }x_i > 0 \\ - &= \alpha \ \ \ \ &&\text{otherwise} - """ - out = np.ones_like(x) - out[x < 0] *= self.alpha - return out - - def grad2(self, x): - r""" - Evaluate the second derivative of the leaky ReLU function on the - elements of input `x`. - - .. math:: - - \frac{\partial^2 \text{LeakyReLU}}{\partial x_i^2} = 0 - """ - return np.zeros_like(x) - - -class Tanh(ActivationBase): - def __init__(self): - """A hyperbolic tangent activation function.""" - super().__init__() - - def __str__(self): - """Return a string representation of the activation function""" - return "Tanh" - - def fn(self, z): - """Compute the tanh function on the elements of input `z`.""" - return np.tanh(z) - - def grad(self, x): - r""" - Evaluate the first derivative of the tanh function on the elements - of input `x`. - - .. math:: - - \frac{\partial \tanh}{\partial x_i} = 1 - \tanh(x)^2 - """ - return 1 - np.tanh(x) ** 2 - - def grad2(self, x): - r""" - Evaluate the second derivative of the tanh function on the elements - of input `x`. - - .. math:: - - \frac{\partial^2 \tanh}{\partial x_i^2} = - -2 \tanh(x) \left(\frac{\partial \tanh}{\partial x_i}\right) - """ - tanh_x = np.tanh(x) - return -2 * tanh_x * (1 - tanh_x ** 2) - - -class Affine(ActivationBase): - def __init__(self, slope=1, intercept=0): - """ - An affine activation function. - - Parameters - ---------- - slope: float - Activation slope. Default is 1. - intercept: float - Intercept/offset term. Default is 0. - """ - self.slope = slope - self.intercept = intercept - super().__init__() - - def __str__(self): - """Return a string representation of the activation function""" - return "Affine(slope={}, intercept={})".format(self.slope, self.intercept) - - def fn(self, z): - r""" - Evaluate the Affine activation on the elements of input `z`. - - .. math:: - - \text{Affine}(z_i) = \text{slope} \times z_i + \text{intercept} - """ - return self.slope * z + self.intercept - - def grad(self, x): - r""" - Evaluate the first derivative of the Affine activation on the elements - of input `x`. - - .. math:: - - \frac{\partial \text{Affine}}{\partial x_i} = \text{slope} - """ - return self.slope * np.ones_like(x) - - def grad2(self, x): - r""" - Evaluate the second derivative of the Affine activation on the elements - of input `x`. - - .. math:: - - \frac{\partial^2 \text{Affine}}{\partial x_i^2} = 0 - """ - return np.zeros_like(x) - - -class Identity(Affine): - def __init__(self): - """ - Identity activation function. - - Notes - ----- - :class:`Identity` is just syntactic sugar for :class:`Affine` with - slope = 1 and intercept = 0. - """ - super().__init__(slope=1, intercept=0) - - def __str__(self): - """Return a string representation of the activation function""" - return "Identity" - - -class ELU(ActivationBase): - def __init__(self, alpha=1.0): - r""" - An exponential linear unit (ELU). - - Notes - ----- - ELUs are intended to address the fact that ReLUs are strictly nonnegative - and thus have an average activation > 0, increasing the chances of internal - covariate shift and slowing down learning. ELU units address this by (1) - allowing negative values when :math:`x < 0`, which (2) are bounded by a value - :math:`-\alpha`. Similar to :class:`LeakyReLU`, the negative activation - values help to push the average unit activation towards 0. Unlike - :class:`LeakyReLU`, however, the boundedness of the negative activation - allows for greater robustness in the face of large negative values, - allowing the function to avoid conveying the *degree* of "absence" - (negative activation) in the input. [*]_ - - Parameters - ---------- - alpha : float - Slope of negative segment. Default is 1. - - References - ---------- - .. [*] Clevert, D. A., Unterthiner, T., Hochreiter, S. (2016). "Fast - and accurate deep network learning by exponential linear units - (ELUs)". *4th International Conference on Learning - Representations*. - """ - self.alpha = alpha - super().__init__() - - def __str__(self): - """Return a string representation of the activation function""" - return "ELU(alpha={})".format(self.alpha) - - def fn(self, z): - r""" - Evaluate the ELU activation on the elements of input `z`. - - .. math:: - - \text{ELU}(z_i) - &= z_i \ \ \ \ &&\text{if }z_i > 0 \\ - &= \alpha (e^{z_i} - 1) \ \ \ \ &&\text{otherwise} - """ - # z if z > 0 else alpha * (e^z - 1) - return np.where(z > 0, z, self.alpha * (np.exp(z) - 1)) - - def grad(self, x): - r""" - Evaluate the first derivative of the ELU activation on the elements - of input `x`. - - .. math:: - - \frac{\partial \text{ELU}}{\partial x_i} - &= 1 \ \ \ \ &&\text{if } x_i > 0 \\ - &= \alpha e^{x_i} \ \ \ \ &&\text{otherwise} - """ - # 1 if x > 0 else alpha * e^(z) - return np.where(x > 0, np.ones_like(x), self.alpha * np.exp(x)) - - def grad2(self, x): - r""" - Evaluate the second derivative of the ELU activation on the elements - of input `x`. - - .. math:: - - \frac{\partial^2 \text{ELU}}{\partial x_i^2} - &= 0 \ \ \ \ &&\text{if } x_i > 0 \\ - &= \alpha e^{x_i} \ \ \ \ &&\text{otherwise} - """ - # 0 if x > 0 else alpha * e^(z) - return np.where(x >= 0, np.zeros_like(x), self.alpha * np.exp(x)) - - -class Exponential(ActivationBase): - def __init__(self): - """An exponential (base e) activation function""" - super().__init__() - - def __str__(self): - """Return a string representation of the activation function""" - return "Exponential" - - def fn(self, z): - r""" - Evaluate the activation function - - .. math:: - \text{Exponential}(z_i) = e^{z_i} - """ - return np.exp(z) - - def grad(self, x): - r""" - Evaluate the first derivative of the exponential activation on the elements - of input `x`. - - .. math:: - - \frac{\partial \text{Exponential}}{\partial x_i} = e^{x_i} - """ - return np.exp(x) - - def grad2(self, x): - r""" - Evaluate the second derivative of the exponential activation on the elements - of input `x`. - - .. math:: - - \frac{\partial^2 \text{Exponential}}{\partial x_i^2} = e^{x_i} - """ - return np.exp(x) - - -class SELU(ActivationBase): - r""" - A scaled exponential linear unit (SELU). - - Notes - ----- - SELU units, when used in conjunction with proper weight initialization and - regularization techniques, encourage neuron activations to converge to - zero-mean and unit variance without explicit use of e.g., batchnorm. - - For SELU units, the :math:`\alpha` and :math:`\text{scale}` values are - constants chosen so that the mean and variance of the inputs are preserved - between consecutive layers. As such the authors propose weights be - initialized using Lecun-Normal initialization: :math:`w_{ij} \sim - \mathcal{N}(0, 1 / \text{fan_in})`, and to use the dropout variant - :math:`\alpha`-dropout during regularization. [*]_ - - See the reference for more information (especially the appendix ;-) ). - - References - ---------- - .. [*] Klambauer, G., Unterthiner, T., & Hochreiter, S. (2017). - "Self-normalizing neural networks." *Advances in Neural Information - Processing Systems, 30.* - """ - - def __init__(self): - self.alpha = 1.6732632423543772848170429916717 - self.scale = 1.0507009873554804934193349852946 - self.elu = ELU(alpha=self.alpha) - super().__init__() - - def __str__(self): - """Return a string representation of the activation function""" - return "SELU" - - def fn(self, z): - r""" - Evaluate the SELU activation on the elements of input `z`. - - .. math:: - - \text{SELU}(z_i) = \text{scale} \times \text{ELU}(z_i, \alpha) - - which is simply - - .. math:: - - \text{SELU}(z_i) - &= \text{scale} \times z_i \ \ \ \ &&\text{if }z_i > 0 \\ - &= \text{scale} \times \alpha (e^{z_i} - 1) \ \ \ \ &&\text{otherwise} - """ - return self.scale * self.elu.fn(z) - - def grad(self, x): - r""" - Evaluate the first derivative of the SELU activation on the elements - of input `x`. - - .. math:: - - \frac{\partial \text{SELU}}{\partial x_i} - &= \text{scale} \ \ \ \ &&\text{if } x_i > 0 \\ - &= \text{scale} \times \alpha e^{x_i} \ \ \ \ &&\text{otherwise} - """ - return np.where( - x >= 0, np.ones_like(x) * self.scale, np.exp(x) * self.alpha * self.scale, - ) - - def grad2(self, x): - r""" - Evaluate the second derivative of the SELU activation on the elements - of input `x`. - - .. math:: - - \frac{\partial^2 \text{SELU}}{\partial x_i^2} - &= 0 \ \ \ \ &&\text{if } x_i > 0 \\ - &= \text{scale} \times \alpha e^{x_i} \ \ \ \ &&\text{otherwise} - """ - return np.where(x > 0, np.zeros_like(x), np.exp(x) * self.alpha * self.scale) - - -class HardSigmoid(ActivationBase): - def __init__(self): - """ - A "hard" sigmoid activation function. - - Notes - ----- - The hard sigmoid is a piecewise linear approximation of the logistic - sigmoid that is computationally more efficient to compute. - """ - super().__init__() - - def __str__(self): - """Return a string representation of the activation function""" - return "Hard Sigmoid" - - def fn(self, z): - r""" - Evaluate the hard sigmoid activation on the elements of input `z`. - - .. math:: - - \text{HardSigmoid}(z_i) - &= 0 \ \ \ \ &&\text{if }z_i < -2.5 \\ - &= 0.2 z_i + 0.5 \ \ \ \ &&\text{if }-2.5 \leq z_i \leq 2.5 \\ - &= 1 \ \ \ \ &&\text{if }z_i > 2.5 - """ - return np.clip((0.2 * z) + 0.5, 0.0, 1.0) - - def grad(self, x): - r""" - Evaluate the first derivative of the hard sigmoid activation on the elements - of input `x`. - - .. math:: - - \frac{\partial \text{HardSigmoid}}{\partial x_i} - &= 0.2 \ \ \ \ &&\text{if } -2.5 \leq x_i \leq 2.5\\ - &= 0 \ \ \ \ &&\text{otherwise} - """ - return np.where((x >= -2.5) & (x <= 2.5), 0.2, 0) - - def grad2(self, x): - r""" - Evaluate the second derivative of the hard sigmoid activation on the elements - of input `x`. - - .. math:: - - \frac{\partial^2 \text{HardSigmoid}}{\partial x_i^2} = 0 - """ - return np.zeros_like(x) - - -class SoftPlus(ActivationBase): - def __init__(self): - """ - A softplus activation function. - - Notes - ----- - In contrast to :class:`ReLU`, the softplus activation is differentiable - everywhere (including 0). It is, however, less computationally efficient to - compute. - - The derivative of the softplus activation is the logistic sigmoid. - """ - super().__init__() - - def __str__(self): - """Return a string representation of the activation function""" - return "SoftPlus" - - def fn(self, z): - r""" - Evaluate the softplus activation on the elements of input `z`. - - .. math:: - - \text{SoftPlus}(z_i) = \log(1 + e^{z_i}) - """ - return np.log(np.exp(z) + 1) - - def grad(self, x): - r""" - Evaluate the first derivative of the softplus activation on the elements - of input `x`. - - .. math:: - - \frac{\partial \text{SoftPlus}}{\partial x_i} = \frac{e^{x_i}}{1 + e^{x_i}} - """ - exp_x = np.exp(x) - return exp_x / (exp_x + 1) - - def grad2(self, x): - r""" - Evaluate the second derivative of the softplus activation on the elements - of input `x`. - - .. math:: - - \frac{\partial^2 \text{SoftPlus}}{\partial x_i^2} = - \frac{e^{x_i}}{(1 + e^{x_i})^2} - """ - exp_x = np.exp(x) - return exp_x / ((exp_x + 1) ** 2) diff --git a/aitk/keras/activations/img/plot.png b/aitk/keras/activations/img/plot.png deleted file mode 100644 index e77a10f..0000000 Binary files a/aitk/keras/activations/img/plot.png and /dev/null differ diff --git a/aitk/keras/callbacks.py b/aitk/keras/callbacks.py deleted file mode 100644 index 574c222..0000000 --- a/aitk/keras/callbacks.py +++ /dev/null @@ -1,225 +0,0 @@ -# -*- coding: utf-8 -*- -# ************************************************************** -# aitk.keras: A Python Keras model API -# -# Copyright (c) 2021 AITK Developers -# -# https://github.com/ArtificialIntelligenceToolkit/aitk.keras -# -# ************************************************************** - -class Callback: - def __init__(self): - self.validation_data = None - self.model = None - - def set_params(self, params): - self.params = params - - def set_model(self, model): - self.model = model - - def on_batch_begin(self, batch, logs=None): - """A backwards compatibility alias for `on_train_batch_begin`.""" - - def on_batch_end(self, batch, logs=None): - """A backwards compatibility alias for `on_train_batch_end`.""" - - def on_epoch_begin(self, epoch, logs=None): - """Called at the start of an epoch. - - Subclasses should override for any actions to run. This - function should only be called during TRAIN mode. - - Args: - epoch: Integer, index of epoch. - logs: Dict. Currently no data is passed to this argument for - this method but that may change in the future. - """ - - def on_epoch_end(self, epoch, logs=None): - """Called at the end of an epoch. - - Subclasses should override for any actions to run. This function - should only be called during TRAIN mode. - - Args: - epoch: Integer, index of epoch. - logs: Dict, metric results for this training epoch, and for the - validation epoch if validation is performed. Validation result keys - are prefixed with `val_`. For training epoch, the values of the - `Model`'s metrics are returned. Example : `{'loss': 0.2, 'accuracy': - 0.7}`. - """ - - def on_train_batch_begin(self, batch, logs=None): - """Called at the beginning of a training batch in `fit` methods. - - Subclasses should override for any actions to run. - - Note that if the `steps_per_execution` argument to `compile` in - `tf.keras.Model` is set to `N`, this method will only be called every `N` - batches. - - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. - """ - # For backwards compatibility. - self.on_batch_begin(batch, logs=logs) - - def on_train_batch_end(self, batch, logs=None): - """Called at the end of a training batch in `fit` methods. - - Subclasses should override for any actions to run. - - Note that if the `steps_per_execution` argument to `compile` in - `tf.keras.Model` is set to `N`, this method will only be called every `N` - batches. - - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Aggregated metric results up until this batch. - """ - # For backwards compatibility. - self.on_batch_end(batch, logs=logs) - - - def on_test_batch_begin(self, batch, logs=None): - """Called at the beginning of a batch in `evaluate` methods. - - Also called at the beginning of a validation batch in the `fit` - methods, if validation data is provided. - - Subclasses should override for any actions to run. - - Note that if the `steps_per_execution` argument to `compile` in - `tf.keras.Model` is set to `N`, this method will only be called every `N` - batches. - - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. - """ - - def on_test_batch_end(self, batch, logs=None): - """Called at the end of a batch in `evaluate` methods. - - Also called at the end of a validation batch in the `fit` - methods, if validation data is provided. - - Subclasses should override for any actions to run. - - Note that if the `steps_per_execution` argument to `compile` in - `tf.keras.Model` is set to `N`, this method will only be called every `N` - batches. - - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Aggregated metric results up until this batch. - """ - - def on_predict_batch_begin(self, batch, logs=None): - """Called at the beginning of a batch in `predict` methods. - - Subclasses should override for any actions to run. - - Note that if the `steps_per_execution` argument to `compile` in - `tf.keras.Model` is set to `N`, this method will only be called every `N` - batches. - - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. - """ - - def on_predict_batch_end(self, batch, logs=None): - """Called at the end of a batch in `predict` methods. - - Subclasses should override for any actions to run. - - Note that if the `steps_per_execution` argument to `compile` in - `tf.keras.Model` is set to `N`, this method will only be called every `N` - batches. - - Args: - batch: Integer, index of batch within the current epoch. - logs: Dict. Aggregated metric results up until this batch. - """ - - def on_train_begin(self, logs=None): - """Called at the beginning of training. - - Subclasses should override for any actions to run. - - Args: - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. - """ - - def on_train_end(self, logs=None): - """Called at the end of training. - - Subclasses should override for any actions to run. - - Args: - logs: Dict. Currently the output of the last call to `on_epoch_end()` - is passed to this argument for this method but that may change in - the future. - """ - - def on_test_begin(self, logs=None): - """Called at the beginning of evaluation or validation. - - Subclasses should override for any actions to run. - - Args: - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. - """ - - def on_test_end(self, logs=None): - """Called at the end of evaluation or validation. - - Subclasses should override for any actions to run. - - Args: - logs: Dict. Currently the output of the last call to - `on_test_batch_end()` is passed to this argument for this method - but that may change in the future. - """ - - def on_predict_begin(self, logs=None): - """Called at the beginning of prediction. - - Subclasses should override for any actions to run. - - Args: - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. - """ - - def on_predict_end(self, logs=None): - """Called at the end of prediction. - - Subclasses should override for any actions to run. - - Args: - logs: Dict. Currently no data is passed to this argument for this method - but that may change in the future. - """ - -class History(Callback): - def __init__(self): - super().__init__() - self.history = {} - - def on_epoch_end(self, epoch, logs=None): - if logs: - for metric in logs: - if metric not in self.history: - self.history[metric] = [] - self.history[metric].append(logs[metric]) diff --git a/aitk/keras/datasets/BUILD b/aitk/keras/datasets/BUILD deleted file mode 100644 index af31da0..0000000 --- a/aitk/keras/datasets/BUILD +++ /dev/null @@ -1,38 +0,0 @@ -# Description: -# Contains the Keras datasets package (internal TensorFlow version). - -package( - default_visibility = [ - "//keras:__subpackages__", - ], - licenses = ["notice"], -) - -filegroup( - name = "all_py_srcs", - srcs = glob(["*.py"]), - visibility = ["//keras/google/private_tf_api_test:__pkg__"], -) - -py_library( - name = "datasets", - srcs = [ - "__init__.py", - "boston_housing.py", - "cifar.py", - "cifar10.py", - "cifar100.py", - "fashion_mnist.py", - "imdb.py", - "mnist.py", - "reuters.py", - ], - srcs_version = "PY3", - visibility = ["//visibility:public"], - deps = [ - "//:expect_numpy_installed", - "//:expect_tensorflow_installed", - "//keras:backend", - "//keras/utils:engine_utils", - ], -) diff --git a/aitk/keras/datasets/__init__.py b/aitk/keras/datasets/__init__.py deleted file mode 100644 index 098bf7b..0000000 --- a/aitk/keras/datasets/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -"""Small NumPy datasets for debugging/testing.""" - diff --git a/aitk/keras/datasets/boston_housing.py b/aitk/keras/datasets/boston_housing.py deleted file mode 100644 index 0ac42bd..0000000 --- a/aitk/keras/datasets/boston_housing.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Boston housing price regression dataset.""" - -import numpy as np - -from .utils import get_file, get_file_async - - -def load_data(path='boston_housing.npz', test_split=0.2, seed=113): - """Loads the Boston Housing dataset. - - This is a dataset taken from the StatLib library which is maintained at - Carnegie Mellon University. - - Samples contain 13 attributes of houses at different locations around the - Boston suburbs in the late 1970s. Targets are the median values of - the houses at a location (in k$). - - The attributes themselves are defined in the - [StatLib website](http://lib.stat.cmu.edu/datasets/boston). - - Args: - path: path where to cache the dataset locally - (relative to `~/.keras/datasets`). - test_split: fraction of the data to reserve as test set. - seed: Random seed for shuffling the data - before computing the test split. - - Returns: - Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. - - **x_train, x_test**: numpy arrays with shape `(num_samples, 13)` - containing either the training samples (for x_train), - or test samples (for y_train). - - **y_train, y_test**: numpy arrays of shape `(num_samples,)` containing the - target scalars. The targets are float scalars typically between 10 and - 50 that represent the home prices in k$. - """ - assert 0 <= test_split < 1 - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file( - path, - origin=origin_folder + 'boston_housing.npz', - file_hash= - 'f553886a1f8d56431e820c5b82552d9d95cfcb96d1e678153f8839538947dff5') - with np.load(path, allow_pickle=True) as f: # pylint: disable=unexpected-keyword-arg - x = f['x'] - y = f['y'] - - rng = np.random.RandomState(seed) - indices = np.arange(len(x)) - rng.shuffle(indices) - x = x[indices] - y = y[indices] - - x_train = np.array(x[:int(len(x) * (1 - test_split))]) - y_train = np.array(y[:int(len(x) * (1 - test_split))]) - x_test = np.array(x[int(len(x) * (1 - test_split)):]) - y_test = np.array(y[int(len(x) * (1 - test_split)):]) - return (x_train, y_train), (x_test, y_test) diff --git a/aitk/keras/datasets/cifar.py b/aitk/keras/datasets/cifar.py deleted file mode 100644 index af4f44b..0000000 --- a/aitk/keras/datasets/cifar.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utilities common to CIFAR10 and CIFAR100 datasets.""" - -import _pickle as cPickle - - -def load_batch(fpath, label_key='labels'): - """Internal utility for parsing CIFAR data. - - Args: - fpath: path the file to parse. - label_key: key for label data in the retrieve - dictionary. - - Returns: - A tuple `(data, labels)`. - """ - with open(fpath, 'rb') as f: - d = cPickle.load(f, encoding='bytes') - # decode utf8 - d_decoded = {} - for k, v in d.items(): - d_decoded[k.decode('utf8')] = v - d = d_decoded - data = d['data'] - labels = d[label_key] - - data = data.reshape(data.shape[0], 3, 32, 32) - return data, labels diff --git a/aitk/keras/datasets/cifar10.py b/aitk/keras/datasets/cifar10.py deleted file mode 100644 index bd4af25..0000000 --- a/aitk/keras/datasets/cifar10.py +++ /dev/null @@ -1,107 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""CIFAR10 small images classification dataset.""" - -import os - -import numpy as np - -from ..backend import image_data_format -from .cifar import load_batch -from .utils import get_file - - -def load_data(): - """Loads the CIFAR10 dataset. - - This is a dataset of 50,000 32x32 color training images and 10,000 test - images, labeled over 10 categories. See more info at the - [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html). - - The classes are: - - | Label | Description | - |:-----:|-------------| - | 0 | airplane | - | 1 | automobile | - | 2 | bird | - | 3 | cat | - | 4 | deer | - | 5 | dog | - | 6 | frog | - | 7 | horse | - | 8 | ship | - | 9 | truck | - - Returns: - Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. - - **x_train**: uint8 NumPy array of grayscale image data with shapes - `(50000, 32, 32, 3)`, containing the training data. Pixel values range - from 0 to 255. - - **y_train**: uint8 NumPy array of labels (integers in range 0-9) - with shape `(50000, 1)` for the training data. - - **x_test**: uint8 NumPy array of grayscale image data with shapes - `(10000, 32, 32, 3)`, containing the test data. Pixel values range - from 0 to 255. - - **y_test**: uint8 NumPy array of labels (integers in range 0-9) - with shape `(10000, 1)` for the test data. - - Example: - - ```python - (x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data() - assert x_train.shape == (50000, 32, 32, 3) - assert x_test.shape == (10000, 32, 32, 3) - assert y_train.shape == (50000, 1) - assert y_test.shape == (10000, 1) - ``` - """ - dirname = 'cifar-10-batches-py' - origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' - path = get_file( - dirname, - origin=origin, - untar=True, - file_hash= - '6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce') - - num_train_samples = 50000 - - x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') - y_train = np.empty((num_train_samples,), dtype='uint8') - - for i in range(1, 6): - fpath = os.path.join(path, 'data_batch_' + str(i)) - (x_train[(i - 1) * 10000:i * 10000, :, :, :], - y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) - - fpath = os.path.join(path, 'test_batch') - x_test, y_test = load_batch(fpath) - - y_train = np.reshape(y_train, (len(y_train), 1)) - y_test = np.reshape(y_test, (len(y_test), 1)) - - if image_data_format() == 'channels_last': - x_train = x_train.transpose(0, 2, 3, 1) - x_test = x_test.transpose(0, 2, 3, 1) - - x_test = x_test.astype(x_train.dtype) - y_test = y_test.astype(y_train.dtype) - - return (x_train, y_train), (x_test, y_test) diff --git a/aitk/keras/datasets/cifar100.py b/aitk/keras/datasets/cifar100.py deleted file mode 100644 index 59bfee0..0000000 --- a/aitk/keras/datasets/cifar100.py +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""CIFAR100 small images classification dataset.""" - -import os - -import numpy as np - -from ..backend import image_data_format -from .cifar import load_batch -from .utils import get_file - - -def load_data(label_mode='fine'): - """Loads the CIFAR100 dataset. - - This is a dataset of 50,000 32x32 color training images and - 10,000 test images, labeled over 100 fine-grained classes that are - grouped into 20 coarse-grained classes. See more info at the - [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html). - - Args: - label_mode: one of "fine", "coarse". If it is "fine" the category labels - are the fine-grained labels, if it is "coarse" the output labels are the - coarse-grained superclasses. - - Returns: - Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. - - **x_train**: uint8 NumPy array of grayscale image data with shapes - `(50000, 32, 32, 3)`, containing the training data. Pixel values range - from 0 to 255. - - **y_train**: uint8 NumPy array of labels (integers in range 0-99) - with shape `(50000, 1)` for the training data. - - **x_test**: uint8 NumPy array of grayscale image data with shapes - `(10000, 32, 32, 3)`, containing the test data. Pixel values range - from 0 to 255. - - **y_test**: uint8 NumPy array of labels (integers in range 0-99) - with shape `(10000, 1)` for the test data. - - Example: - - ```python - (x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data() - assert x_train.shape == (50000, 32, 32, 3) - assert x_test.shape == (10000, 32, 32, 3) - assert y_train.shape == (50000, 1) - assert y_test.shape == (10000, 1) - ``` - """ - if label_mode not in ['fine', 'coarse']: - raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`. ' - f'Received: label_mode={label_mode}.') - - dirname = 'cifar-100-python' - origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' - path = get_file( - dirname, - origin=origin, - untar=True, - file_hash= - '85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b855ba677a7') - - fpath = os.path.join(path, 'train') - x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') - - fpath = os.path.join(path, 'test') - x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') - - y_train = np.reshape(y_train, (len(y_train), 1)) - y_test = np.reshape(y_test, (len(y_test), 1)) - - if image_data_format() == 'channels_last': - x_train = x_train.transpose(0, 2, 3, 1) - x_test = x_test.transpose(0, 2, 3, 1) - - return (x_train, y_train), (x_test, y_test) diff --git a/aitk/keras/datasets/fashion_mnist.py b/aitk/keras/datasets/fashion_mnist.py deleted file mode 100644 index 31bf238..0000000 --- a/aitk/keras/datasets/fashion_mnist.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Fashion-MNIST dataset.""" - -import gzip -import os - -import numpy as np - -from .utils import get_file - - -def load_data(): - """Loads the Fashion-MNIST dataset. - - This is a dataset of 60,000 28x28 grayscale images of 10 fashion categories, - along with a test set of 10,000 images. This dataset can be used as - a drop-in replacement for MNIST. - - The classes are: - - | Label | Description | - |:-----:|-------------| - | 0 | T-shirt/top | - | 1 | Trouser | - | 2 | Pullover | - | 3 | Dress | - | 4 | Coat | - | 5 | Sandal | - | 6 | Shirt | - | 7 | Sneaker | - | 8 | Bag | - | 9 | Ankle boot | - - Returns: - Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. - - **x_train**: uint8 NumPy array of grayscale image data with shapes - `(60000, 28, 28)`, containing the training data. - - **y_train**: uint8 NumPy array of labels (integers in range 0-9) - with shape `(60000,)` for the training data. - - **x_test**: uint8 NumPy array of grayscale image data with shapes - (10000, 28, 28), containing the test data. - - **y_test**: uint8 NumPy array of labels (integers in range 0-9) - with shape `(10000,)` for the test data. - - Example: - - ```python - (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() - assert x_train.shape == (60000, 28, 28) - assert x_test.shape == (10000, 28, 28) - assert y_train.shape == (60000,) - assert y_test.shape == (10000,) - ``` - - License: - The copyright for Fashion-MNIST is held by Zalando SE. - Fashion-MNIST is licensed under the [MIT license]( - https://github.com/zalandoresearch/fashion-mnist/blob/master/LICENSE). - - """ - dirname = os.path.join('datasets', 'fashion-mnist') - base = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - files = [ - 'train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz', - 't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz' - ] - - paths = [] - for fname in files: - paths.append(get_file(fname, origin=base + fname, cache_subdir=dirname)) - - with gzip.open(paths[0], 'rb') as lbpath: - y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8) - - with gzip.open(paths[1], 'rb') as imgpath: - x_train = np.frombuffer( - imgpath.read(), np.uint8, offset=16).reshape(len(y_train), 28, 28) - - with gzip.open(paths[2], 'rb') as lbpath: - y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8) - - with gzip.open(paths[3], 'rb') as imgpath: - x_test = np.frombuffer( - imgpath.read(), np.uint8, offset=16).reshape(len(y_test), 28, 28) - - return (x_train, y_train), (x_test, y_test) diff --git a/aitk/keras/datasets/imdb.py b/aitk/keras/datasets/imdb.py deleted file mode 100644 index 1074cd2..0000000 --- a/aitk/keras/datasets/imdb.py +++ /dev/null @@ -1,184 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""IMDB sentiment classification dataset.""" - -import json - -from .utils import get_file - -import numpy as np -from keras_preprocessing import sequence -_remove_long_seq = sequence._remove_long_seq - -def load_data(path='imdb.npz', - num_words=None, - skip_top=0, - maxlen=None, - seed=113, - start_char=1, - oov_char=2, - index_from=3, - **kwargs): - """Loads the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/). - - This is a dataset of 25,000 movies reviews from IMDB, labeled by sentiment - (positive/negative). Reviews have been preprocessed, and each review is - encoded as a list of word indexes (integers). - For convenience, words are indexed by overall frequency in the dataset, - so that for instance the integer "3" encodes the 3rd most frequent word in - the data. This allows for quick filtering operations such as: - "only consider the top 10,000 most - common words, but eliminate the top 20 most common words". - - As a convention, "0" does not stand for a specific word, but instead is used - to encode any unknown word. - - Args: - path: where to cache the data (relative to `~/.keras/dataset`). - num_words: integer or None. Words are - ranked by how often they occur (in the training set) and only - the `num_words` most frequent words are kept. Any less frequent word - will appear as `oov_char` value in the sequence data. If None, - all words are kept. Defaults to None, so all words are kept. - skip_top: skip the top N most frequently occurring words - (which may not be informative). These words will appear as - `oov_char` value in the dataset. Defaults to 0, so no words are - skipped. - maxlen: int or None. Maximum sequence length. - Any longer sequence will be truncated. Defaults to None, which - means no truncation. - seed: int. Seed for reproducible data shuffling. - start_char: int. The start of a sequence will be marked with this - character. Defaults to 1 because 0 is usually the padding character. - oov_char: int. The out-of-vocabulary character. - Words that were cut out because of the `num_words` or - `skip_top` limits will be replaced with this character. - index_from: int. Index actual words with this index and higher. - **kwargs: Used for backwards compatibility. - - Returns: - Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. - - **x_train, x_test**: lists of sequences, which are lists of indexes - (integers). If the num_words argument was specific, the maximum - possible index value is `num_words - 1`. If the `maxlen` argument was - specified, the largest possible sequence length is `maxlen`. - - **y_train, y_test**: lists of integer labels (1 or 0). - - Raises: - ValueError: in case `maxlen` is so low - that no input sequence could be kept. - - Note that the 'out of vocabulary' character is only used for - words that were present in the training set but are not included - because they're not making the `num_words` cut here. - Words that were not seen in the training set but are in the test set - have simply been skipped. - """ - # Legacy support - if 'nb_words' in kwargs: - print('The `nb_words` argument in `load_data` ' - 'has been renamed `num_words`.') - num_words = kwargs.pop('nb_words') - if kwargs: - raise TypeError(f'Unrecognized keyword arguments: {str(kwargs)}.') - - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file( - path, - origin=origin_folder + 'imdb.npz', - file_hash= - '69664113be75683a8fe16e3ed0ab59fda8886cb3cd7ada244f7d9544e4676b9f') - with np.load(path, allow_pickle=True) as f: # pylint: disable=unexpected-keyword-arg - x_train, labels_train = f['x_train'], f['y_train'] - x_test, labels_test = f['x_test'], f['y_test'] - - rng = np.random.RandomState(seed) - indices = np.arange(len(x_train)) - rng.shuffle(indices) - x_train = x_train[indices] - labels_train = labels_train[indices] - - indices = np.arange(len(x_test)) - rng.shuffle(indices) - x_test = x_test[indices] - labels_test = labels_test[indices] - - if start_char is not None: - x_train = [[start_char] + [w + index_from for w in x] for x in x_train] - x_test = [[start_char] + [w + index_from for w in x] for x in x_test] - elif index_from: - x_train = [[w + index_from for w in x] for x in x_train] - x_test = [[w + index_from for w in x] for x in x_test] - - if maxlen: - x_train, labels_train = _remove_long_seq(maxlen, x_train, labels_train) - x_test, labels_test = _remove_long_seq(maxlen, x_test, labels_test) - if not x_train or not x_test: - raise ValueError('After filtering for sequences shorter than maxlen=' - f'{str(maxlen)}, no sequence was kept. Increase maxlen.') - - xs = x_train + x_test - labels = np.concatenate([labels_train, labels_test]) - - if not num_words: - num_words = max(max(x) for x in xs) - - # by convention, use 2 as OOV word - # reserve 'index_from' (=3 by default) characters: - # 0 (padding), 1 (start), 2 (OOV) - if oov_char is not None: - xs = [ - [w if (skip_top <= w < num_words) else oov_char for w in x] for x in xs - ] - else: - xs = [[w for w in x if skip_top <= w < num_words] for x in xs] - - idx = len(x_train) - x_train, y_train = np.array(xs[:idx], dtype='object'), labels[:idx] - x_test, y_test = np.array(xs[idx:], dtype='object'), labels[idx:] - return (x_train, y_train), (x_test, y_test) - - -def get_word_index(path='imdb_word_index.json'): - """Retrieves a dict mapping words to their index in the IMDB dataset. - - Args: - path: where to cache the data (relative to `~/.keras/dataset`). - - Returns: - The word index dictionary. Keys are word strings, values are their index. - - Example: - - ```python - # Retrieve the training sequences. - (x_train, _), _ = keras.datasets.imdb.load_data() - # Retrieve the word index file mapping words to indices - word_index = keras.datasets.imdb.get_word_index() - # Reverse the word index to obtain a dict mapping indices to words - inverted_word_index = dict((i, word) for (word, i) in word_index.items()) - # Decode the first sequence in the dataset - decoded_sequence = " ".join(inverted_word_index[i] for i in x_train[0]) - ``` - """ - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file( - path, - origin=origin_folder + 'imdb_word_index.json', - file_hash='bfafd718b763782e994055a2d397834f') - with open(path) as f: - return json.load(f) diff --git a/aitk/keras/datasets/mnist.py b/aitk/keras/datasets/mnist.py deleted file mode 100644 index 69de521..0000000 --- a/aitk/keras/datasets/mnist.py +++ /dev/null @@ -1,152 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""MNIST handwritten digits dataset.""" - -import numpy as np -import os - -from .utils import get_file, get_file_async - -origin_folders = [ - ('https://storage.googleapis.com/tensorflow/tf-keras-datasets/', '731c5ac602752760c8e48fbffcf8c3b850d9dc2a2aedcf2cc48468fc17b673d1'), - ("https://raw.githubusercontent.com/ArtificialIntelligenceToolkit/datasets/master/mnist/", None), -] - -def load_data(path='mnist.npz'): - """Loads the MNIST dataset. - - This is a dataset of 60,000 28x28 grayscale images of the 10 digits, - along with a test set of 10,000 images. - More info can be found at the - [MNIST homepage](http://yann.lecun.com/exdb/mnist/). - - Args: - path: path where to cache the dataset locally - (relative to `~/.keras/datasets`). - - Returns: - Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. - - **x_train**: uint8 NumPy array of grayscale image data with shapes - `(60000, 28, 28)`, containing the training data. Pixel values range - from 0 to 255. - - **y_train**: uint8 NumPy array of digit labels (integers in range 0-9) - with shape `(60000,)` for the training data. - - **x_test**: uint8 NumPy array of grayscale image data with shapes - (10000, 28, 28), containing the test data. Pixel values range - from 0 to 255. - - **y_test**: uint8 NumPy array of digit labels (integers in range 0-9) - with shape `(10000,)` for the test data. - - Example: - - ```python - (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() - assert x_train.shape == (60000, 28, 28) - assert x_test.shape == (10000, 28, 28) - assert y_train.shape == (60000,) - assert y_test.shape == (10000,) - ``` - - License: - Yann LeCun and Corinna Cortes hold the copyright of MNIST dataset, - which is a derivative work from original NIST datasets. - MNIST dataset is made available under the terms of the - [Creative Commons Attribution-Share Alike 3.0 license.]( - https://creativecommons.org/licenses/by-sa/3.0/) - """ - for origin_folder, file_hash in origin_folders: - download_path = None - try: - download_path = get_file( - path, - origin=origin_folder + 'mnist.npz', - file_hash=file_hash) - except Exception: - print("Failed dataset download; trying another URL...") - continue - - if download_path and os.path.isfile(download_path): - with np.load(download_path, allow_pickle=True) as f: - x_train, y_train = f['x_train'], f['y_train'] - x_test, y_test = f['x_test'], f['y_test'] - return (x_train, y_train), (x_test, y_test) - - -async def load_data_async(path='mnist.npz'): - """Loads the MNIST dataset. - - This is a dataset of 60,000 28x28 grayscale images of the 10 digits, - along with a test set of 10,000 images. - More info can be found at the - [MNIST homepage](http://yann.lecun.com/exdb/mnist/). - - Args: - path: path where to cache the dataset locally - (relative to `~/.keras/datasets`). - - Returns: - Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. - - **x_train**: uint8 NumPy array of grayscale image data with shapes - `(60000, 28, 28)`, containing the training data. Pixel values range - from 0 to 255. - - **y_train**: uint8 NumPy array of digit labels (integers in range 0-9) - with shape `(60000,)` for the training data. - - **x_test**: uint8 NumPy array of grayscale image data with shapes - (10000, 28, 28), containing the test data. Pixel values range - from 0 to 255. - - **y_test**: uint8 NumPy array of digit labels (integers in range 0-9) - with shape `(10000,)` for the test data. - - Example: - - ```python - (x_train, y_train), (x_test, y_test) = await keras.datasets.mnist.load_data_async() - assert x_train.shape == (60000, 28, 28) - assert x_test.shape == (10000, 28, 28) - assert y_train.shape == (60000,) - assert y_test.shape == (10000,) - ``` - - License: - Yann LeCun and Corinna Cortes hold the copyright of MNIST dataset, - which is a derivative work from original NIST datasets. - MNIST dataset is made available under the terms of the - [Creative Commons Attribution-Share Alike 3.0 license.]( - https://creativecommons.org/licenses/by-sa/3.0/) - """ - for origin_folder, file_hash in origin_folders: - download_path = None - if not os.path.isfile(path): - try: - download_path = await get_file_async(origin_folder, path) - except Exception: - print("Failed dataset download; trying another URL...") - continue - else: - download_path = path - - if download_path and os.path.isfile(download_path): - with np.load(download_path, allow_pickle=True) as f: - x_train, y_train = f['x_train'], f['y_train'] - x_test, y_test = f['x_test'], f['y_test'] - return (x_train, y_train), (x_test, y_test) diff --git a/aitk/keras/datasets/reuters.py b/aitk/keras/datasets/reuters.py deleted file mode 100644 index a649a75..0000000 --- a/aitk/keras/datasets/reuters.py +++ /dev/null @@ -1,163 +0,0 @@ -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Reuters topic classification dataset.""" - -import json - -import numpy as np - -from keras_preprocessing import sequence -_remove_long_seq = sequence._remove_long_seq - -from .utils import get_file - - -def load_data(path='reuters.npz', - num_words=None, - skip_top=0, - maxlen=None, - test_split=0.2, - seed=113, - start_char=1, - oov_char=2, - index_from=3, - **kwargs): - """Loads the Reuters newswire classification dataset. - - This is a dataset of 11,228 newswires from Reuters, labeled over 46 topics. - - This was originally generated by parsing and preprocessing the classic - Reuters-21578 dataset, but the preprocessing code is no longer packaged - with Keras. See this - [github discussion](https://github.com/keras-team/keras/issues/12072) - for more info. - - Each newswire is encoded as a list of word indexes (integers). - For convenience, words are indexed by overall frequency in the dataset, - so that for instance the integer "3" encodes the 3rd most frequent word in - the data. This allows for quick filtering operations such as: - "only consider the top 10,000 most - common words, but eliminate the top 20 most common words". - - As a convention, "0" does not stand for a specific word, but instead is used - to encode any unknown word. - - Args: - path: where to cache the data (relative to `~/.keras/dataset`). - num_words: integer or None. Words are - ranked by how often they occur (in the training set) and only - the `num_words` most frequent words are kept. Any less frequent word - will appear as `oov_char` value in the sequence data. If None, - all words are kept. Defaults to None, so all words are kept. - skip_top: skip the top N most frequently occurring words - (which may not be informative). These words will appear as - `oov_char` value in the dataset. Defaults to 0, so no words are - skipped. - maxlen: int or None. Maximum sequence length. - Any longer sequence will be truncated. Defaults to None, which - means no truncation. - test_split: Float between 0 and 1. Fraction of the dataset to be used - as test data. Defaults to 0.2, meaning 20% of the dataset is used as - test data. - seed: int. Seed for reproducible data shuffling. - start_char: int. The start of a sequence will be marked with this - character. Defaults to 1 because 0 is usually the padding character. - oov_char: int. The out-of-vocabulary character. - Words that were cut out because of the `num_words` or - `skip_top` limits will be replaced with this character. - index_from: int. Index actual words with this index and higher. - **kwargs: Used for backwards compatibility. - - Returns: - Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. - - **x_train, x_test**: lists of sequences, which are lists of indexes - (integers). If the num_words argument was specific, the maximum - possible index value is `num_words - 1`. If the `maxlen` argument was - specified, the largest possible sequence length is `maxlen`. - - **y_train, y_test**: lists of integer labels (1 or 0). - - Note: The 'out of vocabulary' character is only used for - words that were present in the training set but are not included - because they're not making the `num_words` cut here. - Words that were not seen in the training set but are in the test set - have simply been skipped. - """ - # Legacy support - if 'nb_words' in kwargs: - print('The `nb_words` argument in `load_data` ' - 'has been renamed `num_words`.') - num_words = kwargs.pop('nb_words') - if kwargs: - raise TypeError(f'Unrecognized keyword arguments: {str(kwargs)}') - - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file( - path, - origin=origin_folder + 'reuters.npz', - file_hash= - 'd6586e694ee56d7a4e65172e12b3e987c03096cb01eab99753921ef915959916') - with np.load(path, allow_pickle=True) as f: # pylint: disable=unexpected-keyword-arg - xs, labels = f['x'], f['y'] - - rng = np.random.RandomState(seed) - indices = np.arange(len(xs)) - rng.shuffle(indices) - xs = xs[indices] - labels = labels[indices] - - if start_char is not None: - xs = [[start_char] + [w + index_from for w in x] for x in xs] - elif index_from: - xs = [[w + index_from for w in x] for x in xs] - - if maxlen: - xs, labels = _remove_long_seq(maxlen, xs, labels) - - if not num_words: - num_words = max(max(x) for x in xs) - - # by convention, use 2 as OOV word - # reserve 'index_from' (=3 by default) characters: - # 0 (padding), 1 (start), 2 (OOV) - if oov_char is not None: - xs = [[w if skip_top <= w < num_words else oov_char for w in x] for x in xs] - else: - xs = [[w for w in x if skip_top <= w < num_words] for x in xs] - - idx = int(len(xs) * (1 - test_split)) - x_train, y_train = np.array(xs[:idx], dtype='object'), np.array(labels[:idx]) - x_test, y_test = np.array(xs[idx:], dtype='object'), np.array(labels[idx:]) - - return (x_train, y_train), (x_test, y_test) - - -def get_word_index(path='reuters_word_index.json'): - """Retrieves a dict mapping words to their index in the Reuters dataset. - - Args: - path: where to cache the data (relative to `~/.keras/dataset`). - - Returns: - The word index dictionary. Keys are word strings, values are their index. - """ - origin_folder = 'https://storage.googleapis.com/tensorflow/tf-keras-datasets/' - path = get_file( - path, - origin=origin_folder + 'reuters_word_index.json', - file_hash='4d44cc38712099c9e383dc6e5f11a921') - with open(path) as f: - return json.load(f) diff --git a/aitk/keras/datasets/utils.py b/aitk/keras/datasets/utils.py deleted file mode 100644 index 41bbb37..0000000 --- a/aitk/keras/datasets/utils.py +++ /dev/null @@ -1,871 +0,0 @@ -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Utilities for file download and caching.""" - -from abc import abstractmethod -from contextlib import closing -import functools -import hashlib -import multiprocessing.dummy -import os -import io -import pathlib -import queue -import random -import shutil -import tarfile -import threading -import time -import typing -import urllib -import weakref -import zipfile - -from six.moves.urllib.parse import urlsplit -import numpy as np -from six.moves.urllib.request import urlopen -from urllib.request import urlretrieve - -async def get_file_async(origin_folder, file_name): - try: - print("Downloading data from %s" % (origin_folder + file_name)) - import js - response = await js.fetch(origin_folder + file_name) - fp = io.BytesIO((await response.arrayBuffer()).to_py()) - bytes = fp.read() - with open(file_name, "wb") as fp: - fp.write(bytes) - except Exception: - print("Could not load dataset") - return - return file_name - -def path_to_string(path): - """Convert `PathLike` objects to their string representation. - - If given a non-string typed path object, converts it to its string - representation. - - If the object passed to `path` is not among the above, then it is - returned unchanged. This allows e.g. passthrough of file objects - through this function. - - Args: - path: `PathLike` object that represents a path - - Returns: - A string representation of the path argument, if Python support exists. - """ - if isinstance(path, os.PathLike): - return os.fspath(path) - return path - -def _extract_archive(file_path, path='.', archive_format='auto'): - """Extracts an archive if it matches tar, tar.gz, tar.bz, or zip formats. - - Args: - file_path: path to the archive file - path: path to extract the archive file - archive_format: Archive format to try for extracting the file. - Options are 'auto', 'tar', 'zip', and None. - 'tar' includes tar, tar.gz, and tar.bz files. - The default 'auto' is ['tar', 'zip']. - None or an empty list will return no matches found. - - Returns: - True if a match was found and an archive extraction was completed, - False otherwise. - """ - if archive_format is None: - return False - if archive_format == 'auto': - archive_format = ['tar', 'zip'] - if isinstance(archive_format, str): - archive_format = [archive_format] - - file_path = path_to_string(file_path) - path = path_to_string(path) - - for archive_type in archive_format: - if archive_type == 'tar': - open_fn = tarfile.open - is_match_fn = tarfile.is_tarfile - if archive_type == 'zip': - open_fn = zipfile.ZipFile - is_match_fn = zipfile.is_zipfile - - if is_match_fn(file_path): - with open_fn(file_path) as archive: - try: - archive.extractall(path) - except (tarfile.TarError, RuntimeError, KeyboardInterrupt): - if os.path.exists(path): - if os.path.isfile(path): - os.remove(path) - else: - shutil.rmtree(path) - raise - return True - return False - - -def get_file(fname=None, - origin=None, - untar=False, - md5_hash=None, - file_hash=None, - cache_subdir='datasets', - hash_algorithm='auto', - extract=False, - archive_format='auto', - cache_dir=None): - """Downloads a file from a URL if it not already in the cache. - - By default the file at the url `origin` is downloaded to the - cache_dir `~/.keras`, placed in the cache_subdir `datasets`, - and given the filename `fname`. The final location of a file - `example.txt` would therefore be `~/.keras/datasets/example.txt`. - - Files in tar, tar.gz, tar.bz, and zip formats can also be extracted. - Passing a hash will verify the file after download. The command line - programs `shasum` and `sha256sum` can compute the hash. - - Example: - - ```python - path_to_downloaded_file = tf.keras.utils.get_file( - "flower_photos", - "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz", - untar=True) - ``` - - Args: - fname: Name of the file. If an absolute path `/path/to/file.txt` is - specified the file will be saved at that location. If `None`, the - name of the file at `origin` will be used. - origin: Original URL of the file. - untar: Deprecated in favor of `extract` argument. - boolean, whether the file should be decompressed - md5_hash: Deprecated in favor of `file_hash` argument. - md5 hash of the file for verification - file_hash: The expected hash string of the file after download. - The sha256 and md5 hash algorithms are both supported. - cache_subdir: Subdirectory under the Keras cache dir where the file is - saved. If an absolute path `/path/to/folder` is - specified the file will be saved at that location. - hash_algorithm: Select the hash algorithm to verify the file. - options are `'md5'`, `'sha256'`, and `'auto'`. - The default 'auto' detects the hash algorithm in use. - extract: True tries extracting the file as an Archive, like tar or zip. - archive_format: Archive format to try for extracting the file. - Options are `'auto'`, `'tar'`, `'zip'`, and `None`. - `'tar'` includes tar, tar.gz, and tar.bz files. - The default `'auto'` corresponds to `['tar', 'zip']`. - None or an empty list will return no matches found. - cache_dir: Location to store cached files, when None it - defaults to the default directory `~/.keras/`. - - Returns: - Path to the downloaded file - """ - if origin is None: - raise ValueError('Please specify the "origin" argument (URL of the file ' - 'to download).') - - if cache_dir is None: - cache_dir = os.path.join(os.path.expanduser('~'), '.keras') - if md5_hash is not None and file_hash is None: - file_hash = md5_hash - hash_algorithm = 'md5' - datadir_base = os.path.expanduser(cache_dir) - if not os.access(datadir_base, os.W_OK): - datadir_base = os.path.join('/tmp', '.keras') - datadir = os.path.join(datadir_base, cache_subdir) - _makedirs_exist_ok(datadir) - - fname = path_to_string(fname) - if not fname: - fname = os.path.basename(urlsplit(origin).path) - if not fname: - raise ValueError( - f"Can't parse the file name from the origin provided: '{origin}'." - "Please specify the `fname` as the input param.") - - if untar: - if fname.endswith('.tar.gz'): - fname = pathlib.Path(fname) - # The 2 `.with_suffix()` are because of `.tar.gz` as pathlib - # considers it as 2 suffixes. - fname = fname.with_suffix('').with_suffix('') - fname = str(fname) - untar_fpath = os.path.join(datadir, fname) - fpath = untar_fpath + '.tar.gz' - else: - fpath = os.path.join(datadir, fname) - - download = False - if os.path.exists(fpath): - # File found; verify integrity if a hash was provided. - if file_hash is not None: - if not validate_file(fpath, file_hash, algorithm=hash_algorithm): - print('A local file was found, but it seems to be ' - 'incomplete or outdated because the ' + hash_algorithm + - ' file hash does not match the original value of ' + file_hash + - ' so we will re-download the data.') - download = True - else: - download = True - - if download: - print('Downloading data from', origin) - - error_msg = 'URL fetch failure on {}: {} -- {}' - try: - try: - urlretrieve(origin, fpath) - except urllib.error.HTTPError as e: - raise Exception(error_msg.format(origin, e.code, e.msg)) - except urllib.error.URLError as e: - raise Exception(error_msg.format(origin, e.errno, e.reason)) - except (Exception, KeyboardInterrupt) as e: - if os.path.exists(fpath): - os.remove(fpath) - raise - - if untar: - if not os.path.exists(untar_fpath): - _extract_archive(fpath, datadir, archive_format='tar') - return untar_fpath - - if extract: - _extract_archive(fpath, datadir, archive_format) - - return fpath - - -def _makedirs_exist_ok(datadir): - os.makedirs(datadir, exist_ok=True) # pylint: disable=unexpected-keyword-arg - - -def _resolve_hasher(algorithm, file_hash=None): - """Returns hash algorithm as hashlib function.""" - if algorithm == 'sha256': - return hashlib.sha256() - - if algorithm == 'auto' and file_hash is not None and len(file_hash) == 64: - return hashlib.sha256() - - # This is used only for legacy purposes. - return hashlib.md5() - - -def _hash_file(fpath, algorithm='sha256', chunk_size=65535): - """Calculates a file sha256 or md5 hash. - - Example: - - ```python - _hash_file('/path/to/file.zip') - 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' - ``` - - Args: - fpath: path to the file being validated - algorithm: hash algorithm, one of `'auto'`, `'sha256'`, or `'md5'`. - The default `'auto'` detects the hash algorithm in use. - chunk_size: Bytes to read at a time, important for large files. - - Returns: - The file hash - """ - if isinstance(algorithm, str): - hasher = _resolve_hasher(algorithm) - else: - hasher = algorithm - - with open(fpath, 'rb') as fpath_file: - for chunk in iter(lambda: fpath_file.read(chunk_size), b''): - hasher.update(chunk) - - return hasher.hexdigest() - - -def validate_file(fpath, file_hash, algorithm='auto', chunk_size=65535): - """Validates a file against a sha256 or md5 hash. - - Args: - fpath: path to the file being validated - file_hash: The expected hash string of the file. - The sha256 and md5 hash algorithms are both supported. - algorithm: Hash algorithm, one of 'auto', 'sha256', or 'md5'. - The default 'auto' detects the hash algorithm in use. - chunk_size: Bytes to read at a time, important for large files. - - Returns: - Whether the file is valid - """ - hasher = _resolve_hasher(algorithm, file_hash) - - if str(_hash_file(fpath, hasher, chunk_size)) == str(file_hash): - return True - else: - return False - - -class ThreadsafeIter: - """Wrap an iterator with a lock and propagate exceptions to all threads.""" - - def __init__(self, it): - self.it = it - self.lock = threading.Lock() - - # After a generator throws an exception all subsequent next() calls raise a - # StopIteration Exception. This, however, presents an issue when mixing - # generators and threading because it means the order of retrieval need not - # match the order in which the generator was called. This can make it appear - # that a generator exited normally when in fact the terminating exception is - # just in a different thread. In order to provide thread safety, once - # self.it has thrown an exception we continue to throw the same exception. - self._exception = None - - def __iter__(self): - return self - - def next(self): - return self.__next__() - - def __next__(self): - with self.lock: - if self._exception: - raise self._exception # pylint: disable=raising-bad-type - - try: - return next(self.it) - except Exception as e: - self._exception = e - raise - - -def threadsafe_generator(f): - - @functools.wraps(f) - def g(*a, **kw): - return ThreadsafeIter(f(*a, **kw)) - - return g - - -class Sequence: - """Base object for fitting to a sequence of data, such as a dataset. - - Every `Sequence` must implement the `__getitem__` and the `__len__` methods. - If you want to modify your dataset between epochs you may implement - `on_epoch_end`. - The method `__getitem__` should return a complete batch. - - Notes: - - `Sequence` are a safer way to do multiprocessing. This structure guarantees - that the network will only train once - on each sample per epoch which is not the case with generators. - - Examples: - - ```python - from skimage.io import imread - from skimage.transform import resize - import numpy as np - import math - - # Here, `x_set` is list of path to the images - # and `y_set` are the associated classes. - - class CIFAR10Sequence(Sequence): - - def __init__(self, x_set, y_set, batch_size): - self.x, self.y = x_set, y_set - self.batch_size = batch_size - - def __len__(self): - return math.ceil(len(self.x) / self.batch_size) - - def __getitem__(self, idx): - batch_x = self.x[idx * self.batch_size:(idx + 1) * - self.batch_size] - batch_y = self.y[idx * self.batch_size:(idx + 1) * - self.batch_size] - - return np.array([ - resize(imread(file_name), (200, 200)) - for file_name in batch_x]), np.array(batch_y) - ``` - """ - - @abstractmethod - def __getitem__(self, index): - """Gets batch at position `index`. - - Args: - index: position of the batch in the Sequence. - - Returns: - A batch - """ - raise NotImplementedError - - @abstractmethod - def __len__(self): - """Number of batch in the Sequence. - - Returns: - The number of batches in the Sequence. - """ - raise NotImplementedError - - def on_epoch_end(self): - """Method called at the end of every epoch. - """ - pass - - def __iter__(self): - """Create a generator that iterate over the Sequence.""" - for item in (self[i] for i in range(len(self))): - yield item - - -def iter_sequence_infinite(seq): - """Iterates indefinitely over a Sequence. - - Args: - seq: `Sequence` instance. - - Yields: - Batches of data from the `Sequence`. - """ - while True: - for item in seq: - yield item - - -# Global variables to be shared across processes -_SHARED_SEQUENCES = {} -# We use a Value to provide unique id to different processes. -_SEQUENCE_COUNTER = None - - -# Because multiprocessing pools are inherently unsafe, starting from a clean -# state can be essential to avoiding deadlocks. In order to accomplish this, we -# need to be able to check on the status of Pools that we create. -_DATA_POOLS = weakref.WeakSet() -_WORKER_ID_QUEUE = None # Only created if needed. -_WORKER_IDS = set() -_FORCE_THREADPOOL = False -_FORCE_THREADPOOL_LOCK = threading.RLock() - - -def dont_use_multiprocessing_pool(f): - @functools.wraps(f) - def wrapped(*args, **kwargs): - with _FORCE_THREADPOOL_LOCK: - global _FORCE_THREADPOOL - old_force_threadpool, _FORCE_THREADPOOL = _FORCE_THREADPOOL, True - out = f(*args, **kwargs) - _FORCE_THREADPOOL = old_force_threadpool - return out - return wrapped - - -def get_pool_class(use_multiprocessing): - global _FORCE_THREADPOOL - if not use_multiprocessing or _FORCE_THREADPOOL: - return multiprocessing.dummy.Pool # ThreadPool - return multiprocessing.Pool - - -def get_worker_id_queue(): - """Lazily create the queue to track worker ids.""" - global _WORKER_ID_QUEUE - if _WORKER_ID_QUEUE is None: - _WORKER_ID_QUEUE = multiprocessing.Queue() - return _WORKER_ID_QUEUE - - -def init_pool(seqs): - global _SHARED_SEQUENCES - _SHARED_SEQUENCES = seqs - - -def get_index(uid, i): - """Get the value from the Sequence `uid` at index `i`. - - To allow multiple Sequences to be used at the same time, we use `uid` to - get a specific one. A single Sequence would cause the validation to - overwrite the training Sequence. - - Args: - uid: int, Sequence identifier - i: index - - Returns: - The value at index `i`. - """ - return _SHARED_SEQUENCES[uid][i] - - -class SequenceEnqueuer: - """Base class to enqueue inputs. - - The task of an Enqueuer is to use parallelism to speed up preprocessing. - This is done with processes or threads. - - Example: - - ```python - enqueuer = SequenceEnqueuer(...) - enqueuer.start() - datas = enqueuer.get() - for data in datas: - # Use the inputs; training, evaluating, predicting. - # ... stop sometime. - enqueuer.stop() - ``` - - The `enqueuer.get()` should be an infinite stream of datas. - """ - - def __init__(self, sequence, - use_multiprocessing=False): - self.sequence = sequence - self.use_multiprocessing = use_multiprocessing - - global _SEQUENCE_COUNTER - if _SEQUENCE_COUNTER is None: - try: - _SEQUENCE_COUNTER = multiprocessing.Value('i', 0) - except OSError: - # In this case the OS does not allow us to use - # multiprocessing. We resort to an int - # for enqueuer indexing. - _SEQUENCE_COUNTER = 0 - - if isinstance(_SEQUENCE_COUNTER, int): - self.uid = _SEQUENCE_COUNTER - _SEQUENCE_COUNTER += 1 - else: - # Doing Multiprocessing.Value += x is not process-safe. - with _SEQUENCE_COUNTER.get_lock(): - self.uid = _SEQUENCE_COUNTER.value - _SEQUENCE_COUNTER.value += 1 - - self.workers = 0 - self.executor_fn = None - self.queue = None - self.run_thread = None - self.stop_signal = None - - def is_running(self): - return self.stop_signal is not None and not self.stop_signal.is_set() - - def start(self, workers=1, max_queue_size=10): - """Starts the handler's workers. - - Args: - workers: Number of workers. - max_queue_size: queue size - (when full, workers could block on `put()`) - """ - if self.use_multiprocessing: - self.executor_fn = self._get_executor_init(workers) - else: - # We do not need the init since it's threads. - self.executor_fn = lambda _: get_pool_class(False)(workers) - self.workers = workers - self.queue = queue.Queue(max_queue_size) - self.stop_signal = threading.Event() - self.run_thread = threading.Thread(target=self._run) - self.run_thread.daemon = True - self.run_thread.start() - - def _send_sequence(self): - """Sends current Iterable to all workers.""" - # For new processes that may spawn - _SHARED_SEQUENCES[self.uid] = self.sequence - - def stop(self, timeout=None): - """Stops running threads and wait for them to exit, if necessary. - - Should be called by the same thread which called `start()`. - - Args: - timeout: maximum time to wait on `thread.join()` - """ - self.stop_signal.set() - with self.queue.mutex: - self.queue.queue.clear() - self.queue.unfinished_tasks = 0 - self.queue.not_full.notify() - self.run_thread.join(timeout) - _SHARED_SEQUENCES[self.uid] = None - - def __del__(self): - if self.is_running(): - self.stop() - - @abstractmethod - def _run(self): - """Submits request to the executor and queue the `Future` objects.""" - raise NotImplementedError - - @abstractmethod - def _get_executor_init(self, workers): - """Gets the Pool initializer for multiprocessing. - - Args: - workers: Number of workers. - - Returns: - Function, a Function to initialize the pool - """ - raise NotImplementedError - - @abstractmethod - def get(self): - """Creates a generator to extract data from the queue. - - Skip the data if it is `None`. - # Returns - Generator yielding tuples `(inputs, targets)` - or `(inputs, targets, sample_weights)`. - """ - raise NotImplementedError - - -class OrderedEnqueuer(SequenceEnqueuer): - """Builds a Enqueuer from a Sequence. - - Args: - sequence: A `tf.keras.utils.data_utils.Sequence` object. - use_multiprocessing: use multiprocessing if True, otherwise threading - shuffle: whether to shuffle the data at the beginning of each epoch - """ - - def __init__(self, sequence, use_multiprocessing=False, shuffle=False): - super(OrderedEnqueuer, self).__init__(sequence, use_multiprocessing) - self.shuffle = shuffle - - def _get_executor_init(self, workers): - """Gets the Pool initializer for multiprocessing. - - Args: - workers: Number of workers. - - Returns: - Function, a Function to initialize the pool - """ - def pool_fn(seqs): - pool = get_pool_class(True)( - workers, initializer=init_pool_generator, - initargs=(seqs, None, get_worker_id_queue())) - _DATA_POOLS.add(pool) - return pool - - return pool_fn - - def _wait_queue(self): - """Wait for the queue to be empty.""" - while True: - time.sleep(0.1) - if self.queue.unfinished_tasks == 0 or self.stop_signal.is_set(): - return - - def _run(self): - """Submits request to the executor and queue the `Future` objects.""" - sequence = list(range(len(self.sequence))) - self._send_sequence() # Share the initial sequence - while True: - if self.shuffle: - random.shuffle(sequence) - - with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor: - for i in sequence: - if self.stop_signal.is_set(): - return - - self.queue.put( - executor.apply_async(get_index, (self.uid, i)), block=True) - - # Done with the current epoch, waiting for the final batches - self._wait_queue() - - if self.stop_signal.is_set(): - # We're done - return - - # Call the internal on epoch end. - self.sequence.on_epoch_end() - self._send_sequence() # Update the pool - - def get(self): - """Creates a generator to extract data from the queue. - - Skip the data if it is `None`. - - Yields: - The next element in the queue, i.e. a tuple - `(inputs, targets)` or - `(inputs, targets, sample_weights)`. - """ - while self.is_running(): - try: - inputs = self.queue.get(block=True, timeout=5).get() - if self.is_running(): - self.queue.task_done() - if inputs is not None: - yield inputs - except queue.Empty: - pass - except Exception as e: # pylint: disable=broad-except - self.stop() - raise e - - -def init_pool_generator(gens, random_seed=None, id_queue=None): - """Initializer function for pool workers. - - Args: - gens: State which should be made available to worker processes. - random_seed: An optional value with which to seed child processes. - id_queue: A multiprocessing Queue of worker ids. This is used to indicate - that a worker process was created by Keras and can be terminated using - the cleanup_all_keras_forkpools utility. - """ - global _SHARED_SEQUENCES - _SHARED_SEQUENCES = gens - - worker_proc = multiprocessing.current_process() - - # name isn't used for anything, but setting a more descriptive name is helpful - # when diagnosing orphaned processes. - worker_proc.name = 'Keras_worker_{}'.format(worker_proc.name) - - if random_seed is not None: - np.random.seed(random_seed + worker_proc.ident) - - if id_queue is not None: - # If a worker dies during init, the pool will just create a replacement. - id_queue.put(worker_proc.ident, block=True, timeout=0.1) - - -def next_sample(uid): - """Gets the next value from the generator `uid`. - - To allow multiple generators to be used at the same time, we use `uid` to - get a specific one. A single generator would cause the validation to - overwrite the training generator. - - Args: - uid: int, generator identifier - - Returns: - The next value of generator `uid`. - """ - return next(_SHARED_SEQUENCES[uid]) - - -class GeneratorEnqueuer(SequenceEnqueuer): - """Builds a queue out of a data generator. - - The provided generator can be finite in which case the class will throw - a `StopIteration` exception. - - Args: - generator: a generator function which yields data - use_multiprocessing: use multiprocessing if True, otherwise threading - random_seed: Initial seed for workers, - will be incremented by one for each worker. - """ - - def __init__(self, generator, - use_multiprocessing=False, - random_seed=None): - super(GeneratorEnqueuer, self).__init__(generator, use_multiprocessing) - self.random_seed = random_seed - - def _get_executor_init(self, workers): - """Gets the Pool initializer for multiprocessing. - - Args: - workers: Number of works. - - Returns: - A Function to initialize the pool - """ - def pool_fn(seqs): - pool = get_pool_class(True)( - workers, initializer=init_pool_generator, - initargs=(seqs, self.random_seed, get_worker_id_queue())) - _DATA_POOLS.add(pool) - return pool - return pool_fn - - def _run(self): - """Submits request to the executor and queue the `Future` objects.""" - self._send_sequence() # Share the initial generator - with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor: - while True: - if self.stop_signal.is_set(): - return - - self.queue.put( - executor.apply_async(next_sample, (self.uid,)), block=True) - - def get(self): - """Creates a generator to extract data from the queue. - - Skip the data if it is `None`. - - Yields: - The next element in the queue, i.e. a tuple - `(inputs, targets)` or - `(inputs, targets, sample_weights)`. - """ - try: - while self.is_running(): - inputs = self.queue.get(block=True).get() - self.queue.task_done() - if inputs is not None: - yield inputs - except StopIteration: - # Special case for finite generators - last_ones = [] - while self.queue.qsize() > 0: - last_ones.append(self.queue.get(block=True)) - # Wait for them to complete - for f in last_ones: - f.wait() - # Keep the good ones - last_ones = [future.get() for future in last_ones if future.successful()] - for inputs in last_ones: - if inputs is not None: - yield inputs - except Exception as e: # pylint: disable=broad-except - self.stop() - if 'generator already executing' in str(e): - raise RuntimeError( - 'Your generator is NOT thread-safe. ' - 'Keras requires a thread-safe generator when ' - '`use_multiprocessing=False, workers > 1`. ') - raise e diff --git a/aitk/keras/initializers/README.md b/aitk/keras/initializers/README.md deleted file mode 100644 index ebbe2f0..0000000 --- a/aitk/keras/initializers/README.md +++ /dev/null @@ -1,4 +0,0 @@ -# Initializers -The `initializers.py` module contains objects for initializing optimizers, -activation functions, weight initializers, and learning rate schedulers from -strings or parameter dictionaries. diff --git a/aitk/keras/initializers/__init__.py b/aitk/keras/initializers/__init__.py deleted file mode 100644 index 91c82ab..0000000 --- a/aitk/keras/initializers/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .initializers import * diff --git a/aitk/keras/initializers/initializers.py b/aitk/keras/initializers/initializers.py deleted file mode 100644 index a828fda..0000000 --- a/aitk/keras/initializers/initializers.py +++ /dev/null @@ -1,264 +0,0 @@ -import re -from functools import partial -from ast import literal_eval as eval - -import numpy as np - -from ..optimizers import OptimizerBase, SGD, AdaGrad, RMSProp, Adam -from ..activations import ActivationBase, Affine, ReLU, Tanh, Sigmoid, LeakyReLU -from ..schedulers import ( - SchedulerBase, - ConstantScheduler, - ExponentialScheduler, - NoamScheduler, - KingScheduler, -) - -from ..utils import ( - he_normal, - he_uniform, - glorot_normal, - glorot_uniform, - truncated_normal, -) - - -class ActivationInitializer(object): - def __init__(self, param=None): - """ - A class for initializing activation functions. Valid inputs are: - (a) __str__ representations of `ActivationBase` instances - (b) `ActivationBase` instances - - If `param` is `None`, return the identity function: f(X) = X - """ - self.param = param - - def __call__(self): - param = self.param - if param is None: - act = Affine(slope=1, intercept=0) - elif isinstance(param, ActivationBase): - act = param.copy() - elif isinstance(param, str): - act = self.init_from_str(param) - else: - raise ValueError("Unknown activation: {}".format(param)) - return act - - def init_from_str(self, act_str): - act_str = act_str.lower() - if act_str == "relu": - act_fn = ReLU() - elif act_str == "tanh": - act_fn = Tanh() - elif act_str == "sigmoid": - act_fn = Sigmoid() - elif "affine" in act_str: - r = r"affine\(slope=(.*), intercept=(.*)\)" - slope, intercept = re.match(r, act_str).groups() - act_fn = Affine(float(slope), float(intercept)) - elif "leaky relu" in act_str: - r = r"leaky relu\(alpha=(.*)\)" - alpha = re.match(r, act_str).groups()[0] - act_fn = LeakyReLU(float(alpha)) - else: - raise ValueError("Unknown activation: {}".format(act_str)) - return act_fn - - -class SchedulerInitializer(object): - def __init__(self, param=None, lr=None): - """ - A class for initializing learning rate schedulers. Valid inputs are: - (a) __str__ representations of `SchedulerBase` instances - (b) `SchedulerBase` instances - (c) Parameter dicts (e.g., as produced via the `summary` method in - `LayerBase` instances) - - If `param` is `None`, return the ConstantScheduler with learning rate - equal to `lr`. - """ - if all([lr is None, param is None]): - raise ValueError("lr and param cannot both be `None`") - - self.lr = lr - self.param = param - - def __call__(self): - param = self.param - if param is None: - scheduler = ConstantScheduler(self.lr) - elif isinstance(param, SchedulerBase): - scheduler = param.copy() - elif isinstance(param, str): - scheduler = self.init_from_str() - elif isinstance(param, dict): - scheduler = self.init_from_dict() - return scheduler - - def init_from_str(self): - r = r"([a-zA-Z]*)=([^,)]*)" - sch_str = self.param.lower() - kwargs = dict([(i, eval(j)) for (i, j) in re.findall(r, sch_str)]) - - if "constant" in sch_str: - scheduler = ConstantScheduler(**kwargs) - elif "exponential" in sch_str: - scheduler = ExponentialScheduler(**kwargs) - elif "noam" in sch_str: - scheduler = NoamScheduler(**kwargs) - elif "king" in sch_str: - scheduler = KingScheduler(**kwargs) - else: - raise NotImplementedError("{}".format(sch_str)) - return scheduler - - def init_from_dict(self): - S = self.param - sc = S["hyperparameters"] if "hyperparameters" in S else None - - if sc is None: - raise ValueError("Must have `hyperparameters` key: {}".format(S)) - - if sc and sc["id"] == "ConstantScheduler": - scheduler = ConstantScheduler() - elif sc and sc["id"] == "ExponentialScheduler": - scheduler = ExponentialScheduler() - elif sc and sc["id"] == "NoamScheduler": - scheduler = NoamScheduler() - elif sc: - raise NotImplementedError("{}".format(sc["id"])) - scheduler.set_params(sc) - return scheduler - - -class OptimizerInitializer(object): - def __init__(self, param=None): - """ - A class for initializing optimizers. Valid inputs are: - (a) __str__ representations of `OptimizerBase` instances - (b) `OptimizerBase` instances - (c) Parameter dicts (e.g., as produced via the `summary` method in - `LayerBase` instances) - - If `param` is `None`, return the SGD optimizer with default parameters. - """ - self.param = param - - def __call__(self): - param = self.param - if param is None: - opt = SGD() - elif isinstance(param, OptimizerBase): - opt = param.copy() - elif isinstance(param, str): - opt = self.init_from_str() - elif isinstance(param, dict): - opt = self.init_from_dict() - return opt - - def init_from_str(self): - r = r"([a-zA-Z]*)=([^,)]*)" - opt_str = self.param.lower() - kwargs = dict([(i, eval(j)) for (i, j) in re.findall(r, opt_str)]) - if "sgd" in opt_str: - optimizer = SGD(**kwargs) - elif "adagrad" in opt_str: - optimizer = AdaGrad(**kwargs) - elif "rmsprop" in opt_str: - optimizer = RMSProp(**kwargs) - elif "adam" in opt_str: - optimizer = Adam(**kwargs) - else: - raise NotImplementedError("{}".format(opt_str)) - return optimizer - - def init_from_dict(self): - O = self.param - cc = O["cache"] if "cache" in O else None - op = O["hyperparameters"] if "hyperparameters" in O else None - - if op is None: - raise ValueError("Must have `hyperparemeters` key: {}".format(O)) - - if op and op["id"] == "SGD": - optimizer = SGD() - elif op and op["id"] == "RMSProp": - optimizer = RMSProp() - elif op and op["id"] == "AdaGrad": - optimizer = AdaGrad() - elif op and op["id"] == "Adam": - optimizer = Adam() - elif op: - raise NotImplementedError("{}".format(op["id"])) - optimizer.set_params(op, cc) - return optimizer - - -class WeightInitializer(object): - def __init__(self, act_fn_str, mode="glorot_uniform"): - """ - A factory for weight initializers. - - Parameters - ---------- - act_fn_str : str - The string representation for the layer activation function - mode : str (default: 'glorot_uniform') - The weight initialization strategy. Valid entries are {"he_normal", - "he_uniform", "glorot_normal", glorot_uniform", "std_normal", - "trunc_normal"} - """ - if mode not in [ - "he_normal", - "he_uniform", - "glorot_normal", - "glorot_uniform", - "std_normal", - "trunc_normal", - ]: - raise ValueError("Unrecognize initialization mode: {}".format(mode)) - - self.mode = mode - self.act_fn = act_fn_str - - if mode == "glorot_uniform": - self._fn = glorot_uniform - elif mode == "glorot_normal": - self._fn = glorot_normal - elif mode == "he_uniform": - self._fn = he_uniform - elif mode == "he_normal": - self._fn = he_normal - elif mode == "std_normal": - self._fn = np.random.randn - elif mode == "trunc_normal": - self._fn = partial(truncated_normal, mean=0, std=1) - - def __call__(self, weight_shape): - if "glorot" in self.mode: - gain = self._calc_glorot_gain() - W = self._fn(weight_shape, gain) - elif self.mode == "std_normal": - W = self._fn(*weight_shape) - else: - W = self._fn(weight_shape) - return W - - def _calc_glorot_gain(self): - """ - Values from: - https://pytorch.org/docs/stable/nn.html?#torch.nn.init.calculate_gain - """ - gain = 1.0 - act_str = self.act_fn.lower() - if act_str == "tanh": - gain = 5.0 / 3.0 - elif act_str == "relu": - gain = np.sqrt(2) - elif "leaky relu" in act_str: - r = r"leaky relu\(alpha=(.*)\)" - alpha = re.match(r, act_str).groups()[0] - gain = np.sqrt(2 / 1 + float(alpha) ** 2) - return gain diff --git a/aitk/keras/layers/README.md b/aitk/keras/layers/README.md deleted file mode 100644 index 81e888c..0000000 --- a/aitk/keras/layers/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# Layers -The `layers.py` module implements common layers / layer-wise operations that can -be composed to create larger neural networks. It includes: - -- Fully-connected layers -- Sparse evolutionary layers ([Mocanu et al., 2018](https://www.nature.com/articles/s41467-018-04316-3)) -- Dot-product attention layers ([Luong, Pho, & Manning, 2015](https://arxiv.org/pdf/1508.04025.pdf); [Vaswani et al., 2017](https://arxiv.org/pdf/1706.03762.pdf)) -- 1D and 2D convolution (with stride, padding, and dilation) layers ([van den Oord et al., 2016](https://arxiv.org/pdf/1609.03499.pdf); [Yu & Kolton, 2016](https://arxiv.org/pdf/1511.07122.pdf)) -- 2D "deconvolution" (with stride and padding) layers ([Zeiler et al., 2010](https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf)) -- Restricted Boltzmann machines (with CD-_n_ training) ([Smolensky, 1996](http://stanford.edu/~jlmcc/papers/PDP/Volume%201/Chap6_PDP86.pdf); [Carreira-Perpiñán & Hinton, 2005](http://www.cs.toronto.edu/~fritz/absps/cdmiguel.pdf)) -- Elementwise multiplication operation -- Summation operation -- Flattening operation -- Embedding layer -- Softmax layer -- Max & average pooling layer -- 1D and 2D batch normalization layers ([Ioffe & Szegedy, 2015](http://proceedings.mlr.press/v37/ioffe15.pdf)) -- 1D and 2D layer normalization layers ([Ba, Kiros, & Hinton, 2016](https://arxiv.org/pdf/1607.06450.pdf)) -- Recurrent layers ([Elman, 1990](https://crl.ucsd.edu/~elman/Papers/fsit.pdf)) -- Long short-term memory (LSTM) layers ([Hochreiter & Schmidhuber, 1997](http://www.bioinf.jku.at/publications/older/2604.pdf)) diff --git a/aitk/keras/layers/__init__.py b/aitk/keras/layers/__init__.py deleted file mode 100644 index 790b4fa..0000000 --- a/aitk/keras/layers/__init__.py +++ /dev/null @@ -1,4324 +0,0 @@ -# -*- coding: utf-8 -*- -# ************************************************************** -# aitk.keras: A Python Keras model API -# -# Copyright (c) 2021 AITK Developers -# -# https://github.com/ArtificialIntelligenceToolkit/aitk.keras -# -# ************************************************************** - -"""A collection of composable layer objects for building neural networks""" -from abc import ABC, abstractmethod - -import numpy as np - -from ..wrappers import init_wrappers, Dropout - -from ..initializers import ( - WeightInitializer, - OptimizerInitializer, - ActivationInitializer, -) - -from ..utils import ( - pad1D, - pad2D, - conv1D, - conv2D, - im2col, - col2im, - dilate, - deconv2D_naive, - calc_pad_dims_2D, -) - -class Activation(): - def __init__(self, activation): - self.activation = activation - -NAME_CACHE = {} - -class LayerBase(ABC): - def __init__(self, name=None): - """An abstract base class inherited by all neural network layers""" - self.X = [] - self.act_fn = None - self.trainable = True - self.name = self.make_name(name) - self.optimizer = None - self.default_kernel_optimizer = "glorot_uniform" - - self.gradients = {} - self.parameters = {} - self.derived_variables = {} - self.input_layers = [] - self.output_layers = [] - - super().__init__() - - def __call__(self, input_layer): - if isinstance(input_layer, (list, tuple)): - for layer in input_layer: - layer.output_layers.append(self) - self.input_layers.append(layer) - else: - input_layer.output_layers.append(self) - self.input_layers.append(input_layer) - return self - - def __str__(self): - return f"<{self.__class__.__name__}(name='{self.name}')>" - - def make_name(self, name): - if name is None: - class_name = self.__class__.__name__.lower() - count = NAME_CACHE.get(class_name, 0) - if count == 0: - new_name = class_name - else: - new_name = "%s_%s" % (class_name, count) - NAME_CACHE[class_name] = count + 1 - return new_name - else: - return name - - def set_optimizer(self, optimizer=None): - optimizer = optimizer or self.default_kernel_optimizer - self.optimizer = OptimizerInitializer(optimizer)() - - def has_trainable_params(self): - return self.parameters != {} - - @abstractmethod - def _init_params(self, **kwargs): - raise NotImplementedError - - @abstractmethod - def forward(self, z, **kwargs): - """Perform a forward pass through the layer""" - raise NotImplementedError - - @abstractmethod - def backward(self, out, **kwargs): - """Perform a backward pass through the layer""" - raise NotImplementedError - - def freeze(self): - """ - Freeze the layer parameters at their current values so they can no - longer be updated. - """ - self.trainable = False - - def unfreeze(self): - """Unfreeze the layer parameters so they can be updated.""" - self.trainable = True - - def flush_gradients(self): - """Erase all the layer's derived variables and gradients.""" - assert self.trainable, "Layer is frozen" - self.X = [] - for k, v in self.derived_variables.items(): - self.derived_variables[k] = [] - - for k, v in self.gradients.items(): - self.gradients[k] = np.zeros_like(v) - - def update(self, cur_loss=None): - """ - Update the layer parameters using the accrued gradients and layer - optimizer. Flush all gradients once the update is complete. - """ - assert self.trainable, "Layer is frozen" - self.optimizer.step() - for k, v in self.gradients.items(): - if k in self.parameters: - self.parameters[k] = self.optimizer(self.parameters[k], v, k, cur_loss) - self.flush_gradients() - - def set_params(self, summary_dict): - """ - Set the layer parameters from a dictionary of values. - - Parameters - ---------- - summary_dict : dict - A dictionary of layer parameters and hyperparameters. If a required - parameter or hyperparameter is not included within `summary_dict`, - this method will use the value in the current layer's - :meth:`summary` method. - - Returns - ------- - layer : :doc:`Layer ` object - The newly-initialized layer. - """ - layer, sd = self, summary_dict - - # collapse `parameters` and `hyperparameters` nested dicts into a single - # merged dictionary - flatten_keys = ["parameters", "hyperparameters"] - for k in flatten_keys: - if k in sd: - entry = sd[k] - sd.update(entry) - del sd[k] - - for k, v in sd.items(): - if k in self.parameters: - layer.parameters[k] = v - if k in self.hyperparameters: - if k == "act_fn": - layer.act_fn = ActivationInitializer(v)() - elif k == "optimizer": - layer.optimizer = OptimizerInitializer(sd[k])() - elif k == "wrappers": - layer = init_wrappers(layer, sd[k]) - elif k not in ["wrappers", "optimizer"]: - setattr(layer, k, v) - return layer - - def get_weights(self): - # Returns pointers to weight matrices, in order: - return [self.parameters[key] for key in self.parameters] - - def set_weights(self, weights, copy=True): - # Ordered set of parameters: - for i, key in enumerate(self.parameters): - if copy: - self.parameters[key] = weights[i].copy() - else: - self.parameters[key] = weights[i] - self.weights_set = True - - def summary(self): - """Return a dict of the layer parameters, hyperparameters, and ID.""" - return { - "layer": self.hyperparameters["layer"], - "parameters": self.parameters, - "hyperparameters": self.hyperparameters, - } - - -class Input(LayerBase): - def __init__(self, input_shape, batch_size=None, name=None): - super().__init__(name=name) - self.n_out = input_shape - self.trainable = False - - def forward(self, z, **kwargs): - """Perform a forward pass through the layer""" - return z - - def backward(self, out, **kwargs): - """Perform a backward pass through the layer""" - raise NotImplementedError - - def _init_params(self, **kwargs): - raise NotImplementedError - -InputLayer = Input - -class DotProductAttention(LayerBase): - def __init__(self, scale=True, dropout_p=0, kernel_initializer="glorot_uniform", name=None): - r""" - A single "attention head" layer using a dot-product for the scoring function. - - Notes - ----- - The equations for a dot product attention layer are: - - .. math:: - - \mathbf{Z} &= \mathbf{K Q}^\\top \ \ \ \ &&\text{if scale = False} \\ - &= \mathbf{K Q}^\top / \sqrt{d_k} \ \ \ \ &&\text{if scale = True} \\ - \mathbf{Y} &= \text{dropout}(\text{softmax}(\mathbf{Z})) \mathbf{V} - - Parameters - ---------- - scale : bool - Whether to scale the the key-query dot product by the square root - of the key/query vector dimensionality before applying the Softmax. - This is useful, since the scale of dot product will otherwise - increase as query / key dimensions grow. Default is True. - dropout_p : float in [0, 1) - The dropout propbability during training, applied to the output of - the softmax. If 0, no dropout is applied. Default is 0. - kernel_initializer : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is `'glorot_uniform'`. - Unused. - """ # noqa: E501 - super().__init__(name=name) - - self.kernel_initializer = kernel_initializer - self.scale = scale - self.dropout_p = dropout_p - self._init_params() - - def _init_params(self): - self.softmax = Dropout(Softmax(), self.dropout_p) - smdv = self.softmax.derived_variables - self.derived_variables = { - "attention_weights": [], - "dropout_mask": smdv["wrappers"][0]["dropout_mask"], - } - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "DotProductAttention", - "kernel_initializer": self.kernel_initializer, - "scale": self.scale, - "dropout_p": self.dropout_p, - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def freeze(self): - """ - Freeze the layer parameters at their current values so they can no - longer be updated. - """ - self.trainable = False - self.softmax.freeze() - - def unfreeze(self): - """Unfreeze the layer parameters so they can be updated.""" - self.trainable = True - self.softmax.unfreeze() - - def forward(self, Q, K, V, retain_derived=True): - r""" - Compute the attention-weighted output of a collection of keys, values, - and queries. - - Notes - ----- - In the most abstract (ie., hand-wave-y) sense: - - - Query vectors ask questions - - Key vectors advertise their relevancy to questions - - Value vectors give possible answers to questions - - The dot product between Key and Query vectors provides scores for - each of the the `n_ex` different Value vectors - - For a single query and `n` key-value pairs, dot-product attention (with - scaling) is:: - - w0 = dropout(softmax( (query @ key[0]) / sqrt(d_k) )) - w1 = dropout(softmax( (query @ key[1]) / sqrt(d_k) )) - ... - wn = dropout(softmax( (query @ key[n]) / sqrt(d_k) )) - - y = np.array([w0, ..., wn]) @ values - (1 × n_ex) (n_ex × d_v) - - In words, keys and queries are combined via dot-product to produce a - score, which is then passed through a softmax to produce a weight on - each value vector in Values. We elementwise multiply each value vector - by its weight, and then take the elementwise sum of each weighted value - vector to get the :math:`1 \times d_v` output for the current example. - - In vectorized form, - - .. math:: - - \mathbf{Y} = \text{dropout}( - \text{softmax}(\mathbf{KQ}^\top / \sqrt{d_k}) - ) \mathbf{V} - - Parameters - ---------- - Q : :py:class:`ndarray ` of shape `(n_ex, *, d_k)` - A set of `n_ex` query vectors packed into a single matrix. - Optional middle dimensions can be used to specify, e.g., the number - of parallel attention heads. - K : :py:class:`ndarray ` of shape `(n_ex, *, d_k)` - A set of `n_ex` key vectors packed into a single matrix. Optional - middle dimensions can be used to specify, e.g., the number of - parallel attention heads. - V : :py:class:`ndarray ` of shape `(n_ex, *, d_v)` - A set of `n_ex` value vectors packed into a single matrix. Optional - middle dimensions can be used to specify, e.g., the number of - parallel attention heads. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, *, d_v)` - The attention-weighted output values - """ - Y, weights = self._fwd(Q, K, V) - - if retain_derived: - self.X.append((Q, K, V)) - self.derived_variables["attention_weights"].append(weights) - - return Y - - def _fwd(self, Q, K, V): - """Actual computation of forward pass""" - scale = 1 / np.sqrt(Q.shape[-1]) if self.scale else 1 - scores = Q @ K.swapaxes(-2, -1) * scale # attention scores - weights = self.softmax.forward(scores) # attention weights - Y = weights @ V - return Y, weights - - def backward(self, dLdy, retain_grads=True): - r""" - Backprop from layer outputs to inputs. - - Parameters - ---------- - dLdY : :py:class:`ndarray ` of shape `(n_ex, *, d_v)` - The gradient of the loss wrt. the layer output `Y` - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dQ : :py:class:`ndarray ` of shape `(n_ex, *, d_k)` or list of arrays - The gradient of the loss wrt. the layer query matrix/matrices `Q`. - dK : :py:class:`ndarray ` of shape `(n_ex, *, d_k)` or list of arrays - The gradient of the loss wrt. the layer key matrix/matrices `K`. - dV : :py:class:`ndarray ` of shape `(n_ex, *, d_v)` or list of arrays - The gradient of the loss wrt. the layer value matrix/matrices `V`. - """ # noqa: E501 - assert self.trainable, "Layer is frozen" - if not isinstance(dLdy, list): - dLdy = [dLdy] - - dQ, dK, dV = [], [], [] - weights = self.derived_variables["attention_weights"] - for dy, (q, k, v), w in zip(dLdy, self.X, weights): - dq, dk, dv = self._bwd(dy, q, k, v, w) - dQ.append(dq) - dK.append(dk) - dV.append(dv) - - if len(self.X) == 1: - dQ, dK, dV = dQ[0], dK[0], dV[0] - - return dQ, dK, dV - - def _bwd(self, dy, q, k, v, weights): - """Actual computation of the gradient of the loss wrt. q, k, and v""" - d_k = k.shape[-1] - scale = 1 / np.sqrt(d_k) if self.scale else 1 - - dV = weights.swapaxes(-2, -1) @ dy - dWeights = dy @ v.swapaxes(-2, -1) - dScores = self.softmax.backward(dWeights) - dQ = dScores @ k * scale - dK = dScores.swapaxes(-2, -1) @ q * scale - return dQ, dK, dV - - -class RBM(LayerBase): - def __init__(self, n_out, K=1, kernel_initializer="glorot_uniform", name=None): - """ - A Restricted Boltzmann machine with Bernoulli visible and hidden units. - - Parameters - ---------- - n_out : int - The number of output dimensions/units. - K : int - The number of contrastive divergence steps to run before computing - a single gradient update. Default is 1. - kernel_initializer : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is `'glorot_uniform'`. - """ # noqa: E501 - super().__init__(name=name) - - self.K = K # CD-K - self.kernel_initializer = kernel_initializer - self.n_in = None - self.n_out = n_out - self.is_initialized = False - self.weights_set = False - self.act_fn_V = ActivationInitializer("Sigmoid")() - self.act_fn_H = ActivationInitializer("Sigmoid")() - self.parameters = {"W": None, "b_in": None, "b_out": None} - - self._init_params() - - def _init_params(self): - if not self.weights_set: - b_in = np.zeros((1, self.n_in)) - b_out = np.zeros((1, self.n_out)) - init_weights = WeightInitializer(str(self.act_fn_V), mode=self.kernel_initializer) - W = init_weights((self.n_in, self.n_out)) - else: - W, b_in, b_out = self.get_weights() - - self.parameters = {"W": W, "b_in": b_in, "b_out": b_out} - self.gradients = { - "W": np.zeros_like(W), - "b_in": np.zeros_like(b_in), - "b_out": np.zeros_like(b_out), - } - - self.derived_variables = { - "V": None, - "p_H": None, - "p_V_prime": None, - "p_H_prime": None, - "positive_grad": None, - "negative_grad": None, - } - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "RBM", - "K": self.K, - "n_in": self.n_in, - "n_out": self.n_out, - "kernel_initializer": self.kernel_initializer, - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameterse, - }, - } - - def CD_update(self, X): - """ - Perform a single contrastive divergence-`k` training update using the - visible inputs `X` as a starting point for the Gibbs sampler. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, n_in)` - Layer input, representing the `n_in`-dimensional features for a - minibatch of `n_ex` examples. Each feature in X should ideally be - binary-valued, although it is possible to also train on real-valued - features ranging between (0, 1) (e.g., grayscale images). - """ - self.forward(X) - self.backward() - - def forward(self, V, K=None, retain_derived=True): - """ - Perform the CD-`k` "forward pass" of visible inputs into hidden units - and back. - - Notes - ----- - This implementation follows [1]_'s recommendations for the RBM forward - pass: - - - Use real-valued probabilities for both the data and the visible - unit reconstructions. - - Only the final update of the hidden units should use the actual - probabilities -- all others should be sampled binary states. - - When collecting the pairwise statistics for learning weights or - the individual statistics for learning biases, use the - probabilities, not the binary states. - - References - ---------- - .. [1] Hinton, G. (2010). "A practical guide to training restricted - Boltzmann machines". *UTML TR 2010-003* - - Parameters - ---------- - V : :py:class:`ndarray ` of shape `(n_ex, n_in)` - Visible input, representing the `n_in`-dimensional features for a - minibatch of `n_ex` examples. Each feature in V should ideally be - binary-valued, although it is possible to also train on real-valued - features ranging between (0, 1) (e.g., grayscale images). - K : int - The number of steps of contrastive divergence steps to run before - computing the gradient update. If None, use ``self.K``. Default is - None. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - """ - if not self.is_initialized: - self.n_in = V.shape[1] - self._init_params() - - # override self.K if necessary - K = self.K if K is None else K - - W = self.parameters["W"] - b_in = self.parameters["b_in"] - b_out = self.parameters["b_out"] - - # compute hidden unit probabilities - Z_H = V @ W + b_out - p_H = self.act_fn_H.fn(Z_H) - - # sample hidden states (stochastic binary values) - H = np.random.rand(*p_H.shape) <= p_H - H = H.astype(float) - - # always use probabilities when computing gradients - positive_grad = V.T @ p_H - - # perform CD-k - # TODO: use persistent CD-k - # https://www.cs.toronto.edu/~tijmen/pcd/pcd.pdf - H_prime = H.copy() - for k in range(K): - # resample v' given h (H_prime is binary for all but final step) - Z_V_prime = H_prime @ W.T + b_in - p_V_prime = self.act_fn_V.fn(Z_V_prime) - - # don't resample visual units - always use raw probabilities! - V_prime = p_V_prime - - # compute p(h' | v') - Z_H_prime = V_prime @ W + b_out - p_H_prime = self.act_fn_H.fn(Z_H_prime) - - # if this is the final iteration of CD, keep hidden state - # probabilities (don't sample) - H_prime = p_H_prime - if k != self.K - 1: - H_prime = np.random.rand(*p_H_prime.shape) <= p_H_prime - H_prime = H_prime.astype(float) - - negative_grad = p_V_prime.T @ p_H_prime - - if retain_derived: - self.derived_variables["V"] = V - self.derived_variables["p_H"] = p_H - self.derived_variables["p_V_prime"] = p_V_prime - self.derived_variables["p_H_prime"] = p_H_prime - self.derived_variables["positive_grad"] = positive_grad - self.derived_variables["negative_grad"] = negative_grad - - def backward(self, retain_grads=True, *args): - """ - Perform a gradient update on the layer parameters via the contrastive - divergence equations. - - Parameters - ---------- - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - """ - V = self.derived_variables["V"] - p_H = self.derived_variables["p_H"] - p_V_prime = self.derived_variables["p_V_prime"] - p_H_prime = self.derived_variables["p_H_prime"] - positive_grad = self.derived_variables["positive_grad"] - negative_grad = self.derived_variables["negative_grad"] - - if retain_grads: - self.gradients["b_in"] = V - p_V_prime - self.gradients["b_out"] = p_H - p_H_prime - self.gradients["W"] = positive_grad - negative_grad - - def reconstruct(self, X, n_steps=10, return_prob=False): - """ - Reconstruct an input `X` by running the trained Gibbs sampler for - `n_steps`-worth of CD-`k`. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, n_in)` - Layer input, representing the `n_in`-dimensional features for a - minibatch of `n_ex` examples. Each feature in `X` should ideally be - binary-valued, although it is possible to also train on real-valued - features ranging between (0, 1) (e.g., grayscale images). If `X` has - missing values, it may be sufficient to mark them with random - entries and allow the reconstruction to impute them. - n_steps : int - The number of Gibbs sampling steps to perform when generating the - reconstruction. Default is 10. - return_prob : bool - Whether to return the real-valued feature probabilities for the - reconstruction or the binary samples. Default is False. - - Returns - ------- - V : :py:class:`ndarray ` of shape `(n_ex, in_ch)` - The reconstruction (or feature probabilities if `return_prob` is - true) of the visual input `X` after running the Gibbs sampler for - `n_steps`. - """ - self.forward(X, K=n_steps) - p_V_prime = self.derived_variables["p_V_prime"] - - # ignore the gradients produced during this reconstruction - self.flush_gradients() - - # sample V_prime reconstruction if return_prob is False - V = p_V_prime - if not return_prob: - V = (np.random.rand(*p_V_prime.shape) <= p_V_prime).astype(float) - return V - - -####################################################################### -# Layer Ops # -####################################################################### - - -class Add(LayerBase): - def __init__(self, act_fn=None, name=None): - """ - An "addition" layer that returns the sum of its inputs, passed through - an optional nonlinearity. - - Parameters - ---------- - act_fn : str, :doc:`Activation ` object, or None - The element-wise output nonlinearity used in computing the final - output. If None, use the identity function :math:`f(x) = x`. - Default is None. - """ # noqa: E501 - super().__init__(name=name) - self.act_fn = ActivationInitializer(act_fn)() - self._init_params() - - def _init_params(self): - self.derived_variables = {"sum": []} - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "Sum", - "act_fn": str(self.act_fn), - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, X, retain_derived=True): - r""" - Compute the layer output on a single minibatch. - - Parameters - ---------- - X : list of length `n_inputs` - A list of tensors, all of the same shape. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, *)` - The sum over the `n_ex` examples. - """ - out = X[0].copy() - for i in range(1, len(X)): - out += X[i] - if retain_derived: - self.X.append(X) - self.derived_variables["sum"].append(out) - return self.act_fn(out) - - def backward(self, dLdY, retain_grads=True): - r""" - Backprop from layer outputs to inputs. - - Parameters - ---------- - dLdY : :py:class:`ndarray ` of shape `(n_ex, *)` - The gradient of the loss wrt. the layer output `Y`. - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : list of length `n_inputs` - The gradient of the loss wrt. each input in `X`. - """ - if not isinstance(dLdY, list): - dLdY = [dLdY] - - X = self.X - _sum = self.derived_variables["sum"] - grads = [self._bwd(dy, x, ss) for dy, x, ss in zip(dLdY, X, _sum)] - return grads[0] if len(X) == 1 else grads - - def _bwd(self, dLdY, X, _sum): - """Actual computation of gradient of the loss wrt. each input""" - grads = [dLdY * self.act_fn.grad(_sum) for _ in X] - return grads - - -class Multiply(LayerBase): - def __init__(self, act_fn=None, name=None): - """ - A multiplication layer that returns the *elementwise* product of its - inputs, passed through an optional nonlinearity. - - Parameters - ---------- - act_fn : str, :doc:`Activation ` object, or None - The element-wise output nonlinearity used in computing the final - output. If None, use the identity function :math:`f(x) = x`. - Default is None. - """ # noqa: E501 - super().__init__(name=name) - self.act_fn = ActivationInitializer(act_fn)() - self._init_params() - - def _init_params(self): - self.derived_variables = {"product": []} - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "Multiply", - "act_fn": str(self.act_fn), - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, X, retain_derived=True): - r""" - Compute the layer output on a single minibatch. - - Parameters - ---------- - X : list of length `n_inputs` - A list of tensors, all of the same shape. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, *)` - The product over the `n_ex` examples. - """ # noqa: E501 - out = X[0].copy() - for i in range(1, len(X)): - out *= X[i] - if retain_derived: - self.X.append(X) - self.derived_variables["product"].append(out) - return self.act_fn(out) - - def backward(self, dLdY, retain_grads=True): - r""" - Backprop from layer outputs to inputs. - - Parameters - ---------- - dLdY : :py:class:`ndarray ` of shape `(n_ex, *)` - The gradient of the loss wrt. the layer output `Y`. - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : list of length `n_inputs` - The gradient of the loss wrt. each input in `X`. - """ - if not isinstance(dLdY, list): - dLdY = [dLdY] - - X = self.X - _prod = self.derived_variables["product"] - grads = [self._bwd(dy, x, pr) for dy, x, pr in zip(dLdY, X, _prod)] - return grads[0] if len(X) == 1 else grads - - def _bwd(self, dLdY, X, prod): - """Actual computation of gradient of loss wrt. each input""" - grads = [dLdY * self.act_fn.grad(prod)] * len(X) - for i, x in enumerate(X): - grads = [g * x if j != i else g for j, g in enumerate(grads)] - return grads - - -class Flatten(LayerBase): - def __init__(self, keep_dim="first", name=None): - """ - Flatten a multidimensional input into a 2D matrix. - - Parameters - ---------- - keep_dim : {'first', 'last', -1} - The dimension of the original input to retain. Typically used for - retaining the minibatch dimension.. If -1, flatten all dimensions. - Default is 'first'. - """ # noqa: E501 - super().__init__(name=name) - self.n_out = 0 - self.n_in = [] - - self.keep_dim = keep_dim - self._init_params() - - def _init_params(self): - self.X = [] - self.gradients = {} - self.parameters = {} - self.derived_variables = {"in_dims": []} - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "Flatten", - "keep_dim": self.keep_dim, - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, X, retain_derived=True): - r""" - Compute the layer output on a single minibatch. - - Parameters - ---------- - X : :py:class:`ndarray ` - Input volume to flatten. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(*out_dims)` - Flattened output. If `keep_dim` is `'first'`, `X` is reshaped to - ``(X.shape[0], -1)``, otherwise ``(-1, X.shape[0])``. - """ - self.n_in = X.shape - if retain_derived: - self.derived_variables["in_dims"].append(X.shape) - if self.keep_dim == -1: - return X.flatten().reshape(1, -1) - rs = (X.shape[0], -1) if self.keep_dim == "first" else (-1, X.shape[-1]) - self.n_out = rs - return X.reshape(*rs) - - def backward(self, dLdy, retain_grads=True): - r""" - Backprop from layer outputs to inputs. - - Parameters - ---------- - dLdY : :py:class:`ndarray ` of shape `(*out_dims)` - The gradient of the loss wrt. the layer output `Y`. - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : :py:class:`ndarray ` of shape `(*in_dims)` or list of arrays - The gradient of the loss wrt. the layer input(s) `X`. - """ # noqa: E501 - if not isinstance(dLdy, list): - dLdy = [dLdy] - in_dims = self.derived_variables["in_dims"] - out = [dy.reshape(*dims) for dy, dims in zip(dLdy, in_dims)] - return out[0] if len(dLdy) == 1 else out - -class Concatenate(LayerBase): - def __init__(self, name=None): - """ - Concatenate a list of input layers into one. - """ # noqa: E501 - super().__init__(name=name) - self.n_out = 0 - self.n_in = [] - - self._init_params() - - def _init_params(self): - self.X = [] - self.gradients = {} - self.parameters = {} - self.derived_variables = {} - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "Concatenate", - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, X, retain_derived=True): - r""" - Compute the layer output on a single minibatch. - - Parameters - ---------- - X : :py:class:`ndarray ` - Input volume to flatten. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : - """ - result = np.concatenate(X, -1) - self.n_out = result.shape[1:] - self.n_in = [layer.n_out for layer in self.input_layers] - return result - - def backward(self, dLdy, retain_grads=True): - r""" - Backprop from layer outputs to inputs. - - Parameters - ---------- - dLdY : :py:class:`ndarray ` of shape `(*out_dims)` - The gradient of the loss wrt. the layer output `Y`. - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : - """ # noqa: E501 - return dLdy - - -####################################################################### -# Normalization Layers # -####################################################################### - - -class BatchNorm2D(LayerBase): - def __init__(self, momentum=0.9, epsilon=1e-5, name=None): - """ - A batch normalization layer for two-dimensional inputs with an - additional channel dimension. - - Notes - ----- - BatchNorm is an attempt address the problem of internal covariate - shift (ICS) during training by normalizing layer inputs. - - ICS refers to the change in the distribution of layer inputs during - training as a result of the changing parameters of the previous - layer(s). ICS can make it difficult to train models with saturating - nonlinearities, and in general can slow training by requiring a lower - learning rate. - - Equations [train]:: - - Y = scaler * norm(X) + intercept - norm(X) = (X - mean(X)) / sqrt(var(X) + epsilon) - - Equations [test]:: - - Y = scaler * running_norm(X) + intercept - running_norm(X) = (X - running_mean) / sqrt(running_var + epsilon) - - In contrast to :class:`LayerNorm2D`, the BatchNorm layer calculates - the mean and var across the *batch* rather than the output features. - This has two disadvantages: - - 1. It is highly affected by batch size: smaller mini-batch sizes - increase the variance of the estimates for the global mean and - variance. - - 2. It is difficult to apply in RNNs -- one must fit a separate - BatchNorm layer for *each* time-step. - - Parameters - ---------- - momentum : float - The momentum term for the running mean/running std calculations. - The closer this is to 1, the less weight will be given to the - mean/std of the current batch (i.e., higher smoothing). Default is - 0.9. - epsilon : float - A small smoothing constant to use during computation of ``norm(X)`` - to avoid divide-by-zero errors. Default is 1e-5. - """ # noqa: E501 - super().__init__(name=name) - - self.in_ch = None - self.out_ch = None - self.epsilon = epsilon - self.momentum = momentum - self.parameters = { - "scaler": None, - "intercept": None, - "running_var": None, - "running_mean": None, - } - self.is_initialized = False - self.weights_set = False - - def _init_params(self): - scaler = np.random.rand(self.in_ch) - intercept = np.zeros(self.in_ch) - - # init running mean and std at 0 and 1, respectively - running_mean = np.zeros(self.in_ch) - running_var = np.ones(self.in_ch) - - self.parameters = { - "scaler": scaler, - "intercept": intercept, - "running_var": running_var, - "running_mean": running_mean, - } - - self.gradients = { - "scaler": np.zeros_like(scaler), - "intercept": np.zeros_like(intercept), - } - - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "BatchNorm2D", - "act_fn": None, - "in_ch": self.in_ch, - "out_ch": self.out_ch, - "epsilon": self.epsilon, - "momentum": self.momentum, - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def reset_running_stats(self): - """Reset the running mean and variance estimates to 0 and 1.""" - assert self.trainable, "Layer is frozen" - self.parameters["running_mean"] = np.zeros(self.in_ch) - self.parameters["running_var"] = np.ones(self.in_ch) - - def forward(self, X, retain_derived=True): - """ - Compute the layer output on a single minibatch. - - Notes - ----- - Equations [train]:: - - Y = scaler * norm(X) + intercept - norm(X) = (X - mean(X)) / sqrt(var(X) + epsilon) - - Equations [test]:: - - Y = scaler * running_norm(X) + intercept - running_norm(X) = (X - running_mean) / sqrt(running_var + epsilon) - - In contrast to :class:`LayerNorm2D`, the BatchNorm layer calculates the - mean and var across the *batch* rather than the output features. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - Input volume containing the `in_rows` x `in_cols`-dimensional - features for a minibatch of `n_ex` examples. - retain_derived : bool - Whether to use the current intput to adjust the running mean and - running_var computations. Setting this to True is the same as - freezing the layer for the current input. Default is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - Layer output for each of the `n_ex` examples. - """ # noqa: E501 - if not self.is_initialized: - self.in_ch = self.out_ch = X.shape[3] - self._init_params() - - ep = self.hyperparameters["epsilon"] - mm = self.hyperparameters["momentum"] - rm = self.parameters["running_mean"] - rv = self.parameters["running_var"] - - scaler = self.parameters["scaler"] - intercept = self.parameters["intercept"] - - # if the layer is frozen, use our running mean/std values rather - # than the mean/std values for the new batch - X_mean = self.parameters["running_mean"] - X_var = self.parameters["running_var"] - - if self.trainable and retain_derived: - X_mean, X_var = X.mean(axis=(0, 1, 2)), X.var(axis=(0, 1, 2)) # , ddof=1) - self.parameters["running_mean"] = mm * rm + (1.0 - mm) * X_mean - self.parameters["running_var"] = mm * rv + (1.0 - mm) * X_var - - if retain_derived: - self.X.append(X) - - N = (X - X_mean) / np.sqrt(X_var + ep) - y = scaler * N + intercept - return y - - def backward(self, dLdy, retain_grads=True): - """ - Backprop from layer outputs to inputs. - - Parameters - ---------- - dLdY : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The gradient of the loss wrt. the layer output `Y`. - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The gradient of the loss wrt. the layer input `X`. - """ # noqa: E501 - assert self.trainable, "Layer is frozen" - if not isinstance(dLdy, list): - dLdy = [dLdy] - - dX = [] - X = self.X - for dy, x in zip(dLdy, X): - dx, dScaler, dIntercept = self._bwd(dy, x) - dX.append(dx) - - if retain_grads: - self.gradients["scaler"] += dScaler - self.gradients["intercept"] += dIntercept - - return dX[0] if len(X) == 1 else dX - - def _bwd(self, dLdy, X): - """Computation of gradient of loss wrt. X, scaler, and intercept""" - scaler = self.parameters["scaler"] - ep = self.hyperparameters["epsilon"] - - # reshape to 2D, retaining channel dim - X_shape = X.shape - X = np.reshape(X, (-1, X.shape[3])) - dLdy = np.reshape(dLdy, (-1, dLdy.shape[3])) - - # apply 1D batchnorm backward pass on reshaped array - n_ex, in_ch = X.shape - X_mean, X_var = X.mean(axis=0), X.var(axis=0) # , ddof=1) - - N = (X - X_mean) / np.sqrt(X_var + ep) - dIntercept = dLdy.sum(axis=0) - dScaler = np.sum(dLdy * N, axis=0) - - dN = dLdy * scaler - dX = (n_ex * dN - dN.sum(axis=0) - N * (dN * N).sum(axis=0)) / ( - n_ex * np.sqrt(X_var + ep) - ) - - return np.reshape(dX, X_shape), dScaler, dIntercept - - -class BatchNorm1D(LayerBase): - def __init__(self, momentum=0.9, epsilon=1e-5, name=None): - """ - A batch normalization layer for 1D inputs. - - Notes - ----- - BatchNorm is an attempt address the problem of internal covariate - shift (ICS) during training by normalizing layer inputs. - - ICS refers to the change in the distribution of layer inputs during - training as a result of the changing parameters of the previous - layer(s). ICS can make it difficult to train models with saturating - nonlinearities, and in general can slow training by requiring a lower - learning rate. - - Equations [train]:: - - Y = scaler * norm(X) + intercept - norm(X) = (X - mean(X)) / sqrt(var(X) + epsilon) - - Equations [test]:: - - Y = scaler * running_norm(X) + intercept - running_norm(X) = (X - running_mean) / sqrt(running_var + epsilon) - - In contrast to :class:`LayerNorm1D`, the BatchNorm layer calculates - the mean and var across the *batch* rather than the output features. - This has two disadvantages: - - 1. It is highly affected by batch size: smaller mini-batch sizes - increase the variance of the estimates for the global mean and - variance. - - 2. It is difficult to apply in RNNs -- one must fit a separate - BatchNorm layer for *each* time-step. - - Parameters - ---------- - momentum : float - The momentum term for the running mean/running std calculations. - The closer this is to 1, the less weight will be given to the - mean/std of the current batch (i.e., higher smoothing). Default is - 0.9. - epsilon : float - A small smoothing constant to use during computation of ``norm(X)`` - to avoid divide-by-zero errors. Default is 1e-5. - """ # noqa: E501 - super().__init__(name=name) - - self.n_in = None - self.n_out = None - self.epsilon = epsilon - self.momentum = momentum - self.parameters = { - "scaler": None, - "intercept": None, - "running_var": None, - "running_mean": None, - } - self.is_initialized = False - self.weights_set = False - - def _init_params(self): - scaler = np.random.rand(self.n_in) - intercept = np.zeros(self.n_in) - - # init running mean and std at 0 and 1, respectively - running_mean = np.zeros(self.n_in) - running_var = np.ones(self.n_in) - - self.parameters = { - "scaler": scaler, - "intercept": intercept, - "running_mean": running_mean, - "running_var": running_var, - } - - self.gradients = { - "scaler": np.zeros_like(scaler), - "intercept": np.zeros_like(intercept), - } - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "BatchNorm1D", - "act_fn": None, - "n_in": self.n_in, - "n_out": self.n_out, - "epsilon": self.epsilon, - "momentum": self.momentum, - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def reset_running_stats(self): - """Reset the running mean and variance estimates to 0 and 1.""" - assert self.trainable, "Layer is frozen" - self.parameters["running_mean"] = np.zeros(self.n_in) - self.parameters["running_var"] = np.ones(self.n_in) - - def forward(self, X, retain_derived=True): - """ - Compute the layer output on a single minibatch. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, n_in)` - Layer input, representing the `n_in`-dimensional features for a - minibatch of `n_ex` examples. - retain_derived : bool - Whether to use the current intput to adjust the running mean and - running_var computations. Setting this to True is the same as - freezing the layer for the current input. Default is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, n_in)` - Layer output for each of the `n_ex` examples - """ - if not self.is_initialized: - self.n_in = self.n_out = X.shape[1] - self._init_params() - - ep = self.hyperparameters["epsilon"] - mm = self.hyperparameters["momentum"] - rm = self.parameters["running_mean"] - rv = self.parameters["running_var"] - - scaler = self.parameters["scaler"] - intercept = self.parameters["intercept"] - - # if the layer is frozen, use our running mean/std values rather - # than the mean/std values for the new batch - X_mean = self.parameters["running_mean"] - X_var = self.parameters["running_var"] - - if self.trainable and retain_derived: - X_mean, X_var = X.mean(axis=0), X.var(axis=0) # , ddof=1) - self.parameters["running_mean"] = mm * rm + (1.0 - mm) * X_mean - self.parameters["running_var"] = mm * rv + (1.0 - mm) * X_var - - if retain_derived: - self.X.append(X) - - N = (X - X_mean) / np.sqrt(X_var + ep) - y = scaler * N + intercept - return y - - def backward(self, dLdy, retain_grads=True): - """ - Backprop from layer outputs to inputs. - - Parameters - ---------- - dLdY : :py:class:`ndarray ` of shape `(n_ex, n_in)` - The gradient of the loss wrt. the layer output `Y`. - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : :py:class:`ndarray ` of shape `(n_ex, n_in)` - The gradient of the loss wrt. the layer input `X`. - """ - assert self.trainable, "Layer is frozen" - if not isinstance(dLdy, list): - dLdy = [dLdy] - - dX = [] - X = self.X - for dy, x in zip(dLdy, X): - dx, dScaler, dIntercept = self._bwd(dy, x) - dX.append(dx) - - if retain_grads: - self.gradients["scaler"] += dScaler - self.gradients["intercept"] += dIntercept - - return dX[0] if len(X) == 1 else dX - - def _bwd(self, dLdy, X): - """Computation of gradient of loss wrt X, scaler, and intercept""" - scaler = self.parameters["scaler"] - ep = self.hyperparameters["epsilon"] - - n_ex, n_in = X.shape - X_mean, X_var = X.mean(axis=0), X.var(axis=0) # , ddof=1) - - N = (X - X_mean) / np.sqrt(X_var + ep) - dIntercept = dLdy.sum(axis=0) - dScaler = np.sum(dLdy * N, axis=0) - - dN = dLdy * scaler - dX = (n_ex * dN - dN.sum(axis=0) - N * (dN * N).sum(axis=0)) / ( - n_ex * np.sqrt(X_var + ep) - ) - - return dX, dScaler, dIntercept - - -class LayerNorm2D(LayerBase): - def __init__(self, epsilon=1e-5, name=None): - """ - A layer normalization layer for 2D inputs with an additional channel - dimension. - - Notes - ----- - In contrast to :class:`BatchNorm2D`, the LayerNorm layer calculates the - mean and variance across *features* rather than examples in the batch - ensuring that the mean and variance estimates are independent of batch - size and permitting straightforward application in RNNs. - - Equations [train & test]:: - - Y = scaler * norm(X) + intercept - norm(X) = (X - mean(X)) / sqrt(var(X) + epsilon) - - Also in contrast to :class:`BatchNorm2D`, `scaler` and `intercept` are applied - *elementwise* to ``norm(X)``. - - Parameters - ---------- - epsilon : float - A small smoothing constant to use during computation of ``norm(X)`` - to avoid divide-by-zero errors. Default is 1e-5. - """ # noqa: E501 - super().__init__(name=name) - - self.in_ch = None - self.out_ch = None - self.epsilon = epsilon - self.parameters = {"scaler": None, "intercept": None} - self.is_initialized = False - self.weights_set = False - - def _init_params(self, X_shape): - n_ex, in_rows, in_cols, in_ch = X_shape - - scaler = np.random.rand(in_rows, in_cols, in_ch) - intercept = np.zeros((in_rows, in_cols, in_ch)) - - self.parameters = {"scaler": scaler, "intercept": intercept} - - self.gradients = { - "scaler": np.zeros_like(scaler), - "intercept": np.zeros_like(intercept), - } - - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "LayerNorm2D", - "act_fn": None, - "in_ch": self.in_ch, - "out_ch": self.out_ch, - "epsilon": self.epsilon, - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, X, retain_derived=True): - """ - Compute the layer output on a single minibatch. - - Notes - ----- - Equations [train & test]:: - - Y = scaler * norm(X) + intercept - norm(X) = (X - mean(X)) / sqrt(var(X) + epsilon) - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - Input volume containing the `in_rows` by `in_cols`-dimensional - features for a minibatch of `n_ex` examples. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - Layer output for each of the `n_ex` examples. - """ # noqa: E501 - if not self.is_initialized: - self.in_ch = self.out_ch = X.shape[3] - self._init_params(X.shape) - - scaler = self.parameters["scaler"] - ep = self.hyperparameters["epsilon"] - intercept = self.parameters["intercept"] - - if retain_derived: - self.X.append(X) - - X_var = X.var(axis=(1, 2, 3), keepdims=True) - X_mean = X.mean(axis=(1, 2, 3), keepdims=True) - lnorm = (X - X_mean) / np.sqrt(X_var + ep) - y = scaler * lnorm + intercept - return y - - def backward(self, dLdy, retain_grads=True): - """ - Backprop from layer outputs to inputs. - - Parameters - ---------- - dLdY : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The gradient of the loss wrt. the layer output `Y`. - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The gradient of the loss wrt. the layer input `X`. - """ # noqa: E501 - assert self.trainable, "Layer is frozen" - if not isinstance(dLdy, list): - dLdy = [dLdy] - - dX = [] - X = self.X - for dy, x in zip(dLdy, X): - dx, dScaler, dIntercept = self._bwd(dy, x) - dX.append(dx) - - if retain_grads: - self.gradients["scaler"] += dScaler - self.gradients["intercept"] += dIntercept - - return dX[0] if len(X) == 1 else dX - - def _bwd(self, dy, X): - """Computation of gradient of the loss wrt X, scaler, intercept""" - scaler = self.parameters["scaler"] - ep = self.hyperparameters["epsilon"] - - X_mean = X.mean(axis=(1, 2, 3), keepdims=True) - X_var = X.var(axis=(1, 2, 3), keepdims=True) - lnorm = (X - X_mean) / np.sqrt(X_var + ep) - - dLnorm = dy * scaler - dIntercept = dy.sum(axis=0) - dScaler = np.sum(dy * lnorm, axis=0) - - n_in = np.prod(X.shape[1:]) - lnorm = lnorm.reshape(-1, n_in) - dLnorm = dLnorm.reshape(lnorm.shape) - X_var = X_var.reshape(X_var.shape[:2]) - - dX = ( - n_in * dLnorm - - dLnorm.sum(axis=1, keepdims=True) - - lnorm * (dLnorm * lnorm).sum(axis=1, keepdims=True) - ) / (n_in * np.sqrt(X_var + ep)) - - # reshape X gradients back to proper dimensions - return np.reshape(dX, X.shape), dScaler, dIntercept - - -class LayerNorm1D(LayerBase): - def __init__(self, epsilon=1e-5, name=None): - """ - A layer normalization layer for 1D inputs. - - Notes - ----- - In contrast to :class:`BatchNorm1D`, the LayerNorm layer calculates the - mean and variance across *features* rather than examples in the batch - ensuring that the mean and variance estimates are independent of batch - size and permitting straightforward application in RNNs. - - Equations [train & test]:: - - Y = scaler * norm(X) + intercept - norm(X) = (X - mean(X)) / sqrt(var(X) + epsilon) - - Also in contrast to :class:`BatchNorm1D`, `scaler` and `intercept` are applied - *elementwise* to ``norm(X)``. - - Parameters - ---------- - epsilon : float - A small smoothing constant to use during computation of ``norm(X)`` - to avoid divide-by-zero errors. Default is 1e-5. - """ # noqa: E501 - super().__init__(name=name) - - self.n_in = None - self.n_out = None - self.epsilon = epsilon - self.parameters = {"scaler": None, "intercept": None} - self.is_initialized = False - self.weights_set = False - - def _init_params(self): - scaler = np.random.rand(self.n_in) - intercept = np.zeros(self.n_in) - - self.parameters = {"scaler": scaler, "intercept": intercept} - - self.gradients = { - "scaler": np.zeros_like(scaler), - "intercept": np.zeros_like(intercept), - } - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "LayerNorm1D", - "act_fn": None, - "n_in": self.n_in, - "n_out": self.n_out, - "epsilon": self.epsilon, - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, X, retain_derived=True): - """ - Compute the layer output on a single minibatch. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, n_in)` - Layer input, representing the `n_in`-dimensional features for a - minibatch of `n_ex` examples. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, n_in)` - Layer output for each of the `n_ex` examples. - """ - if not self.is_initialized: - self.n_in = self.n_out = X.shape[1] - self._init_params() - - scaler = self.parameters["scaler"] - ep = self.hyperparameters["epsilon"] - intercept = self.parameters["intercept"] - - if retain_derived: - self.X.append(X) - - X_mean, X_var = X.mean(axis=1, keepdims=True), X.var(axis=1, keepdims=True) - lnorm = (X - X_mean) / np.sqrt(X_var + ep) - y = scaler * lnorm + intercept - return y - - def backward(self, dLdy, retain_grads=True): - """ - Backprop from layer outputs to inputs. - - Parameters - ---------- - dLdY : :py:class:`ndarray ` of shape `(n_ex, n_in)` - The gradient of the loss wrt. the layer output `Y`. - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : :py:class:`ndarray ` of shape `(n_ex, n_in)` - The gradient of the loss wrt. the layer input `X`. - """ - assert self.trainable, "Layer is frozen" - if not isinstance(dLdy, list): - dLdy = [dLdy] - - dX = [] - X = self.X - for dy, x in zip(dLdy, X): - dx, dScaler, dIntercept = self._bwd(dy, x) - dX.append(dx) - - if retain_grads: - self.gradients["scaler"] += dScaler - self.gradients["intercept"] += dIntercept - - return dX[0] if len(X) == 1 else dX - - def _bwd(self, dLdy, X): - """Computation of gradient of the loss wrt X, scaler, intercept""" - scaler = self.parameters["scaler"] - ep = self.hyperparameters["epsilon"] - - n_ex, n_in = X.shape - X_mean, X_var = X.mean(axis=1, keepdims=True), X.var(axis=1, keepdims=True) - - lnorm = (X - X_mean) / np.sqrt(X_var + ep) - dIntercept = dLdy.sum(axis=0) - dScaler = np.sum(dLdy * lnorm, axis=0) - - dLnorm = dLdy * scaler - dX = ( - n_in * dLnorm - - dLnorm.sum(axis=1, keepdims=True) - - lnorm * (dLnorm * lnorm).sum(axis=1, keepdims=True) - ) / (n_in * np.sqrt(X_var + ep)) - - return dX, dScaler, dIntercept - - -####################################################################### -# MLP Layers # -####################################################################### - - -class Embedding(LayerBase): - def __init__( - self, n_out, vocab_size, pool=None, kernel_initializer="glorot_uniform", name=None - ): - """ - An embedding layer. - - Notes - ----- - Equations:: - - Y = W[x] - - NB. This layer must be the first in a neural network as the gradients - do not get passed back through to the inputs. - - Parameters - ---------- - n_out : int - The dimensionality of the embeddings - vocab_size : int - The total number of items in the vocabulary. All integer indices - are expected to range between 0 and `vocab_size - 1`. - pool : {'sum', 'mean', None} - If not None, apply this function to the collection of `n_in` - encodings in each example to produce a single, pooled embedding. - Default is None. - kernel_initializer : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is `'glorot_uniform'`. - """ # noqa: E501 - super().__init__(name=name) - fstr = "'pool' must be either 'sum', 'mean', or None but got '{}'" - assert pool in ["sum", "mean", None], fstr.format(pool) - - self.kernel_initializer = kernel_initializer - self.pool = pool - self.n_out = n_out - self.vocab_size = vocab_size - self.parameters = {"W": None} - self.is_initialized = False - self.weights_set = False - self._init_params() - - def _init_params(self): - if not self.weights_set: - init_weights = WeightInitializer("Affine(slope=1, intercept=0)", mode=self.kernel_initializer) - W = init_weights((self.vocab_size, self.n_out)) - else: - W = self.get_weights() - - self.parameters = {"W": W} - self.derived_variables = {} - self.gradients = {"W": np.zeros_like(W)} - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "Embedding", - "kernel_initializer": self.kernel_initializer, - "pool": self.pool, - "n_out": self.n_out, - "vocab_size": self.vocab_size, - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def lookup(self, ids): - """ - Return the embeddings associated with the IDs in `ids`. - - Parameters - ---------- - word_ids : :py:class:`ndarray ` of shape (`M`,) - An array of `M` IDs to retrieve embeddings for. - - Returns - ------- - embeddings : :py:class:`ndarray ` of shape (`M`, `n_out`) - The embedding vectors for each of the `M` IDs. - """ - return self.parameters["W"][ids] - - def forward(self, X, retain_derived=True): - """ - Compute the layer output on a single minibatch. - - Notes - ----- - Equations: - Y = W[x] - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, n_in)` or list of length `n_ex` - Layer input, representing a minibatch of `n_ex` examples. If - ``self.pool`` is None, each example must consist of exactly `n_in` - integer token IDs. Otherwise, `X` can be a ragged array, with each - example consisting of a variable number of token IDs. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through with regard to this input. - Default is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, n_in, n_out)` - Embeddings for each coordinate of each of the `n_ex` examples - """ # noqa: E501 - # if X is a ragged array - if isinstance(X, list) and not issubclass(X[0].dtype.type, np.integer): - fstr = "Input to Embedding layer must be an array of integers, got '{}'" - raise TypeError(fstr.format(X[0].dtype.type)) - - # otherwise - if isinstance(X, np.ndarray) and not issubclass(X.dtype.type, np.integer): - fstr = "Input to Embedding layer must be an array of integers, got '{}'" - raise TypeError(fstr.format(X.dtype.type)) - - Y = self._fwd(X) - if retain_derived: - self.X.append(X) - return Y - - def _fwd(self, X): - """Actual computation of forward pass""" - W = self.parameters["W"] - if self.pool is None: - emb = W[X] - elif self.pool == "sum": - emb = np.array([W[x].sum(axis=0) for x in X])[:, None, :] - elif self.pool == "mean": - emb = np.array([W[x].mean(axis=0) for x in X])[:, None, :] - return emb - - def backward(self, dLdy, retain_grads=True): - """ - Backprop from layer outputs to embedding weights. - - Notes - ----- - Because the items in `X` are interpreted as indices, we cannot compute - the gradient of the layer output wrt. `X`. - - Parameters - ---------- - dLdy : :py:class:`ndarray ` of shape `(n_ex, n_in, n_out)` or list of arrays - The gradient(s) of the loss wrt. the layer output(s) - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - """ # noqa: E501 - assert self.trainable, "Layer is frozen" - if not isinstance(dLdy, list): - dLdy = [dLdy] - - for dy, x in zip(dLdy, self.X): - dw = self._bwd(dy, x) - - if retain_grads: - self.gradients["W"] += dw - - def _bwd(self, dLdy, X): - """Actual computation of gradient of the loss wrt. W""" - dW = np.zeros_like(self.parameters["W"]) - dLdy = dLdy.reshape(-1, self.n_out) - - if self.pool is None: - for ix, v_id in enumerate(X.flatten()): - dW[v_id] += dLdy[ix] - elif self.pool == "sum": - for ix, v_ids in enumerate(X): - dW[v_ids] += dLdy[ix] - elif self.pool == "mean": - for ix, v_ids in enumerate(X): - dW[v_ids] += dLdy[ix] / len(v_ids) - return dW - - -class Dense(LayerBase): - def __init__(self, n_out, activation=None, kernel_initializer="glorot_uniform", name=None): - r""" - A fully-connected (dense) layer. - - Notes - ----- - A fully connected layer computes the function - - .. math:: - - \mathbf{Y} = f( \mathbf{WX} + \mathbf{b} ) - - where `f` is the activation nonlinearity, **W** and **b** are - parameters of the layer, and **X** is the minibatch of input examples. - - Parameters - ---------- - n_out : int - The dimensionality of the layer output - act_fn : str, :doc:`Activation ` object, or None - The element-wise output nonlinearity used in computing `Y`. If None, - use the identity function :math:`f(X) = X`. Default is None. - kernel_initializer : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is `'glorot_uniform'`. - """ # noqa: E501 - super().__init__(name=name) - - self.kernel_initializer = kernel_initializer - self.n_in = None - self.n_out = n_out - self.act_fn = ActivationInitializer(activation)() - self.parameters = {"W": None, "b": None} - self.is_initialized = False - self.weights_set = False - - def _init_params(self): - if not self.weights_set: - init_weights = WeightInitializer(str(self.act_fn), mode=self.kernel_initializer) - W = init_weights((self.n_in, self.n_out)) - b = np.zeros((1, self.n_out)) - else: - W, b = self.get_weights() - - self.parameters = {"W": W, "b": b} - self.derived_variables = {"Z": []} - self.gradients = {"W": np.zeros_like(W), "b": np.zeros_like(b)} - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "Dense", - "kernel_initializer": self.kernel_initializer, - "n_in": self.n_in, - "n_out": self.n_out, - "act_fn": str(self.act_fn), - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, X, retain_derived=True): - """ - Compute the layer output on a single minibatch. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, n_in)` - Layer input, representing the `n_in`-dimensional features for a - minibatch of `n_ex` examples. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, n_out)` - Layer output for each of the `n_ex` examples. - """ - if not self.is_initialized: - self.n_in = X.shape[1] - self._init_params() - - Y, Z = self._fwd(X) - - if retain_derived: - self.X.append(X) - self.derived_variables["Z"].append(Z) - - return Y - - def _fwd(self, X): - """Actual computation of forward pass""" - W = self.parameters["W"] - b = self.parameters["b"] - - Z = X @ W + b - Y = self.act_fn(Z) - return Y, Z - - def backward(self, dLdy, retain_grads=True): - """ - Backprop from layer outputs to inputs. - - Parameters - ---------- - dLdy : :py:class:`ndarray ` of shape `(n_ex, n_out)` or list of arrays - The gradient(s) of the loss wrt. the layer output(s). - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dLdX : :py:class:`ndarray ` of shape `(n_ex, n_in)` or list of arrays - The gradient of the loss wrt. the layer input(s) `X`. - """ # noqa: E501 - assert self.trainable, "Layer is frozen" - if not isinstance(dLdy, list): - dLdy = [dLdy] - - dX = [] - X = self.X - for dy, x in zip(dLdy, X): - dx, dw, db = self._bwd(dy, x) - dX.append(dx) - - if retain_grads: - self.gradients["W"] += dw - self.gradients["b"] += db - - return dX[0] if len(X) == 1 else dX - - def _bwd(self, dLdy, X): - """Actual computation of gradient of the loss wrt. X, W, and b""" - W = self.parameters["W"] - b = self.parameters["b"] - - Z = X @ W + b - dZ = dLdy * self.act_fn.grad(Z) - - dX = dZ @ W.T - dW = X.T @ dZ - dB = dZ.sum(axis=0) # don't keep dimensions - return dX, dW, dB - - def _bwd2(self, dLdy, X, dLdy_bwd): - """Compute second derivatives / deriv. of loss wrt. dX, dW, and db""" - W = self.parameters["W"] - b = self.parameters["b"] - - dZ = self.act_fn.grad(X @ W + b) - ddZ = self.act_fn.grad2(X @ W + b) - - ddX = dLdy @ W * dZ - ddW = dLdy.T @ (dLdy_bwd * dZ) - ddB = np.sum(dLdy @ W * dLdy_bwd * ddZ, axis=0, keepdims=True) - return ddX, ddW, ddB - - -class Softmax(LayerBase): - def __init__(self, dim=-1, name=None): - r""" - A softmax nonlinearity layer. - - Notes - ----- - This is implemented as a layer rather than an activation primarily - because it requires retaining the layer input in order to compute the - softmax gradients properly. In other words, in contrast to other - simple activations, the softmax function and its gradient are not - computed elementwise, and thus are more easily expressed as a layer. - - The softmax function computes: - - .. math:: - - y_i = \frac{e^{x_i}}{\sum_j e^{x_j}} - - where :math:`x_i` is the `i` th element of input example **x**. - - Parameters - ---------- - dim: int - The dimension in `X` along which the softmax will be computed. - Default is -1. - """ # noqa: E501 - super().__init__(name=name) - - self.dim = dim - self.n_in = None - self.is_initialized = False - self.weights_set = False - - def _init_params(self): - self.gradients = {} - self.parameters = {} - self.derived_variables = {} - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "SoftmaxLayer", - "n_in": self.n_in, - "n_out": self.n_in, - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, X, retain_derived=True): - """ - Compute the layer output on a single minibatch. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, n_in)` - Layer input, representing the `n_in`-dimensional features for a - minibatch of `n_ex` examples. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, n_out)` - Layer output for each of the `n_ex` examples. - """ - if not self.is_initialized: - self.n_in = X.shape[1] - self._init_params() - - Y = self._fwd(X) - - if retain_derived: - self.X.append(X) - - return Y - - def _fwd(self, X): - """Actual computation of softmax forward pass""" - # center data to avoid overflow - e_X = np.exp(X - np.max(X, axis=self.dim, keepdims=True)) - return e_X / e_X.sum(axis=self.dim, keepdims=True) - - def backward(self, dLdy, retain_grads=True): - """ - Backprop from layer outputs to inputs. - - Parameters - ---------- - dLdy : :py:class:`ndarray ` of shape `(n_ex, n_out)` or list of arrays - The gradient(s) of the loss wrt. the layer output(s). - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dLdX : :py:class:`ndarray ` of shape `(n_ex, n_in)` - The gradient of the loss wrt. the layer input `X`. - """ # noqa: E501 - assert self.trainable, "Layer is frozen" - if not isinstance(dLdy, list): - dLdy = [dLdy] - - dX = [] - X = self.X - for dy, x in zip(dLdy, X): - dx = self._bwd(dy, x) - dX.append(dx) - - return dX[0] if len(X) == 1 else dX - - def _bwd(self, dLdy, X): - """ - Actual computation of the gradient of the loss wrt. the input X. - - The Jacobian, J, of the softmax for input x = [x1, ..., xn] is: - J[i, j] = - softmax(x_i) * (1 - softmax(x_j)) if i = j - -softmax(x_i) * softmax(x_j) if i != j - where - x_n is input example n (ie., the n'th row in X) - """ - dX = [] - for dy, x in zip(dLdy, X): - dxi = [] - for dyi, xi in zip(*np.atleast_2d(dy, x)): - yi = self._fwd(xi.reshape(1, -1)).reshape(-1, 1) - dyidxi = np.diagflat(yi) - yi @ yi.T # jacobian wrt. input sample xi - dxi.append(dyi @ dyidxi) - dX.append(dxi) - return np.array(dX).reshape(*X.shape) - - -class SparseEvolution(LayerBase): - def __init__( - self, - n_out, - zeta=0.3, - epsilon=20, - act_fn=None, - kernel_initializer="glorot_uniform", - name=None, - ): - r""" - A sparse Erdos-Renyi layer with evolutionary rewiring via the sparse - evolutionary training (SET) algorithm. - - Notes - ----- - .. math:: - - Y = f( (\mathbf{W} \odot \mathbf{W}_{mask}) \mathbf{X} + \mathbf{b} ) - - where :math:`\odot` is the elementwise multiplication operation, `f` is - the layer activation function, and :math:`\mathbf{W}_{mask}` is an - evolved binary mask. - - Parameters - ---------- - n_out : int - The dimensionality of the layer output - zeta : float - Proportion of the positive and negative weights closest to zero to - drop after each training update. Default is 0.3. - epsilon : float - Layer sparsity parameter. Default is 20. - act_fn : str, :doc:`Activation ` object, or None - The element-wise output nonlinearity used in computing `Y`. If None, - use the identity function :math:`f(X) = X`. Default is None. - kernel_initializer : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is `'glorot_uniform'`. - """ # noqa: E501 - super().__init__(name=name) - - self.kernel_initializer = kernel_initializer - self.n_in = None - self.zeta = zeta - self.n_out = n_out - self.epsilon = epsilon - self.act_fn = ActivationInitializer(act_fn)() - self.parameters = {"W": None, "b": None} - self.is_initialized = False - self.weights_set = False - - def _init_params(self): - if not self.weights_set: - init_weights = WeightInitializer(str(self.act_fn), mode=self.kernel_initializer) - W = init_weights((self.n_in, self.n_out)) - b = np.zeros((1, self.n_out)) - # convert a fully connected base layer into a sparse layer - n_in, n_out = W.shape - p = (self.epsilon * (n_in + n_out)) / (n_in * n_out) - mask = np.random.binomial(1, p, shape=W.shape) - else: - W, b, mask = self.get_weights() - - self.derived_variables = {"Z": []} - self.parameters = {"W": W, "b": b, "W_mask": mask} - self.gradients = {"W": np.zeros_like(W), "b": np.zeros_like(b)} - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "SparseEvolutionary", - "kernel_initializer": self.kernel_initializer, - "zeta": self.zeta, - "n_in": self.n_in, - "n_out": self.n_out, - "epsilon": self.epsilon, - "act_fn": str(self.act_fn), - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, X, retain_derived=True): - """ - Compute the layer output on a single minibatch. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, n_in)` - Layer input, representing the `n_in`-dimensional features for a - minibatch of `n_ex` examples. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, n_out)` - Layer output for each of the `n_ex` examples. - """ - if not self.is_initialized: - self.n_in = X.shape[1] - self._init_params() - - Y, Z = self._fwd(X) - - if retain_derived: - self.X.append(X) - self.derived_variables["Z"].append(Z) - - return Y - - def _fwd(self, X): - """Actual computation of forward pass""" - W = self.parameters["W"] - b = self.parameters["b"] - W_mask = self.parameters["W_mask"] - - Z = X @ (W * W_mask) + b - Y = self.act_fn(Z) - return Y, Z - - def backward(self, dLdy, retain_grads=True): - """ - Backprop from layer outputs to inputs - - Parameters - ---------- - dLdy : :py:class:`ndarray ` of shape `(n_ex, n_out)` or list of arrays - The gradient(s) of the loss wrt. the layer output(s). - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dLdX : :py:class:`ndarray ` of shape `(n_ex, n_in)` - The gradient of the loss wrt. the layer input `X`. - """ # noqa: E501 - assert self.trainable, "Layer is frozen" - if not isinstance(dLdy, list): - dLdy = [dLdy] - - dX = [] - X = self.X - for dy, x in zip(dLdy, X): - dx, dw, db = self._bwd(dy, x) - dX.append(dx) - - if retain_grads: - self.gradients["W"] += dw - self.gradients["b"] += db - - return dX[0] if len(X) == 1 else dX - - def _bwd(self, dLdy, X): - """Actual computation of gradient of the loss wrt. X, W, and b""" - W = self.parameters["W"] - b = self.parameters["b"] - W_sparse = W * self.parameters["W_mask"] - - Z = X @ W_sparse + b - dZ = dLdy * self.act_fn.grad(Z) - - dX = dZ @ W_sparse.T - dW = X.T @ dZ - dB = dZ.sum(axis=0, keepdims=True) - return dX, dW, dB - - def _bwd2(self, dLdy, X, dLdy_bwd): - """Compute second derivatives / deriv. of loss wrt. dX, dW, and db""" - W = self.parameters["W"] - b = self.parameters["b"] - W_sparse = W * self.parameters["W_mask"] - - dZ = self.act_fn.grad(X @ W_sparse + b) - ddZ = self.act_fn.grad2(X @ W_sparse + b) - - ddX = dLdy @ W * dZ - ddW = dLdy.T @ (dLdy_bwd * dZ) - ddB = np.sum(dLdy @ W_sparse * dLdy_bwd * ddZ, axis=0, keepdims=True) - return ddX, ddW, ddB - - def update(self): - """ - Update parameters using current gradients and evolve network - connections via SET. - """ - assert self.trainable, "Layer is frozen" - for k, v in self.gradients.items(): - if k in self.parameters: - self.parameters[k] = self.optimizer(self.parameters[k], v, k) - self.flush_gradients() - self._evolve_connections() - - def _evolve_connections(self): - assert self.trainable, "Layer is frozen" - W = self.parameters["W"] - W_mask = self.parameters["W_mask"] - W_flat = (W * W_mask).reshape(-1) - - k = int(np.prod(W.shape) * self.zeta) - - (p_ix,) = np.where(W_flat > 0) - (n_ix,) = np.where(W_flat < 0) - - # remove the k largest negative and k smallest positive weights - k_smallest_p = p_ix[np.argsort(W_flat[p_ix])][:k] - k_largest_n = n_ix[np.argsort(W_flat[n_ix])][-k:] - n_rewired = len(k_smallest_p) + len(k_largest_n) - - self.mask = np.ones_like(W_flat) - self.mask[k_largest_n] = 0 - self.mask[k_smallest_p] = 0 - - zero_ixs = np.where(self.mask == 0) - - # resample new connections and update mask - np.shuffle(zero_ixs) - self.mask[zero_ixs[:n_rewired]] = 1 - self.mask = self.mask.reshape(*W.shape) - - -####################################################################### -# Convolutional Layers # -####################################################################### - - -class Conv1D(LayerBase): - def __init__( - self, - out_ch, - kernel_width, - pad=0, - stride=1, - dilation=0, - act_fn=None, - kernel_initializer="glorot_uniform", - name=None, - ): - """ - Apply a one-dimensional convolution kernel over an input volume. - - Notes - ----- - Equations:: - - out = act_fn(pad(X) * W + b) - out_dim = floor(1 + (n_rows_in + pad_left + pad_right - kernel_width) / stride) - - where '`*`' denotes the cross-correlation operation with stride `s` and dilation `d`. - - Parameters - ---------- - out_ch : int - The number of filters/kernels to compute in the current layer - kernel_width : int - The width of a single 1D filter/kernel in the current layer - act_fn : str, :doc:`Activation ` object, or None - The activation function for computing ``Y[t]``. If None, use the - identity function :math:`f(x) = x` by default. Default is None. - pad : int, tuple, or {'same', 'causal'} - The number of rows/columns to zero-pad the input with. If `'same'`, - calculate padding to ensure the output length matches in the input - length. If `'causal'` compute padding such that the output both has - the same length as the input AND ``output[t]`` does not depend on - ``input[t + 1:]``. Default is 0. - stride : int - The stride/hop of the convolution kernels as they move over the - input volume. Default is 1. - dilation : int - Number of pixels inserted between kernel elements. Effective kernel - shape after dilation is: ``[kernel_rows * (d + 1) - d, kernel_cols - * (d + 1) - d]``. Default is 0. - kernel_initializer : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is `'glorot_uniform'`. - """ # noqa: E501 - super().__init__(name=name) - - self.pad = pad - self.kernel_initializer = kernel_initializer - self.in_ch = None - self.out_ch = out_ch - self.stride = stride - self.dilation = dilation - self.kernel_width = kernel_width - self.act_fn = ActivationInitializer(act_fn)() - self.parameters = {"W": None, "b": None} - self.is_initialized = False - self.weights_set = False - - def _init_params(self): - if not self.weights_set: - init_weights = WeightInitializer(str(self.act_fn), mode=self.kernel_initializer) - W = init_weights((self.kernel_width, self.in_ch, self.out_ch)) - b = np.zeros((1, 1, self.out_ch)) - else: - W, b = self.get_weights() - - self.parameters = {"W": W, "b": b} - self.gradients = {"W": np.zeros_like(W), "b": np.zeros_like(b)} - self.derived_variables = {"Z": [], "out_rows": [], "out_cols": []} - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "Conv1D", - "pad": self.pad, - "kernel_initializer": self.kernel_initializer, - "in_ch": self.in_ch, - "out_ch": self.out_ch, - "stride": self.stride, - "dilation": self.dilation, - "act_fn": str(self.act_fn), - "kernel_width": self.kernel_width, - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, X, retain_derived=True): - """ - Compute the layer output given input volume `X`. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, l_in, in_ch)` - The input volume consisting of `n_ex` examples, each of length - `l_in` and with `in_ch` input channels - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, l_out, out_ch)` - The layer output. - """ - if not self.is_initialized: - self.in_ch = X.shape[2] - self._init_params() - - W = self.parameters["W"] - b = self.parameters["b"] - - n_ex, l_in, in_ch = X.shape - s, p, d = self.stride, self.pad, self.dilation - - # pad the input and perform the forward convolution - Z = conv1D(X, W, s, p, d) + b - Y = self.act_fn(Z) - - if retain_derived: - self.X.append(X) - self.derived_variables["Z"].append(Z) - self.derived_variables["out_rows"].append(Z.shape[1]) - self.derived_variables["out_cols"].append(Z.shape[2]) - - return Y - - def backward(self, dLdy, retain_grads=True): - """ - Compute the gradient of the loss with respect to the layer parameters. - - Notes - ----- - Relies on :meth:`~numpy_ml.neural_nets.utils.im2col` and - :meth:`~numpy_ml.neural_nets.utils.col2im` to vectorize the - gradient calculation. See the private method :meth:`_backward_naive` - for a more straightforward implementation. - - Parameters - ---------- - dLdy : :py:class:`ndarray ` of shape `(n_ex, l_out, out_ch)` or list of arrays - The gradient(s) of the loss with respect to the layer output(s). - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : :py:class:`ndarray ` of shape `(n_ex, l_in, in_ch)` - The gradient of the loss with respect to the layer input volume. - """ # noqa: E501 - assert self.trainable, "Layer is frozen" - if not isinstance(dLdy, list): - dLdy = [dLdy] - - X = self.X - Z = self.derived_variables["Z"] - - dX = [] - for dy, x, z in zip(dLdy, X, Z): - dx, dw, db = self._bwd(dy, x, z) - dX.append(dx) - - if retain_grads: - self.gradients["W"] += dw - self.gradients["b"] += db - - return dX[0] if len(X) == 1 else dX - - def _bwd(self, dLdy, X, Z): - """Actual computation of gradient of the loss wrt. X, W, and b""" - W = self.parameters["W"] - - # add a row dimension to X, W, and dZ to permit us to use im2col/col2im - X2D = np.expand_dims(X, axis=1) - W2D = np.expand_dims(W, axis=0) - dLdZ = np.expand_dims(dLdy * self.act_fn.grad(Z), axis=1) - - d = self.dilation - fr, fc, in_ch, out_ch = W2D.shape - n_ex, l_out, out_ch = dLdy.shape - fr, fc, s = 1, self.kernel_width, self.stride - - # use pad1D here in order to correctly handle self.pad = 'causal', - # which isn't defined for pad2D - _, p = pad1D(X, self.pad, self.kernel_width, s, d) - p2D = (0, 0, p[0], p[1]) - - # columnize W, X, and dLdy - dLdZ_col = dLdZ.transpose(3, 1, 2, 0).reshape(out_ch, -1) - W_col = W2D.transpose(3, 2, 0, 1).reshape(out_ch, -1).T - X_col, _ = im2col(X2D, W2D.shape, p2D, s, d) - - # compute gradients via matrix multiplication and reshape - dB = dLdZ_col.sum(axis=1).reshape(1, 1, -1) - dW = (dLdZ_col @ X_col.T).reshape(out_ch, in_ch, fr, fc).transpose(2, 3, 1, 0) - - # reshape columnized dX back into the same format as the input volume - dX_col = W_col @ dLdZ_col - dX = col2im(dX_col, X2D.shape, W2D.shape, p2D, s, d).transpose(0, 2, 3, 1) - - return np.squeeze(dX, axis=1), np.squeeze(dW, axis=0), dB - - def _backward_naive(self, dLdy, retain_grads=True): - """ - A slower (ie., non-vectorized) but more straightforward implementation - of the gradient computations for a 2D conv layer. - - Parameters - ---------- - dLdy : :py:class:`ndarray ` of shape `(n_ex, l_out, out_ch)` or list of arrays - The gradient(s) of the loss with respect to the layer output(s). - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : :py:class:`ndarray ` of shape `(n_ex, l_in, in_ch)` - The gradient of the loss with respect to the layer input volume. - """ # noqa: E501 - assert self.trainable, "Layer is frozen" - if not isinstance(dLdy, list): - dLdy = [dLdy] - - W = self.parameters["W"] - b = self.parameters["b"] - Zs = self.derived_variables["Z"] - - Xs, d = self.X, self.dilation - fw, s, p = self.kernel_width, self.stride, self.pad - - dXs = [] - for X, Z, dy in zip(Xs, Zs, dLdy): - n_ex, l_out, out_ch = dy.shape - X_pad, (pr1, pr2) = pad1D(X, p, self.kernel_width, s, d) - - dX = np.zeros_like(X_pad) - dZ = dy * self.act_fn.grad(Z) - - dW, dB = np.zeros_like(W), np.zeros_like(b) - for m in range(n_ex): - for i in range(l_out): - for c in range(out_ch): - # compute window boundaries w. stride and dilation - i0, i1 = i * s, (i * s) + fw * (d + 1) - d - - wc = W[:, :, c] - kernel = dZ[m, i, c] - window = X_pad[m, i0 : i1 : (d + 1), :] - - dB[:, :, c] += kernel - dW[:, :, c] += window * kernel - dX[m, i0 : i1 : (d + 1), :] += wc * kernel - - if retain_grads: - self.gradients["W"] += dW - self.gradients["b"] += dB - - pr2 = None if pr2 == 0 else -pr2 - dXs.append(dX[:, pr1:pr2, :]) - return dXs[0] if len(Xs) == 1 else dXs - - -class Conv2D(LayerBase): - def __init__( - self, - out_ch, - kernel_shape, - pad=0, - stride=1, - dilation=0, - act_fn=None, - kernel_initializer="glorot_uniform", - name=None, - ): - """ - Apply a two-dimensional convolution kernel over an input volume. - - Notes - ----- - Equations:: - - out = act_fn(pad(X) * W + b) - n_rows_out = floor(1 + (n_rows_in + pad_left + pad_right - filter_rows) / stride) - n_cols_out = floor(1 + (n_cols_in + pad_top + pad_bottom - filter_cols) / stride) - - where `'*'` denotes the cross-correlation operation with stride `s` and - dilation `d`. - - Parameters - ---------- - out_ch : int - The number of filters/kernels to compute in the current layer - kernel_shape : 2-tuple - The dimension of a single 2D filter/kernel in the current layer - act_fn : str, :doc:`Activation ` object, or None - The activation function for computing ``Y[t]``. If None, use the - identity function :math:`f(X) = X` by default. Default is None. - pad : int, tuple, or 'same' - The number of rows/columns to zero-pad the input with. Default is - 0. - stride : int - The stride/hop of the convolution kernels as they move over the - input volume. Default is 1. - dilation : int - Number of pixels inserted between kernel elements. Effective kernel - shape after dilation is: ``[kernel_rows * (d + 1) - d, kernel_cols - * (d + 1) - d]``. Default is 0. - kernel_initializer : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is `'glorot_uniform'`. - """ # noqa: E501 - super().__init__(name=name) - - self.pad = pad - self.kernel_initializer = kernel_initializer - self.in_ch = None - self.out_ch = out_ch - self.stride = stride - self.dilation = dilation - self.kernel_shape = kernel_shape - self.act_fn = ActivationInitializer(act_fn)() - self.parameters = {"W": None, "b": None} - self.is_initialized = False - self.weights_set = False - - def _init_params(self): - fr, fc = self.kernel_shape - if not self.weights_set: - init_weights = WeightInitializer(str(self.act_fn), mode=self.kernel_initializer) - W = init_weights((fr, fc, self.in_ch, self.out_ch)) - b = np.zeros((1, 1, 1, self.out_ch)) - else: - W, b = self.get_weights() - - self.parameters = {"W": W, "b": b} - self.gradients = {"W": np.zeros_like(W), "b": np.zeros_like(b)} - self.derived_variables = {"Z": [], "out_rows": [], "out_cols": []} - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "Conv2D", - "pad": self.pad, - "kernel_initializer": self.kernel_initializer, - "in_ch": self.in_ch, - "out_ch": self.out_ch, - "stride": self.stride, - "dilation": self.dilation, - "act_fn": str(self.act_fn), - "kernel_shape": self.kernel_shape, - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, X, retain_derived=True): - """ - Compute the layer output given input volume `X`. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The input volume consisting of `n_ex` examples, each with dimension - (`in_rows`, `in_cols`, `in_ch`). - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, out_rows, out_cols, out_ch)` - The layer output. - """ # noqa: E501 - if not self.is_initialized: - self.in_ch = X.shape[3] - self._init_params() - - W = self.parameters["W"] - b = self.parameters["b"] - - n_ex, in_rows, in_cols, in_ch = X.shape - s, p, d = self.stride, self.pad, self.dilation - - # pad the input and perform the forward convolution - Z = conv2D(X, W, s, p, d) + b - Y = self.act_fn(Z) - - if retain_derived: - self.X.append(X) - self.derived_variables["Z"].append(Z) - self.derived_variables["out_rows"].append(Z.shape[1]) - self.derived_variables["out_cols"].append(Z.shape[2]) - - return Y - - def backward(self, dLdy, retain_grads=True): - """ - Compute the gradient of the loss with respect to the layer parameters. - - Notes - ----- - Relies on :meth:`~numpy_ml.neural_nets.utils.im2col` and - :meth:`~numpy_ml.neural_nets.utils.col2im` to vectorize the - gradient calculation. - - See the private method :meth:`_backward_naive` for a more straightforward - implementation. - - Parameters - ---------- - dLdy : :py:class:`ndarray ` of shape `(n_ex, out_rows, - out_cols, out_ch)` or list of arrays - The gradient(s) of the loss with respect to the layer output(s). - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The gradient of the loss with respect to the layer input volume. - """ # noqa: E501 - assert self.trainable, "Layer is frozen" - if not isinstance(dLdy, list): - dLdy = [dLdy] - - dX = [] - X = self.X - Z = self.derived_variables["Z"] - - for dy, x, z in zip(dLdy, X, Z): - dx, dw, db = self._bwd(dy, x, z) - dX.append(dx) - - if retain_grads: - self.gradients["W"] += dw - self.gradients["b"] += db - - return dX[0] if len(X) == 1 else dX - - def _bwd(self, dLdy, X, Z): - """Actual computation of gradient of the loss wrt. X, W, and b""" - W = self.parameters["W"] - - d = self.dilation - fr, fc, in_ch, out_ch = W.shape - n_ex, out_rows, out_cols, out_ch = dLdy.shape - (fr, fc), s, p = self.kernel_shape, self.stride, self.pad - - # columnize W, X, and dLdy - dLdZ = dLdy * self.act_fn.grad(Z) - dLdZ_col = dLdZ.transpose(3, 1, 2, 0).reshape(out_ch, -1) - W_col = W.transpose(3, 2, 0, 1).reshape(out_ch, -1).T - X_col, p = im2col(X, W.shape, p, s, d) - - # compute gradients via matrix multiplication and reshape - dB = dLdZ_col.sum(axis=1).reshape(1, 1, 1, -1) - dW = (dLdZ_col @ X_col.T).reshape(out_ch, in_ch, fr, fc).transpose(2, 3, 1, 0) - - # reshape columnized dX back into the same format as the input volume - dX_col = W_col @ dLdZ_col - dX = col2im(dX_col, X.shape, W.shape, p, s, d).transpose(0, 2, 3, 1) - - return dX, dW, dB - - def _backward_naive(self, dLdy, retain_grads=True): - """ - A slower (ie., non-vectorized) but more straightforward implementation - of the gradient computations for a 2D conv layer. - - Parameters - ---------- - dLdY : :py:class:`ndarray ` of shape `(n_ex, out_rows, out_cols, out_ch)` - The gradient of the loss with respect to the layer output. - - Returns - ------- - dX : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The gradient of the loss with respect to the layer input volume. - """ # noqa: E501 - assert self.trainable, "Layer is frozen" - if not isinstance(dLdy, list): - dLdy = [dLdy] - - W = self.parameters["W"] - b = self.parameters["b"] - Zs = self.derived_variables["Z"] - - Xs, d = self.X, self.dilation - (fr, fc), s, p = self.kernel_shape, self.stride, self.pad - - dXs = [] - for X, Z, dy in zip(Xs, Zs, dLdy): - n_ex, out_rows, out_cols, out_ch = dy.shape - X_pad, (pr1, pr2, pc1, pc2) = pad2D(X, p, self.kernel_shape, s, d) - - dZ = dLdy * self.act_fn.grad(Z) - - dX = np.zeros_like(X_pad) - dW, dB = np.zeros_like(W), np.zeros_like(b) - for m in range(n_ex): - for i in range(out_rows): - for j in range(out_cols): - for c in range(out_ch): - # compute window boundaries w. stride and dilation - i0, i1 = i * s, (i * s) + fr * (d + 1) - d - j0, j1 = j * s, (j * s) + fc * (d + 1) - d - - wc = W[:, :, :, c] - kernel = dZ[m, i, j, c] - window = X_pad[m, i0 : i1 : (d + 1), j0 : j1 : (d + 1), :] - - dB[:, :, :, c] += kernel - dW[:, :, :, c] += window * kernel - dX[m, i0 : i1 : (d + 1), j0 : j1 : (d + 1), :] += ( - wc * kernel - ) - - if retain_grads: - self.gradients["W"] += dW - self.gradients["b"] += dB - - pr2 = None if pr2 == 0 else -pr2 - pc2 = None if pc2 == 0 else -pc2 - dXs.append(dX[:, pr1:pr2, pc1:pc2, :]) - return dXs[0] if len(Xs) == 1 else dXs - - -class Pool2D(LayerBase): - def __init__(self, kernel_shape, stride=1, pad=0, mode="max", name=None): - """ - A single two-dimensional pooling layer. - - Parameters - ---------- - kernel_shape : 2-tuple - The dimension of a single 2D filter/kernel in the current layer - stride : int - The stride/hop of the convolution kernels as they move over the - input volume. Default is 1. - pad : int, tuple, or 'same' - The number of rows/columns of 0's to pad the input. Default is 0. - mode : {"max", "average"} - The pooling function to apply. - """ # noqa: E501 - super().__init__(name=name) - - self.pad = pad - self.mode = mode - self.in_ch = None - self.out_ch = None - self.stride = stride - self.kernel_shape = kernel_shape - self.is_initialized = False - self.weights_set = False - - def _init_params(self): - self.derived_variables = {"out_rows": [], "out_cols": []} - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "Pool2D", - "act_fn": None, - "pad": self.pad, - "mode": self.mode, - "in_ch": self.in_ch, - "out_ch": self.out_ch, - "stride": self.stride, - "kernel_shape": self.kernel_shape, - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, X, retain_derived=True): - """ - Compute the layer output given input volume `X`. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The input volume consisting of `n_ex` examples, each with dimension - (`in_rows`,`in_cols`, `in_ch`) - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, out_rows, out_cols, out_ch)` - The layer output. - """ # noqa: E501 - if not self.is_initialized: - self.in_ch = self.out_ch = X.shape[3] - self._init_params() - - n_ex, in_rows, in_cols, nc_in = X.shape - (fr, fc), s, p = self.kernel_shape, self.stride, self.pad - X_pad, (pr1, pr2, pc1, pc2) = pad2D(X, p, self.kernel_shape, s) - - out_rows = np.floor(1 + (in_rows + pr1 + pr2 - fr) / s).astype(int) - out_cols = np.floor(1 + (in_cols + pc1 + pc2 - fc) / s).astype(int) - - if self.mode == "max": - pool_fn = np.max - elif self.mode == "average": - pool_fn = np.mean - - Y = np.zeros((n_ex, out_rows, out_cols, self.out_ch)) - for m in range(n_ex): - for i in range(out_rows): - for j in range(out_cols): - for c in range(self.out_ch): - # calculate window boundaries, incorporating stride - i0, i1 = i * s, (i * s) + fr - j0, j1 = j * s, (j * s) + fc - - xi = X_pad[m, i0:i1, j0:j1, c] - Y[m, i, j, c] = pool_fn(xi) - - if retain_derived: - self.X.append(X) - self.derived_variables["out_rows"].append(out_rows) - self.derived_variables["out_cols"].append(out_cols) - - return Y - - def backward(self, dLdY, retain_grads=True): - """ - Backprop from layer outputs to inputs - - Parameters - ---------- - dLdY : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The gradient of the loss wrt. the layer output `Y`. - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The gradient of the loss wrt. the layer input `X`. - """ # noqa: E501 - assert self.trainable, "Layer is frozen" - if not isinstance(dLdY, list): - dLdY = [dLdY] - - Xs = self.X - out_rows = self.derived_variables["out_rows"] - out_cols = self.derived_variables["out_cols"] - - (fr, fc), s, p = self.kernel_shape, self.stride, self.pad - - dXs = [] - for X, dy, out_row, out_col in zip(Xs, dLdY, out_rows, out_cols): - n_ex, in_rows, in_cols, nc_in = X.shape - X_pad, (pr1, pr2, pc1, pc2) = pad2D(X, p, self.kernel_shape, s) - - dX = np.zeros_like(X_pad) - for m in range(n_ex): - for i in range(out_row): - for j in range(out_col): - for c in range(self.out_ch): - # calculate window boundaries, incorporating stride - i0, i1 = i * s, (i * s) + fr - j0, j1 = j * s, (j * s) + fc - - if self.mode == "max": - xi = X[m, i0:i1, j0:j1, c] - - # enforce that the mask can only consist of a - # single `True` entry, even if multiple entries in - # xi are equal to max(xi) - mask = np.zeros_like(xi).astype(bool) - x, y = np.argwhere(xi == np.max(xi))[0] - mask[x, y] = True - - dX[m, i0:i1, j0:j1, c] += mask * dy[m, i, j, c] - elif self.mode == "average": - frame = np.ones((fr, fc)) * dy[m, i, j, c] - dX[m, i0:i1, j0:j1, c] += frame / np.prod((fr, fc)) - - pr2 = None if pr2 == 0 else -pr2 - pc2 = None if pc2 == 0 else -pc2 - dXs.append(dX[:, pr1:pr2, pc1:pc2, :]) - return dXs[0] if len(Xs) == 1 else dXs - - -class Deconv2D(LayerBase): - def __init__( - self, - out_ch, - kernel_shape, - pad=0, - stride=1, - act_fn=None, - kernel_initializer="glorot_uniform", - name=None, - ): - """ - Apply a two-dimensional "deconvolution" to an input volume. - - Notes - ----- - The term "deconvolution" in this context does not correspond with the - deconvolution operation in mathematics. More accurately, this layer is - computing a transposed convolution / fractionally-strided convolution. - - Parameters - ---------- - out_ch : int - The number of filters/kernels to compute in the current layer - kernel_shape : 2-tuple - The dimension of a single 2D filter/kernel in the current layer - act_fn : str, :doc:`Activation ` object, or None - The activation function for computing ``Y[t]``. If None, use - :class:`~numpy_ml.neural_nets.activations.Affine` - activations by default. Default is None. - pad : int, tuple, or 'same' - The number of rows/columns to zero-pad the input with. Default is 0. - stride : int - The stride/hop of the convolution kernels as they move over the - input volume. Default is 1. - kernel_initializer : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is `'glorot_uniform'`. - """ # noqa: E501 - super().__init__(name=name) - - self.pad = pad - self.kernel_initializer = kernel_initializer - self.in_ch = None - self.stride = stride - self.out_ch = out_ch - self.kernel_shape = kernel_shape - self.act_fn = ActivationInitializer(act_fn)() - self.parameters = {"W": None, "b": None} - self.is_initialized = False - self.weights_set = False - - def _init_params(self): - fr, fc = self.kernel_shape - if not self.weights_set: - init_weights = WeightInitializer(str(self.act_fn), mode=self.kernel_initializer) - W = init_weights((fr, fc, self.in_ch, self.out_ch)) - b = np.zeros((1, 1, 1, self.out_ch)) - else: - W, b = self.get_weights() - - self.parameters = {"W": W, "b": b} - self.gradients = {"W": np.zeros_like(W), "b": np.zeros_like(b)} - self.derived_variables = {"Z": [], "out_rows": [], "out_cols": []} - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "Deconv2D", - "pad": self.pad, - "kernel_initializer": self.kernel_initializer, - "in_ch": self.in_ch, - "out_ch": self.out_ch, - "stride": self.stride, - "act_fn": str(self.act_fn), - "kernel_shape": self.kernel_shape, - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, X, retain_derived=True): - """ - Compute the layer output given input volume `X`. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The input volume consisting of `n_ex` examples, each with dimension - (`in_rows`, `in_cols`, `in_ch`). - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, out_rows, out_cols, out_ch)` - The layer output. - """ # noqa: E501 - if not self.is_initialized: - self.in_ch = X.shape[3] - self._init_params() - - W = self.parameters["W"] - b = self.parameters["b"] - - s, p = self.stride, self.pad - n_ex, in_rows, in_cols, in_ch = X.shape - - # pad the input and perform the forward deconvolution - Z = deconv2D_naive(X, W, s, p, 0) + b - Y = self.act_fn(Z) - - if retain_derived: - self.X.append(X) - self.derived_variables["Z"].append(Z) - self.derived_variables["out_rows"].append(Z.shape[1]) - self.derived_variables["out_cols"].append(Z.shape[2]) - - return Y - - def backward(self, dLdY, retain_grads=True): - """ - Compute the gradient of the loss with respect to the layer parameters. - - Notes - ----- - Relies on :meth:`~numpy_ml.neural_nets.utils.im2col` and - :meth:`~numpy_ml.neural_nets.utils.col2im` to vectorize the - gradient calculations. - - Parameters - ---------- - dLdY : :py:class:`ndarray ` of shape (`n_ex, out_rows, out_cols, out_ch`) - The gradient of the loss with respect to the layer output. - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : :py:class:`ndarray ` of shape (`n_ex, in_rows, in_cols, in_ch`) - The gradient of the loss with respect to the layer input volume. - """ # noqa: E501 - assert self.trainable, "Layer is frozen" - if not isinstance(dLdY, list): - dLdY = [dLdY] - - dX = [] - X, Z = self.X, self.derived_variables["Z"] - - for dy, x, z in zip(dLdY, X, Z): - dx, dw, db = self._bwd(dy, x, z) - dX.append(dx) - - if retain_grads: - self.gradients["W"] += dw - self.gradients["b"] += db - - return dX[0] if len(X) == 1 else dX - - def _bwd(self, dLdY, X, Z): - """Actual computation of gradient of the loss wrt. X, W, and b""" - W = np.rot90(self.parameters["W"], 2) - - s = self.stride - if self.stride > 1: - X = dilate(X, s - 1) - s = 1 - - fr, fc, in_ch, out_ch = W.shape - (fr, fc), p = self.kernel_shape, self.pad - n_ex, out_rows, out_cols, out_ch = dLdY.shape - - # pad X the first time - X_pad, p = pad2D(X, p, W.shape[:2], s) - n_ex, in_rows, in_cols, in_ch = X_pad.shape - pr1, pr2, pc1, pc2 = p - - # compute additional padding to produce the deconvolution - out_rows = s * (in_rows - 1) - pr1 - pr2 + fr - out_cols = s * (in_cols - 1) - pc1 - pc2 + fc - out_dim = (out_rows, out_cols) - - # add additional "deconvolution" padding - _p = calc_pad_dims_2D(X_pad.shape, out_dim, W.shape[:2], s, 0) - X_pad, _ = pad2D(X_pad, _p, W.shape[:2], s) - - # columnize W, X, and dLdY - dLdZ = dLdY * self.act_fn.grad(Z) - dLdZ, _ = pad2D(dLdZ, p, W.shape[:2], s) - - dLdZ_col = dLdZ.transpose(3, 1, 2, 0).reshape(out_ch, -1) - W_col = W.transpose(3, 2, 0, 1).reshape(out_ch, -1) - X_col, _ = im2col(X_pad, W.shape, 0, s, 0) - - # compute gradients via matrix multiplication and reshape - dB = dLdZ_col.sum(axis=1).reshape(1, 1, 1, -1) - dW = (dLdZ_col @ X_col.T).reshape(out_ch, in_ch, fr, fc).transpose(2, 3, 1, 0) - dW = np.rot90(dW, 2) - - # reshape columnized dX back into the same format as the input volume - dX_col = W_col.T @ dLdZ_col - - total_pad = tuple(i + j for i, j in zip(p, _p)) - dX = col2im(dX_col, X.shape, W.shape, total_pad, s, 0).transpose(0, 2, 3, 1) - dX = dX[:, :: self.stride, :: self.stride, :] - - return dX, dW, dB - - -####################################################################### -# Recurrent Layers # -####################################################################### - - -class RNNCell(LayerBase): - def __init__(self, n_out, act_fn="Tanh", kernel_initializer="glorot_uniform", name=None): - r""" - A single step of a vanilla (Elman) RNN. - - Notes - ----- - At timestep `t`, the vanilla RNN cell computes - - .. math:: - - \mathbf{Z}^{(t)} &= - \mathbf{W}_{ax} \mathbf{X}^{(t)} + \mathbf{b}_{ax} + - \mathbf{W}_{aa} \mathbf{A}^{(t-1)} + \mathbf{b}_{aa} \\ - \mathbf{A}^{(t)} &= f(\mathbf{Z}^{(t)}) - - where - - - :math:`\mathbf{X}^{(t)}` is the input at time `t` - - :math:`\mathbf{A}^{(t)}` is the hidden state at timestep `t` - - `f` is the layer activation function - - :math:`\mathbf{W}_{ax}` and :math:`\mathbf{b}_{ax}` are the weights - and bias for the input to hidden layer - - :math:`\mathbf{W}_{aa}` and :math:`\mathbf{b}_{aa}` are the weights - and biases for the hidden to hidden layer - - Parameters - ---------- - n_out : int - The dimension of a single hidden state / output on a given timestep - act_fn : str, :doc:`Activation ` object, or None - The activation function for computing ``A[t]``. Default is `'Tanh'`. - kernel_initializer : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is `'glorot_uniform'`. - """ # noqa: E501 - super().__init__(name=name) - - self.kernel_initializer = kernel_initializer - self.n_in = None - self.n_out = n_out - self.n_timesteps = None - self.act_fn = ActivationInitializer(act_fn)() - self.parameters = {"Waa": None, "Wax": None, "ba": None, "bx": None} - self.is_initialized = False - self.weights_set = False - - def _init_params(self): - self.X = [] - if not self.weights_set: - init_weights = WeightInitializer(str(self.act_fn), mode=self.kernel_initializer) - Wax = init_weights((self.n_in, self.n_out)) - Waa = init_weights((self.n_out, self.n_out)) - ba = np.zeros((self.n_out, 1)) - bx = np.zeros((self.n_out, 1)) - else: - Waa, ba, Wax, bx = self.get_weights() - - self.parameters = {"Waa": Waa, "ba": ba, "Wax": Wax, "bx": bx} - - self.gradients = { - "Waa": np.zeros_like(Waa), - "Wax": np.zeros_like(Wax), - "ba": np.zeros_like(ba), - "bx": np.zeros_like(bx), - } - - self.derived_variables = { - "A": [], - "Z": [], - "n_timesteps": 0, - "current_step": 0, - "dLdA_accumulator": None, - } - - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "RNNCell", - "kernel_initializer": self.kernel_initializer, - "n_in": self.n_in, - "n_out": self.n_out, - "act_fn": str(self.act_fn), - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, Xt): - """ - Compute the network output for a single timestep. - - Parameters - ---------- - Xt : :py:class:`ndarray ` of shape `(n_ex, n_in)` - Input at timestep `t` consisting of `n_ex` examples each of - dimensionality `n_in`. - - Returns - ------- - At: :py:class:`ndarray ` of shape `(n_ex, n_out)` - The value of the hidden state at timestep `t` for each of the - `n_ex` examples. - """ - if not self.is_initialized: - self.n_in = Xt.shape[1] - self._init_params() - - # increment timestep - self.derived_variables["n_timesteps"] += 1 - self.derived_variables["current_step"] += 1 - - # Retrieve parameters - ba = self.parameters["ba"] - bx = self.parameters["bx"] - Wax = self.parameters["Wax"] - Waa = self.parameters["Waa"] - - # initialize the hidden state to zero - As = self.derived_variables["A"] - if len(As) == 0: - n_ex, n_in = Xt.shape - A0 = np.zeros((n_ex, self.n_out)) - As.append(A0) - - # compute next hidden state - Zt = As[-1] @ Waa + ba.T + Xt @ Wax + bx.T - At = self.act_fn(Zt) - - self.derived_variables["Z"].append(Zt) - self.derived_variables["A"].append(At) - - # store intermediate variables - self.X.append(Xt) - return At - - def backward(self, dLdAt): - """ - Backprop for a single timestep. - - Parameters - ---------- - dLdAt : :py:class:`ndarray ` of shape `(n_ex, n_out)` - The gradient of the loss wrt. the layer outputs (ie., hidden - states) at timestep `t`. - - Returns - ------- - dLdXt : :py:class:`ndarray ` of shape `(n_ex, n_in)` - The gradient of the loss wrt. the layer inputs at timestep `t`. - """ - assert self.trainable, "Layer is frozen" - - # decrement current step - self.derived_variables["current_step"] -= 1 - - # extract context variables - Zs = self.derived_variables["Z"] - As = self.derived_variables["A"] - t = self.derived_variables["current_step"] - dA_acc = self.derived_variables["dLdA_accumulator"] - - # initialize accumulator - if dA_acc is None: - dA_acc = np.zeros_like(As[0]) - - # get network weights for gradient calcs - Wax = self.parameters["Wax"] - Waa = self.parameters["Waa"] - - # compute gradient components at timestep t - dA = dLdAt + dA_acc - dZ = self.act_fn.grad(Zs[t]) * dA - dXt = dZ @ Wax.T - - # update parameter gradients with signal from current step - self.gradients["Waa"] += As[t].T @ dZ - self.gradients["Wax"] += self.X[t].T @ dZ - self.gradients["ba"] += dZ.sum(axis=0, keepdims=True).T - self.gradients["bx"] += dZ.sum(axis=0, keepdims=True).T - - # update accumulator variable for hidden state - self.derived_variables["dLdA_accumulator"] = dZ @ Waa.T - return dXt - - def flush_gradients(self): - """Erase all the layer's derived variables and gradients.""" - assert self.trainable, "Layer is frozen" - - self.X = [] - for k, v in self.derived_variables.items(): - self.derived_variables[k] = [] - - self.derived_variables["n_timesteps"] = 0 - self.derived_variables["current_step"] = 0 - - # reset parameter gradients to 0 - for k, v in self.parameters.items(): - self.gradients[k] = np.zeros_like(v) - - -class LSTMCell(LayerBase): - def __init__( - self, - n_out, - act_fn="Tanh", - gate_fn="Sigmoid", - kernel_initializer="glorot_uniform", - name=None, - ): - """ - A single step of a long short-term memory (LSTM) RNN. - - Notes - ----- - Notation: - - - ``Z[t]`` is the input to each of the gates at timestep `t` - - ``A[t]`` is the value of the hidden state at timestep `t` - - ``Cc[t]`` is the value of the *candidate* cell/memory state at timestep `t` - - ``C[t]`` is the value of the *final* cell/memory state at timestep `t` - - ``Gf[t]`` is the output of the forget gate at timestep `t` - - ``Gu[t]`` is the output of the update gate at timestep `t` - - ``Go[t]`` is the output of the output gate at timestep `t` - - Equations:: - - Z[t] = stack([A[t-1], X[t]]) - Gf[t] = gate_fn(Wf @ Z[t] + bf) - Gu[t] = gate_fn(Wu @ Z[t] + bu) - Go[t] = gate_fn(Wo @ Z[t] + bo) - Cc[t] = act_fn(Wc @ Z[t] + bc) - C[t] = Gf[t] * C[t-1] + Gu[t] * Cc[t] - A[t] = Go[t] * act_fn(C[t]) - - where `@` indicates dot/matrix product, and '*' indicates elementwise - multiplication. - - Parameters - ---------- - n_out : int - The dimension of a single hidden state / output on a given timestep. - act_fn : str, :doc:`Activation ` object, or None - The activation function for computing ``A[t]``. Default is - `'Tanh'`. - gate_fn : str, :doc:`Activation ` object, or None - The gate function for computing the update, forget, and output - gates. Default is `'Sigmoid'`. - kernel_initializer : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is `'glorot_uniform'`. - """ # noqa: E501 - super().__init__(name=name) - - self.kernel_initializer = kernel_initializer - self.n_in = None - self.n_out = n_out - self.n_timesteps = None - self.act_fn = ActivationInitializer(act_fn)() - self.gate_fn = ActivationInitializer(gate_fn)() - self.parameters = { - "Wf": None, - "Wu": None, - "Wc": None, - "Wo": None, - "bf": None, - "bu": None, - "bc": None, - "bo": None, - } - self.is_initialized = False - self.weights_set = False - - def _init_params(self): - self.X = [] - if not self.weights_set: - init_weights_gate = WeightInitializer(str(self.gate_fn), mode=self.kernel_initializer) - init_weights_act = WeightInitializer(str(self.act_fn), mode=self.kernel_initializer) - - Wf = init_weights_gate((self.n_in + self.n_out, self.n_out)) - Wu = init_weights_gate((self.n_in + self.n_out, self.n_out)) - Wc = init_weights_act((self.n_in + self.n_out, self.n_out)) - Wo = init_weights_gate((self.n_in + self.n_out, self.n_out)) - - bf = np.zeros((1, self.n_out)) - bu = np.zeros((1, self.n_out)) - bc = np.zeros((1, self.n_out)) - bo = np.zeros((1, self.n_out)) - else: - Wf, bf, Wu, bu, Wc, bc, Wo, bo = self.get_weights() - - self.parameters = { - "Wf": Wf, - "bf": bf, - "Wu": Wu, - "bu": bu, - "Wc": Wc, - "bc": bc, - "Wo": Wo, - "bo": bo, - } - - self.gradients = { - "Wf": np.zeros_like(Wf), - "Wu": np.zeros_like(Wu), - "Wc": np.zeros_like(Wc), - "Wo": np.zeros_like(Wo), - "bf": np.zeros_like(bf), - "bu": np.zeros_like(bu), - "bc": np.zeros_like(bc), - "bo": np.zeros_like(bo), - } - - self.derived_variables = { - "C": [], - "A": [], - "Gf": [], - "Gu": [], - "Go": [], - "Gc": [], - "Cc": [], - "n_timesteps": 0, - "current_step": 0, - "dLdA_accumulator": None, - "dLdC_accumulator": None, - } - - self.is_initialized = True - self.weights_set = True - - def _get_params(self): - Wf = self.parameters["Wf"] - Wu = self.parameters["Wu"] - Wc = self.parameters["Wc"] - Wo = self.parameters["Wo"] - bf = self.parameters["bf"] - bu = self.parameters["bu"] - bc = self.parameters["bc"] - bo = self.parameters["bo"] - return Wf, Wu, Wc, Wo, bf, bu, bc, bo - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "LSTMCell", - "kernel_initializer": self.kernel_initializer, - "n_in": self.n_in, - "n_out": self.n_out, - "act_fn": str(self.act_fn), - "gate_fn": str(self.gate_fn), - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def forward(self, Xt): - """ - Compute the layer output for a single timestep. - - Parameters - ---------- - Xt : :py:class:`ndarray ` of shape `(n_ex, n_in)` - Input at timestep t consisting of `n_ex` examples each of - dimensionality `n_in`. - - Returns - ------- - At: :py:class:`ndarray ` of shape `(n_ex, n_out)` - The value of the hidden state at timestep `t` for each of the `n_ex` - examples. - Ct: :py:class:`ndarray ` of shape `(n_ex, n_out)` - The value of the cell/memory state at timestep `t` for each of the - `n_ex` examples. - """ - if not self.is_initialized: - self.n_in = Xt.shape[1] - self._init_params() - - Wf, Wu, Wc, Wo, bf, bu, bc, bo = self._get_params() - - self.derived_variables["n_timesteps"] += 1 - self.derived_variables["current_step"] += 1 - - if len(self.derived_variables["A"]) == 0: - n_ex, n_in = Xt.shape - init = np.zeros((n_ex, self.n_out)) - self.derived_variables["A"].append(init) - self.derived_variables["C"].append(init) - - A_prev = self.derived_variables["A"][-1] - C_prev = self.derived_variables["C"][-1] - - # concatenate A_prev and Xt to create Zt - Zt = np.hstack([A_prev, Xt]) - - Gft = self.gate_fn(Zt @ Wf + bf) - Gut = self.gate_fn(Zt @ Wu + bu) - Got = self.gate_fn(Zt @ Wo + bo) - Cct = self.act_fn(Zt @ Wc + bc) - Ct = Gft * C_prev + Gut * Cct - At = Got * self.act_fn(Ct) - - # bookkeeping - self.X.append(Xt) - self.derived_variables["A"].append(At) - self.derived_variables["C"].append(Ct) - self.derived_variables["Gf"].append(Gft) - self.derived_variables["Gu"].append(Gut) - self.derived_variables["Go"].append(Got) - self.derived_variables["Cc"].append(Cct) - return At, Ct - - def backward(self, dLdAt): - """ - Backprop for a single timestep. - - Parameters - ---------- - dLdAt : :py:class:`ndarray ` of shape `(n_ex, n_out)` - The gradient of the loss wrt. the layer outputs (ie., hidden - states) at timestep `t`. - - Returns - ------- - dLdXt : :py:class:`ndarray ` of shape `(n_ex, n_in)` - The gradient of the loss wrt. the layer inputs at timestep `t`. - """ - assert self.trainable, "Layer is frozen" - - Wf, Wu, Wc, Wo, bf, bu, bc, bo = self._get_params() - - self.derived_variables["current_step"] -= 1 - t = self.derived_variables["current_step"] - - Got = self.derived_variables["Go"][t] - Gft = self.derived_variables["Gf"][t] - Gut = self.derived_variables["Gu"][t] - Cct = self.derived_variables["Cc"][t] - At = self.derived_variables["A"][t + 1] - Ct = self.derived_variables["C"][t + 1] - C_prev = self.derived_variables["C"][t] - A_prev = self.derived_variables["A"][t] - - Xt = self.X[t] - Zt = np.hstack([A_prev, Xt]) - - dA_acc = self.derived_variables["dLdA_accumulator"] - dC_acc = self.derived_variables["dLdC_accumulator"] - - # initialize accumulators - if dA_acc is None: - dA_acc = np.zeros_like(At) - - if dC_acc is None: - dC_acc = np.zeros_like(Ct) - - # Gradient calculations - # --------------------- - - dA = dLdAt + dA_acc - dC = dC_acc + dA * Got * self.act_fn.grad(Ct) - - # compute the input to the gate functions at timestep t - _Go = Zt @ Wo + bo - _Gf = Zt @ Wf + bf - _Gu = Zt @ Wu + bu - _Gc = Zt @ Wc + bc - - # compute gradients wrt the *input* to each gate - dGot = dA * self.act_fn(Ct) * self.gate_fn.grad(_Go) - dCct = dC * Gut * self.act_fn.grad(_Gc) - dGut = dC * Cct * self.gate_fn.grad(_Gu) - dGft = dC * C_prev * self.gate_fn.grad(_Gf) - - dZ = dGft @ Wf.T + dGut @ Wu.T + dCct @ Wc.T + dGot @ Wo.T - dXt = dZ[:, self.n_out :] - - self.gradients["Wc"] += Zt.T @ dCct - self.gradients["Wu"] += Zt.T @ dGut - self.gradients["Wf"] += Zt.T @ dGft - self.gradients["Wo"] += Zt.T @ dGot - self.gradients["bo"] += dGot.sum(axis=0, keepdims=True) - self.gradients["bu"] += dGut.sum(axis=0, keepdims=True) - self.gradients["bf"] += dGft.sum(axis=0, keepdims=True) - self.gradients["bc"] += dCct.sum(axis=0, keepdims=True) - - self.derived_variables["dLdA_accumulator"] = dZ[:, : self.n_out] - self.derived_variables["dLdC_accumulator"] = Gft * dC - return dXt - - def flush_gradients(self): - """Erase all the layer's derived variables and gradients.""" - assert self.trainable, "Layer is frozen" - - self.X = [] - for k, v in self.derived_variables.items(): - self.derived_variables[k] = [] - - self.derived_variables["n_timesteps"] = 0 - self.derived_variables["current_step"] = 0 - - # reset parameter gradients to 0 - for k, v in self.parameters.items(): - self.gradients[k] = np.zeros_like(v) - - -class RNN(LayerBase): - def __init__(self, n_out, act_fn="Tanh", kernel_initializer="glorot_uniform", name=None): - """ - A single vanilla (Elman)-RNN layer. - - Parameters - ---------- - n_out : int - The dimension of a single hidden state / output on a given - timestep. - act_fn : str, :doc:`Activation ` object, or None - The activation function for computing ``A[t]``. Default is - `'Tanh'`. - kernel_initializer : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is `'glorot_uniform'`. - """ # noqa: E501 - super().__init__(name=name) - - self.kernel_initializer = kernel_initializer - self.n_in = None - self.n_out = n_out - self.n_timesteps = None - self.act_fn = ActivationInitializer(act_fn)() - self.is_initialized = False - self.weights_set = False - - def _init_params(self): - self.cell = RNNCell( - n_in=self.n_in, - n_out=self.n_out, - act_fn=self.act_fn, - kernel_initializer=self.kernel_initializer, - ) - self.cell.set_optimizer() # FIXME - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "RNN", - "kernel_initializer": self.kernel_initializer, - "n_in": self.n_in, - "n_out": self.n_out, - "act_fn": str(self.act_fn), - "optimizer": self.cell.hyperparameters["optimizer"], - } - - def forward(self, X): - """ - Run a forward pass across all timesteps in the input. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, n_in, n_t)` - Input consisting of `n_ex` examples each of dimensionality `n_in` - and extending for `n_t` timesteps. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, n_out, n_t)` - The value of the hidden state for each of the `n_ex` examples - across each of the `n_t` timesteps. - """ - if not self.is_initialized: - self.n_in = X.shape[1] - self._init_params() - - Y = [] - n_ex, n_in, n_t = X.shape - for t in range(n_t): - yt = self.cell.forward(X[:, :, t]) - Y.append(yt) - return np.dstack(Y) - - def backward(self, dLdA): - """ - Run a backward pass across all timesteps in the input. - - Parameters - ---------- - dLdA : :py:class:`ndarray ` of shape `(n_ex, n_out, n_t)` - The gradient of the loss with respect to the layer output for each - of the `n_ex` examples across all `n_t` timesteps. - - Returns - ------- - dLdX : :py:class:`ndarray ` of shape `(n_ex, n_in, n_t)` - The value of the hidden state for each of the `n_ex` examples - across each of the `n_t` timesteps. - """ - assert self.cell.trainable, "Layer is frozen" - dLdX = [] - n_ex, n_out, n_t = dLdA.shape - for t in reversed(range(n_t)): - dLdXt = self.cell.backward(dLdA[:, :, t]) - dLdX.insert(0, dLdXt) - dLdX = np.dstack(dLdX) - return dLdX - - @property - def derived_variables(self): - """ - Return a dictionary containing any intermediate variables computed - during the forward / backward passes. - """ - return self.cell.derived_variables - - @property - def gradients(self): - """ - Return a dictionary of the gradients computed during the backward - pass - """ - return self.cell.gradients - - @property - def parameters(self): - """Return a dictionary of the current layer parameters""" - return self.cell.parameters - - def set_params(self, summary_dict): - """ - Set the layer parameters from a dictionary of values. - - Parameters - ---------- - summary_dict : dict - A dictionary of layer parameters and hyperparameters. If a required - parameter or hyperparameter is not included within `summary_dict`, - this method will use the value in the current layer's - :meth:`summary` method. - - Returns - ------- - layer : :doc:`Layer ` object - The newly-initialized layer. - """ - self = super().set_params(summary_dict) - return self.cell.set_parameters(summary_dict) - - def freeze(self): - """ - Freeze the layer parameters at their current values so they can no - longer be updated. - """ - self.cell.freeze() - - def unfreeze(self): - """Unfreeze the layer parameters so they can be updated.""" - self.cell.unfreeze() - - def flush_gradients(self): - """Erase all the layer's derived variables and gradients.""" - self.cell.flush_gradients() - - def update(self): - """ - Update the layer parameters using the accrued gradients and layer - optimizer. Flush all gradients once the update is complete. - """ - self.cell.update() - self.flush_gradients() - - -class LSTM(LayerBase): - def __init__( - self, - n_out, - act_fn="Tanh", - gate_fn="Sigmoid", - kernel_initializer="glorot_uniform", - name=None, - ): - """ - A single long short-term memory (LSTM) RNN layer. - - Parameters - ---------- - n_out : int - The dimension of a single hidden state / output on a given timestep. - act_fn : str, :doc:`Activation ` object, or None - The activation function for computing ``A[t]``. Default is `'Tanh'`. - gate_fn : str, :doc:`Activation ` object, or None - The gate function for computing the update, forget, and output - gates. Default is `'Sigmoid'`. - kernel_initializer : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is `'glorot_uniform'`. - """ # noqa: E501 - super().__init__(name=name) - - self.kernel_initializer = kernel_initializer - self.n_in = None - self.n_out = n_out - self.n_timesteps = None - self.act_fn = ActivationInitializer(act_fn)() - self.gate_fn = ActivationInitializer(gate_fn)() - self.is_initialized = False - self.weights_set = False - - def _init_params(self): - self.cell = LSTMCell( - n_in=self.n_in, - n_out=self.n_out, - act_fn=self.act_fn, - gate_fn=self.gate_fn, - kernel_initializer=self.kernel_initializer, - ) - ## FIXME: does LSTMCell need optimizer? - self.is_initialized = True - self.weights_set = True - - @property - def hyperparameters(self): - """Return a dictionary containing the layer hyperparameters.""" - return { - "layer": "LSTM", - "kernel_initializer": self.kernel_initializer, - "n_in": self.n_in, - "n_out": self.n_out, - "act_fn": str(self.act_fn), - "gate_fn": str(self.gate_fn), - "optimizer": self.cell.hyperparameters["optimizer"], - } - - def forward(self, X): - """ - Run a forward pass across all timesteps in the input. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, n_in, n_t)` - Input consisting of `n_ex` examples each of dimensionality `n_in` - and extending for `n_t` timesteps. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, n_out, n_t)` - The value of the hidden state for each of the `n_ex` examples - across each of the `n_t` timesteps. - """ - if not self.is_initialized: - self.n_in = X.shape[1] - self._init_params() - - Y = [] - n_ex, n_in, n_t = X.shape - for t in range(n_t): - yt, _ = self.cell.forward(X[:, :, t]) - Y.append(yt) - return np.dstack(Y) - - def backward(self, dLdA): - """ - Run a backward pass across all timesteps in the input. - - Parameters - ---------- - dLdA : :py:class:`ndarray ` of shape `(n_ex, n_out, n_t)` - The gradient of the loss with respect to the layer output for each - of the `n_ex` examples across all `n_t` timesteps. - - Returns - ------- - dLdX : :py:class:`ndarray ` of shape (`n_ex`, `n_in`, `n_t`) - The value of the hidden state for each of the `n_ex` examples - across each of the `n_t` timesteps. - """ # noqa: E501 - assert self.cell.trainable, "Layer is frozen" - dLdX = [] - n_ex, n_out, n_t = dLdA.shape - for t in reversed(range(n_t)): - dLdXt, _ = self.cell.backward(dLdA[:, :, t]) - dLdX.insert(0, dLdXt) - dLdX = np.dstack(dLdX) - return dLdX - - @property - def derived_variables(self): - """ - Return a dictionary containing any intermediate variables computed - during the forward / backward passes. - """ - return self.cell.derived_variables - - @property - def gradients(self): - """ - Return a dictionary of the gradients computed during the backward - pass - """ - return self.cell.gradients - - @property - def parameters(self): - """Return a dictionary of the current layer parameters""" - return self.cell.parameters - - def freeze(self): - """ - Freeze the layer parameters at their current values so they can no - longer be updated. - """ - self.cell.freeze() - - def unfreeze(self): - """Unfreeze the layer parameters so they can be updated.""" - self.cell.unfreeze() - - def set_params(self, summary_dict): - """ - Set the layer parameters from a dictionary of values. - - Parameters - ---------- - summary_dict : dict - A dictionary of layer parameters and hyperparameters. If a required - parameter or hyperparameter is not included within `summary_dict`, - this method will use the value in the current layer's - :meth:`summary` method. - - Returns - ------- - layer : :doc:`Layer ` object - The newly-initialized layer. - """ - self = super().set_params(summary_dict) - return self.cell.set_parameters(summary_dict) - - def flush_gradients(self): - """Erase all the layer's derived variables and gradients.""" - self.cell.flush_gradients() - - def update(self): - """ - Update the layer parameters using the accrued gradients and layer - optimizer. Flush all gradients once the update is complete. - """ - self.cell.update() - self.flush_gradients() diff --git a/aitk/keras/losses/README.md b/aitk/keras/losses/README.md deleted file mode 100644 index 59e1008..0000000 --- a/aitk/keras/losses/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# Losses - -The `losses.py` module implements several common loss functions, including: - -- Squared error -- Cross-entropy -- Variational lower-bound for binary VAE ([Kingma & Welling, 2014](https://arxiv.org/abs/1312.6114)) -- WGAN-GP loss for generator and critic ([Gulrajani et al., 2017](https://arxiv.org/pdf/1704.00028.pdf)) -- Noise contrastive estimation (NCE) loss ([Gutmann & - Hyvärinen, 2010](https://www.cs.helsinki.fi/u/ahyvarin/papers/Gutmann10AISTATS.pdf); [Minh & Teh, 2012](https://www.cs.toronto.edu/~amnih/papers/ncelm.pdf)) diff --git a/aitk/keras/losses/__init__.py b/aitk/keras/losses/__init__.py deleted file mode 100644 index 908ff51..0000000 --- a/aitk/keras/losses/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -Common neural network loss functions. - -This module implements loss objects that can be used during neural network -training. -""" - -from .losses import * diff --git a/aitk/keras/losses/losses.py b/aitk/keras/losses/losses.py deleted file mode 100644 index 23f7fc8..0000000 --- a/aitk/keras/losses/losses.py +++ /dev/null @@ -1,946 +0,0 @@ -from abc import ABC, abstractmethod - -import numpy as np - -from ..numpy_ml_utils.testing import is_binary, is_stochastic -from ..initializers import ( - WeightInitializer, - ActivationInitializer, - OptimizerInitializer, -) - - -class ObjectiveBase(ABC): - def __init__(self): - super().__init__() - self.name = "base_loss" - - @abstractmethod - def loss(self, y_true, y_pred): - pass - - @abstractmethod - def grad(self, y_true, y_pred, **kwargs): - pass - - -class MeanSquaredError(ObjectiveBase): - def __init__(self): - super().__init__() - self.name = "mean_squared_error" - - def loss(self, y, y_pred): - squared_error = np.square(y_pred - y) - mse = np.mean(squared_error) - return mse - - def __call__(self, y, y_pred): - return self.loss(y, y_pred) - - def grad(self, y, y_pred): - return 2 * (y_pred - y) - -class SquaredError(ObjectiveBase): - def __init__(self): - r""" - A squared-error / `L2` loss. - - Notes - ----- - For real-valued target **y** and predictions :math:`\hat{\mathbf{y}}`, the - squared error is - - .. math:: - \mathcal{L}(\mathbf{y}, \hat{\mathbf{y}}) - = 0.5 ||\hat{\mathbf{y}} - \mathbf{y}||_2^2 - """ - super().__init__() - self.name = "squared_error" - - def __call__(self, y, y_pred): - return self.loss(y, y_pred) - - def __str__(self): - return "SquaredError" - - @staticmethod - def loss(y, y_pred): - """ - Compute the squared error between `y` and `y_pred`. - - Parameters - ---------- - y : :py:class:`ndarray ` of shape (n, m) - Ground truth values for each of `n` examples - y_pred : :py:class:`ndarray ` of shape (n, m) - Predictions for the `n` examples in the batch. - - Returns - ------- - loss : float - The sum of the squared error across dimensions and examples. - """ - return 0.5 * np.linalg.norm(y_pred - y) ** 2 - - @staticmethod - def grad(y, y_pred, z, act_fn): - r""" - Gradient of the squared error loss with respect to the pre-nonlinearity - input, `z`. - - Notes - ----- - The current method computes the gradient :math:`\\frac{\partial - \mathcal{L}}{\partial \mathbf{z}}`, where - - .. math:: - - \mathcal{L}(\mathbf{z}) - &= \\text{squared_error}(\mathbf{y}, g(\mathbf{z})) \\\\ - g(\mathbf{z}) - &= \\text{act_fn}(\mathbf{z}) - - The gradient with respect to :math:`\mathbf{z}` is then - - .. math:: - - \\frac{\partial \mathcal{L}}{\partial \mathbf{z}} - = (g(\mathbf{z}) - \mathbf{y}) \left( - \\frac{\partial g}{\partial \mathbf{z}} \\right) - - Parameters - ---------- - y : :py:class:`ndarray ` of shape (n, m) - Ground truth values for each of `n` examples. - y_pred : :py:class:`ndarray ` of shape (n, m) - Predictions for the `n` examples in the batch. - act_fn : :doc:`Activation ` object - The activation function for the output layer of the network. - - Returns - ------- - grad : :py:class:`ndarray ` of shape (n, m) - The gradient of the squared error loss with respect to `z`. - """ - return (y_pred - y) * act_fn.grad(z) - - -class CrossEntropy(ObjectiveBase): - def __init__(self): - r""" - A cross-entropy loss. - - Notes - ----- - For a one-hot target **y** and predicted class probabilities - :math:`\hat{\mathbf{y}}`, the cross entropy is - - .. math:: - \mathcal{L}(\mathbf{y}, \hat{\mathbf{y}}) - = \sum_i y_i \log \hat{y}_i - """ - super().__init__() - self.name = "cross_entropy" - - def __call__(self, y, y_pred): - return self.loss(y, y_pred) - - def __str__(self): - return "CrossEntropy" - - @staticmethod - def loss(y, y_pred): - """ - Compute the cross-entropy (log) loss. - - Notes - ----- - This method returns the sum (not the average!) of the losses for each - sample. - - Parameters - ---------- - y : :py:class:`ndarray ` of shape (n, m) - Class labels (one-hot with `m` possible classes) for each of `n` - examples. - y_pred : :py:class:`ndarray ` of shape (n, m) - Probabilities of each of `m` classes for the `n` examples in the - batch. - - Returns - ------- - loss : float - The sum of the cross-entropy across classes and examples. - """ - is_binary(y) - is_stochastic(y_pred) - - # prevent taking the log of 0 - eps = np.finfo(float).eps - - # each example is associated with a single class; sum the negative log - # probability of the correct label over all samples in the batch. - # observe that we are taking advantage of the fact that y is one-hot - # encoded - cross_entropy = -np.sum(y * np.log(y_pred + eps)) - return cross_entropy - - @staticmethod - def grad(y, y_pred): - r""" - Compute the gradient of the cross entropy loss with regard to the - softmax input, `z`. - - Notes - ----- - The gradient for this method goes through both the cross-entropy loss - AND the softmax non-linearity to return :math:`\\frac{\partial - \mathcal{L}}{\partial \mathbf{z}}` (rather than :math:`\\frac{\partial - \mathcal{L}}{\partial \\text{softmax}(\mathbf{z})}`). - - In particular, let: - - .. math:: - - \mathcal{L}(\mathbf{z}) - = \\text{cross_entropy}(\\text{softmax}(\mathbf{z})). - - The current method computes: - - .. math:: - - \\frac{\partial \mathcal{L}}{\partial \mathbf{z}} - &= \\text{softmax}(\mathbf{z}) - \mathbf{y} \\\\ - &= \hat{\mathbf{y}} - \mathbf{y} - - Parameters - ---------- - y : :py:class:`ndarray ` of shape `(n, m)` - A one-hot encoding of the true class labels. Each row constitues a - training example, and each column is a different class. - y_pred: :py:class:`ndarray ` of shape `(n, m)` - The network predictions for the probability of each of `m` class - labels on each of `n` examples in a batch. - - Returns - ------- - grad : :py:class:`ndarray ` of shape (n, m) - The gradient of the cross-entropy loss with respect to the *input* - to the softmax function. - """ - is_binary(y) - is_stochastic(y_pred) - - # derivative of xe wrt z is y_pred - y_true, hence we can just - # subtract 1 from the probability of the correct class labels - grad = y_pred - y - - # [optional] scale the gradients by the number of examples in the batch - # n, m = y.shape - # grad /= n - return grad - - -class VAELoss(ObjectiveBase): - def __init__(self): - r""" - The variational lower bound for a variational autoencoder with Bernoulli - units. - - Notes - ----- - The VLB to the sum of the binary cross entropy between the true input and - the predicted output (the "reconstruction loss") and the KL divergence - between the learned variational distribution :math:`q` and the prior, - :math:`p`, assumed to be a unit Gaussian. - - .. math:: - - \\text{VAELoss} = - \\text{cross_entropy}(\mathbf{y}, \hat{\mathbf{y}}) - + \\mathbb{KL}[q \ || \ p] - - where :math:`\mathbb{KL}[q \ || \ p]` is the Kullback-Leibler - divergence between the distributions :math:`q` and :math:`p`. - - References - ---------- - .. [1] Kingma, D. P. & Welling, M. (2014). "Auto-encoding variational Bayes". - *arXiv preprint arXiv:1312.6114.* https://arxiv.org/pdf/1312.6114.pdf - """ - super().__init__() - self.name = "vae_loss" - - def __call__(self, y, y_pred, t_mean, t_log_var): - return self.loss(y, y_pred, t_mean, t_log_var) - - def __str__(self): - return "VAELoss" - - @staticmethod - def loss(y, y_pred, t_mean, t_log_var): - r""" - Variational lower bound for a Bernoulli VAE. - - Parameters - ---------- - y : :py:class:`ndarray ` of shape `(n_ex, N)` - The original images. - y_pred : :py:class:`ndarray ` of shape `(n_ex, N)` - The VAE reconstruction of the images. - t_mean: :py:class:`ndarray ` of shape `(n_ex, T)` - Mean of the variational distribution :math:`q(t \mid x)`. - t_log_var: :py:class:`ndarray ` of shape `(n_ex, T)` - Log of the variance vector of the variational distribution - :math:`q(t \mid x)`. - - Returns - ------- - loss : float - The VLB, averaged across the batch. - """ - # prevent nan on log(0) - eps = np.finfo(float).eps - y_pred = np.clip(y_pred, eps, 1 - eps) - - # reconstruction loss: binary cross-entropy - rec_loss = -np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred), axis=1) - - # KL divergence between the variational distribution q and the prior p, - # a unit gaussian - kl_loss = -0.5 * np.sum(1 + t_log_var - t_mean ** 2 - np.exp(t_log_var), axis=1) - loss = np.mean(kl_loss + rec_loss) - return loss - - @staticmethod - def grad(y, y_pred, t_mean, t_log_var): - """ - Compute the gradient of the VLB with regard to the network parameters. - - Parameters - ---------- - y : :py:class:`ndarray ` of shape `(n_ex, N)` - The original images. - y_pred : :py:class:`ndarray ` of shape `(n_ex, N)` - The VAE reconstruction of the images. - t_mean: :py:class:`ndarray ` of shape `(n_ex, T)` - Mean of the variational distribution :math:`q(t | x)`. - t_log_var: :py:class:`ndarray ` of shape `(n_ex, T)` - Log of the variance vector of the variational distribution - :math:`q(t | x)`. - - Returns - ------- - dY_pred : :py:class:`ndarray ` of shape `(n_ex, N)` - The gradient of the VLB with regard to `y_pred`. - dLogVar : :py:class:`ndarray ` of shape `(n_ex, T)` - The gradient of the VLB with regard to `t_log_var`. - dMean : :py:class:`ndarray ` of shape `(n_ex, T)` - The gradient of the VLB with regard to `t_mean`. - """ - N = y.shape[0] - eps = np.finfo(float).eps - y_pred = np.clip(y_pred, eps, 1 - eps) - - dY_pred = -y / (N * y_pred) - (y - 1) / (N - N * y_pred) - dLogVar = (np.exp(t_log_var) - 1) / (2 * N) - dMean = t_mean / N - return dY_pred, dLogVar, dMean - - -class WGAN_GPLoss(ObjectiveBase): - def __init__(self, lambda_=10): - r""" - The loss function for a Wasserstein GAN [*]_ [*]_ with gradient penalty. - - Notes - ----- - Assuming an optimal critic, minimizing this quantity wrt. the generator - parameters corresponds to minimizing the Wasserstein-1 (earth-mover) - distance between the fake and real data distributions. - - The formula for the WGAN-GP critic loss is - - .. math:: - - \\text{WGANLoss} - &= \sum_{x \in X_{real}} p(x) D(x) - - \sum_{x' \in X_{fake}} p(x') D(x') \\\\ - \\text{WGANLossGP} - &= \\text{WGANLoss} + \lambda - (||\\nabla_{X_{interp}} D(X_{interp})||_2 - 1)^2 - - where - - .. math:: - - X_{fake} &= \\text{Generator}(\mathbf{z}) \\\\ - X_{interp} &= \\alpha X_{real} + (1 - \\alpha) X_{fake} \\\\ - - and - - .. math:: - - \mathbf{z} &\sim \mathcal{N}(0, \mathbb{1}) \\\\ - \\alpha &\sim \\text{Uniform}(0, 1) - - References - ---------- - .. [*] Gulrajani, I., Ahmed, F., Arjovsky, M., Dumoulin, V., & - Courville, A. (2017) "Improved training of Wasserstein GANs" - *Advances in Neural Information Processing Systems, 31*: 5769-5779. - .. [*] Goodfellow, I. J, Abadie, P. A., Mirza, M., Xu, B., Farley, D. - W., Ozair, S., Courville, A., & Bengio, Y. (2014) "Generative - adversarial nets" *Advances in Neural Information Processing - Systems, 27*: 2672-2680. - - Parameters - ---------- - lambda_ : float - The gradient penalty coefficient. Default is 10. - """ - self.lambda_ = lambda_ - super().__init__() - self.name = "wgan_gp_loss" - - def __call__(self, Y_fake, module, Y_real=None, gradInterp=None): - """ - Computes the generator and critic loss using the WGAN-GP value - function. - - Parameters - ---------- - Y_fake : :py:class:`ndarray ` of shape `(n_ex,)` - The output of the critic for `X_fake`. - module : {'C', 'G'} - Whether to calculate the loss for the critic ('C') or the generator - ('G'). If calculating loss for the critic, `Y_real` and - `gradInterp` must not be None. - Y_real : :py:class:`ndarray ` of shape `(n_ex,)`, or None - The output of the critic for `X_real`. Default is None. - gradInterp : :py:class:`ndarray ` of shape `(n_ex, n_feats)`, or None - The gradient of the critic output for `X_interp` wrt. `X_interp`. - Default is None. - - Returns - ------- - loss : float - Depending on the setting for `module`, either the critic or - generator loss, averaged over examples in the minibatch. - """ - return self.loss(Y_fake, module, Y_real=Y_real, gradInterp=gradInterp) - - def __str__(self): - return "WGANLossGP(lambda_={})".format(self.lambda_) - - def loss(self, Y_fake, module, Y_real=None, gradInterp=None): - """ - Computes the generator and critic loss using the WGAN-GP value - function. - - Parameters - ---------- - Y_fake : :py:class:`ndarray ` of shape (n_ex,) - The output of the critic for `X_fake`. - module : {'C', 'G'} - Whether to calculate the loss for the critic ('C') or the generator - ('G'). If calculating loss for the critic, `Y_real` and - `gradInterp` must not be None. - Y_real : :py:class:`ndarray ` of shape `(n_ex,)` or None - The output of the critic for `X_real`. Default is None. - gradInterp : :py:class:`ndarray ` of shape `(n_ex, n_feats)` or None - The gradient of the critic output for `X_interp` wrt. `X_interp`. - Default is None. - - Returns - ------- - loss : float - Depending on the setting for `module`, either the critic or - generator loss, averaged over examples in the minibatch. - """ - # calc critic loss including gradient penalty - if module == "C": - X_interp_norm = np.linalg.norm(gradInterp, axis=1, keepdims=True) - gradient_penalty = (X_interp_norm - 1) ** 2 - loss = ( - Y_fake.mean() - Y_real.mean() + self.lambda_ * gradient_penalty.mean() - ) - - # calc generator loss - elif module == "G": - loss = -Y_fake.mean() - - else: - raise ValueError("Unrecognized module: {}".format(module)) - - return loss - - def grad(self, Y_fake, module, Y_real=None, gradInterp=None): - """ - Computes the gradient of the generator or critic loss with regard to - its inputs. - - Parameters - ---------- - Y_fake : :py:class:`ndarray ` of shape `(n_ex,)` - The output of the critic for `X_fake`. - module : {'C', 'G'} - Whether to calculate the gradient for the critic loss ('C') or the - generator loss ('G'). If calculating grads for the critic, `Y_real` - and `gradInterp` must not be None. - Y_real : :py:class:`ndarray ` of shape `(n_ex,)` or None - The output of the critic for `X_real`. Default is None. - gradInterp : :py:class:`ndarray ` of shape `(n_ex, n_feats)` or None - The gradient of the critic output on `X_interp` wrt. `X_interp`. - Default is None. - - Returns - ------- - grads : tuple - If `module` == 'C', returns a 3-tuple containing the gradient of - the critic loss with regard to (`Y_fake`, `Y_real`, `gradInterp`). - If `module` == 'G', returns the gradient of the generator with - regard to `Y_fake`. - """ - eps = np.finfo(float).eps - n_ex_fake = Y_fake.shape[0] - - # calc gradient of the critic loss - if module == "C": - n_ex_real = Y_real.shape[0] - - dY_fake = -1 / n_ex_fake * np.ones_like(Y_fake) - dY_real = 1 / n_ex_real * np.ones_like(Y_real) - - # differentiate through gradient penalty - X_interp_norm = np.linalg.norm(gradInterp, axis=1, keepdims=True) + eps - - dGradInterp = ( - (2 / n_ex_fake) - * self.lambda_ - * (X_interp_norm - 1) - * (gradInterp / X_interp_norm) - ) - grad = (dY_fake, dY_real, dGradInterp) - - # calc gradient of the generator loss - elif module == "G": - grad = -1 / n_ex_fake * np.ones_like(Y_fake) - - else: - raise ValueError("Unrecognized module: {}".format(module)) - return grad - - -class NCELoss(ObjectiveBase): - """ - """ - - def __init__( - self, - n_classes, - noise_sampler, - num_negative_samples, - optimizer=None, - init="glorot_uniform", - subtract_log_label_prob=True, - ): - r""" - A noise contrastive estimation (NCE) loss function. - - Notes - ----- - Noise contrastive estimation is a candidate sampling method often - used to reduce the computational challenge of training a softmax - layer on problems with a large number of output classes. It proceeds by - training a logistic regression model to discriminate between samples - from the true data distribution and samples from an artificial noise - distribution. - - It can be shown that as the ratio of negative samples to data samples - goes to infinity, the gradient of the NCE loss converges to the - original softmax gradient. - - For input data **X**, target labels `targets`, loss parameters **W** and - **b**, and noise samples `noise` sampled from the noise distribution `Q`, - the NCE loss is - - .. math:: - - \\text{NCE}(X, targets) = - \\text{cross_entropy}(\mathbf{y}_{targets}, \hat{\mathbf{y}}_{targets}) + - \\text{cross_entropy}(\mathbf{y}_{noise}, \hat{\mathbf{y}}_{noise}) - - where - - .. math:: - - \hat{\mathbf{y}}_{targets} - &= \sigma(\mathbf{W}[targets] \mathbf{X} + \mathbf{b}[targets] - \log Q(targets)) \\\\ - \hat{\mathbf{y}}_{noise} - &= \sigma(\mathbf{W}[noise] \mathbf{X} + \mathbf{b}[noise] - \log Q(noise)) - - In the above equations, :math:`\sigma` is the logistic sigmoid - function, and :math:`Q(x)` corresponds to the probability of the values - in `x` under `Q`. - - References - ---------- - .. [1] Gutmann, M. & Hyvarinen, A. (2010). Noise-contrastive - estimation: A new estimation principle for unnormalized statistical - models. *AISTATS, 13*: 297-304. - .. [2] Minh, A. & Teh, Y. W. (2012). A fast and simple algorithm for - training neural probabilistic language models. *ICML, 29*: 1751-1758. - - Parameters - ---------- - n_classes : int - The total number of output classes in the model. - noise_sampler : :class:`~numpy_ml.utils.data_structures.DiscreteSampler` instance - The negative sampler. Defines a distribution over all classes in - the dataset. - num_negative_samples : int - The number of negative samples to draw for each target / batch of - targets. - init : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is 'glorot_uniform'. - optimizer : str, :doc:`Optimizer ` object, or None - The optimization strategy to use when performing gradient updates - within the :meth:`update` method. If None, use the :class:`SGD - ` optimizer with - default parameters. Default is None. - subtract_log_label_prob : bool - Whether to subtract the log of the probability of each label under - the noise distribution from its respective logit. Set to False for - negative sampling, True for NCE. Default is True. - - Attributes - ---------- - gradients : dict - The accumulated parameter gradients. - parameters: dict - The loss parameter values. - hyperparameters: dict - The loss hyperparameter values. - derived_variables: dict - Useful intermediate values computed during the loss computation. - """ - super().__init__() - self.name = "nce_loss" - - self.init = init - self.n_in = None - self.trainable = True - self.n_classes = n_classes - self.noise_sampler = noise_sampler - self.num_negative_samples = num_negative_samples - self.act_fn = ActivationInitializer("Sigmoid")() - self.optimizer = OptimizerInitializer(optimizer)() - self.subtract_log_label_prob = subtract_log_label_prob - - self.is_initialized = False - - def _init_params(self): - init_weights = WeightInitializer(str(self.act_fn), mode=self.init) - - self.X = [] - b = np.zeros((1, self.n_classes)) - W = init_weights((self.n_classes, self.n_in)) - - self.parameters = {"W": W, "b": b} - - self.gradients = {"W": np.zeros_like(W), "b": np.zeros_like(b)} - - self.derived_variables = { - "y_pred": [], - "target": [], - "true_w": [], - "true_b": [], - "sampled_b": [], - "sampled_w": [], - "out_labels": [], - "target_logits": [], - "noise_samples": [], - "noise_logits": [], - } - - self.is_initialized = True - - @property - def hyperparameters(self): - return { - "id": "NCELoss", - "n_in": self.n_in, - "init": self.init, - "n_classes": self.n_classes, - "noise_sampler": self.noise_sampler, - "num_negative_samples": self.num_negative_samples, - "subtract_log_label_prob": self.subtract_log_label_prob, - "optimizer": { - "cache": self.optimizer.cache, - "hyperparameters": self.optimizer.hyperparameters, - }, - } - - def __call__(self, target, X, neg_samples=None, retain_derived=True): - return self.loss(target, X, neg_samples, retain_derived) - - def __str__(self): - keys = [ - "{}={}".format(k, v) - for k, v in self.hyperparameters.items() - if k not in ["id", "optimizer"] - ] + ["optimizer={}".format(self.optimizer)] - return "NCELoss({})".format(", ".join(keys)) - - def freeze(self): - """ - Freeze the loss parameters at their current values so they can no - longer be updated. - """ - self.trainable = False - - def unfreeze(self): - """Unfreeze the layer parameters so they can be updated.""" - self.trainable = True - - def flush_gradients(self): - """Erase all the layer's derived variables and gradients.""" - assert self.trainable, "NCELoss is frozen" - self.X = [] - for k, v in self.derived_variables.items(): - self.derived_variables[k] = [] - - for k, v in self.gradients.items(): - self.gradients[k] = np.zeros_like(v) - - def update(self, cur_loss=None): - """ - Update the loss parameters using the accrued gradients and optimizer. - Flush all gradients once the update is complete. - """ - assert self.trainable, "NCELoss is frozen" - self.optimizer.step() - for k, v in self.gradients.items(): - if k in self.parameters: - self.parameters[k] = self.optimizer(self.parameters[k], v, k, cur_loss) - self.flush_gradients() - - def loss(self, target, X, neg_samples=None, retain_derived=True): - """ - Compute the NCE loss for a collection of inputs and associated targets. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, n_c, n_in)` - Layer input. A minibatch of `n_ex` examples, where each example is - an `n_c` by `n_in` matrix (e.g., the matrix of `n_c` context - embeddings, each of dimensionality `n_in`, for a CBOW model). - target : :py:class:`ndarray ` of shape `(n_ex,)` - Integer indices of the target class(es) for each example in the - minibatch (e.g., the target word id for an example in a CBOW model). - neg_samples : :py:class:`ndarray ` of shape (`num_negative_samples`,) or None - An optional array of negative samples to use during the loss - calculation. These will be used instead of samples draw from - ``self.noise_sampler``. Default is None. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through with regard to this input. - Default is True. - - Returns - ------- - loss : float - The NCE loss summed over the minibatch and samples. - y_pred : :py:class:`ndarray ` of shape (`n_ex`, `n_c`) - The network predictions for the conditional probability of each - target given each context: entry (`i`, `j`) gives the predicted - probability of target `i` under context vector `j`. - """ - if not self.is_initialized: - self.n_in = X.shape[-1] - self._init_params() - - loss, Z_target, Z_neg, y_pred, y_true, noise_samples = self._loss( - X, target, neg_samples - ) - - # cache derived variables for gradient calculation - if retain_derived: - self.X.append(X) - - self.derived_variables["y_pred"].append(y_pred) - self.derived_variables["target"].append(target) - self.derived_variables["out_labels"].append(y_true) - self.derived_variables["target_logits"].append(Z_target) - self.derived_variables["noise_samples"].append(noise_samples) - self.derived_variables["noise_logits"].append(Z_neg) - - return loss, np.squeeze(y_pred[..., :1], -1) - - def _loss(self, X, target, neg_samples): - """Actual computation of NCE loss""" - fstr = "X must have shape (n_ex, n_c, n_in), but got {} dims instead" - assert X.ndim == 3, fstr.format(X.ndim) - - W = self.parameters["W"] - b = self.parameters["b"] - - # sample negative samples from the noise distribution - if neg_samples is None: - neg_samples = self.noise_sampler(self.num_negative_samples) - assert len(neg_samples) == self.num_negative_samples - - # get the probability of the negative sample class and the target - # class under the noise distribution - p_neg_samples = self.noise_sampler.probs[neg_samples] - p_target = np.atleast_2d(self.noise_sampler.probs[target]) - - # save the noise samples for debugging - noise_samples = (neg_samples, p_target, p_neg_samples) - - # compute the logit for the negative samples and target - Z_target = X @ W[target].T + b[0, target] - Z_neg = X @ W[neg_samples].T + b[0, neg_samples] - - # subtract the log probability of each label under the noise dist - if self.subtract_log_label_prob: - n, m = Z_target.shape[0], Z_neg.shape[0] - Z_target[range(n), ...] -= np.log(p_target) - Z_neg[range(m), ...] -= np.log(p_neg_samples) - - # only retain the probability of the target under its associated - # minibatch example - aa, _, cc = Z_target.shape - Z_target = Z_target[range(aa), :, range(cc)][..., None] - - # p_target = (n_ex, n_c, 1) - # p_neg = (n_ex, n_c, n_samples) - pred_p_target = self.act_fn(Z_target) - pred_p_neg = self.act_fn(Z_neg) - - # if we're in evaluation mode, ignore the negative samples - just - # return the binary cross entropy on the targets - y_pred = pred_p_target - if self.trainable: - # (n_ex, n_c, 1 + n_samples) (target is first column) - y_pred = np.concatenate((y_pred, pred_p_neg), axis=-1) - - n_targets = 1 - y_true = np.zeros_like(y_pred) - y_true[..., :n_targets] = 1 - - # binary cross entropy - eps = np.finfo(float).eps - np.clip(y_pred, eps, 1 - eps, y_pred) - loss = -np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)) - return loss, Z_target, Z_neg, y_pred, y_true, noise_samples - - def grad(self, retain_grads=True, update_params=True): - """ - Compute the gradient of the NCE loss with regard to the inputs, - weights, and biases. - - Parameters - ---------- - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - update_params : bool - Whether to perform a single step of gradient descent on the layer - weights and bias using the calculated gradients. If `retain_grads` - is False, this option is ignored and the parameter gradients are - not updated. Default is True. - - Returns - ------- - dLdX : :py:class:`ndarray ` of shape (`n_ex`, `n_in`) or list of arrays - The gradient of the loss with regard to the layer input(s) `X`. - """ - assert self.trainable, "NCE loss is frozen" - - dX = [] - for input_idx, x in enumerate(self.X): - dx, dw, db = self._grad(x, input_idx) - dX.append(dx) - - if retain_grads: - self.gradients["W"] += dw - self.gradients["b"] += db - - dX = dX[0] if len(self.X) == 1 else dX - - if retain_grads and update_params: - self.update() - - return dX - - def _grad(self, X, input_idx): - """Actual computation of gradient wrt. loss weights + input""" - W, b = self.parameters["W"], self.parameters["b"] - - y_pred = self.derived_variables["y_pred"][input_idx] - target = self.derived_variables["target"][input_idx] - y_true = self.derived_variables["out_labels"][input_idx] - Z_neg = self.derived_variables["noise_logits"][input_idx] - Z_target = self.derived_variables["target_logits"][input_idx] - neg_samples = self.derived_variables["noise_samples"][input_idx][0] - - # the number of target classes per minibatch example - n_targets = 1 - - # calculate the grad of the binary cross entropy wrt. the network - # predictions - preds, classes = y_pred.flatten(), y_true.flatten() - - dLdp_real = ((1 - classes) / (1 - preds)) - (classes / preds) - dLdp_real = dLdp_real.reshape(*y_pred.shape) - - # partition the gradients into target and negative sample portions - dLdy_pred_target = dLdp_real[..., :n_targets] - dLdy_pred_neg = dLdp_real[..., n_targets:] - - # compute gradients of the loss wrt the data and noise logits - dLdZ_target = dLdy_pred_target * self.act_fn.grad(Z_target) - dLdZ_neg = dLdy_pred_neg * self.act_fn.grad(Z_neg) - - # compute param gradients on target + negative samples - dB_neg = dLdZ_neg.sum(axis=(0, 1)) - dB_target = dLdZ_target.sum(axis=(1, 2)) - - dW_neg = (dLdZ_neg.transpose(0, 2, 1) @ X).sum(axis=0) - dW_target = (dLdZ_target.transpose(0, 2, 1) @ X).sum(axis=1) - - # TODO: can this be done with np.einsum instead? - dX_target = np.vstack( - [dLdZ_target[[ix]] @ W[[t]] for ix, t in enumerate(target)] - ) - dX_neg = dLdZ_neg @ W[neg_samples] - - hits = list(set(target).intersection(set(neg_samples))) - hit_ixs = [np.where(target == h)[0] for h in hits] - - # adjust param gradients if there's an accidental hit - if len(hits) != 0: - hit_ixs = np.concatenate(hit_ixs) - target = np.delete(target, hit_ixs) - dB_target = np.delete(dB_target, hit_ixs) - dW_target = np.delete(dW_target, hit_ixs, 0) - - dX = dX_target + dX_neg - - # use np.add.at to ensure that repeated indices in the target (or - # possibly in neg_samples if sampling is done with replacement) are - # properly accounted for - dB = np.zeros_like(b).flatten() - np.add.at(dB, target, dB_target) - np.add.at(dB, neg_samples, dB_neg) - dB = dB.reshape(*b.shape) - - dW = np.zeros_like(W) - np.add.at(dW, target, dW_target) - np.add.at(dW, neg_samples, dW_neg) - - return dX, dW, dB diff --git a/aitk/keras/metrics.py b/aitk/keras/metrics.py deleted file mode 100644 index 4bcf51c..0000000 --- a/aitk/keras/metrics.py +++ /dev/null @@ -1,71 +0,0 @@ -# -*- coding: utf-8 -*- -# ************************************************************** -# aitk.keras: A Python Keras model API -# -# Copyright (c) 2021 AITK Developers -# -# https://github.com/ArtificialIntelligenceToolkit/aitk.keras -# -# ************************************************************** - -""" -Metrics can be computed as a stateless function: - -metric(targets, outputs) - -or as a stateful subclass of Metric. -""" - -import numpy as np -from abc import ABC, abstractmethod - -class Metric(ABC): - def __init__(self, name): - super().__init__() - self.name = name - - @abstractmethod - def reset_state(self): - raise NotImplementedError - - @abstractmethod - def update_state(self, targets, outputs): - raise NotImplementedError - - @abstractmethod - def result(self): - raise NotImplementedError - - def __str__(self): - return self.name - -class ToleranceAccuracy(Metric): - def __init__(self, tolerance): - super().__init__("tolerance_accuracy") - self.tolerance = tolerance - self.reset_state() - - def reset_state(self): - self.accurate = 0 - self.total = 0 - - def update_state(self, targets, outputs): - results = np.all( - np.less_equal(np.abs(targets - outputs), - self.tolerance), axis=-1) - self.accurate += sum(results) - self.total += len(results) - - def result(self): - return self.accurate / self.total - -def tolerance_accuracy(targets, outputs): - return np.mean( - np.all( - np.less_equal(np.abs(targets - outputs), - tolerance_accuracy.tolerance), - axis=-1), - axis=-1, - ) -# Needs the tolerance from somewhere: -tolerance_accuracy.tolerance = 0.1 diff --git a/aitk/keras/models/README.md b/aitk/keras/models/README.md deleted file mode 100644 index 1a15ce7..0000000 --- a/aitk/keras/models/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# Models - -The models module implements popular full neural networks. It includes: - -- `vae.py`: A Bernoulli variational autoencoder ([Kingma & Welling, 2014](https://arxiv.org/abs/1312.6114)) -- `wgan_gp.py`: A Wasserstein generative adversarial network with gradient - penalty ([Gulrajani et al., 2017](https://arxiv.org/pdf/1704.00028.pdf); -[Goodfellow et al., 2014](https://papers.nips.cc/paper/5423-generative-adversarial-nets.pdf)) -- `w2v.py`: word2vec model with CBOW and skip-gram architectures and - training via noise contrastive estimation ([Mikolov et al., 2012](https://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality.pdf)) diff --git a/aitk/keras/models/__init__.py b/aitk/keras/models/__init__.py deleted file mode 100644 index af5d12c..0000000 --- a/aitk/keras/models/__init__.py +++ /dev/null @@ -1,540 +0,0 @@ -# -*- coding: utf-8 -*- -# ************************************************************** -# aitk.keras: A Python Keras model API -# -# Copyright (c) 2021 AITK Developers -# -# https://github.com/ArtificialIntelligenceToolkit/aitk.keras -# -# ************************************************************** - -from ..layers import Input, Activation, Concatenate -from ..losses import MeanSquaredError, CrossEntropy -from ..initializers import OptimizerInitializer -from ..callbacks import History -from ..utils import topological_sort - -import numpy as np -import time -import math -import numbers -import functools -import operator -from collections import defaultdict - -LOSS_FUNCTIONS = { - "mse": MeanSquaredError, - "mean_squared_error": MeanSquaredError, - "crossentropy": CrossEntropy, - # FIXME: add more error functions -} - -NAME_CACHE = {} - -def get_metric_name(metric): - if hasattr(metric, "name"): - return metric.name - elif hasattr(metric, "__name__"): - return metric.__name__ - else: - return str(metric) - - -class Model(): - def __init__(self, inputs=None, outputs=None, name=None): - self.stop_training = False - self.built = False - self.sequential = False - self.history = History() - self.name = self.make_name(name) - self.layers = [] - self.layer_map = {} - self._input_layers = None - self._output_layers = None - self.step = 0 - # Build a model graph from inputs to outputs: - if inputs is not None and outputs is not None: - if not isinstance(outputs, (list, tuple)): - outputs = [outputs] - queue = [] if inputs is None else inputs - if not isinstance(queue, (list, tuple)): - queue = [queue] - while len(queue) > 0: - layer = queue.pop(0) - if layer not in self.layers: - if layer.name in self.layer_map: - raise AttributeError("duplicate layer name: '%s'" % layer.name) - self.layers.append(layer) - self.layer_map[layer.name] = layer - if layer in outputs: - # Make sure no more layers: - layer.output_layers = [] - else: - queue.extend(layer.output_layers) - self.sequential = self.is_sequential() - self.build() - - def is_sequential(self): - return ((len(self.get_input_layers()) == 1) and - (len(self.get_output_layers()) == 1) and - (not any([isinstance(layer, Concatenate) - for layer in self.layers]))) - - def get_input_layers(self): - if self._input_layers is None: - return [layer for layer in self.layers if len(layer.input_layers) == 0] - else: - return self._input_layers - - def get_output_layers(self): - if self._output_layers is None: - return [layer for layer in self.layers if len(layer.output_layers) == 0] - else: - return self._output_layers - - def connect(self, in_layer, out_layer): - """ - Connect first layer to second layer. - """ - if in_layer not in out_layer.input_layers: - out_layer.input_layers.append(in_layer) - if out_layer not in in_layer.output_layers: - in_layer.output_layers.append(out_layer) - - def make_name(self, name): - if name is None: - class_name = self.__class__.__name__.lower() - count = NAME_CACHE.get(class_name, 0) - if count == 0: - new_name = class_name - else: - new_name = "%s_%s" % (class_name, count) - NAME_CACHE[class_name] = count + 1 - return new_name - else: - return name - - def summary(self): - if not self.built: - print(f'Model: "{self.name}" (unbuilt)') - else: - print(f'Model: "{self.name}"') - print('_' * 65) - print("Layer (type) Output Shape Param #") - print("=" * 65) - total_parameters = 0 - # FIXME: sum up other, non-trainable params - other_params = 0 - for i, layer in enumerate(topological_sort(self.get_input_layers())): - layer_name = ("%s (%s)" % (layer.name, layer.__class__.__name__))[:25] - output_shape = (None, layer.n_out) if isinstance(layer.n_out, numbers.Number) else layer.n_out - if self.built: - parameters = sum([np.prod(item.shape) for item in layer.parameters.values() if item is not None]) - total_parameters += parameters - print(f"{layer_name:25s} {str(output_shape)[:15]:>15s} {parameters:>20,}") - else: - print(f"{layer_name:25s} {str(output_shape)[:15]:>15s} {'(unbuilt)':>20}") - if i != len(self.layers) - 1: - print("_" * 65) - print("=" * 65) - if self.built: - print(f"Total params: {total_parameters:,}") - print(f"Trainable params: {total_parameters + other_params:,}") - print(f"Non-trainable params: {other_params:,}") - print("_" * 65) - - def build(self): - self._input_layers = [layer for layer in self.layers if len(layer.input_layers) == 0] - self._output_layers = [layer for layer in self.layers if len(layer.output_layers) == 0] - for layer in self.layers: - if not isinstance(layer, Input): - self.is_initialized = False - # now, let's force the layers to initialize: - inputs = self.build_inputs() - self.predict(inputs) - self.built = True - - def compile(self, optimizer, loss, metrics=None): - for layer in self.layers: - if not isinstance(layer, Input): - self.is_initialized = False - layer.optimizer = OptimizerInitializer(optimizer)() - loss_function = LOSS_FUNCTIONS[loss] - self.loss_function = loss_function() - self.metrics = metrics if metrics is not None else [] - self.build() - - def get_layer_output_shape(self, layer, n=1): - """ - Get the shape of the layer with a dataset - size of n. - """ - if isinstance(layer.n_out, numbers.Number): - shape = (n, layer.n_out) - else: - shape = tuple([n] + list(layer.n_out)) - return shape - - def get_layer_output_array(self, layer): - """ - Get an output array of a layer (dataset, n = 1). - """ - shape = self.get_layer_output_shape(layer) - output = np.ndarray(shape) - return output - - def build_inputs(self): - """ - Build a dataset of dummy inputs. - """ - if self.sequential: - inputs = self.get_layer_output_array(self.layers[0]) - else: - if len(self.get_input_layers()) > 1: - inputs = [self.get_layer_output_array(input) - for input in self._input_layers] - else: - inputs = self.get_layer_output_array(self._input_layers[0]) - return inputs - - def get_weights(self, flat=False): - """ - Get the weights from the model. - """ - array = [] - if flat: - for layer in self.layers: - if layer.has_trainable_params(): - for weight in layer.get_weights(): - if isinstance(weight, numbers.Number): - array.extend(weight) - else: - array.extend(weight.flatten()) - else: - for layer in self.layers: - if layer.has_trainable_params(): - array.extend(layer.get_weights()) - return array - - def copy_weights(self, model): - """ - Copy the weights from another model by layer name. - """ - for layer in model.layers: - weights = layer.get_weights() - self.layer_map[layer.name].set_weights(weights) - - def get_weights_by_name(self): - """ - Copy the weights from another model by layer name. - """ - return {layer.name: layer.get_weights() for layer in self.layers} - - def set_weights(self, weights): - """ - Set the weights in a network. - - Args: - weights: a list of pairs of weights and biases for each layer, - or a single (flat) array of values - """ - if len(weights) > 0 and isinstance(weights[0], numbers.Number): - # Flat - current = 0 - for layer in self.layers: - if layer.has_trainable_params(): - orig = layer.get_weights() - new_weights = [] - for item in orig: - if isinstance(item, numbers.Number): - total = 1 - new_weights.append(item) - else: - total = functools.reduce(operator.mul, item.shape, 1) - w = np.array(weights[current:current + total], dtype=float) - new_weights.append(w.reshape(item.shape)) - current += total - layer.set_weights(new_weights) - else: - i = 0 - for layer in self.layers: - if layer.has_trainable_params(): - orig = layer.get_weights() - count = len(orig) - layer.set_weights(weights[i:i+count]) - i += count - - def format_time(self, seconds): - """ - Format time for easy human reading. - """ - if seconds > 1: - return f"{seconds:.0f}s" - elif seconds * 1000 > 1: - return f"{seconds * 1000:.0f}ms" - else: - return f"{seconds * 1000000:.0f}µs" - - def fit(self, inputs, targets, batch_size=32, epochs=1, verbose="auto", callbacks=None, - initial_epoch=0, shuffle=True): - """ - The training loop for all models. - """ - self.history = History() - self.stop_training = False - verbose = 1 if verbose == "auto" else verbose - callbacks = [] if callbacks is None else callbacks - callbacks.append(self.history) - inputs = np.array(inputs, dtype=float) - targets = np.array(targets, dtype=float) - self.flush_gradients() - for callback in callbacks: - callback.set_model(self) - callback.on_train_begin() - for epoch in range(initial_epoch, epochs): - if self.stop_training: - break - epoch_metric_values = {} - for metric in self.metrics: - if hasattr(metric, "reset_state"): - metric.reset_state() - else: - epoch_metric_values[get_metric_name(metric)] = 0 - - for callback in callbacks: - callback.on_epoch_begin(epoch) - - loss = 0 - total_batches = math.ceil(self.get_length_of_inputs(inputs) / batch_size) - if verbose: - print(f"Epoch {epoch+1}/{epochs}") - for batch, length, batch_data in self.enumerate_batches(inputs, targets, batch_size, shuffle): - start_time = time.monotonic() - batch_loss, batch_metric_values = self.train_batch(batch_data, batch, length, batch_size, callbacks) - loss += batch_loss - for metric in batch_metric_values: - # FIXME: Need to account for uneven batch sizes? - epoch_metric_values[metric] += batch_metric_values[metric] - end_time = time.monotonic() - self.step += length - if verbose: - logs = {} - ftime = self.format_time((end_time - start_time) / length) - for metric in self.metrics: - if hasattr(metric, "result"): - logs[metric.name] = metric.result() - else: - if get_metric_name(metric) in batch_metric_values: - logs[get_metric_name(metric)] = batch_metric_values[get_metric_name(metric)] - metrics = " - ".join(["%s: %.4f" % (metric, logs[metric]) for metric in batch_metric_values]) - if metrics: - metrics = " - " + metrics - # ideally update output here - logs = { - "loss": loss, - } - for metric in self.metrics: - if hasattr(metric, "result"): - logs[metric.name] = metric.result() - else: - if get_metric_name(metric) in epoch_metric_values: - logs[get_metric_name(metric)] = epoch_metric_values[get_metric_name(metric)] / total_batches - if verbose: - metrics = " - ".join(["%s: %.4f" % (metric, logs[metric]) for metric in logs]) - if metrics: - metrics = " - " + metrics - # Until we have output screen formatting; uses the last computed times, metrics - print(f"{batch + 1}/{total_batches} [==============================] - {end_time - start_time:.0f}s {ftime}/step{metrics}") - for callback in callbacks: - callback.on_epoch_end( - epoch, - logs - ) - if self.stop_training: - print("Training stopped early.") - for callback in callbacks: - callback.on_train_end() - return self.history - - def flush_gradients(self): - for layer in self.layers: - if layer.has_trainable_params(): - layer.flush_gradients() - - def enumerate_batches(self, inputs, targets, batch_size, shuffle): - indexes = np.arange(self.get_length_of_inputs(inputs)) - if shuffle: - # In place shuffle - np.random.shuffle(indexes) - current_row = 0 - batch = 0 - while (current_row * batch_size) < self.get_length_of_inputs(inputs): - batch_inputs = self.get_batch_inputs( - inputs, indexes, current_row, batch_size) - batch_targets = self.get_batch_targets( - targets, indexes, current_row, batch_size) - current_row += 1 - yield batch, self.get_length_of_inputs(batch_inputs), (batch_inputs, batch_targets) - batch += 1 - - def get_length_of_inputs(self, inputs): - if len(self.get_input_layers()) == 1: - return len(inputs) - else: - return len(inputs[0]) - - def get_batch_inputs(self, inputs, indexes, current_row, batch_size): - batch_indexes = indexes[current_row:current_row + batch_size] - if len(self.get_input_layers()) == 1: - return inputs[batch_indexes] - else: - return [np.array(inputs[i][batch_indexes]) - for i in range(len(self.get_input_layers()))] - - def get_batch_targets(self, targets, indexes, current_row, batch_size): - batch_indexes = indexes[current_row:current_row + batch_size] - if self.sequential: - # Numpy, one bank: - return targets[batch_indexes] - else: - return [np.array(targets[i][batch_indexes]) - for i in range(len(self.get_output_layers()))] - - def train_batch(self, dataset, batch, length, batch_size, callbacks): - """ - dataset = (inputs, targets) - batch = batch number (eg, step) - length = the actual size of the batch - batch_size = desired size of batch - """ - inputs, targets = dataset - # If the size of this batch is less than desired, scale it? - #scale = length / batch_size - scale = 1.0 - # Use predict to forward the activations, saving - # needed information: - outputs = self.predict(inputs, True) - # Compute the derivative with respect - # to this batch of the dataset: - batch_loss = 0 - batch_metric_values = defaultdict(int) - for callback in callbacks: - callback.on_train_batch_begin(batch) - results = 0 - # FIXME: If batch_size is different from others? Scale it? - if self.sequential: - dY_pred = self.loss_function.grad( - targets, - outputs, - ) - queue = [(self.get_output_layers()[0], dY_pred)] - while len(queue) > 0: - layer, dY_pred = queue.pop(0) - if not isinstance(layer, Input): - dY_pred = layer.backward(dY_pred) - for input_layer in layer.input_layers: - queue.append((input_layer, dY_pred)) - - batch_loss = self.loss_function(targets, outputs) * scale - for metric in self.metrics: - if hasattr(metric, "update_state"): - metric.update_state(targets, outputs) - else: - batch_metric_values[get_metric_name(metric)] = metric(targets, outputs) - else: - for out_n in range(len(self.get_output_layers())): - dY_pred = self.loss_function.grad( - targets[out_n], - outputs[out_n], - ) * scale - queue = [(self.get_output_layers()[out_n], dY_pred)] - while len(queue) > 0: - layer, dY_pred = queue.pop(0) - if not isinstance(layer, Input): - dY_pred = layer.backward(dY_pred) - for input_layer in layer.input_layers: - queue.append((input_layer, dY_pred)) - - batch_loss += self.loss_function(targets[out_n], outputs[out_n]) * scale - for metric in self.metrics: - if hasattr(metric, "update_state"): - metric.update_state(targets[out_n], outputs[out_n]) - else: - batch_metric_values[get_metric_name(metric)] += metric(targets, outputs) - - for callback in callbacks: - logs = {"batch_loss": batch_loss} - logs.update(batch_metric_values) - callback.on_train_batch_end(batch, logs) - self.update(batch_loss) - return batch_loss, batch_metric_values - - def update(self, batch_loss): - """ - Update the weights based on the batch_loss. - The weight delatas were computed in train_batch(). - """ - # FIXME? Need to pass the batch_loss to just the layers - # responsible for this loss (eg, in case of multiple - # output layers) - # FIXME: layers need to be able to accumulate delta changes - for layer in self.layers: - if not isinstance(layer, Input): - layer.update(batch_loss) - - def predict(self, inputs, retain_derived=False): - inputs = np.array(inputs, dtype=float) - results = [] - # First, load the outputs of the input layers: - if self.sequential: - outputs = {self._input_layers[0].name: inputs} - else: - if len(self._input_layers) > 1: - outputs = {self._input_layers[i].name: input for i, input in enumerate(inputs)} - else: - outputs = {self._input_layers[0].name: inputs} - - # Propagate in topological order: - for layer in topological_sort(self.get_input_layers()): - if not isinstance(layer, Input): - inputs = [outputs[in_layer.name] for in_layer in layer.input_layers] - if len(inputs) == 1: - outputs[layer.name] = layer.forward(inputs[0], retain_derived=retain_derived) - else: - outputs[layer.name] = layer.forward(inputs, retain_derived=retain_derived) - - for layer in self.get_output_layers(): - results.append(outputs[layer.name]) - if self.sequential: - return results[0] - else: - return results - -class Sequential(Model): - def __init__(self, layers=None, name="sequential"): - super().__init__(name=name) - self.sequential = True - if layers is not None: - for layer in layers: - self.add(layer) - self.build() - - def add(self, layer): - if layer.name in self.layer_map: - raise AttributeError("duplicate layer name: '%s'" % layer.name) - self.layer_map[layer.name] = layer - if len(self.layers) == 0: - if isinstance(layer, Input): - self.layers.append(layer) - else: - input_layer = Input(input_shape=layer.input_shape) - self.connect(input_layer, layer) - self.layers.append(input_layer) - self.layers.append(layer) - elif isinstance(layer, Activation): - self.layers[-1].act_fn = layer.activation - else: - input_layer = self.layers[-1] - self.connect(input_layer, layer) - self.layers.append(layer) - self.build() diff --git a/aitk/keras/models/vae.py b/aitk/keras/models/vae.py deleted file mode 100644 index e136355..0000000 --- a/aitk/keras/models/vae.py +++ /dev/null @@ -1,453 +0,0 @@ -from time import time -from collections import OrderedDict - -import numpy as np - -from ..losses import VAELoss -from ..utils import minibatch -from ..activations import ReLU, Affine, Sigmoid -from ..layers import Conv2D, Pool2D, Flatten, FullyConnected - - -class BernoulliVAE(object): - def __init__( - self, - T=5, - latent_dim=256, - enc_conv1_pad=0, - enc_conv2_pad=0, - enc_conv1_out_ch=32, - enc_conv2_out_ch=64, - enc_conv1_stride=1, - enc_pool1_stride=2, - enc_conv2_stride=1, - enc_pool2_stride=1, - enc_conv1_kernel_shape=(5, 5), - enc_pool1_kernel_shape=(2, 2), - enc_conv2_kernel_shape=(5, 5), - enc_pool2_kernel_shape=(2, 2), - optimizer="RMSProp(lr=0.0001)", - init="glorot_uniform", - ): - """ - A variational autoencoder (VAE) with 2D convolutional encoder and Bernoulli - input and output units. - - Notes - ----- - The VAE architecture is - - .. code-block:: text - - |-- t_mean ----| - X -> [Encoder] -| |--> [Sampler] -> [Decoder] -> X_recon - |-- t_log_var -| - - where ``[Encoder]`` is - - .. code-block:: text - - Conv1 -> ReLU -> MaxPool1 -> Conv2 -> ReLU -> - MaxPool2 -> Flatten -> FC1 -> ReLU -> FC2 - - ``[Decoder]`` is - - .. code-block:: text - - FC1 -> FC2 -> Sigmoid - - and ``[Sampler]`` draws a sample from the distribution - - .. math:: - - \mathcal{N}(\\text{t_mean}, \exp \left\{\\text{t_log_var}\\right\} I) - - using the reparameterization trick. - - Parameters - ---------- - T : int - The dimension of the variational parameter `t`. Default is 5. - enc_conv1_pad : int - The padding for the first convolutional layer of the encoder. Default is 0. - enc_conv1_stride : int - The stride for the first convolutional layer of the encoder. Default is 1. - enc_conv1_out_ch : int - The number of output channels for the first convolutional layer of - the encoder. Default is 32. - enc_conv1_kernel_shape : tuple - The number of rows and columns in each filter of the first - convolutional layer of the encoder. Default is (5, 5). - enc_pool1_kernel_shape : tuple - The number of rows and columns in the receptive field of the first - max pool layer of the encoder. Default is (2, 3). - enc_pool1_stride : int - The stride for the first MaxPool layer of the encoder. Default is - 2. - enc_conv2_pad : int - The padding for the second convolutional layer of the encoder. - Default is 0. - enc_conv2_out_ch : int - The number of output channels for the second convolutional layer of - the encoder. Default is 64. - enc_conv2_kernel_shape : tuple - The number of rows and columns in each filter of the second - convolutional layer of the encoder. Default is (5, 5). - enc_conv2_stride : int - The stride for the second convolutional layer of the encoder. - Default is 1. - enc_pool2_stride : int - The stride for the second MaxPool layer of the encoder. Default is - 1. - enc_pool2_kernel_shape : tuple - The number of rows and columns in the receptive field of the second - max pool layer of the encoder. Default is (2, 3). - latent_dim : int - The dimension of the output for the first FC layer of the encoder. - Default is 256. - optimizer : str or :doc:`Optimizer ` object or None - The optimization strategy to use when performing gradient updates. - If None, use the :class:`~numpy_ml.neural_nets.optimizers.SGD` - optimizer with default parameters. Default is "RMSProp(lr=0.0001)". - init : str - The weight initialization strategy. Valid entries are - {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform', - 'std_normal', 'trunc_normal'}. Default is 'glorot_uniform'. - """ - self.T = T - self.init = init - self.loss = VAELoss() - self.optimizer = optimizer - self.latent_dim = latent_dim - self.enc_conv1_pad = enc_conv1_pad - self.enc_conv2_pad = enc_conv2_pad - self.enc_conv1_stride = enc_conv1_stride - self.enc_conv1_out_ch = enc_conv1_out_ch - self.enc_pool1_stride = enc_pool1_stride - self.enc_conv2_out_ch = enc_conv2_out_ch - self.enc_conv2_stride = enc_conv2_stride - self.enc_pool2_stride = enc_pool2_stride - self.enc_conv2_kernel_shape = enc_conv2_kernel_shape - self.enc_pool2_kernel_shape = enc_pool2_kernel_shape - self.enc_conv1_kernel_shape = enc_conv1_kernel_shape - self.enc_pool1_kernel_shape = enc_pool1_kernel_shape - - self._init_params() - - def _init_params(self): - self._dv = {} - self._build_encoder() - self._build_decoder() - - def _build_encoder(self): - """ - CNN encoder - - Conv1 -> ReLU -> MaxPool1 -> Conv2 -> ReLU -> MaxPool2 -> - Flatten -> FC1 -> ReLU -> FC2 - """ - self.encoder = OrderedDict() - self.encoder["Conv1"] = Conv2D( - act_fn=ReLU(), - init=self.init, - pad=self.enc_conv1_pad, - optimizer=self.optimizer, - out_ch=self.enc_conv1_out_ch, - stride=self.enc_conv1_stride, - kernel_shape=self.enc_conv1_kernel_shape, - ) - self.encoder["Pool1"] = Pool2D( - mode="max", - optimizer=self.optimizer, - stride=self.enc_pool1_stride, - kernel_shape=self.enc_pool1_kernel_shape, - ) - self.encoder["Conv2"] = Conv2D( - act_fn=ReLU(), - init=self.init, - pad=self.enc_conv2_pad, - optimizer=self.optimizer, - out_ch=self.enc_conv2_out_ch, - stride=self.enc_conv2_stride, - kernel_shape=self.enc_conv2_kernel_shape, - ) - self.encoder["Pool2"] = Pool2D( - mode="max", - optimizer=self.optimizer, - stride=self.enc_pool2_stride, - kernel_shape=self.enc_pool2_kernel_shape, - ) - self.encoder["Flatten3"] = Flatten(optimizer=self.optimizer) - self.encoder["FC4"] = FullyConnected( - n_out=self.latent_dim, act_fn=ReLU(), optimizer=self.optimizer - ) - self.encoder["FC5"] = FullyConnected( - n_out=self.T * 2, - optimizer=self.optimizer, - act_fn=Affine(slope=1, intercept=0), - init=self.init, - ) - - def _build_decoder(self): - """ - MLP decoder - - FC1 -> ReLU -> FC2 -> Sigmoid - """ - self.decoder = OrderedDict() - self.decoder["FC1"] = FullyConnected( - act_fn=ReLU(), - init=self.init, - n_out=self.latent_dim, - optimizer=self.optimizer, - ) - # NB. `n_out` is dependent on the dimensionality of X. we use a - # placeholder for now, and update it within the `forward` method - self.decoder["FC2"] = FullyConnected( - n_out=None, act_fn=Sigmoid(), optimizer=self.optimizer, init=self.init - ) - - @property - def parameters(self): - return { - "components": { - "encoder": {k: v.parameters for k, v in self.encoder.items()}, - "decoder": {k: v.parameters for k, v in self.decoder.items()}, - } - } - - @property - def hyperparameters(self): - return { - "layer": "BernoulliVAE", - "T": self.T, - "init": self.init, - "loss": str(self.loss), - "optimizer": self.optimizer, - "latent_dim": self.latent_dim, - "enc_conv1_pad": self.enc_conv1_pad, - "enc_conv2_pad": self.enc_conv2_pad, - "enc_conv1_in_ch": self.enc_conv1_in_ch, - "enc_conv1_stride": self.enc_conv1_stride, - "enc_conv1_out_ch": self.enc_conv1_out_ch, - "enc_pool1_stride": self.enc_pool1_stride, - "enc_conv2_out_ch": self.enc_conv2_out_ch, - "enc_conv2_stride": self.enc_conv2_stride, - "enc_pool2_stride": self.enc_pool2_stride, - "enc_conv2_kernel_shape": self.enc_conv2_kernel_shape, - "enc_pool2_kernel_shape": self.enc_pool2_kernel_shape, - "enc_conv1_kernel_shape": self.enc_conv1_kernel_shape, - "enc_pool1_kernel_shape": self.enc_pool1_kernel_shape, - "encoder_ids": list(self.encoder.keys()), - "decoder_ids": list(self.decoder.keys()), - "components": { - "encoder": {k: v.hyperparameters for k, v in self.encoder.items()}, - "decoder": {k: v.hyperparameters for k, v in self.decoder.items()}, - }, - } - - @property - def derived_variables(self): - dv = { - "noise": None, - "t_mean": None, - "t_log_var": None, - "dDecoder_FC1_in": None, - "dDecoder_t_mean": None, - "dEncoder_FC5_out": None, - "dDecoder_FC1_out": None, - "dEncoder_FC4_out": None, - "dEncoder_Pool2_out": None, - "dEncoder_Conv2_out": None, - "dEncoder_Pool1_out": None, - "dEncoder_Conv1_out": None, - "dDecoder_t_log_var": None, - "dEncoder_Flatten3_out": None, - "components": { - "encoder": {k: v.derived_variables for k, v in self.encoder.items()}, - "decoder": {k: v.derived_variables for k, v in self.decoder.items()}, - }, - } - dv.update(self._dv) - return dv - - @property - def gradients(self): - return { - "components": { - "encoder": {k: v.gradients for k, v in self.encoder.items()}, - "decoder": {k: v.gradients for k, v in self.decoder.items()}, - } - } - - def _sample(self, t_mean, t_log_var): - """ - Returns a sample from the distribution - - q(t | x) = N(t_mean, diag(exp(t_log_var))) - - using the reparameterization trick. - - Parameters - ---------- - t_mean : :py:class:`ndarray ` of shape `(n_ex, latent_dim)` - Mean of the desired distribution. - t_log_var : :py:class:`ndarray ` of shape `(n_ex, latent_dim)` - Log variance vector of the desired distribution. - - Returns - ------- - samples: :py:class:`ndarray ` of shape `(n_ex, latent_dim)` - """ - noise = np.random.normal(loc=0.0, scale=1.0, size=t_mean.shape) - samples = noise * np.exp(t_log_var) + t_mean - # save sampled noise for backward pass - self._dv["noise"] = noise - return samples - - def forward(self, X_train): - """VAE forward pass""" - if self.decoder["FC2"].n_out is None: - fc2 = self.decoder["FC2"] - self.decoder["FC2"] = fc2.set_params({"n_out": self.N}) - - # assume each image is represented as a flattened row vector, - n_ex, in_rows, N, in_ch = X_train.shape - - # encode the training batch to estimate the mean and variance of the - # variational distribution - out = X_train - for k, v in self.encoder.items(): - out = v.forward(out) - - # extract the mean and log variance of the variational distribution - # q(t | x) from the encoder output - t_mean = out[:, : self.T] - t_log_var = out[:, self.T :] - - # sample t from q(t | x) using reparamterization trick - t = self._sample(t_mean, t_log_var) - - # pass the sampled latent value, t, through the decoder - # to generate the average reconstruction - X_recon = t - for k, v in self.decoder.items(): - X_recon = v.forward(X_recon) - - self._dv["t_mean"] = t_mean - self._dv["t_log_var"] = t_log_var - return X_recon - - def backward(self, X_train, X_recon): - """VAE backward pass""" - n_ex = X_train.shape[0] - D, E = self.decoder, self.encoder - noise = self.derived_variables["noise"] - t_mean = self.derived_variables["t_mean"] - t_log_var = self.derived_variables["t_log_var"] - - # compute gradients through the VAE loss - dY_pred, dLogVar, dMean = self.loss.grad( - X_train.reshape(n_ex, -1), X_recon, t_mean, t_log_var - ) - - # backprop through the decoder - dDecoder_FC1_out = D["FC2"].backward(dY_pred) - dDecoder_FC1_in = D["FC1"].backward(dDecoder_FC1_out) - - # backprop through the sampler - dDecoder_t_log_var = dDecoder_FC1_in * (noise * np.exp(t_log_var)) - dDecoder_t_mean = dDecoder_FC1_in - - # backprop through the encoder - dEncoder_FC5_out = np.hstack( - [dDecoder_t_mean + dMean, dDecoder_t_log_var + dLogVar] - ) - dEncoder_FC4_out = E["FC5"].backward(dEncoder_FC5_out) - dEncoder_Flatten3_out = E["FC4"].backward(dEncoder_FC4_out) - dEncoder_Pool2_out = E["Flatten3"].backward(dEncoder_Flatten3_out) - dEncoder_Conv2_out = E["Pool2"].backward(dEncoder_Pool2_out) - dEncoder_Pool1_out = E["Conv2"].backward(dEncoder_Conv2_out) - dEncoder_Conv1_out = E["Pool1"].backward(dEncoder_Pool1_out) - dX = E["Conv1"].backward(dEncoder_Conv1_out) - - self._dv["dDecoder_t_mean"] = dDecoder_t_mean - self._dv["dDecoder_FC1_in"] = dDecoder_FC1_in - self._dv["dDecoder_FC1_out"] = dDecoder_FC1_out - self._dv["dEncoder_FC5_out"] = dEncoder_FC5_out - self._dv["dEncoder_FC4_out"] = dEncoder_FC4_out - self._dv["dDecoder_t_log_var"] = dDecoder_t_log_var - self._dv["dEncoder_Pool2_out"] = dEncoder_Pool2_out - self._dv["dEncoder_Conv2_out"] = dEncoder_Conv2_out - self._dv["dEncoder_Pool1_out"] = dEncoder_Pool1_out - self._dv["dEncoder_Conv1_out"] = dEncoder_Conv1_out - self._dv["dEncoder_Flatten3_out"] = dEncoder_Flatten3_out - return dX - - def update(self, cur_loss=None): - """Perform gradient updates""" - for k, v in reversed(list(self.decoder.items())): - v.update(cur_loss) - for k, v in reversed(list(self.encoder.items())): - v.update(cur_loss) - self.flush_gradients() - - def flush_gradients(self): - """Reset parameter gradients after update""" - for k, v in self.decoder.items(): - v.flush_gradients() - for k, v in self.encoder.items(): - v.flush_gradients() - - def fit(self, X_train, n_epochs=20, batchsize=128, verbose=True): - """ - Fit the VAE to a training dataset. - - Parameters - ---------- - X_train : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The input volume - n_epochs : int - The maximum number of training epochs to run. Default is 20. - batchsize : int - The desired number of examples in each training batch. Default is 128. - verbose : bool - Print batch information during training. Default is True. - """ - self.verbose = verbose - self.n_epochs = n_epochs - self.batchsize = batchsize - - _, self.in_rows, self.in_cols, self.in_ch = X_train.shape - self.N = self.in_rows * self.in_cols * self.in_ch - - prev_loss = np.inf - for i in range(n_epochs): - loss, estart = 0.0, time() - batch_generator, nb = minibatch(X_train, batchsize, shuffle=True) - - # TODO: parallelize inner loop - for j, b_ix in enumerate(batch_generator): - bsize, bstart = len(b_ix), time() - - X_batch = X_train[b_ix] - X_batch_col = X_train[b_ix].reshape(bsize, -1) - - X_recon = self.forward(X_batch) - t_mean = self.derived_variables["t_mean"] - t_log_var = self.derived_variables["t_log_var"] - - self.backward(X_batch, X_recon) - batch_loss = self.loss(X_batch_col, X_recon, t_mean, t_log_var) - loss += batch_loss - - self.update(batch_loss) - - if self.verbose: - fstr = "\t[Batch {}/{}] Train loss: {:.3f} ({:.1f}s/batch)" - print(fstr.format(j + 1, nb, batch_loss, time() - bstart)) - - loss /= nb - fstr = "[Epoch {}] Avg. loss: {:.3f} Delta: {:.3f} ({:.2f}m/epoch)" - print(fstr.format(i + 1, loss, prev_loss - loss, (time() - estart) / 60.0)) - prev_loss = loss diff --git a/aitk/keras/models/w2v.py b/aitk/keras/models/w2v.py deleted file mode 100644 index b14ae74..0000000 --- a/aitk/keras/models/w2v.py +++ /dev/null @@ -1,451 +0,0 @@ -from time import time - -import numpy as np - -from ..layers import Embedding -from ..losses import NCELoss - -from ..preprocessing.nlp import Vocabulary, tokenize_words -from ..numpy_ml_utils.data_structures import DiscreteSampler - - -class Word2Vec(object): - def __init__( - self, - context_len=5, - min_count=None, - skip_gram=False, - max_tokens=None, - embedding_dim=300, - filter_stopwords=True, - noise_dist_power=0.75, - kernel_initializer="glorot_uniform", - num_negative_samples=64, - optimizer="SGD(lr=0.1)", - ): - """ - A word2vec model supporting both continuous bag of words (CBOW) and - skip-gram architectures, with training via noise contrastive - estimation. - - Parameters - ---------- - context_len : int - The number of words to the left and right of the current word to - use as context during training. Larger values result in more - training examples and thus can lead to higher accuracy at the - expense of additional training time. Default is 5. - min_count : int or None - Minimum number of times a token must occur in order to be included - in vocab. If None, include all tokens from `corpus_fp` in vocab. - Default is None. - skip_gram : bool - Whether to train the skip-gram or CBOW model. The skip-gram model - is trained to predict the target word i given its surrounding - context, ``words[i - context:i]`` and ``words[i + 1:i + 1 + - context]`` as input. Default is False. - max_tokens : int or None - Only add the first `max_tokens` most frequent tokens that occur - more than `min_count` to the vocabulary. If None, add all tokens - that occur more than than `min_count`. Default is None. - embedding_dim : int - The number of dimensions in the final word embeddings. Default is - 300. - filter_stopwords : bool - Whether to remove stopwords before encoding the words in the - corpus. Default is True. - noise_dist_power : float - The power the unigram count is raised to when computing the noise - distribution for negative sampling. A value of 0 corresponds to a - uniform distribution over tokens, and a value of 1 corresponds to a - distribution proportional to the token unigram counts. Default is - 0.75. - kernel_initializer : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is 'glorot_uniform'. - num_negative_samples: int - The number of negative samples to draw from the noise distribution - for each positive training sample. If 0, use the hierarchical - softmax formulation of the model instead. Default is 5. - optimizer : str, :doc:`Optimizer ` object, or None - The optimization strategy to use when performing gradient updates - within the `update` method. If None, use the - :class:`~numpy_ml.neural_nets.optimizers.SGD` optimizer with - default parameters. Default is None. - - Attributes - ---------- - parameters : dict - hyperparameters : dict - derived_variables : dict - gradients : dict - - Notes - ----- - The word2vec model is outlined in in [1]. - - CBOW architecture:: - - w_{t-R} ----| - w_{t-R+1} ----| - ... --> Average --> Embedding layer --> [NCE Layer / HSoftmax] --> P(w_{t} | w_{...}) - w_{t+R-1} ----| - w_{t+R} ----| - - Skip-gram architecture:: - - |--> P(w_{t-R} | w_{t}) - |--> P(w_{t-R+1} | w_{t}) - w_{t} --> Embedding layer --> [NCE Layer / HSoftmax] --| ... - |--> P(w_{t+R-1} | w_{t}) - |--> P(w_{t+R} | w_{t}) - - where :math:`w_{i}` is the one-hot representation of the word at position - `i` within a sentence in the corpus and `R` is the length of the context - window on either side of the target word. - - References - ---------- - .. [1] Mikolov et al. (2013). "Distributed representations of words - and phrases and their compositionality," Proceedings of the 26th - International Conference on Neural Information Processing Systems. - https://arxiv.org/pdf/1310.4546.pdf - """ - self.kernel_initializer = kernel_initializer - self.optimizer = optimizer - self.skip_gram = skip_gram - self.min_count = min_count - self.max_tokens = max_tokens - self.context_len = context_len - self.embedding_dim = embedding_dim - self.filter_stopwords = filter_stopwords - self.noise_dist_power = noise_dist_power - self.num_negative_samples = num_negative_samples - self.special_chars = set(["", "", ""]) - - def _init_params(self): - self._dv = {} - self._build_noise_distribution() - - self.embeddings = Embedding( - kernel_initializer=self.kernel_initializer, - vocab_size=self.vocab_size, - n_out=self.embedding_dim, - optimizer=self.optimizer, - pool=None if self.skip_gram else "mean", - ) - - self.loss = NCELoss( - kernel_initializer=self.kernel_initializer, - optimizer=self.optimizer, - n_classes=self.vocab_size, - subtract_log_label_prob=False, - noise_sampler=self._noise_sampler, - num_negative_samples=self.num_negative_samples, - ) - - @property - def parameters(self): - """Model parameters""" - param = {"components": {"embeddings": {}, "loss": {}}} - if hasattr(self, "embeddings"): - param["components"] = { - "embeddings": self.embeddings.parameters, - "loss": self.loss.parameters, - } - return param - - @property - def hyperparameters(self): - """Model hyperparameters""" - hp = { - "layer": "Word2Vec", - "kernel_initializer": self.kernel_initializer, - "skip_gram": self.skip_gram, - "optimizer": self.optimizer, - "max_tokens": self.max_tokens, - "context_len": self.context_len, - "embedding_dim": self.embedding_dim, - "noise_dist_power": self.noise_dist_power, - "filter_stopwords": self.filter_stopwords, - "num_negative_samples": self.num_negative_samples, - "vocab_size": self.vocab_size if hasattr(self, "vocab_size") else None, - "components": {"embeddings": {}, "loss": {}}, - } - - if hasattr(self, "embeddings"): - hp["components"] = { - "embeddings": self.embeddings.hyperparameters, - "loss": self.loss.hyperparameters, - } - return hp - - @property - def derived_variables(self): - """Variables computed during model operation""" - dv = {"components": {"embeddings": {}, "loss": {}}} - dv.update(self._dv) - - if hasattr(self, "embeddings"): - dv["components"] = { - "embeddings": self.embeddings.derived_variables, - "loss": self.loss.derived_variables, - } - return dv - - @property - def gradients(self): - """Model parameter gradients""" - grad = {"components": {"embeddings": {}, "loss": {}}} - if hasattr(self, "embeddings"): - grad["components"] = { - "embeddings": self.embeddings.gradients, - "loss": self.loss.gradients, - } - return grad - - def forward(self, X, targets, retain_derived=True): - """ - Evaluate the network on a single minibatch. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, n_in)` - Layer input, representing a minibatch of `n_ex` examples, each - consisting of `n_in` integer word indices - targets : :py:class:`ndarray ` of shape `(n_ex,)` - Target word index for each example in the minibatch. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If `False`, this suggests the layer - will not be expected to backprop through wrt. this input. Default - True. - - Returns - ------- - loss : float - The loss associated with the current minibatch - y_pred : :py:class:`ndarray ` of shape `(n_ex,)` - The conditional probabilities of the words in `targets` given the - corresponding example / context in `X`. - """ - X_emb = self.embeddings.forward(X, retain_derived=True) - loss, y_pred = self.loss.loss(X_emb, targets.flatten(), retain_derived=True) - return loss, y_pred - - def backward(self): - """ - Compute the gradient of the loss wrt the current network parameters. - """ - dX_emb = self.loss.grad(retain_grads=True, update_params=False) - self.embeddings.backward(dX_emb) - - def update(self, cur_loss=None): - """Perform gradient updates""" - self.loss.update(cur_loss) - self.embeddings.update(cur_loss) - self.flush_gradients() - - def flush_gradients(self): - """Reset parameter gradients after update""" - self.loss.flush_gradients() - self.embeddings.flush_gradients() - - def get_embedding(self, word_ids): - """ - Retrieve the embeddings for a collection of word IDs. - - Parameters - ---------- - word_ids : :py:class:`ndarray ` of shape `(M,)` - An array of word IDs to retrieve embeddings for. - - Returns - ------- - embeddings : :py:class:`ndarray ` of shape `(M, n_out)` - The embedding vectors for each of the `M` word IDs. - """ - if isinstance(word_ids, list): - word_ids = np.array(word_ids) - return self.embeddings.lookup(word_ids) - - def _build_noise_distribution(self): - """ - Construct the noise distribution for use during negative sampling. - - For a word ``w`` in the corpus, the noise distribution is:: - - P_n(w) = Count(w) ** noise_dist_power / Z - - where ``Z`` is a normalizing constant, and `noise_dist_power` is a - hyperparameter of the model. Mikolov et al. report best performance - using a `noise_dist_power` of 0.75. - """ - if not hasattr(self, "vocab"): - raise ValueError("Must call `fit` before constructing noise distribution") - - probs = np.zeros(len(self.vocab)) - power = self.hyperparameters["noise_dist_power"] - - for ix, token in enumerate(self.vocab): - count = token.count - probs[ix] = count ** power - - probs /= np.sum(probs) - self._noise_sampler = DiscreteSampler(probs, log=False, with_replacement=False) - - def _train_epoch(self, corpus_fps, encoding): - total_loss = 0 - batch_generator = self.minibatcher(corpus_fps, encoding) - for ix, (X, target) in enumerate(batch_generator): - loss = self._train_batch(X, target) - total_loss += loss - if self.verbose: - smooth_loss = 0.99 * smooth_loss + 0.01 * loss if ix > 0 else loss - fstr = "[Batch {}] Loss: {:.5f} | Smoothed Loss: {:.5f}" - print(fstr.format(ix + 1, loss, smooth_loss)) - return total_loss / (ix + 1) - - def _train_batch(self, X, target): - loss, _ = self.forward(X, target) - self.backward() - self.update(loss) - return loss - - def minibatcher(self, corpus_fps, encoding): - """ - A minibatch generator for skip-gram and CBOW models. - - Parameters - ---------- - corpus_fps : str or list of strs - The filepath / list of filepaths to the document(s) to be encoded. - Each document is expected to be encoded as newline-separated - string of text, with adjacent tokens separated by a whitespace - character. - encoding : str - Specifies the text encoding for corpus. This value is passed - directly to Python's `open` builtin. Common entries are either - 'utf-8' (no header byte), or 'utf-8-sig' (header byte). - - Yields - ------ - X : list of length `batchsize` or :py:class:`ndarray ` of shape (`batchsize`, `n_in`) - The context IDs for a minibatch of `batchsize` examples. If - ``self.skip_gram`` is False, `X` will be a ragged list consisting - of `batchsize` variable-length lists. If ``self.skip_gram`` is - `True`, all sublists will be of the same length (`n_in`) and `X` - will be returned as a :py:class:`ndarray ` of shape (`batchsize`, `n_in`). - target : :py:class:`ndarray ` of shape (`batchsize`, 1) - The target IDs associated with each example in `X` - """ - batchsize = self.batchsize - X_mb, target_mb, mb_ready = [], [], False - - for d_ix, doc_fp in enumerate(corpus_fps): - with open(doc_fp, "r", encoding=encoding) as doc: - for line in doc: - words = tokenize_words( - line, lowercase=True, filter_stopwords=self.filter_stopwords - ) - word_ixs = self.vocab.words_to_indices( - self.vocab.filter(words, unk=False) - ) - for word_loc, word in enumerate(word_ixs): - # since more distant words are usually less related to - # the target word, we downweight them by sampling from - # them less frequently during training. - R = np.random.randint(1, self.context_len) - left = word_ixs[max(word_loc - R, 0) : word_loc] - right = word_ixs[word_loc + 1 : word_loc + 1 + R] - context = left + right - - if len(context) == 0: - continue - - # in the skip-gram architecture we use each of the - # surrounding context to predict `word` / avoid - # predicting negative samples - if self.skip_gram: - X_mb.extend([word] * len(context)) - target_mb.extend(context) - mb_ready = len(target_mb) >= batchsize - - # in the CBOW architecture we use the average of the - # context embeddings to predict the target `word` / avoid - # predicting the negative samples - else: - context = np.array(context) - X_mb.append(context) # X_mb will be a ragged array - target_mb.append(word) - mb_ready = len(X_mb) == batchsize - - if mb_ready: - mb_ready = False - X_batch, target_batch = X_mb.copy(), target_mb.copy() - X_mb, target_mb = [], [] - if self.skip_gram: - X_batch = np.array(X_batch)[:, None] - target_batch = np.array(target_batch)[:, None] - yield X_batch, target_batch - - # if we've reached the end of our final document and there are - # remaining examples, yield the stragglers as a partial minibatch - if len(X_mb) > 0: - if self.skip_gram: - X_mb = np.array(X_mb)[:, None] - target_mb = np.array(target_mb)[:, None] - yield X_mb, target_mb - - def fit( - self, corpus_fps, encoding="utf-8-sig", n_epochs=20, batchsize=128, verbose=True - ): - """ - Learn word2vec embeddings for the examples in `X_train`. - - Parameters - ---------- - corpus_fps : str or list of strs - The filepath / list of filepaths to the document(s) to be encoded. - Each document is expected to be encoded as newline-separated - string of text, with adjacent tokens separated by a whitespace - character. - encoding : str - Specifies the text encoding for corpus. Common entries are either - 'utf-8' (no header byte), or 'utf-8-sig' (header byte). Default - value is 'utf-8-sig'. - n_epochs : int - The maximum number of training epochs to run. Default is 20. - batchsize : int - The desired number of examples in each training batch. Default is - 128. - verbose : bool - Print batch information during training. Default is True. - """ - self.verbose = verbose - self.n_epochs = n_epochs - self.batchsize = batchsize - - self.vocab = Vocabulary( - lowercase=True, - min_count=self.min_count, - max_tokens=self.max_tokens, - filter_stopwords=self.filter_stopwords, - ) - self.vocab.fit(corpus_fps, encoding=encoding) - self.vocab_size = len(self.vocab) - - # ignore special characters when training the model - for sp in self.special_chars: - self.vocab.counts[sp] = 0 - - # now that we know our vocabulary size, we can initialize the embeddings - self._init_params() - - prev_loss = np.inf - for i in range(n_epochs): - loss, estart = 0.0, time() - loss = self._train_epoch(corpus_fps, encoding) - - fstr = "[Epoch {}] Avg. loss: {:.3f} Delta: {:.3f} ({:.2f}m/epoch)" - print(fstr.format(i + 1, loss, prev_loss - loss, (time() - estart) / 60.0)) - prev_loss = loss diff --git a/aitk/keras/models/wgan_gp.py b/aitk/keras/models/wgan_gp.py deleted file mode 100644 index a48e194..0000000 --- a/aitk/keras/models/wgan_gp.py +++ /dev/null @@ -1,528 +0,0 @@ -from time import time -from collections import OrderedDict - -import numpy as np - -from ..utils import minibatch -from ..layers import Dense -from ..losses import WGAN_GPLoss - - -class WGAN_GP(object): - """ - A Wasserstein generative adversarial network (WGAN) architecture with - gradient penalty (GP). - - Notes - ----- - In contrast to a regular WGAN, WGAN-GP uses gradient penalty on the - generator rather than weight clipping to encourage the 1-Lipschitz - constraint: - - .. math:: - - | \\text{Generator}(\mathbf{x}_1) - \\text{Generator}(\mathbf{x}_2) | - \leq |\mathbf{x}_1 - \mathbf{x}_2 | \ \ \ \ \\forall \mathbf{x}_1, \mathbf{x}_2 - - In other words, the generator must have input gradients with a norm of at - most 1 under the :math:`\mathbf{X}_{real}` and :math:`\mathbf{X}_{fake}` - data distributions. - - To enforce this constraint, WGAN-GP penalizes the model if the generator - gradient norm moves away from a target norm of 1. See - :class:`~numpy_ml.neural_nets.losses.WGAN_GPLoss` for more details. - - In contrast to a standard WGAN, WGAN-GP avoids using BatchNorm in the - critic, as correlation between samples in a batch can impact the stability - of the gradient penalty. - - WGAP-GP architecture: - - .. code-block:: text - - X_real ------------------------| - >---> [Critic] --> Y_out - Z --> [Generator] --> X_fake --| - - where ``[Generator]`` is - - .. code-block:: text - - FC1 -> ReLU -> FC2 -> ReLU -> FC3 -> ReLU -> FC4 - - and ``[Critic]`` is - - .. code-block:: text - - FC1 -> ReLU -> FC2 -> ReLU -> FC3 -> ReLU -> FC4 - - and - - .. math:: - - Z \sim \mathcal{N}(0, 1) - """ - - def __init__( - self, - g_hidden=512, - kernel_initializer="he_uniform", - optimizer="RMSProp(lr=0.0001)", - debug=False, - ): - """ - Wasserstein generative adversarial network with gradient penalty. - - Parameters - ---------- - g_hidden : int - The number of units in the critic and generator hidden layers. - Default is 512. - kernel_initializer : str - The weight initialization strategy. Valid entries are - {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform', - 'std_normal', 'trunc_normal'}. Default is "he_uniform". - optimizer : str or :doc:`Optimizer ` object or None - The optimization strategy to use when performing gradient updates. - If None, use the :class:`~numpy_ml.neural_nets.optimizers.SGD` - optimizer with default parameters. Default is "RMSProp(lr=0.0001)". - debug : bool - Whether to store additional intermediate output within - ``self.derived_variables``. Default is False. - """ - self.kernel_initializer = kernel_initializer - self.debug = debug - self.g_hidden = g_hidden - self.optimizer = optimizer - - self.lambda_ = None - self.n_steps = None - self.batchsize = None - - self.is_initialized = False - - def _init_params(self): - self._dv = {} - self._gr = {} - self._build_critic() - self._build_generator() - self.is_initialized = True - - def _build_generator(self): - """ - FC1 -> ReLU -> FC2 -> ReLU -> FC3 -> ReLU -> FC4 - """ - self.generator = OrderedDict() - self.generator["FC1"] = Dense( - self.g_hidden, act_fn="ReLU", optimizer=self.optimizer, kernel_initializer=self.kernel_initializer - ) - self.generator["FC2"] = Dense( - self.g_hidden, act_fn="ReLU", optimizer=self.optimizer, kernel_initializer=self.kernel_initializer - ) - self.generator["FC3"] = Dense( - self.g_hidden, act_fn="ReLU", optimizer=self.optimizer, kernel_initializer=self.kernel_initializer - ) - self.generator["FC4"] = Dense( - self.n_feats, - act_fn="Affine(slope=1, intercept=0)", - optimizer=self.optimizer, - kernel_initializer=self.kernel_initializer, - ) - - def _build_critic(self): - """ - FC1 -> ReLU -> FC2 -> ReLU -> FC3 -> ReLU -> FC4 - """ - self.critic = OrderedDict() - self.critic["FC1"] = Dense( - self.g_hidden, act_fn="ReLU", optimizer=self.optimizer, kernel_initializer=self.kernel_initializer - ) - self.critic["FC2"] = Dense( - self.g_hidden, act_fn="ReLU", optimizer=self.optimizer, kernel_initializer=self.kernel_initializer - ) - self.critic["FC3"] = Dense( - self.g_hidden, act_fn="ReLU", optimizer=self.optimizer, kernel_initializer=self.kernel_initializer - ) - self.critic["FC4"] = Dense( - 1, - act_fn="Affine(slope=1, intercept=0)", - optimizer=self.optimizer, - kernel_initializer=self.kernel_initializer, - ) - - @property - def hyperparameters(self): - return { - "kernel_initializer": self.kernel_initializer, - "lambda_": self.lambda_, - "g_hidden": self.g_hidden, - "n_steps": self.n_steps, - "optimizer": self.optimizer, - "batchsize": self.batchsize, - "c_updates_per_epoch": self.c_updates_per_epoch, - "components": { - "critic": {k: v.hyperparameters for k, v in self.critic.items()}, - "generator": {k: v.hyperparameters for k, v in self.generator.items()}, - }, - } - - @property - def parameters(self): - return { - "components": { - "critic": {k: v.parameters for k, v in self.critic.items()}, - "generator": {k: v.parameters for k, v in self.generator.items()}, - } - } - - @property - def derived_variables(self): - C = self.critic.items() - G = self.generator.items() - dv = { - "components": { - "critic": {k: v.derived_variables for k, v in C}, - "generator": {k: v.derived_variables for k, v in G}, - } - } - dv.update(self._dv) - return dv - - @property - def gradients(self): - grads = { - "dC_Y_fake": None, - "dC_Y_real": None, - "dG_Y_fake": None, - "dC_gradInterp": None, - "components": { - "critic": {k: v.gradients for k, v in self.critic.items()}, - "generator": {k: v.gradients for k, v in self.generator.items()}, - }, - } - grads.update(self._gr) - return grads - - def forward(self, X, module, retain_derived=True): - """ - Perform the forward pass for either the generator or the critic. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(batchsize, \*)` - Input data - module : {'C' or 'G'} - Whether to perform the forward pass for the critic ('C') or for the - generator ('G'). - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - out : :py:class:`ndarray ` of shape `(batchsize, \*)` - The output of the final layer of the module. - Xs : dict - A dictionary with layer ids as keys and values corresponding to the - input to each intermediate layer during the forward pass. Useful - during debugging. - """ - if module == "G": - mod = self.generator - elif module == "C": - mod = self.critic - else: - raise ValueError("Unrecognized module name: {}".format(module)) - - Xs = {} - out, rd = X, retain_derived - for k, v in mod.items(): - Xs[k] = out - out = v.forward(out, retain_derived=rd) - return out, Xs - - def backward(self, grad, module, retain_grads=True): - """ - Perform the backward pass for either the generator or the critic. - - Parameters - ---------- - grad : :py:class:`ndarray ` of shape `(batchsize, \*)` or list of arrays - Gradient of the loss with respect to module output(s). - module : {'C' or 'G'} - Whether to perform the backward pass for the critic ('C') or for the - generator ('G'). - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is True. - - Returns - ------- - out : :py:class:`ndarray ` of shape `(batchsize, \*)` - The gradient of the loss with respect to the module input. - dXs : dict - A dictionary with layer ids as keys and values corresponding to the - input to each intermediate layer during the backward pass. Useful - during debugging. - """ - if module == "G": - mod = self.generator - elif module == "C": - mod = self.critic - else: - raise ValueError("Unrecognized module name: {}".format(module)) - - dXs = {} - out, rg = grad, retain_grads - for k, v in reversed(list(mod.items())): - dXs[k] = out - out = v.backward(out, retain_grads=rg) - return out, dXs - - def _dGradInterp(self, dLdGradInterp, dYi_outs): - """ - Compute the gradient penalty's contribution to the critic loss and - update the parameter gradients accordingly. - - Parameters - ---------- - dLdGradInterp : :py:class:`ndarray ` of shape `(batchsize, critic_in_dim)` - Gradient of `Y_interp` with respect to `X_interp`. - dYi_outs : dict - The intermediate outputs generated during the backward pass when - computing `dLdGradInterp`. - """ - dy = dLdGradInterp - for k, v in self.critic.items(): - X = v.X[-1] # layer input during forward pass - dy, dW, dB = v._bwd2(dy, X, dYi_outs[k][2]) - self.critic[k].gradients["W"] += dW - self.critic[k].gradients["b"] += dB - - def update_critic(self, X_real): - """ - Compute parameter gradients for the critic on a single minibatch. - - Parameters - ---------- - X_real : :py:class:`ndarray ` of shape `(batchsize, n_feats)` - Input data. - - Returns - ------- - C_loss : float - The critic loss on the current data. - """ - self.flush_gradients("C") - - n_ex = X_real.shape[0] - noise = np.random.randn(*X_real.shape) - - # generate and score the real and fake data - X_fake, Xf_outs = self.forward(noise, "G") - Y_real, Yr_outs = self.forward(X_real, "C") - Y_fake, Yf_outs = self.forward(X_fake, "C") - - # sample a random point on the linear interpolation between real and - # fake data and compute its score - alpha = np.random.rand(n_ex, 1) - X_interp = alpha * X_real + (1 - alpha) * X_fake - Y_interp, Yi_outs = self.forward(X_interp, "C") - - # compute the gradient of Y_interp wrt. X_interp - # Note that we don't save intermediate gradients here since this is not - # the real backward pass - dLdy = [0, 0, np.ones_like(Y_interp)] - (_, _, gradInterp), dYi_outs = self.backward(dLdy, "C", retain_grads=False) - - # calculate critic loss and differentiate with respect to each term - C_loss = self.loss(Y_fake, "C", Y_real, gradInterp) - dY_real, dY_fake, dGrad_interp = self.loss.grad(Y_fake, "C", Y_real, gradInterp) - - # compute `dY_real` and `dY_fake` contributions to critic loss, update - # param gradients accordingly - self.backward([dY_real, dY_fake, 0], "C") - - # compute `gradInterp`'s contribution to the critic loss, updating - # param gradients accordingly - self._dGradInterp(dGrad_interp, dYi_outs) - - # cache intermediate vars for the generator update - self._dv["alpha"] = alpha - self._dv["Y_fake"] = Y_fake - - # log additional intermediate values for debugging - if self.debug: - self._dv["G_fwd_X_fake"] = {} - self._dv["C_fwd_Y_real"] = {} - self._dv["C_fwd_Y_fake"] = {} - self._dv["C_fwd_Y_interp"] = {} - - N = len(self.critic.keys()) - N2 = len(self.generator.keys()) - - for i in range(N2): - self._dv["G_fwd_X_fake"]["FC" + str(i)] = Xf_outs["FC" + str(i + 1)] - - for i in range(N): - self._dv["C_fwd_Y_real"]["FC" + str(i)] = Yr_outs["FC" + str(i + 1)] - self._dv["C_fwd_Y_fake"]["FC" + str(i)] = Yf_outs["FC" + str(i + 1)] - self._dv["C_fwd_Y_interp"]["FC" + str(i)] = Yi_outs["FC" + str(i + 1)] - - self._dv["C_fwd_Y_real"]["FC" + str(N)] = Y_real - self._dv["C_fwd_Y_fake"]["FC" + str(N)] = Y_fake - self._dv["G_fwd_X_fake"]["FC" + str(N2)] = X_fake - self._dv["C_fwd_Y_interp"]["FC" + str(N)] = Y_interp - self._dv["C_dY_interp_wrt"] = {k: v[2] for k, v in dYi_outs.items()} - - self._dv["noise"] = noise - self._dv["X_fake"] = X_fake - self._dv["X_real"] = X_real - self._dv["Y_real"] = Y_real - self._dv["Y_fake"] = Y_fake - self._dv["C_loss"] = C_loss - self._dv["dY_real"] = dY_real - self._dv["dC_Y_fake"] = dY_fake - self._dv["X_interp"] = X_interp - self._dv["Y_interp"] = Y_interp - self._dv["gradInterp"] = gradInterp - self._dv["dGrad_interp"] = dGrad_interp - - return C_loss - - def update_generator(self, X_shape): - """ - Compute parameter gradients for the generator on a single minibatch. - - Parameters - ---------- - X_shape : tuple of `(batchsize, n_feats)` - Shape for the input batch. - - Returns - ------- - G_loss : float - The generator loss on the fake data (generated during the critic - update) - """ - self.flush_gradients("G") - Y_fake = self.derived_variables["Y_fake"] - - n_ex, _ = Y_fake.shape - G_loss = -Y_fake.mean() - dG_loss = -np.ones_like(Y_fake) / n_ex - self.backward(dG_loss, "G") - - if self.debug: - self._dv["G_loss"] = G_loss - self._dv["dG_Y_fake"] = dG_loss - - return G_loss - - def flush_gradients(self, module): - """Reset parameter gradients to 0 after an update.""" - if module == "G": - mod = self.generator - elif module == "C": - mod = self.critic - else: - raise ValueError("Unrecognized module name: {}".format(module)) - - for k, v in mod.items(): - v.flush_gradients() - - def update(self, module, module_loss=None): - """Perform gradient updates and flush gradients upon completion""" - if module == "G": - mod = self.generator - elif module == "C": - mod = self.critic - else: - raise ValueError("Unrecognized module name: {}".format(module)) - - for k, v in reversed(list(mod.items())): - v.update(module_loss) - self.flush_gradients(module) - - def fit( - self, - X_real, - lambda_, - n_steps=1000, - batchsize=128, - c_updates_per_epoch=5, - verbose=True, - ): - """ - Fit WGAN_GP on a training dataset. - - Parameters - ---------- - X_real : :py:class:`ndarray ` of shape `(n_ex, n_feats)` - Training dataset - lambda_ : float - Gradient penalty coefficient for the critic loss - n_steps : int - The maximum number of generator updates to perform. Default is - 1000. - batchsize : int - Number of examples to use in each training minibatch. Default is - 128. - c_updates_per_epoch : int - The number of critic updates to perform at each generator update. - verbose : bool - Print loss values after each update. If False, only print loss - every 100 steps. Default is True. - """ - self.lambda_ = lambda_ - self.verbose = verbose - self.n_steps = n_steps - self.batchsize = batchsize - self.c_updates_per_epoch = c_updates_per_epoch - - # adjust output of the generator to match the dimensionality of X - if not self.is_initialized: - self.n_feats = X_real.shape[1] - self._init_params() - - # (re-)initialize loss - prev_C, prev_G = np.inf, np.inf - self.loss = WGAN_GPLoss(lambda_=self.lambda_) - - # training loop - NC, NG = self.c_updates_per_epoch, self.n_steps - for i in range(NG): - estart = time() - batch_generator, _ = minibatch(X_real, batchsize, shuffle=False) - - for j, b_ix in zip(range(NC), batch_generator): - bstart = time() - X_batch = X_real[b_ix] - C_loss = self.update_critic(X_batch) - - # for testing, don't perform gradient update so we can inspect each grad - if not self.debug: - self.update("C", C_loss) - - if self.verbose: - fstr = "\t[Critic batch {}] Critic loss: {:.3f} {:.3f}∆ ({:.1f}s/batch)" - print(fstr.format(j + 1, C_loss, prev_C - C_loss, time() - bstart)) - prev_C = C_loss - - # generator update - G_loss = self.update_generator(X_batch.shape) - - # for testing, don't perform gradient update so we can inspect each grad - if not self.debug: - self.update("G", G_loss) - - if i % 99 == 0: - fstr = "[Epoch {}] Gen. loss: {:.3f} Critic loss: {:.3f}" - print(fstr.format(i + 1, G_loss, C_loss)) - - elif self.verbose: - fstr = "[Epoch {}] Gen. loss: {:.3f} {:.3f}∆ ({:.1f}s/epoch)" - print(fstr.format(i + 1, G_loss, prev_G - G_loss, time() - estart)) - prev_G = G_loss diff --git a/aitk/keras/modules/README.md b/aitk/keras/modules/README.md deleted file mode 100644 index 8590b6b..0000000 --- a/aitk/keras/modules/README.md +++ /dev/null @@ -1,10 +0,0 @@ -# Modules - -The `modules.py` module implements common multi-layer blocks that appear across -many modern deep networks. It includes: - -- Bidirectional LSTMs ([Schuster & Paliwal, 1997](https://pdfs.semanticscholar.org/4b80/89bc9b49f84de43acc2eb8900035f7d492b2.pdf)) -- ResNet-style "identity" (i.e., `same`-convolution) residual blocks ([He et al., 2015](https://arxiv.org/pdf/1512.03385.pdf)) -- ResNet-style "convolutional" (i.e., parametric) residual blocks ([He et al., 2015](https://arxiv.org/pdf/1512.03385.pdf)) -- WaveNet-style residual block with dilated causal convolutions ([van den Oord et al., 2016](https://arxiv.org/pdf/1609.03499.pdf)) -- Transformer-style multi-headed dot-product attention ([Vaswani et al., 2017](https://arxiv.org/pdf/1706.03762.pdf)) diff --git a/aitk/keras/modules/__init__.py b/aitk/keras/modules/__init__.py deleted file mode 100644 index 270dceb..0000000 --- a/aitk/keras/modules/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .modules import * diff --git a/aitk/keras/modules/modules.py b/aitk/keras/modules/modules.py deleted file mode 100644 index cc31ea7..0000000 --- a/aitk/keras/modules/modules.py +++ /dev/null @@ -1,1427 +0,0 @@ -from abc import ABC, abstractmethod - -import re -import numpy as np - -from ..wrappers import Dropout -from ..utils import calc_pad_dims_2D -from ..activations import Tanh, Sigmoid, ReLU, LeakyReLU, Affine -from ..layers import ( - DotProductAttention, - Dense, - BatchNorm2D, - Conv1D, - Conv2D, - Multiply, - LSTMCell, - Add, -) - - -class ModuleBase(ABC): - def __init__(self): - self.X = None - self.trainable = True - - super().__init__() - - @abstractmethod - def _init_params(self, **kwargs): - raise NotImplementedError - - @abstractmethod - def forward(self, z, **kwargs): - raise NotImplementedError - - @abstractmethod - def backward(self, out, **kwargs): - raise NotImplementedError - - @property - def components(self): - comps = [] - for c in self.hyperparameters["component_ids"]: - if hasattr(self, c): - comps.append(getattr(self, c)) - return comps - - def freeze(self): - self.trainable = False - for c in self.components: - c.freeze() - - def unfreeze(self): - self.trainable = True - for c in self.components: - c.unfreeze() - - def update(self, cur_loss=None): - assert self.trainable, "Layer is frozen" - for c in self.components: - c.update(cur_loss) - self.flush_gradients() - - def flush_gradients(self): - assert self.trainable, "Layer is frozen" - - self.X = [] - self._dv = {} - for c in self.components: - for k, v in c.derived_variables.items(): - c.derived_variables[k] = None - - for k, v in c.gradients.items(): - c.gradients[k] = np.zeros_like(v) - - def set_params(self, summary_dict): - cids = self.hyperparameters["component_ids"] - for k, v in summary_dict["parameters"].items(): - if k == "components": - for c, cd in summary_dict["parameters"][k].items(): - if c in cids: - getattr(self, c).set_params(cd) - - elif k in self.parameters: - self.parameters[k] = v - - for k, v in summary_dict["hyperparameters"].items(): - if k == "components": - for c, cd in summary_dict["hyperparameters"][k].items(): - if c in cids: - getattr(self, c).set_params(cd) - - if k in self.hyperparameters: - if k == "act_fn" and v == "ReLU": - self.hyperparameters[k] = ReLU() - elif v == "act_fn" and v == "Sigmoid": - self.hyperparameters[k] = Sigmoid() - elif v == "act_fn" and v == "Tanh": - self.hyperparameters[k] = Tanh() - elif v == "act_fn" and "Affine" in v: - r = r"Affine\(slope=(.*), intercept=(.*)\)" - slope, intercept = re.match(r, v).groups() - self.hyperparameters[k] = Affine(float(slope), float(intercept)) - elif v == "act_fn" and "Leaky ReLU" in v: - r = r"Leaky ReLU\(alpha=(.*)\)" - alpha = re.match(r, v).groups()[0] - self.hyperparameters[k] = LeakyReLU(float(alpha)) - else: - self.hyperparameters[k] = v - - def summary(self): - return { - "parameters": self.parameters, - "layer": self.hyperparameters["layer"], - "hyperparameters": self.hyperparameters, - } - - -class WavenetResidualModule(ModuleBase): - def __init__( - self, - ch_residual, - ch_dilation, - dilation, - kernel_width, - optimizer=None, - init="glorot_uniform", - ): - """ - A WaveNet-like residual block with causal dilated convolutions. - - .. code-block:: text - - *Skip path in* >-------------------------------------------> + ---> *Skip path out* - Causal |--> Tanh --| | - *Main |--> Dilated Conv1D -| * --> 1x1 Conv1D --| - path >--| |--> Sigm --| | - in* |-------------------------------------------------> + ---> *Main path out* - *Residual path* - - On the final block, the output of the skip path is further processed to - produce the network predictions. - - References - ---------- - .. [1] van den Oord et al. (2016). "Wavenet: a generative model for raw - audio". https://arxiv.org/pdf/1609.03499.pdf - - Parameters - ---------- - ch_residual : int - The number of output channels for the 1x1 - :class:`~numpy_ml.neural_nets.layers.Conv1D` layer in the main path. - ch_dilation : int - The number of output channels for the causal dilated - :class:`~numpy_ml.neural_nets.layers.Conv1D` layer in the main path. - dilation : int - The dilation rate for the causal dilated - :class:`~numpy_ml.neural_nets.layers.Conv1D` layer in the main path. - kernel_width : int - The width of the causal dilated - :class:`~numpy_ml.neural_nets.layers.Conv1D` kernel in the main - path. - init : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is 'glorot_uniform'. - optimizer : str or :doc:`Optimizer ` object or None - The optimization strategy to use when performing gradient updates - within the :meth:`update` method. If None, use the - :class:`~numpy_ml.neural_nets.optimizers.SGD` optimizer with default - parameters. Default is None. - """ - super().__init__() - - self.init = init - self.dilation = dilation - self.optimizer = optimizer - self.ch_residual = ch_residual - self.ch_dilation = ch_dilation - self.kernel_width = kernel_width - - self._init_params() - - def _init_params(self): - self._dv = {} - - self.conv_dilation = Conv1D( - stride=1, - pad="causal", - init=self.init, - kernel_width=2, - dilation=self.dilation, - out_ch=self.ch_dilation, - optimizer=self.optimizer, - act_fn=Affine(slope=1, intercept=0), - ) - - self.tanh = Tanh() - self.sigm = Sigmoid() - self.multiply_gate = Multiply(act_fn=Affine(slope=1, intercept=0)) - - self.conv_1x1 = Conv1D( - stride=1, - pad="same", - dilation=0, - init=self.init, - kernel_width=1, - out_ch=self.ch_residual, - optimizer=self.optimizer, - act_fn=Affine(slope=1, intercept=0), - ) - - self.add_residual = Add(act_fn=Affine(slope=1, intercept=0)) - self.add_skip = Add(act_fn=Affine(slope=1, intercept=0)) - - @property - def parameters(self): - """A dictionary of the module parameters.""" - return { - "components": { - "conv_1x1": self.conv_1x1.parameters, - "add_skip": self.add_skip.parameters, - "add_residual": self.add_residual.parameters, - "conv_dilation": self.conv_dilation.parameters, - "multiply_gate": self.multiply_gate.parameters, - } - } - - @property - def hyperparameters(self): - """A dictionary of the module hyperparameters""" - return { - "layer": "WavenetResidualModule", - "init": self.init, - "dilation": self.dilation, - "optimizer": self.optimizer, - "ch_residual": self.ch_residual, - "ch_dilation": self.ch_dilation, - "kernel_width": self.kernel_width, - "component_ids": [ - "conv_1x1", - "add_skip", - "add_residual", - "conv_dilation", - "multiply_gate", - ], - "components": { - "conv_1x1": self.conv_1x1.hyperparameters, - "add_skip": self.add_skip.hyperparameters, - "add_residual": self.add_residual.hyperparameters, - "conv_dilation": self.conv_dilation.hyperparameters, - "multiply_gate": self.multiply_gate.hyperparameters, - }, - } - - @property - def derived_variables(self): - """A dictionary of intermediate values computed during the - forward/backward passes.""" - dv = { - "conv_1x1_out": None, - "conv_dilation_out": None, - "multiply_gate_out": None, - "components": { - "conv_1x1": self.conv_1x1.derived_variables, - "add_skip": self.add_skip.derived_variables, - "add_residual": self.add_residual.derived_variables, - "conv_dilation": self.conv_dilation.derived_variables, - "multiply_gate": self.multiply_gate.derived_variables, - }, - } - dv.update(self._dv) - return dv - - @property - def gradients(self): - """A dictionary of the module parameter gradients.""" - return { - "components": { - "conv_1x1": self.conv_1x1.gradients, - "add_skip": self.add_skip.gradients, - "add_residual": self.add_residual.gradients, - "conv_dilation": self.conv_dilation.gradients, - "multiply_gate": self.multiply_gate.gradients, - } - } - - def forward(self, X_main, X_skip=None): - """ - Compute the module output on a single minibatch. - - Parameters - ---------- - X_main : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The input volume consisting of `n_ex` examples, each with dimension - (`in_rows`, `in_cols`, `in_ch`). - X_skip : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)`, or None - The output of the preceding skip-connection if this is not the - first module in the network. - - Returns - ------- - Y_main : :py:class:`ndarray ` of shape `(n_ex, out_rows, out_cols, out_ch)` - The output of the main pathway. - Y_skip : :py:class:`ndarray ` of shape `(n_ex, out_rows, out_cols, out_ch)` - The output of the skip-connection pathway. - """ - self.X_main, self.X_skip = X_main, X_skip - conv_dilation_out = self.conv_dilation.forward(X_main) - - tanh_gate = self.tanh.fn(conv_dilation_out) - sigm_gate = self.sigm.fn(conv_dilation_out) - - multiply_gate_out = self.multiply_gate.forward([tanh_gate, sigm_gate]) - conv_1x1_out = self.conv_1x1.forward(multiply_gate_out) - - # if this is the first wavenet block, initialize the "previous" skip - # connection sum to 0 - self.X_skip = np.zeros_like(conv_1x1_out) if X_skip is None else X_skip - - Y_skip = self.add_skip.forward([X_skip, conv_1x1_out]) - Y_main = self.add_residual.forward([X_main, conv_1x1_out]) - - self._dv["tanh_out"] = tanh_gate - self._dv["sigm_out"] = sigm_gate - self._dv["conv_dilation_out"] = conv_dilation_out - self._dv["multiply_gate_out"] = multiply_gate_out - self._dv["conv_1x1_out"] = conv_1x1_out - return Y_main, Y_skip - - def backward(self, dY_skip, dY_main=None): - dX_skip, dConv_1x1_out = self.add_skip.backward(dY_skip) - - # if this is the last wavenet block, dY_main will be None. if not, - # calculate the error contribution from dY_main and add it to the - # contribution from the skip path - dX_main = np.zeros_like(self.X_main) - if dY_main is not None: - dX_main, dConv_1x1_main = self.add_residual.backward(dY_main) - dConv_1x1_out += dConv_1x1_main - - dMultiply_out = self.conv_1x1.backward(dConv_1x1_out) - dTanh_out, dSigm_out = self.multiply_gate.backward(dMultiply_out) - - conv_dilation_out = self.derived_variables["conv_dilation_out"] - dTanh_in = dTanh_out * self.tanh.grad(conv_dilation_out) - dSigm_in = dSigm_out * self.sigm.grad(conv_dilation_out) - dDilation_out = dTanh_in + dSigm_in - - conv_back = self.conv_dilation.backward(dDilation_out) - dX_main += conv_back - - self._dv["dLdTanh"] = dTanh_out - self._dv["dLdSigmoid"] = dSigm_out - self._dv["dLdConv_1x1"] = dConv_1x1_out - self._dv["dLdMultiply"] = dMultiply_out - self._dv["dLdConv_dilation"] = dDilation_out - return dX_main, dX_skip - - -class SkipConnectionIdentityModule(ModuleBase): - def __init__( - self, - out_ch, - kernel_shape1, - kernel_shape2, - stride1=1, - stride2=1, - act_fn=None, - epsilon=1e-5, - momentum=0.9, - optimizer=None, - init="glorot_uniform", - ): - """ - A ResNet-like "identity" shortcut module. - - Notes - ----- - The identity module enforces `same` padding during each convolution to - ensure module output has same dims as its input. - - .. code-block:: text - - X -> Conv2D -> Act_fn -> BatchNorm2D -> Conv2D -> BatchNorm2D -> + -> Act_fn - \______________________________________________________________/ - - References - ---------- - .. [1] He et al. (2015). "Deep residual learning for image - recognition." https://arxiv.org/pdf/1512.03385.pdf - - Parameters - ---------- - out_ch : int - The number of filters/kernels to compute in the first convolutional - layer. - kernel_shape1 : 2-tuple - The dimension of a single 2D filter/kernel in the first - convolutional layer. - kernel_shape2 : 2-tuple - The dimension of a single 2D filter/kernel in the second - convolutional layer. - stride1 : int - The stride/hop of the convolution kernels in the first - convolutional layer. Default is 1. - stride2 : int - The stride/hop of the convolution kernels in the second - convolutional layer. Default is 1. - act_fn : :doc:`Activation ` object or None - The activation function for computing Y[t]. If None, use the - identity :math:`f(x) = x` by default. Default is None. - epsilon : float - A small smoothing constant to use during - :class:`~numpy_ml.neural_nets.layers.BatchNorm2D` computation to - avoid divide-by-zero errors. Default is 1e-5. - momentum : float - The momentum term for the running mean/running std calculations in - the :class:`~numpy_ml.neural_nets.layers.BatchNorm2D` layers. The - closer this is to 1, the less weight will be given to the mean/std - of the current batch (i.e., higher smoothing). Default is 0.9. - optimizer : str or :doc:`Optimizer ` object or None - The optimization strategy to use when performing gradient updates - within the :meth:`update` method. If None, use the - :class:`~numpy_ml.neural_nets.optimizers.SGD` optimizer with - default parameters. Default is None. - init : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is 'glorot_uniform'. - """ - super().__init__() - - self.init = init - self.in_ch = None - self.out_ch = out_ch - self.epsilon = epsilon - self.stride1 = stride1 - self.stride2 = stride2 - self.optimizer = optimizer - self.momentum = momentum - self.kernel_shape1 = kernel_shape1 - self.kernel_shape2 = kernel_shape2 - self.act_fn = Affine(slope=1, intercept=0) if act_fn is None else act_fn - - self._init_params() - - def _init_params(self): - self._dv = {} - - self.conv1 = Conv2D( - pad="same", - init=self.init, - out_ch=self.out_ch, - act_fn=self.act_fn, - stride=self.stride1, - optimizer=self.optimizer, - kernel_shape=self.kernel_shape1, - ) - # we can't initialize `conv2` without X's dimensions; see `forward` - # for further details - self.batchnorm1 = BatchNorm2D(epsilon=self.epsilon, momentum=self.momentum) - self.batchnorm2 = BatchNorm2D(epsilon=self.epsilon, momentum=self.momentum) - self.add3 = Add(self.act_fn) - - def _init_conv2(self): - self.conv2 = Conv2D( - pad="same", - init=self.init, - out_ch=self.in_ch, - stride=self.stride2, - optimizer=self.optimizer, - kernel_shape=self.kernel_shape2, - act_fn=Affine(slope=1, intercept=0), - ) - - @property - def parameters(self): - """A dictionary of the module parameters.""" - return { - "components": { - "add3": self.add3.parameters, - "conv1": self.conv1.parameters, - "conv2": self.conv2.parameters, - "batchnorm1": self.batchnorm1.parameters, - "batchnorm2": self.batchnorm2.parameters, - } - } - - @property - def hyperparameters(self): - """A dictionary of the module hyperparameters.""" - return { - "layer": "SkipConnectionIdentityModule", - "init": self.init, - "in_ch": self.in_ch, - "out_ch": self.out_ch, - "epsilon": self.epsilon, - "stride1": self.stride1, - "stride2": self.stride2, - "momentum": self.momentum, - "optimizer": self.optimizer, - "act_fn": str(self.act_fn), - "kernel_shape1": self.kernel_shape1, - "kernel_shape2": self.kernel_shape2, - "component_ids": ["conv1", "batchnorm1", "conv2", "batchnorm2", "add3"], - "components": { - "add3": self.add3.hyperparameters, - "conv1": self.conv1.hyperparameters, - "conv2": self.conv2.hyperparameters, - "batchnorm1": self.batchnorm1.hyperparameters, - "batchnorm2": self.batchnorm2.hyperparameters, - }, - } - - @property - def derived_variables(self): - """A dictionary of intermediate values computed during the - forward/backward passes.""" - dv = { - "conv1_out": None, - "conv2_out": None, - "batchnorm1_out": None, - "batchnorm2_out": None, - "components": { - "add3": self.add3.derived_variables, - "conv1": self.conv1.derived_variables, - "conv2": self.conv2.derived_variables, - "batchnorm1": self.batchnorm1.derived_variables, - "batchnorm2": self.batchnorm2.derived_variables, - }, - } - dv.update(self._dv) - return dv - - @property - def gradients(self): - """A dictionary of the accumulated module parameter gradients.""" - return { - "components": { - "add3": self.add3.gradients, - "conv1": self.conv1.gradients, - "conv2": self.conv2.gradients, - "batchnorm1": self.batchnorm1.gradients, - "batchnorm2": self.batchnorm2.gradients, - } - } - - def forward(self, X, retain_derived=True): - """ - Compute the module output given input volume `X`. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape (n_ex, in_rows, in_cols, in_ch) - The input volume consisting of `n_ex` examples, each with dimension - (`in_rows`, `in_cols`, `in_ch`). - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape (n_ex, out_rows, out_cols, out_ch) - The module output volume. - """ - if not hasattr(self, "conv2"): - self.in_ch = X.shape[3] - self._init_conv2() - - conv1_out = self.conv1.forward(X, retain_derived) - bn1_out = self.batchnorm1.forward(conv1_out, retain_derived) - conv2_out = self.conv2.forward(bn1_out, retain_derived) - bn2_out = self.batchnorm2.forward(conv2_out, retain_derived) - Y = self.add3.forward([X, bn2_out], retain_derived) - - if retain_derived: - self._dv["conv1_out"] = conv1_out - self._dv["conv2_out"] = conv2_out - self._dv["batchnorm1_out"] = bn1_out - self._dv["batchnorm2_out"] = bn2_out - return Y - - def backward(self, dLdY, retain_grads=True): - """ - Compute the gradient of the loss with respect to the layer parameters. - - Parameters - ---------- - dLdy : :py:class:`ndarray ` of shape (`n_ex, out_rows, out_cols, out_ch`) or list of arrays - The gradient(s) of the loss with respect to the module output(s). - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : :py:class:`ndarray ` of shape (n_ex, in_rows, in_cols, in_ch) - The gradient of the loss with respect to the module input volume. - """ - dX, dBn2_out = self.add3.backward(dLdY, retain_grads) - dConv2_out = self.batchnorm2.backward(dBn2_out, retain_grads) - dBn1_out = self.conv2.backward(dConv2_out, retain_grads) - dConv1_out = self.batchnorm1.backward(dBn1_out, retain_grads) - dX += self.conv1.backward(dConv1_out, retain_grads) - - self._dv["dLdAdd3_X"] = dX - self._dv["dLdBn2"] = dBn2_out - self._dv["dLdBn1"] = dBn1_out - self._dv["dLdConv2"] = dConv2_out - self._dv["dLdConv1"] = dConv1_out - return dX - - -class SkipConnectionConvModule(ModuleBase): - def __init__( - self, - out_ch1, - out_ch2, - kernel_shape1, - kernel_shape2, - kernel_shape_skip, - pad1=0, - pad2=0, - stride1=1, - stride2=1, - act_fn=None, - epsilon=1e-5, - momentum=0.9, - stride_skip=1, - optimizer=None, - init="glorot_uniform", - ): - """ - A ResNet-like "convolution" shortcut module. - - Notes - ----- - In contrast to :class:`SkipConnectionIdentityModule`, the additional - `conv2d_skip` and `batchnorm_skip` layers in the shortcut path allow - adjusting the dimensions of `X` to match the output of the main set of - convolutions. - - .. code-block:: text - - X -> Conv2D -> Act_fn -> BatchNorm2D -> Conv2D -> BatchNorm2D -> + -> Act_fn - \_____________________ Conv2D -> Batchnorm2D __________________/ - - References - ---------- - .. [1] He et al. (2015). "Deep residual learning for image - recognition." https://arxiv.org/pdf/1512.03385.pdf - - Parameters - ---------- - out_ch1 : int - The number of filters/kernels to compute in the first convolutional - layer. - out_ch2 : int - The number of filters/kernels to compute in the second - convolutional layer. - kernel_shape1 : 2-tuple - The dimension of a single 2D filter/kernel in the first - convolutional layer. - kernel_shape2 : 2-tuple - The dimension of a single 2D filter/kernel in the second - convolutional layer. - kernel_shape_skip : 2-tuple - The dimension of a single 2D filter/kernel in the "skip" - convolutional layer. - stride1 : int - The stride/hop of the convolution kernels in the first - convolutional layer. Default is 1. - stride2 : int - The stride/hop of the convolution kernels in the second - convolutional layer. Default is 1. - stride_skip : int - The stride/hop of the convolution kernels in the "skip" - convolutional layer. Default is 1. - pad1 : int, tuple, or 'same' - The number of rows/columns of 0's to pad the input to the first - convolutional layer with. Default is 0. - pad2 : int, tuple, or 'same' - The number of rows/columns of 0's to pad the input to the second - convolutional layer with. Default is 0. - act_fn : :doc:`Activation ` object or None - The activation function for computing ``Y[t]``. If None, use the - identity :math:`f(x) = x` by default. Default is None. - epsilon : float - A small smoothing constant to use during - :class:`~numpy_ml.neural_nets.layers.BatchNorm2D` computation to - avoid divide-by-zero errors. Default is 1e-5. - momentum : float - The momentum term for the running mean/running std calculations in - the :class:`~numpy_ml.neural_nets.layers.BatchNorm2D` layers. The - closer this is to 1, the less weight will be given to the mean/std - of the current batch (i.e., higher smoothing). Default is 0.9. - init : str - The weight initialization strategy. Valid entries are - {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'}. - optimizer : str or :doc:`Optimizer ` object - The optimization strategy to use when performing gradient updates - within the :class:`update` method. If None, use the - :class:`~numpy_ml.neural_nets.optimizers.SGD` optimizer with - default parameters. Default is None. - """ - super().__init__() - - self.init = init - self.pad1 = pad1 - self.pad2 = pad2 - self.in_ch = None - self.out_ch1 = out_ch1 - self.out_ch2 = out_ch2 - self.epsilon = epsilon - self.stride1 = stride1 - self.stride2 = stride2 - self.momentum = momentum - self.optimizer = optimizer - self.stride_skip = stride_skip - self.kernel_shape1 = kernel_shape1 - self.kernel_shape2 = kernel_shape2 - self.kernel_shape_skip = kernel_shape_skip - self.act_fn = Affine(slope=1, intercept=0) if act_fn is None else act_fn - - self._init_params() - - def _init_params(self, X=None): - self._dv = {} - self.conv1 = Conv2D( - pad=self.pad1, - init=self.init, - act_fn=self.act_fn, - out_ch=self.out_ch1, - stride=self.stride1, - optimizer=self.optimizer, - kernel_shape=self.kernel_shape1, - ) - self.conv2 = Conv2D( - pad=self.pad2, - init=self.init, - out_ch=self.out_ch2, - stride=self.stride2, - optimizer=self.optimizer, - kernel_shape=self.kernel_shape2, - act_fn=Affine(slope=1, intercept=0), - ) - # we can't initialize `conv_skip` without X's dimensions; see `forward` - # for further details - self.batchnorm1 = BatchNorm2D(epsilon=self.epsilon, momentum=self.momentum) - self.batchnorm2 = BatchNorm2D(epsilon=self.epsilon, momentum=self.momentum) - self.batchnorm_skip = BatchNorm2D(epsilon=self.epsilon, momentum=self.momentum) - self.add3 = Add(self.act_fn) - - def _calc_skip_padding(self, X): - pads = [] - for p in [self.pad1, self.pad2]: - if isinstance(p, int): - pads.append((p, p, p, p)) - elif isinstance(p, tuple) and len(p) == 2: - pads.append((p[0], p[0], p[1], p[1])) - self.pad1, self.pad2 = pads - - # compute the dimensions of the convolution1 output - s1 = self.stride1 - fr1, fc1 = self.kernel_shape1 - _, in_rows, in_cols, _ = X.shape - pr11, pr12, pc11, pc12 = self.pad1 - - out_rows1 = np.floor(1 + (in_rows + pr11 + pr12 - fr1) / s1).astype(int) - out_cols1 = np.floor(1 + (in_cols + pc11 + pc12 - fc1) / s1).astype(int) - - # compute the dimensions of the convolution2 output - s2 = self.stride2 - fr2, fc2 = self.kernel_shape2 - pr21, pr22, pc21, pc22 = self.pad2 - - out_rows2 = np.floor(1 + (out_rows1 + pr21 + pr22 - fr2) / s2).astype(int) - out_cols2 = np.floor(1 + (out_cols1 + pc21 + pc22 - fc2) / s2).astype(int) - - # finally, compute the appropriate padding dims for the skip convolution - desired_dims = (out_rows2, out_cols2) - self.pad_skip = calc_pad_dims_2D( - X.shape, - desired_dims, - stride=self.stride_skip, - kernel_shape=self.kernel_shape_skip, - ) - - def _init_conv_skip(self, X): - self._calc_skip_padding(X) - self.conv_skip = Conv2D( - init=self.init, - pad=self.pad_skip, - out_ch=self.out_ch2, - stride=self.stride_skip, - kernel_shape=self.kernel_shape_skip, - act_fn=Affine(slope=1, intercept=0), - optimizer=self.optimizer, - ) - - @property - def parameters(self): - """A dictionary of the module parameters.""" - return { - "components": { - "add3": self.add3.parameters, - "conv1": self.conv1.parameters, - "conv2": self.conv2.parameters, - "conv_skip": self.conv_skip.parameters - if hasattr(self, "conv_skip") - else None, - "batchnorm1": self.batchnorm1.parameters, - "batchnorm2": self.batchnorm2.parameters, - "batchnorm_skip": self.batchnorm_skip.parameters, - } - } - - @property - def hyperparameters(self): - """A dictionary of the module hyperparameters.""" - return { - "layer": "SkipConnectionConvModule", - "init": self.init, - "pad1": self.pad1, - "pad2": self.pad2, - "in_ch": self.in_ch, - "out_ch1": self.out_ch1, - "out_ch2": self.out_ch2, - "epsilon": self.epsilon, - "stride1": self.stride1, - "stride2": self.stride2, - "momentum": self.momentum, - "act_fn": str(self.act_fn), - "stride_skip": self.stride_skip, - "kernel_shape1": self.kernel_shape1, - "kernel_shape2": self.kernel_shape2, - "kernel_shape_skip": self.kernel_shape_skip, - "pad_skip": self.pad_skip if hasattr(self, "pad_skip") else None, - "component_ids": [ - "add3", - "conv1", - "conv2", - "conv_skip", - "batchnorm1", - "batchnorm2", - "batchnorm_skip", - ], - "components": { - "add3": self.add3.hyperparameters, - "conv1": self.conv1.hyperparameters, - "conv2": self.conv2.hyperparameters, - "conv_skip": self.conv_skip.hyperparameters - if hasattr(self, "conv_skip") - else None, - "batchnorm1": self.batchnorm1.hyperparameters, - "batchnorm2": self.batchnorm2.hyperparameters, - "batchnorm_skip": self.batchnorm_skip.hyperparameters, - }, - } - - @property - def derived_variables(self): - """A dictionary of intermediate values computed during the - forward/backward passes.""" - dv = { - "conv1_out": None, - "conv2_out": None, - "conv_skip_out": None, - "batchnorm1_out": None, - "batchnorm2_out": None, - "batchnorm_skip_out": None, - "components": { - "add3": self.add3.derived_variables, - "conv1": self.conv1.derived_variables, - "conv2": self.conv2.derived_variables, - "conv_skip": self.conv_skip.derived_variables - if hasattr(self, "conv_skip") - else None, - "batchnorm1": self.batchnorm1.derived_variables, - "batchnorm2": self.batchnorm2.derived_variables, - "batchnorm_skip": self.batchnorm_skip.derived_variables, - }, - } - dv.update(self._dv) - return dv - - @property - def gradients(self): - """A dictionary of the accumulated module parameter gradients.""" - return { - "components": { - "add3": self.add3.gradients, - "conv1": self.conv1.gradients, - "conv2": self.conv2.gradients, - "conv_skip": self.conv_skip.gradients - if hasattr(self, "conv_skip") - else None, - "batchnorm1": self.batchnorm1.gradients, - "batchnorm2": self.batchnorm2.gradients, - "batchnorm_skip": self.batchnorm_skip.gradients, - } - } - - def forward(self, X, retain_derived=True): - """ - Compute the layer output given input volume `X`. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The input volume consisting of `n_ex` examples, each with dimension - (`in_rows`, `in_cols`, `in_ch`). - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, out_rows, out_cols, out_ch)` - The module output volume. - """ - # now that we have the input dims for X we can initialize the proper - # padding in the `conv_skip` layer - if not hasattr(self, "conv_skip"): - self._init_conv_skip(X) - self.in_ch = X.shape[3] - - conv1_out = self.conv1.forward(X, retain_derived) - bn1_out = self.batchnorm1.forward(conv1_out, retain_derived) - conv2_out = self.conv2.forward(bn1_out, retain_derived) - bn2_out = self.batchnorm2.forward(conv2_out, retain_derived) - conv_skip_out = self.conv_skip.forward(X, retain_derived) - bn_skip_out = self.batchnorm_skip.forward(conv_skip_out, retain_derived) - Y = self.add3.forward([bn_skip_out, bn2_out], retain_derived) - - if retain_derived: - self._dv["conv1_out"] = conv1_out - self._dv["conv2_out"] = conv2_out - self._dv["batchnorm1_out"] = bn1_out - self._dv["batchnorm2_out"] = bn2_out - self._dv["conv_skip_out"] = conv_skip_out - self._dv["batchnorm_skip_out"] = bn_skip_out - return Y - - def backward(self, dLdY, retain_grads=True): - """ - Compute the gradient of the loss with respect to the module parameters. - - Parameters - ---------- - dLdy : :py:class:`ndarray ` of shape `(n_ex, out_rows, out_cols, out_ch)` - or list of arrays - The gradient(s) of the loss with respect to the module output(s). - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dX : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The gradient of the loss with respect to the module input volume. - """ - dBnskip_out, dBn2_out = self.add3.backward(dLdY) - dConvskip_out = self.batchnorm_skip.backward(dBnskip_out) - dX = self.conv_skip.backward(dConvskip_out) - - dConv2_out = self.batchnorm2.backward(dBn2_out) - dBn1_out = self.conv2.backward(dConv2_out) - dConv1_out = self.batchnorm1.backward(dBn1_out) - dX += self.conv1.backward(dConv1_out) - - if retain_grads: - self._dv["dLdAdd3_X"] = dX - self._dv["dLdBn1"] = dBn1_out - self._dv["dLdBn2"] = dBn2_out - self._dv["dLdConv1"] = dConv1_out - self._dv["dLdConv2"] = dConv2_out - self._dv["dLdBnSkip"] = dBnskip_out - self._dv["dLdConvSkip"] = dConvskip_out - return dX - - -class BidirectionalLSTM(ModuleBase): - def __init__( - self, - n_out, - act_fn=None, - gate_fn=None, - merge_mode="concat", - init="glorot_uniform", - optimizer=None, - ): - """ - A single bidirectional long short-term memory (LSTM) layer. - - Parameters - ---------- - n_out : int - The dimension of a single hidden state / output on a given timestep - act_fn : :doc:`Activation ` object or None - The activation function for computing ``A[t]``. If not specified, - use :class:`~numpy_ml.neural_nets.activations.Tanh` by default. - gate_fn : :doc:`Activation ` object or None - The gate function for computing the update, forget, and output - gates. If not specified, use - :class:`~numpy_ml.neural_nets.activations.Sigmoid` by default. - merge_mode : {"sum", "multiply", "concat", "average"} - Mode by which outputs of the forward and backward LSTMs will be - combined. Default is 'concat'. - optimizer : str or :doc:`Optimizer ` object or None - The optimization strategy to use when performing gradient updates - within the `update` method. If None, use the - :class:`~numpy_ml.neural_nets.optimizers.SGD` optimizer with - default parameters. Default is None. - init : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is 'glorot_uniform'. - """ - super().__init__() - - self.init = init - self.n_in = None - self.n_out = n_out - self.optimizer = optimizer - self.merge_mode = merge_mode - self.act_fn = Tanh() if act_fn is None else act_fn - self.gate_fn = Sigmoid() if gate_fn is None else gate_fn - self._init_params() - - def _init_params(self): - self.cell_fwd = LSTMCell( - init=self.init, - n_out=self.n_out, - act_fn=self.act_fn, - gate_fn=self.gate_fn, - optimizer=self.optimizer, - ) - self.cell_bwd = LSTMCell( - init=self.init, - n_out=self.n_out, - act_fn=self.act_fn, - gate_fn=self.gate_fn, - optimizer=self.optimizer, - ) - - def forward(self, X): - """ - Run a forward pass across all timesteps in the input. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, n_in, n_t)` - Input consisting of `n_ex` examples each of dimensionality `n_in` - and extending for `n_t` timesteps. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, n_out, n_t)` - The value of the hidden state for each of the `n_ex` examples - across each of the `n_t` timesteps. - """ - Y_fwd, Y_bwd, Y = [], [], [] - n_ex, self.n_in, n_t = X.shape - - # forward LSTM - for t in range(n_t): - yt, ct = self.cell_fwd.forward(X[:, :, t]) - Y_fwd.append(yt) - - # backward LSTM - for t in reversed(range(n_t)): - yt, ct = self.cell_bwd.forward(X[:, :, t]) - Y_bwd.insert(0, yt) - - # merge forward and backward states - for t in range(n_t): - if self.merge_mode == "concat": - Y.append(np.concatenate([Y_fwd[t], Y_bwd[t]], axis=1)) - elif self.merge_mode == "sum": - Y.append(Y_fwd[t] + Y_bwd[t]) - elif self.merge_mode == "average": - Y.append((Y_fwd[t] + Y_bwd[t]) / 2) - elif self.merge_mode == "multiply": - Y.append(Y_fwd[t] * Y_bwd[t]) - - self.Y_fwd, self.Y_bwd = Y_fwd, Y_bwd - return np.dstack(Y) - - def backward(self, dLdA): - """ - Run a backward pass across all timesteps in the input. - - Parameters - ---------- - dLdA : :py:class:`ndarray ` of shape `(n_ex, n_out, n_t)` - The gradient of the loss with respect to the layer output for each - of the `n_ex` examples across all `n_t` timesteps. - - Returns - ------- - dLdX : :py:class:`ndarray ` of shape `(n_ex, n_in, n_t)` - The value of the hidden state for each of the `n_ex` examples - across each of the `n_t` timesteps. - """ - assert self.trainable, "Layer is frozen" - - n_ex, n_out, n_t = dLdA.shape - dLdX_f, dLdX_b, dLdX = [], [], [] - - # forward LSTM - for t in reversed(range(n_t)): - if self.merge_mode == "concat": - dLdXt_f = self.cell_fwd.backward(dLdA[:, : self.n_out, t]) - elif self.merge_mode == "sum": - dLdXt_f = self.cell_fwd.backward(dLdA[:, :, t]) - elif self.merge_mode == "multiplty": - dLdXt_f = self.cell_fwd.backward(dLdA[:, :, t] * self.Y_bwd[t]) - elif self.merge_mode == "average": - dLdXt_f = self.cell_fwd.backward(dLdA[:, :, t] * 0.5) - dLdX_f.insert(0, dLdXt_f) - - # backward LSTM - for t in range(n_t): - if self.merge_mode == "concat": - dLdXt_b = self.cell_bwd.backward(dLdA[:, self.n_out :, t]) - elif self.merge_mode == "sum": - dLdXt_b = self.cell_bwd.backward(dLdA[:, :, t]) - elif self.merge_mode == "multiplty": - dLdXt_b = self.cell_bwd.backward(dLdA[:, :, t] * self.Y_fwd[t]) - elif self.merge_mode == "average": - dLdXt_b = self.cell_bwd.backward(dLdA[:, :, t] * 0.5) - dLdX_b.append(dLdXt_b) - - for t in range(n_t): - dLdX.append(dLdX_f[t] + dLdX_b[t]) - - return np.dstack(dLdX) - - @property - def derived_variables(self): - """A dictionary of intermediate values computed during the - forward/backward passes.""" - return { - "components": { - "cell_fwd": self.cell_fwd.derived_variables, - "cell_bwd": self.cell_bwd.derived_variables, - } - } - - @property - def gradients(self): - """A dictionary of the accumulated module parameter gradients.""" - return { - "components": { - "cell_fwd": self.cell_fwd.gradients, - "cell_bwd": self.cell_bwd.gradients, - } - } - - @property - def parameters(self): - """A dictionary of the module parameters.""" - return { - "components": { - "cell_fwd": self.cell_fwd.parameters, - "cell_bwd": self.cell_bwd.parameters, - } - } - - @property - def hyperparameters(self): - """A dictionary of the module hyperparameters.""" - return { - "layer": "BidirectionalLSTM", - "init": self.init, - "n_in": self.n_in, - "n_out": self.n_out, - "act_fn": str(self.act_fn), - "optimizer": self.optimizer, - "merge_mode": self.merge_mode, - "component_ids": ["cell_fwd", "cell_bwd"], - "components": { - "cell_fwd": self.cell_fwd.hyperparameters, - "cell_bwd": self.cell_bwd.hyperparameters, - }, - } - - -class MultiHeadedAttentionModule(ModuleBase): - def __init__(self, n_heads=8, dropout_p=0, init="glorot_uniform", optimizer=None): - """ - A mutli-headed attention module. - - Notes - ----- - Multi-head attention allows a model to jointly attend to information from - different representation subspaces at different positions. With a - single head, this information would get averaged away when the - attention weights are combined with the value - - .. math:: - - \\text{MultiHead}(\mathbf{Q}, \mathbf{K}, \mathbf{V}) - = [\\text{head}_1; ...; \\text{head}_h] \\mathbf{W}^{(O)} - - where - - .. math:: - - \\text{head}_i = \\text{SDP_attention}( - \mathbf{Q W}_i^{(Q)}, \mathbf{K W}_i^{(K)}, \mathbf{V W}_i^{(V)}) - - and the projection weights are parameter matrices: - - .. math:: - - \mathbf{W}_i^{(Q)} &\in - \mathbb{R}^{(\\text{kqv_dim} \ \\times \ \\text{latent_dim})} \\\\ - \mathbf{W}_i^{(K)} &\in - \mathbb{R}^{(\\text{kqv_dim} \ \\times \ \\text{latent_dim})} \\\\ - \mathbf{W}_i^{(V)} &\in - \mathbb{R}^{(\\text{kqv_dim} \ \\times \ \\text{latent_dim})} \\\\ - \mathbf{W}^{(O)} &\in - \mathbb{R}^{(\\text{n_heads} \cdot \\text{latent_dim} \ \\times \ \\text{kqv_dim})} - - Importantly, the current module explicitly assumes that - - .. math:: - - \\text{kqv_dim} = \\text{dim(query)} = \\text{dim(keys)} = \\text{dim(values)} - - and that - - .. math:: - - \\text{latent_dim} = \\text{kqv_dim / n_heads} - - **[MH Attention Head h]**: - - .. code-block:: text - - K --> W_h^(K) ------\\ - V --> W_h^(V) ------- > DP_Attention --> head_h - Q --> W_h^(Q) ------/ - - The full **[MultiHeadedAttentionModule]** then becomes - - .. code-block:: text - - ----------------- - K --> | [Attn Head 1] | --> head_1 --\\ - V --> | [Attn Head 2] | --> head_2 --\\ - Q --> | ... | ... --> Concat --> W^(O) --> MH_out - | [Attn Head Z] | --> head_Z --/ - ----------------- - - Due to the reduced dimension of each head, the total computational cost - is similar to that of a single attention head with full (i.e., kqv_dim) - dimensionality. - - Parameters - ---------- - n_heads : int - The number of simultaneous attention heads to use. Note that the - larger `n_heads`, the smaller the dimensionality of any single - head, since ``latent_dim = kqv_dim / n_heads``. Default is 8. - dropout_p : float in [0, 1) - The dropout propbability during training, applied to the output of - the softmax in each dot-product attention head. If 0, no dropout is - applied. Default is 0. - init : {'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'} - The weight initialization strategy. Default is 'glorot_uniform'. - optimizer : str, :doc:`Optimizer ` object, or None - The optimization strategy to use when performing gradient updates - within the :meth:`update` method. If None, use the - :class:`~numpy_ml.neural_nets.optimizers.SGD` optimizer with default - parameters. Default is None. - """ - self.init = init - self.kqv_dim = None - self.projections = {} - self.n_heads = n_heads - self.optimizer = optimizer - self.dropout_p = dropout_p - self.is_initialized = False - - def _init_params(self): - self._dv = {} - - # assume dim(keys) = dim(query) = dim(values) - assert self.kqv_dim % self.n_heads == 0 - self.latent_dim = self.kqv_dim // self.n_heads - - self.attention = DotProductAttention(scale=True, dropout_p=self.dropout_p) - self.projections = { - k: Dropout( - FullyConnected( - init=self.init, - n_out=self.kqv_dim, - optimizer=self.optimizer, - act_fn="Affine(slope=1, intercept=0)", - ), - self.dropout_p, - ) - for k in ["Q", "K", "V", "O"] - } - - self.is_initialized = True - - def forward(self, Q, K, V): - if not self.is_initialized: - self.kqv_dim = Q.shape[-1] - self._init_params() - - # project queries, keys, and values into the `latent_dim`-dimensional subspace - n_ex = Q.shape[0] - for k, x in zip(["Q", "K", "V"], [Q, K, V]): - proj = self.projections[k].forward(x) - proj = proj.reshape(n_ex, -1, self.n_heads, self.latent_dim).swapaxes(1, 2) - self._dv["{}_proj".format(k)] = proj - - dv = self.derived_variables - Q_proj, K_proj, V_proj = dv["Q_proj"], dv["K_proj"], dv["V_proj"] - - # apply scaled dot-product attention to the projected vectors - attn = self.attention - attn_out = attn.forward(Q_proj, K_proj, V_proj) - self._dv["attention_weights"] = attn.derived_variables["attention_weights"] - - # concatenate the different heads using `reshape` to create an - # `kqv_dim`-dim vector - attn_out = attn_out.swapaxes(1, 2).reshape(n_ex, self.kqv_dim) - self._dv["attention_out"] = attn_out.reshape(n_ex, -1, self.kqv_dim) - - # apply the final output projection - Y = self.projections["O"].forward(attn_out) - Y = Y.reshape(n_ex, -1, self.kqv_dim) - return Y - - def backward(self, dLdy): - n_ex = dLdy.shape[0] - dLdy = dLdy.reshape(n_ex, self.kqv_dim) - dLdX = self.projections["O"].backward(dLdy) - dLdX = dLdX.reshape(n_ex, self.n_heads, -1, self.latent_dim) - - dLdQ_proj, dLdK_proj, dLdV_proj = self.attention.backward(dLdX) - - self._dv["dQ_proj"] = dLdQ_proj - self._dv["dK_proj"] = dLdK_proj - self._dv["dV_proj"] = dLdV_proj - - dLdQ_proj = dLdQ_proj.reshape(n_ex, self.kqv_dim) - dLdK_proj = dLdK_proj.reshape(n_ex, self.kqv_dim) - dLdV_proj = dLdV_proj.reshape(n_ex, self.kqv_dim) - - dLdQ = self.projections["Q"].backward(dLdQ_proj) - dLdK = self.projections["K"].backward(dLdK_proj) - dLdV = self.projections["V"].backward(dLdV_proj) - return dLdQ, dLdK, dLdV - - @property - def derived_variables(self): - """A dictionary of intermediate values computed during the - forward/backward passes.""" - dv = { - "Q_proj": None, - "K_proj": None, - "V_proj": None, - "components": { - "Q": self.projections["Q"].derived_variables, - "K": self.projections["K"].derived_variables, - "V": self.projections["V"].derived_variables, - "O": self.projections["O"].derived_variables, - "attention": self.attention.derived_variables, - }, - } - dv.update(self._dv) - return dv - - @property - def gradients(self): - """A dictionary of the accumulated module parameter gradients.""" - return { - "components": { - "Q": self.projections["Q"].gradients, - "K": self.projections["K"].gradients, - "V": self.projections["V"].gradients, - "O": self.projections["O"].gradients, - "attention": self.attention.gradients, - } - } - - @property - def parameters(self): - """A dictionary of the module parameters.""" - return { - "components": { - "Q": self.projections["Q"].parameters, - "K": self.projections["K"].parameters, - "V": self.projections["V"].parameters, - "O": self.projections["O"].parameters, - "attention": self.attention.parameters, - } - } - - @property - def hyperparameters(self): - """A dictionary of the module hyperparameters.""" - return { - "layer": "MultiHeadedAttentionModule", - "init": self.init, - "kqv_dim": self.kqv_dim, - "latent_dim": self.latent_dim, - "n_heads": self.n_heads, - "dropout_p": self.dropout_p, - "component_ids": ["attention", "Q", "K", "V", "O"], - "components": { - "Q": self.projections["Q"].hyperparameters, - "K": self.projections["K"].hyperparameters, - "V": self.projections["V"].hyperparameters, - "O": self.projections["O"].hyperparameters, - "attention": self.attention.hyperparameters, - }, - } diff --git a/aitk/keras/numpy_ml_utils/README.md b/aitk/keras/numpy_ml_utils/README.md deleted file mode 100644 index a50b58b..0000000 --- a/aitk/keras/numpy_ml_utils/README.md +++ /dev/null @@ -1,38 +0,0 @@ -# Utilities - -The utilities module implements a number of useful functions and objects that -power other ML algorithms across the repo. - -- `data_structures.py` implements a few useful data structures - - A max- and min-heap ordered priority queue - - A [ball tree](https://en.wikipedia.org/wiki/Ball_tree) with the KNS1 algorithm ([Omohundro, 1989](http://ftp.icsi.berkeley.edu/ftp/pub/techreports/1989/tr-89-063.pdf); [Moore & Gray, 2006](http://people.ee.duke.edu/~lcarin/liu06a.pdf)) - - A discrete sampler implementing Vose's algorithm for the [alias method](https://en.wikipedia.org/wiki/Alias_method) ([Walker, 1977](https://dl.acm.org/citation.cfm?id=355749); [Vose, 1991](https://pdfs.semanticscholar.org/f65b/cde1fcf82e05388b31de80cba10bf65acc07.pdf)) - -- `kernels.py` implements several general-purpose similarity kernels - - Linear kernel - - Polynomial kernel - - Radial basis function kernel - -- `distance_metrics.py` implements common distance metrics - - Euclidean (L2) distance - - Manhattan (L1) distance - - Chebyshev (L-infinity) distance - - Minkowski-p distance - - Hamming distance - -- `graphs.py` implements simple data structures and algorithms for graph - processing. - - Undirected + directed graph objects allowing for probabilistic edge weights - - Graph generators (Erdos-Renyi, random DAGs) - - Topological sorting for DAGs - - Cycle detection - - Simple path-finding - -- `windows.py` implements several common windowing functions - - Hann - - Hamming - - Blackman-Harris - - Generalized cosine - -- `testing.py` implements helper functions that prove useful when writing unit - tests, including data generators and various assert statements diff --git a/aitk/keras/numpy_ml_utils/__init__.py b/aitk/keras/numpy_ml_utils/__init__.py deleted file mode 100644 index c90b4df..0000000 --- a/aitk/keras/numpy_ml_utils/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from . import testing -from . import data_structures -from . import distance_metrics -from . import kernels -from . import windows -from . import graphs diff --git a/aitk/keras/numpy_ml_utils/data_structures.py b/aitk/keras/numpy_ml_utils/data_structures.py deleted file mode 100644 index 4a1ea31..0000000 --- a/aitk/keras/numpy_ml_utils/data_structures.py +++ /dev/null @@ -1,522 +0,0 @@ -import heapq -from copy import copy -from collections.abc import Hashable - -import numpy as np - -from .distance_metrics import euclidean - -####################################################################### -# Priority Queue # -####################################################################### - - -class PQNode(object): - def __init__(self, key, val, priority, entry_id, **kwargs): - """A generic node object for holding entries in :class:`PriorityQueue`""" - self.key = key - self.val = val - self.entry_id = entry_id - self.priority = priority - - def __repr__(self): - fstr = "PQNode(key={}, val={}, priority={}, entry_id={})" - return fstr.format(self.key, self.val, self.priority, self.entry_id) - - def to_dict(self): - """Return a dictionary representation of the node's contents""" - d = self.__dict__ - d["id"] = "PQNode" - return d - - def __gt__(self, other): - if not isinstance(other, PQNode): - return -1 - if self.priority == other.priority: - return self.entry_id > other.entry_id - return self.priority > other.priority - - def __ge__(self, other): - if not isinstance(other, PQNode): - return -1 - return self.priority >= other.priority - - def __lt__(self, other): - if not isinstance(other, PQNode): - return -1 - if self.priority == other.priority: - return self.entry_id < other.entry_id - return self.priority < other.priority - - def __le__(self, other): - if not isinstance(other, PQNode): - return -1 - return self.priority <= other.priority - - -class PriorityQueue: - def __init__(self, capacity, heap_order="max"): - """ - A priority queue implementation using a binary heap. - - Notes - ----- - A priority queue is a data structure useful for storing the top - `capacity` largest or smallest elements in a collection of values. As a - result of using a binary heap, ``PriorityQueue`` offers `O(log N)` - :meth:`push` and :meth:`pop` operations. - - Parameters - ---------- - capacity: int - The maximum number of items that can be held in the queue. - heap_order: {"max", "min"} - Whether the priority queue should retain the items with the - `capacity` smallest (`heap_order` = 'min') or `capacity` largest - (`heap_order` = 'max') priorities. - """ - assert heap_order in ["max", "min"], "heap_order must be either 'max' or 'min'" - self.capacity = capacity - self.heap_order = heap_order - - self._pq = [] - self._count = 0 - self._entry_counter = 0 - - def __repr__(self): - fstr = "PriorityQueue(capacity={}, heap_order={}) with {} items" - return fstr.format(self.capacity, self.heap_order, self._count) - - def __len__(self): - return self._count - - def __iter__(self): - return iter(self._pq) - - def push(self, key, priority, val=None): - """ - Add a new (key, value) pair with priority `priority` to the queue. - - Notes - ----- - If the queue is at capacity and `priority` exceeds the priority of the - item with the largest/smallest priority currently in the queue, replace - the current queue item with (`key`, `val`). - - Parameters - ---------- - key : hashable object - The key to insert into the queue. - priority : comparable - The priority for the `key`, `val` pair. - val : object - The value associated with `key`. Default is None. - """ - if self.heap_order == "max": - priority = -1 * priority - - item = PQNode(key=key, val=val, priority=priority, entry_id=self._entry_counter) - heapq.heappush(self._pq, item) - - self._count += 1 - self._entry_counter += 1 - - while self._count > self.capacity: - self.pop() - - def pop(self): - """ - Remove the item with the largest/smallest (depending on - ``self.heap_order``) priority from the queue and return it. - - Notes - ----- - In contrast to :meth:`peek`, this operation is `O(log N)`. - - Returns - ------- - item : :class:`PQNode` instance or None - Item with the largest/smallest priority, depending on - ``self.heap_order``. - """ - item = heapq.heappop(self._pq).to_dict() - if self.heap_order == "max": - item["priority"] = -1 * item["priority"] - self._count -= 1 - return item - - def peek(self): - """ - Return the item with the largest/smallest (depending on - ``self.heap_order``) priority *without* removing it from the queue. - - Notes - ----- - In contrast to :meth:`pop`, this operation is O(1). - - Returns - ------- - item : :class:`PQNode` instance or None - Item with the largest/smallest priority, depending on - ``self.heap_order``. - """ - item = None - if self._count > 0: - item = copy(self._pq[0].to_dict()) - if self.heap_order == "max": - item["priority"] = -1 * item["priority"] - return item - - -####################################################################### -# Ball Tree # -####################################################################### - - -class BallTreeNode: - def __init__(self, centroid=None, X=None, y=None): - self.left = None - self.right = None - self.radius = None - self.is_leaf = False - - self.data = X - self.targets = y - self.centroid = centroid - - def __repr__(self): - fstr = "BallTreeNode(centroid={}, is_leaf={})" - return fstr.format(self.centroid, self.is_leaf) - - def to_dict(self): - d = self.__dict__ - d["id"] = "BallTreeNode" - return d - - -class BallTree: - def __init__(self, leaf_size=40, metric=None): - """ - A ball tree data structure. - - Notes - ----- - A ball tree is a binary tree in which every node defines a - `D`-dimensional hypersphere ("ball") containing a subset of the points - to be searched. Each internal node of the tree partitions the data - points into two disjoint sets which are associated with different - balls. While the balls themselves may intersect, each point is assigned - to one or the other ball in the partition according to its distance - from the ball's center. Each leaf node in the tree defines a ball and - enumerates all data points inside that ball. - - Parameters - ---------- - leaf_size : int - The maximum number of datapoints at each leaf. Default is 40. - metric : :doc:`Distance metric ` or None - The distance metric to use for computing nearest neighbors. If - None, use the :func:`~numpy_ml.utils.distance_metrics.euclidean` - metric. Default is None. - - References - ---------- - .. [1] Omohundro, S. M. (1989). "Five balltree construction algorithms". *ICSI - Technical Report TR-89-063*. - .. [2] Liu, T., Moore, A., & Gray A. (2006). "New algorithms for efficient - high-dimensional nonparametric classification". *J. Mach. Learn. Res., - 7*, 1135-1158. - """ - self.root = None - self.leaf_size = leaf_size - self.metric = metric if metric is not None else euclidean - - def fit(self, X, y=None): - """ - Build a ball tree recursively using the O(M log N) `k`-d construction - algorithm. - - Notes - ----- - Recursively divides data into nodes defined by a centroid `C` and radius - `r` such that each point below the node lies within the hyper-sphere - defined by `C` and `r`. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(N, M)` - An array of `N` examples each with `M` features. - y : :py:class:`ndarray ` of shape `(N, *)` or None - An array of target values / labels associated with the entries in - `X`. Default is None. - """ - centroid, left_X, left_y, right_X, right_y = self._split(X, y) - self.root = BallTreeNode(centroid=centroid) - self.root.radius = np.max([self.metric(centroid, x) for x in X]) - self.root.left = self._build_tree(left_X, left_y) - self.root.right = self._build_tree(right_X, right_y) - - def _build_tree(self, X, y): - centroid, left_X, left_y, right_X, right_y = self._split(X, y) - - if X.shape[0] <= self.leaf_size: - leaf = BallTreeNode(centroid=centroid, X=X, y=y) - leaf.radius = np.max([self.metric(centroid, x) for x in X]) - leaf.is_leaf = True - return leaf - - node = BallTreeNode(centroid=centroid) - node.radius = np.max([self.metric(centroid, x) for x in X]) - node.left = self._build_tree(left_X, left_y) - node.right = self._build_tree(right_X, right_y) - return node - - def _split(self, X, y=None): - # find the dimension with greatest variance - split_dim = np.argmax(np.var(X, axis=0)) - - # sort X and y along split_dim - sort_ixs = np.argsort(X[:, split_dim]) - X, y = X[sort_ixs], y[sort_ixs] if y is not None else None - - # divide at median value of split_dim - med_ix = X.shape[0] // 2 - centroid = X[med_ix] # , split_dim - - # split data into two halves at the centroid (median always appears on - # the right split) - left_X, left_y = X[:med_ix], y[:med_ix] if y is not None else None - right_X, right_y = X[med_ix:], y[med_ix:] if y is not None else None - return centroid, left_X, left_y, right_X, right_y - - def nearest_neighbors(self, k, x): - """ - Find the `k` nearest neighbors in the ball tree to a query vector `x` - using the KNS1 algorithm. - - Parameters - ---------- - k : int - The number of closest points in `X` to return - x : :py:class:`ndarray ` of shape `(1, M)` - The query vector. - - Returns - ------- - nearest : list of :class:`PQNode` s of length `k` - List of the `k` points in `X` to closest to the query vector. The - ``key`` attribute of each :class:`PQNode` contains the point itself, the - ``val`` attribute contains its target, and the ``distance`` - attribute contains its distance to the query vector. - """ - # maintain a max-first priority queue with priority = distance to x - PQ = PriorityQueue(capacity=k, heap_order="max") - nearest = self._knn(k, x, PQ, self.root) - for n in nearest: - n.distance = self.metric(x, n.key) - return nearest - - def _knn(self, k, x, PQ, root): - dist = self.metric - dist_to_ball = dist(x, root.centroid) - root.radius - dist_to_farthest_neighbor = dist(x, PQ.peek()["key"]) if len(PQ) > 0 else np.inf - - if dist_to_ball >= dist_to_farthest_neighbor and len(PQ) == k: - return PQ - if root.is_leaf: - targets = [None] * len(root.data) if root.targets is None else root.targets - for point, target in zip(root.data, targets): - dist_to_x = dist(x, point) - if len(PQ) == k and dist_to_x < dist_to_farthest_neighbor: - PQ.push(key=point, val=target, priority=dist_to_x) - else: - PQ.push(key=point, val=target, priority=dist_to_x) - else: - l_closest = dist(x, root.left.centroid) < dist(x, root.right.centroid) - PQ = self._knn(k, x, PQ, root.left if l_closest else root.right) - PQ = self._knn(k, x, PQ, root.right if l_closest else root.left) - return PQ - - -####################################################################### -# Multinomial Sampler # -####################################################################### - - -class DiscreteSampler: - def __init__(self, probs, log=False, with_replacement=True): - """ - Sample from an arbitrary multinomial PMF over the first `N` nonnegative - integers using Vose's algorithm for the alias method. - - Notes - ----- - Vose's algorithm takes `O(n)` time to initialize, requires `O(n)` memory, - and generates samples in constant time. - - References - ---------- - .. [1] Walker, A. J. (1977) "An efficient method for generating discrete - random variables with general distributions". *ACM Transactions on - Mathematical Software, 3(3)*, 253-256. - - .. [2] Vose, M. D. (1991) "A linear algorithm for generating random numbers - with a given distribution". *IEEE Trans. Softw. Eng., 9*, 972-974. - - .. [3] Schwarz, K (2011) "Darts, dice, and coins: sampling from a discrete - distribution". http://www.keithschwarz.com/darts-dice-coins/ - - Parameters - ---------- - probs : :py:class:`ndarray ` of length `(N,)` - A list of probabilities of the `N` outcomes in the sample space. - `probs[i]` returns the probability of outcome `i`. - log : bool - Whether the probabilities in `probs` are in logspace. Default is - False. - with_replacement : bool - Whether to generate samples with or without replacement. Default is - True. - """ - if not isinstance(probs, np.ndarray): - probs = np.array(probs) - - self.log = log - self.N = len(probs) - self.probs = probs - self.with_replacement = with_replacement - - alias = np.zeros(self.N) - prob = np.zeros(self.N) - scaled_probs = self.probs + np.log(self.N) if log else self.probs * self.N - - selector = scaled_probs < 0 if log else scaled_probs < 1 - small, large = np.where(selector)[0].tolist(), np.where(~selector)[0].tolist() - - while len(small) and len(large): - l, g = small.pop(), large.pop() - - alias[l] = g - prob[l] = scaled_probs[l] - - if log: - pg = np.log(np.exp(scaled_probs[g]) + np.exp(scaled_probs[l]) - 1) - else: - pg = scaled_probs[g] + scaled_probs[l] - 1 - - scaled_probs[g] = pg - to_small = pg < 0 if log else pg < 1 - if to_small: - small.append(g) - else: - large.append(g) - - while len(large): - prob[large.pop()] = 0 if log else 1 - - while len(small): - prob[small.pop()] = 0 if log else 1 - - self.prob_table = prob - self.alias_table = alias - - def __call__(self, n_samples=1): - """ - Generate random draws from the `probs` distribution over integers in - [0, N). - - Parameters - ---------- - n_samples: int - The number of samples to generate. Default is 1. - - Returns - ------- - sample : :py:class:`ndarray ` of shape `(n_samples,)` - A collection of draws from the distribution defined by `probs`. - Each sample is an int in the range `[0, N)`. - """ - return self.sample(n_samples) - - def sample(self, n_samples=1): - """ - Generate random draws from the `probs` distribution over integers in - [0, N). - - Parameters - ---------- - n_samples: int - The number of samples to generate. Default is 1. - - Returns - ------- - sample : :py:class:`ndarray ` of shape `(n_samples,)` - A collection of draws from the distribution defined by `probs`. - Each sample is an int in the range `[0, N)`. - """ - ixs = np.random.randint(0, self.N, n_samples) - p = np.exp(self.prob_table[ixs]) if self.log else self.prob_table[ixs] - flips = np.random.binomial(1, p) - samples = [ix if f else self.alias_table[ix] for ix, f in zip(ixs, flips)] - - # do recursive rejection sampling to sample without replacement - if not self.with_replacement: - unique = list(set(samples)) - while len(samples) != len(unique): - n_new = len(samples) - len(unique) - samples = unique + self.sample(n_new).tolist() - unique = list(set(samples)) - - return np.array(samples, dtype=int) - - -####################################################################### -# Dict # -####################################################################### - - -class Dict(dict): - def __init__(self, encoder=None): - """ - A dictionary subclass which returns the key value if it is not in the - dict. - - Parameters - ---------- - encoder : function or None - A function which is applied to a key before adding / retrieving it - from the dictionary. If None, the function defaults to the - identity. Default is None. - """ - super(Dict, self).__init__() - self._encoder = encoder - self._id_max = 0 - - def __setitem__(self, key, value): - if self._encoder is not None: - key = self._encoder(key) - elif not isinstance(key, Hashable): - key = tuple(key) - super(Dict, self).__setitem__(key, value) - - def _encode_key(self, key): - D = super(Dict, self) - enc_key = self._encoder(key) - if D.__contains__(enc_key): - val = D.__getitem__(enc_key) - else: - val = self._id_max - D.__setitem__(enc_key, val) - self._id_max += 1 - return val - - def __getitem__(self, key): - self._key = copy.deepcopy(key) - if self._encoder is not None: - return self._encode_key(key) - elif not isinstance(key, Hashable): - key = tuple(key) - return super(Dict, self).__getitem__(key) - - def __missing__(self, key): - return self._key diff --git a/aitk/keras/numpy_ml_utils/distance_metrics.py b/aitk/keras/numpy_ml_utils/distance_metrics.py deleted file mode 100644 index 8c51e6c..0000000 --- a/aitk/keras/numpy_ml_utils/distance_metrics.py +++ /dev/null @@ -1,132 +0,0 @@ -import numpy as np - - -def euclidean(x, y): - """ - Compute the Euclidean (`L2`) distance between two real vectors - - Notes - ----- - The Euclidean distance between two vectors **x** and **y** is - - .. math:: - - d(\mathbf{x}, \mathbf{y}) = \sqrt{ \sum_i (x_i - y_i)^2 } - - Parameters - ---------- - x,y : :py:class:`ndarray ` s of shape `(N,)` - The two vectors to compute the distance between - - Returns - ------- - d : float - The L2 distance between **x** and **y**. - """ - return np.sqrt(np.sum((x - y) ** 2)) - - -def manhattan(x, y): - """ - Compute the Manhattan (`L1`) distance between two real vectors - - Notes - ----- - The Manhattan distance between two vectors **x** and **y** is - - .. math:: - - d(\mathbf{x}, \mathbf{y}) = \sum_i |x_i - y_i| - - Parameters - ---------- - x,y : :py:class:`ndarray ` s of shape `(N,)` - The two vectors to compute the distance between - - Returns - ------- - d : float - The L1 distance between **x** and **y**. - """ - return np.sum(np.abs(x - y)) - - -def chebyshev(x, y): - """ - Compute the Chebyshev (:math:`L_\infty`) distance between two real vectors - - Notes - ----- - The Chebyshev distance between two vectors **x** and **y** is - - .. math:: - - d(\mathbf{x}, \mathbf{y}) = \max_i |x_i - y_i| - - Parameters - ---------- - x,y : :py:class:`ndarray ` s of shape `(N,)` - The two vectors to compute the distance between - - Returns - ------- - d : float - The Chebyshev distance between **x** and **y**. - """ - return np.max(np.abs(x - y)) - - -def minkowski(x, y, p): - """ - Compute the Minkowski-`p` distance between two real vectors. - - Notes - ----- - The Minkowski-`p` distance between two vectors **x** and **y** is - - .. math:: - - d(\mathbf{x}, \mathbf{y}) = \left( \sum_i |x_i - y_i|^p \\right)^{1/p} - - Parameters - ---------- - x,y : :py:class:`ndarray ` s of shape `(N,)` - The two vectors to compute the distance between - p : float > 1 - The parameter of the distance function. When `p = 1`, this is the `L1` - distance, and when `p=2`, this is the `L2` distance. For `p < 1`, - Minkowski-`p` does not satisfy the triangle inequality and hence is not - a valid distance metric. - - Returns - ------- - d : float - The Minkowski-`p` distance between **x** and **y**. - """ - return np.sum(np.abs(x - y) ** p) ** (1 / p) - - -def hamming(x, y): - """ - Compute the Hamming distance between two integer-valued vectors. - - Notes - ----- - The Hamming distance between two vectors **x** and **y** is - - .. math:: - - d(\mathbf{x}, \mathbf{y}) = \\frac{1}{N} \sum_i \mathbb{1}_{x_i \\neq y_i} - - Parameters - ---------- - x,y : :py:class:`ndarray ` s of shape `(N,)` - The two vectors to compute the distance between. Both vectors should be - integer-valued. - - Returns - ------- - d : float - The Hamming distance between **x** and **y**. - """ - return np.sum(x != y) / len(x) diff --git a/aitk/keras/numpy_ml_utils/graphs.py b/aitk/keras/numpy_ml_utils/graphs.py deleted file mode 100644 index c65f5f3..0000000 --- a/aitk/keras/numpy_ml_utils/graphs.py +++ /dev/null @@ -1,363 +0,0 @@ -from abc import ABC, abstractmethod -from collections import defaultdict -from itertools import combinations, permutations - -import numpy as np - -####################################################################### -# Graph Components # -####################################################################### - - -class Edge(object): - def __init__(self, fr, to, w=None): - """ - A generic directed edge object. - - Parameters - ---------- - fr: int - The id of the vertex the edge goes from - to: int - The id of the vertex the edge goes to - w: float, :class:`Object` instance, or None - The edge weight, if applicable. If weight is an arbitrary Object it - must have a method called 'sample' which takes no arguments and - returns a random sample from the weight distribution. If `w` is - None, no weight is assumed. Default is None. - """ - self.fr = fr - self.to = to - self._w = w - - def __repr__(self): - return "{} -> {}, weight: {}".format(self.fr, self.to, self._w) - - @property - def weight(self): - return self._w.sample() if hasattr(self._w, "sample") else self._w - - def reverse(self): - """Reverse the edge direction""" - return Edge(self.t, self.f, self.w) - - -####################################################################### -# Graph Types # -####################################################################### - - -class Graph(ABC): - def __init__(self, V, E): - self._I2V = {i: v for i, v in zip(range(len(V)), V)} - self._V2I = {v: i for i, v in zip(range(len(V)), V)} - self._G = {i: set() for i in range(len(V))} - self._V = V - self._E = E - - self._build_adjacency_list() - - def __getitem__(self, v_i): - return self.get_neighbors(v_i) - - def get_index(self, v): - """Get the internal index for a given vetex""" - return self._V2I[v] - - def get_vertex(self, v_i): - """Get the original vertex from a given internal index""" - return self._I2V[v_i] - - @property - def vertices(self): - return self._V - - @property - def indices(self): - return list(range(len(self.vertices))) - - @property - def edges(self): - return self._E - - def get_neighbors(self, v_i): - """ - Return the internal indices of the vertices reachable from the vertex - with index `v_i`. - """ - return [self._V2I[e.to] for e in self._G[v_i]] - - def to_matrix(self): - """Return an adjacency matrix representation of the graph""" - adj_mat = np.zeros((len(self._V), len(self._V))) - for e in self.edges: - fr, to = self._V2I[e.fr], self._V2I[e.to] - adj_mat[fr, to] = 1 if e.weight is None else e.weight - return adj_mat - - def to_adj_dict(self): - """Return an adjacency dictionary representation of the graph""" - adj_dict = defaultdict(lambda: list()) - for e in self.edges: - adj_dict[e.fr].append(e) - return adj_dict - - def path_exists(self, s_i, e_i): - """ - Check whether a path exists from vertex index `s_i` to `e_i`. - - Parameters - ---------- - s_i: Int - The interal index of the start vertex - e_i: Int - The internal index of the end vertex - - Returns - ------- - path_exists : Boolean - Whether or not a valid path exists between `s_i` and `e_i`. - """ - queue = [(s_i, [s_i])] - while len(queue): - c_i, path = queue.pop(0) - nbrs_not_on_path = set(self.get_neighbors(c_i)) - set(path) - - for n_i in nbrs_not_on_path: - queue.append((n_i, path + [n_i])) - if n_i == e_i: - return True - return False - - def all_paths(self, s_i, e_i): - """ - Find all simple paths between `s_i` and `e_i` in the graph. - - Notes - ----- - Uses breadth-first search. Ignores all paths with repeated vertices. - - Parameters - ---------- - s_i: Int - The interal index of the start vertex - e_i: Int - The internal index of the end vertex - - Returns - ------- - complete_paths : list of lists - A list of all paths from `s_i` to `e_i`. Each path is represented - as a list of interal vertex indices. - """ - complete_paths = [] - queue = [(s_i, [s_i])] - - while len(queue): - c_i, path = queue.pop(0) - nbrs_not_on_path = set(self.get_neighbors(c_i)) - set(path) - - for n_i in nbrs_not_on_path: - if n_i == e_i: - complete_paths.append(path + [n_i]) - else: - queue.append((n_i, path + [n_i])) - - return complete_paths - - @abstractmethod - def _build_adjacency_list(self): - pass - - -class DiGraph(Graph): - def __init__(self, V, E): - """ - A generic directed graph object. - - Parameters - ---------- - V : list - A list of vertex IDs. - E : list of :class:`Edge ` objects - A list of directed edges connecting pairs of vertices in ``V``. - """ - super().__init__(V, E) - self.is_directed = True - self._topological_ordering = [] - - def _build_adjacency_list(self): - """Encode directed graph as an adjancency list""" - # assumes no parallel edges - for e in self.edges: - fr_i = self._V2I[e.fr] - self._G[fr_i].add(e) - - def reverse(self): - """Reverse the direction of all edges in the graph""" - return DiGraph(self.vertices, [e.reverse() for e in self.edges]) - - def topological_ordering(self): - """ - Returns a (non-unique) topological sort / linearization of the nodes - IFF the graph is acyclic, otherwise returns None. - - Notes - ----- - A topological sort is an ordering on the nodes in `G` such that for every - directed edge :math:`u \\rightarrow v` in the graph, `u` appears before - `v` in the ordering. The topological ordering is produced by ordering - the nodes in `G` by their DFS "last visit time," from greatest to - smallest. - - This implementation follows a recursive, DFS-based approach [1]_ which - may break if the graph is very large. For an iterative version, see - Khan's algorithm [2]_ . - - References - ---------- - .. [1] Tarjan, R. (1976), Edge-disjoint spanning trees and depth-first - search, *Acta Informatica, 6 (2)*: 171–185. - .. [2] Kahn, A. (1962), Topological sorting of large networks, - *Communications of the ACM, 5 (11)*: 558–562. - - Returns - ------- - ordering : list or None - A topoligical ordering of the vertex indices if the graph is a DAG, - otherwise None. - """ - ordering = [] - visited = set() - - def dfs(v_i, path=None): - """A simple DFS helper routine""" - path = set([v_i]) if path is None else path - for nbr_i in self.get_neighbors(v_i): - if nbr_i in path: - return True # cycle detected! - elif nbr_i not in visited: - visited.add(nbr_i) - path.add(nbr_i) - is_cyclic = dfs(nbr_i, path) - if is_cyclic: - return True - - # insert to the beginning of the ordering - ordering.insert(0, v_i) - path -= set([v_i]) - return False - - for s_i in self.indices: - if s_i not in visited: - visited.add(s_i) - is_cyclic = dfs(s_i) - - if is_cyclic: - return None - - return ordering - - def is_acyclic(self): - """Check whether the graph contains cycles""" - return self.topological_ordering() is not None - - -class UndirectedGraph(Graph): - def __init__(self, V, E): - """ - A generic undirected graph object. - - Parameters - ---------- - V : list - A list of vertex IDs. - E : list of :class:`Edge ` objects - A list of edges connecting pairs of vertices in ``V``. For any edge - connecting vertex `u` to vertex `v`, :class:`UndirectedGraph - ` will assume that there - exists a corresponding edge connecting `v` to `u`, even if this is - not present in `E`. - """ - super().__init__(V, E) - self.is_directed = False - - def _build_adjacency_list(self): - """Encode undirected, unweighted graph as an adjancency list""" - # assumes no parallel edges - # each edge appears twice as (u,v) and (v,u) - for e in self.edges: - fr_i = self._V2I[e.fr] - to_i = self._V2I[e.to] - - self._G[fr_i].add(e) - self._G[to_i].add(e.reverse()) - - -####################################################################### -# Graph Generators # -####################################################################### - - -def random_unweighted_graph(n_vertices, edge_prob=0.5, directed=False): - """ - Generate an unweighted Erdős-Rényi random graph [*]_. - - References - ---------- - .. [*] Erdős, P. and Rényi, A. (1959). On Random Graphs, *Publ. Math. 6*, 290. - - Parameters - ---------- - n_vertices : int - The number of vertices in the graph. - edge_prob : float in [0, 1] - The probability of forming an edge between two vertices. Default is - 0.5. - directed : bool - Whether the edges in the graph should be directed. Default is False. - - Returns - ------- - G : :class:`Graph` instance - The resulting random graph. - """ - vertices = list(range(n_vertices)) - candidates = permutations(vertices, 2) if directed else combinations(vertices, 2) - - edges = [] - for (fr, to) in candidates: - if np.random.rand() <= edge_prob: - edges.append(Edge(fr, to)) - - return DiGraph(vertices, edges) if directed else UndirectedGraph(vertices, edges) - - -def random_DAG(n_vertices, edge_prob=0.5): - """ - Create a 'random' unweighted directed acyclic graph by pruning all the - backward connections from a random graph. - - Parameters - ---------- - n_vertices : int - The number of vertices in the graph. - edge_prob : float in [0, 1] - The probability of forming an edge between two vertices in the - underlying random graph, before edge pruning. Default is 0.5. - - Returns - ------- - G : :class:`Graph` instance - The resulting DAG. - """ - G = random_unweighted_graph(n_vertices, edge_prob, directed=True) - - # prune edges to remove backwards connections between vertices - G = DiGraph(G.vertices, [e for e in G.edges if e.fr < e.to]) - - # if we pruned away all the edges, generate a new graph - while not len(G.edges): - G = random_unweighted_graph(n_vertices, edge_prob, directed=True) - G = DiGraph(G.vertices, [e for e in G.edges if e.fr < e.to]) - return G diff --git a/aitk/keras/numpy_ml_utils/kernels.py b/aitk/keras/numpy_ml_utils/kernels.py deleted file mode 100644 index f346d61..0000000 --- a/aitk/keras/numpy_ml_utils/kernels.py +++ /dev/null @@ -1,344 +0,0 @@ -import re -from abc import ABC, abstractmethod - -import numpy as np - - -class KernelBase(ABC): - def __init__(self): - super().__init__() - self.parameters = {} - self.hyperparameters = {} - - @abstractmethod - def _kernel(self, X, Y): - raise NotImplementedError - - def __call__(self, X, Y=None): - """Refer to documentation for the `_kernel` method""" - return self._kernel(X, Y) - - def __str__(self): - P, H = self.parameters, self.hyperparameters - p_str = ", ".join(["{}={}".format(k, v) for k, v in P.items()]) - return "{}({})".format(H["id"], p_str) - - def summary(self): - """Return the dictionary of model parameters, hyperparameters, and ID""" - return { - "id": self.hyperparameters["id"], - "parameters": self.parameters, - "hyperparameters": self.hyperparameters, - } - - def set_params(self, summary_dict): - """ - Set the model parameters and hyperparameters using the settings in - `summary_dict`. - - Parameters - ---------- - summary_dict : dict - A dictionary with keys 'parameters' and 'hyperparameters', - structured as would be returned by the :meth:`summary` method. If - a particular (hyper)parameter is not included in this dict, the - current value will be used. - - Returns - ------- - new_kernel : :doc:`Kernel ` instance - A kernel with parameters and hyperparameters adjusted to those - specified in `summary_dict`. - """ - kr, sd = self, summary_dict - - # collapse `parameters` and `hyperparameters` nested dicts into a single - # merged dictionary - flatten_keys = ["parameters", "hyperparameters"] - for k in flatten_keys: - if k in sd: - entry = sd[k] - sd.update(entry) - del sd[k] - - for k, v in sd.items(): - if k in self.parameters: - kr.parameters[k] = v - if k in self.hyperparameters: - kr.hyperparameters[k] = v - return kr - - -class LinearKernel(KernelBase): - def __init__(self, c0=0): - """ - The linear (i.e., dot-product) kernel. - - Notes - ----- - For input vectors :math:`\mathbf{x}` and :math:`\mathbf{y}`, the linear - kernel is: - - .. math:: - - k(\mathbf{x}, \mathbf{y}) = \mathbf{x}^\\top \mathbf{y} + c_0 - - Parameters - ---------- - c0 : float - An "inhomogeneity" parameter. When `c0` = 0, the kernel is said to be - homogenous. Default is 1. - """ - super().__init__() - self.hyperparameters = {"id": "LinearKernel"} - self.parameters = {"c0": c0} - - def _kernel(self, X, Y=None): - """ - Compute the linear kernel (i.e., dot-product) between all pairs of rows in - `X` and `Y`. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(N, C)` - Collection of `N` input vectors - Y : :py:class:`ndarray ` of shape `(M, C)` or None - Collection of `M` input vectors. If None, assume `Y` = `X`. - Default is None. - - Returns - ------- - out : :py:class:`ndarray ` of shape `(N, M)` - Similarity between `X` and `Y`, where index (`i`, `j`) gives - :math:`k(x_i, y_j)`. - """ - X, Y = kernel_checks(X, Y) - return X @ Y.T + self.parameters["c0"] - - -class PolynomialKernel(KernelBase): - def __init__(self, d=3, gamma=None, c0=1): - """ - The degree-`d` polynomial kernel. - - Notes - ----- - For input vectors :math:`\mathbf{x}` and :math:`\mathbf{y}`, the polynomial - kernel is: - - .. math:: - - k(\mathbf{x}, \mathbf{y}) = (\gamma \mathbf{x}^\\top \mathbf{y} + c_0)^d - - In contrast to the linear kernel, the polynomial kernel also computes - similarities *across* dimensions of the **x** and **y** vectors, - allowing it to account for interactions between features. As an - instance of the dot product family of kernels, the polynomial kernel is - invariant to a rotation of the coordinates about the origin, but *not* - to translations. - - Parameters - ---------- - d : int - Degree of the polynomial kernel. Default is 3. - gamma : float or None - A scaling parameter for the dot product between `x` and `y`, - determining the amount of smoothing/resonlution of the kernel. - Larger values result in greater smoothing. If None, defaults to 1 / - `C`. Sometimes referred to as the kernel bandwidth. Default is - None. - c0 : float - Parameter trading off the influence of higher-order versus lower-order - terms in the polynomial. If `c0` = 0, the kernel is said to be - homogenous. Default is 1. - """ - super().__init__() - self.hyperparameters = {"id": "PolynomialKernel"} - self.parameters = {"d": d, "c0": c0, "gamma": gamma} - - def _kernel(self, X, Y=None): - """ - Compute the degree-`d` polynomial kernel between all pairs of rows in `X` - and `Y`. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(N, C)` - Collection of `N` input vectors - Y : :py:class:`ndarray ` of shape `(M, C)` or None - Collection of `M` input vectors. If None, assume `Y = X`. Default - is None. - - Returns - ------- - out : :py:class:`ndarray ` of shape `(N, M)` - Similarity between `X` and `Y` where index (`i`, `j`) gives - :math:`k(x_i, y_j)` (i.e., the kernel's Gram-matrix). - """ - P = self.parameters - X, Y = kernel_checks(X, Y) - gamma = 1 / X.shape[1] if P["gamma"] is None else P["gamma"] - return (gamma * (X @ Y.T) + P["c0"]) ** P["d"] - - -class RBFKernel(KernelBase): - def __init__(self, sigma=None): - """ - Radial basis function (RBF) / squared exponential kernel. - - Notes - ----- - For input vectors :math:`\mathbf{x}` and :math:`\mathbf{y}`, the radial - basis function kernel is: - - .. math:: - - k(\mathbf{x}, \mathbf{y}) = \exp \left\{ -0.5 - \left\lVert \\frac{\mathbf{x} - - \mathbf{y}}{\sigma} \\right\\rVert_2^2 \\right\} - - The RBF kernel decreases with distance and ranges between zero (in the - limit) to one (when **x** = **y**). Notably, the implied feature space - of the kernel has an infinite number of dimensions. - - Parameters - ---------- - sigma : float or array of shape `(C,)` or None - A scaling parameter for the vectors **x** and **y**, producing an - isotropic kernel if a float, or an anistropic kernel if an array of - length `C`. Larger values result in higher resolution / greater - smoothing. If None, defaults to :math:`\sqrt(C / 2)`. Sometimes - referred to as the kernel 'bandwidth'. Default is None. - """ - super().__init__() - self.hyperparameters = {"id": "RBFKernel"} - self.parameters = {"sigma": sigma} - - def _kernel(self, X, Y=None): - """ - Computes the radial basis function (RBF) kernel between all pairs of - rows in `X` and `Y`. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(N, C)` - Collection of `N` input vectors, each with dimension `C`. - Y : :py:class:`ndarray ` of shape `(M, C)` - Collection of `M` input vectors. If None, assume `Y` = `X`. Default - is None. - - Returns - ------- - out : :py:class:`ndarray ` of shape `(N, M)` - Similarity between `X` and `Y` where index (i, j) gives :math:`k(x_i, y_j)`. - """ - P = self.parameters - X, Y = kernel_checks(X, Y) - sigma = np.sqrt(X.shape[1] / 2) if P["sigma"] is None else P["sigma"] - return np.exp(-0.5 * pairwise_l2_distances(X / sigma, Y / sigma) ** 2) - - -class KernelInitializer(object): - def __init__(self, param=None): - """ - A class for initializing learning rate schedulers. Valid inputs are: - (a) __str__ representations of `KernelBase` instances - (b) `KernelBase` instances - (c) Parameter dicts (e.g., as produced via the :meth:`summary` method in - `KernelBase` instances) - - If `param` is None, return `LinearKernel`. - """ - self.param = param - - def __call__(self): - param = self.param - if param is None: - kernel = LinearKernel() - elif isinstance(param, KernelBase): - kernel = param - elif isinstance(param, str): - kernel = self.init_from_str() - elif isinstance(param, dict): - kernel = self.init_from_dict() - return kernel - - def init_from_str(self): - r = r"([a-zA-Z0-9]*)=([^,)]*)" - kr_str = self.param.lower() - kwargs = dict([(i, eval(j)) for (i, j) in re.findall(r, self.param)]) - - if "linear" in kr_str: - kernel = LinearKernel(**kwargs) - elif "polynomial" in kr_str: - kernel = PolynomialKernel(**kwargs) - elif "rbf" in kr_str: - kernel = RBFKernel(**kwargs) - else: - raise NotImplementedError("{}".format(kr_str)) - return kernel - - def init_from_dict(self): - S = self.param - sc = S["hyperparameters"] if "hyperparameters" in S else None - - if sc is None: - raise ValueError("Must have `hyperparameters` key: {}".format(S)) - - if sc and sc["id"] == "LinearKernel": - scheduler = LinearKernel().set_params(S) - elif sc and sc["id"] == "PolynomialKernel": - scheduler = PolynomialKernel().set_params(S) - elif sc and sc["id"] == "RBFKernel": - scheduler = RBFKernel().set_params(S) - elif sc: - raise NotImplementedError("{}".format(sc["id"])) - return scheduler - - -def kernel_checks(X, Y): - X = X.reshape(-1, 1) if X.ndim == 1 else X - Y = X if Y is None else Y - Y = Y.reshape(-1, 1) if Y.ndim == 1 else Y - - assert X.ndim == 2, "X must have 2 dimensions, but got {}".format(X.ndim) - assert Y.ndim == 2, "Y must have 2 dimensions, but got {}".format(Y.ndim) - assert X.shape[1] == Y.shape[1], "X and Y must have the same number of columns" - return X, Y - - -def pairwise_l2_distances(X, Y): - """ - A fast, vectorized way to compute pairwise l2 distances between rows in `X` - and `Y`. - - Notes - ----- - An entry of the pairwise Euclidean distance matrix for two vectors is - - .. math:: - - d[i, j] &= \sqrt{(x_i - y_i) @ (x_i - y_i)} \\\\ - &= \sqrt{sum (x_i - y_j)^2} \\\\ - &= \sqrt{sum (x_i)^2 - 2 x_i y_j + (y_j)^2} - - The code below computes the the third line using numpy broadcasting - fanciness to avoid any for loops. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(N, C)` - Collection of `N` input vectors - Y : :py:class:`ndarray ` of shape `(M, C)` - Collection of `M` input vectors. If None, assume `Y` = `X`. Default is - None. - - Returns - ------- - dists : :py:class:`ndarray ` of shape `(N, M)` - Pairwise distance matrix. Entry (i, j) contains the `L2` distance between - :math:`x_i` and :math:`y_j`. - """ - D = -2 * X @ Y.T + np.sum(Y ** 2, axis=1) + np.sum(X ** 2, axis=1)[:, np.newaxis] - D[D < 0] = 0 # clip any value less than 0 (a result of numerical imprecision) - return np.sqrt(D) diff --git a/aitk/keras/numpy_ml_utils/testing.py b/aitk/keras/numpy_ml_utils/testing.py deleted file mode 100644 index 67f3111..0000000 --- a/aitk/keras/numpy_ml_utils/testing.py +++ /dev/null @@ -1,150 +0,0 @@ -"""Utilities for writing unit tests""" -import numbers -import numpy as np - -MSG_CACHE = set() - -def warn_once(msg): - if msg not in MSG_CACHE: - print(msg) - MSG_CACHE.add(msg) - -####################################################################### -# Assertions # -####################################################################### - - -def is_symmetric(X): - """Check that an array `X` is symmetric along its main diagonal""" - return np.allclose(X, X.T) - - -def is_symmetric_positive_definite(X): - """Check that a matrix `X` is a symmetric and positive-definite.""" - if is_symmetric(X): - try: - # if matrix is symmetric, check whether the Cholesky decomposition - # (defined only for symmetric/Hermitian positive definite matrices) - # exists - np.linalg.cholesky(X) - return True - except np.linalg.LinAlgError: - return False - return False - - -def is_stochastic(X): - """True if `X` contains probabilities that sum to 1 along the columns""" - msg = "Array should be stochastic along the columns" - assert len(X[X < 0]) == len(X[X > 1]) == 0, msg - if not np.allclose(np.sum(X, axis=1), np.ones(X.shape[0])): - warn_once("WARNING: %s; are you using the correct activation function?" % msg) - return True - - -def is_number(a): - """Check that a value `a` is numeric""" - return isinstance(a, numbers.Number) - - -def is_one_hot(x): - """Return True if array `x` is a binary array with a single 1""" - msg = "Matrix should be one-hot binary" - assert np.array_equal(x, x.astype(bool)), msg - assert np.allclose(np.sum(x, axis=1), np.ones(x.shape[0])), msg - return True - - -def is_binary(x): - """Return True if array `x` consists only of binary values""" - msg = "Matrix must be binary" - assert np.array_equal(x, x.astype(bool)), msg - return True - - -####################################################################### -# Data Generators # -####################################################################### - - -def random_one_hot_matrix(n_examples, n_classes): - """Create a random one-hot matrix of shape (`n_examples`, `n_classes`)""" - X = np.eye(n_classes) - X = X[np.random.choice(n_classes, n_examples)] - return X - - -def random_stochastic_matrix(n_examples, n_classes): - """Create a random stochastic matrix of shape (`n_examples`, `n_classes`)""" - X = np.random.rand(n_examples, n_classes) - X /= X.sum(axis=1, keepdims=True) - return X - - -def random_tensor(shape, standardize=False): - """ - Create a random real-valued tensor of shape `shape`. If `standardize` is - True, ensure each column has mean 0 and std 1. - """ - offset = np.random.randint(-300, 300, shape) - X = np.random.rand(*shape) + offset - - if standardize: - eps = np.finfo(float).eps - X = (X - X.mean(axis=0)) / (X.std(axis=0) + eps) - return X - - -def random_binary_tensor(shape, sparsity=0.5): - """ - Create a random binary tensor of shape `shape`. `sparsity` is a value - between 0 and 1 controlling the ratio of 0s to 1s in the output tensor. - """ - return (np.random.rand(*shape) >= (1 - sparsity)).astype(float) - - -def random_paragraph(n_words, vocab=None): - """ - Generate a random paragraph consisting of `n_words` words. If `vocab` is - not None, words will be drawn at random from this list. Otherwise, words - will be sampled uniformly from a collection of 26 Latin words. - """ - if vocab is None: - vocab = [ - "at", - "stet", - "accusam", - "aliquyam", - "clita", - "lorem", - "ipsum", - "dolor", - "dolore", - "dolores", - "sit", - "amet", - "consetetur", - "sadipscing", - "elitr", - "sed", - "diam", - "nonumy", - "eirmod", - "duo", - "ea", - "eos", - "erat", - "est", - "et", - "gubergren", - ] - return [np.random.choice(vocab) for _ in range(n_words)] - - -####################################################################### -# Custom Warnings # -####################################################################### - - -class DependencyWarning(RuntimeWarning): - pass diff --git a/aitk/keras/numpy_ml_utils/windows.py b/aitk/keras/numpy_ml_utils/windows.py deleted file mode 100644 index cd3132f..0000000 --- a/aitk/keras/numpy_ml_utils/windows.py +++ /dev/null @@ -1,156 +0,0 @@ -import numpy as np - - -def blackman_harris(window_len, symmetric=False): - """ - The Blackman-Harris window. - - Notes - ----- - The Blackman-Harris window is an instance of the more general class of - cosine-sum windows where `K=3`. Additional coefficients extend the Hamming - window to further minimize the magnitude of the nearest side-lobe in the - frequency response. - - .. math:: - \\text{bh}(n) = a_0 - a_1 \cos\left(\\frac{2 \pi n}{N}\\right) + - a_2 \cos\left(\\frac{4 \pi n }{N}\\right) - - a_3 \cos\left(\\frac{6 \pi n}{N}\\right) - - where `N` = `window_len` - 1, :math:`a_0` = 0.35875, :math:`a_1` = 0.48829, - :math:`a_2` = 0.14128, and :math:`a_3` = 0.01168. - - Parameters - ---------- - window_len : int - The length of the window in samples. Should be equal to the - `frame_width` if applying to a windowed signal. - symmetric : bool - If False, create a 'periodic' window that can be used in with an FFT / - in spectral analysis. If True, generate a symmetric window that can be - used in, e.g., filter design. Default is False. - - Returns - ------- - window : :py:class:`ndarray ` of shape `(window_len,)` - The window - """ - return generalized_cosine( - window_len, [0.35875, 0.48829, 0.14128, 0.01168], symmetric - ) - - -def hamming(window_len, symmetric=False): - """ - The Hamming window. - - Notes - ----- - The Hamming window is an instance of the more general class of cosine-sum - windows where `K=1` and :math:`a_0 = 0.54`. Coefficients selected to - minimize the magnitude of the nearest side-lobe in the frequency response. - - .. math:: - - \\text{hamming}(n) = 0.54 - - 0.46 \cos\left(\\frac{2 \pi n}{\\text{window_len} - 1}\\right) - - Parameters - ---------- - window_len : int - The length of the window in samples. Should be equal to the - `frame_width` if applying to a windowed signal. - symmetric : bool - If False, create a 'periodic' window that can be used in with an FFT / - in spectral analysis. If True, generate a symmetric window that can be - used in, e.g., filter design. Default is False. - - Returns - ------- - window : :py:class:`ndarray ` of shape `(window_len,)` - The window - """ - return generalized_cosine(window_len, [0.54, 1 - 0.54], symmetric) - - -def hann(window_len, symmetric=False): - """ - The Hann window. - - Notes - ----- - The Hann window is an instance of the more general class of cosine-sum - windows where `K=1` and :math:`a_0` = 0.5. Unlike the Hamming window, the - end points of the Hann window touch zero. - - .. math:: - - \\text{hann}(n) = 0.5 - 0.5 \cos\left(\\frac{2 \pi n}{\\text{window_len} - 1}\\right) - - Parameters - ---------- - window_len : int - The length of the window in samples. Should be equal to the - `frame_width` if applying to a windowed signal. - symmetric : bool - If False, create a 'periodic' window that can be used in with an FFT / - in spectral analysis. If True, generate a symmetric window that can be - used in, e.g., filter design. Default is False. - - Returns - ------- - window : :py:class:`ndarray ` of shape `(window_len,)` - The window - """ - return generalized_cosine(window_len, [0.5, 0.5], symmetric) - - -def generalized_cosine(window_len, coefs, symmetric=False): - """ - The generalized cosine family of window functions. - - Notes - ----- - The generalized cosine window is a simple weighted sum of cosine terms. - - For :math:`n \in \{0, \ldots, \\text{window_len} \}`: - - .. math:: - - \\text{GCW}(n) = \sum_{k=0}^K (-1)^k a_k \cos\left(\\frac{2 \pi k n}{\\text{window_len}}\\right) - - Parameters - ---------- - window_len : int - The length of the window in samples. Should be equal to the - `frame_width` if applying to a windowed signal. - coefs: list of floats - The :math:`a_k` coefficient values - symmetric : bool - If False, create a 'periodic' window that can be used in with an FFT / - in spectral analysis. If True, generate a symmetric window that can be - used in, e.g., filter design. Default is False. - - Returns - ------- - window : :py:class:`ndarray ` of shape `(window_len,)` - The window - """ - window_len += 1 if not symmetric else 0 - entries = np.linspace(-np.pi, np.pi, window_len) # (-1)^k * 2pi*n / window_len - window = np.sum([ak * np.cos(k * entries) for k, ak in enumerate(coefs)], axis=0) - return window[:-1] if not symmetric else window - - -class WindowInitializer: - def __call__(self, window): - if window == "hamming": - return hamming - elif window == "blackman_harris": - return blackman_harris - elif window == "hann": - return hann - elif window == "generalized_cosine": - return generalized_cosine - else: - raise NotImplementedError("{}".format(window)) diff --git a/aitk/keras/optimizers/README.md b/aitk/keras/optimizers/README.md deleted file mode 100644 index fa815cb..0000000 --- a/aitk/keras/optimizers/README.md +++ /dev/null @@ -1,8 +0,0 @@ -# Optimizers - -The `optimizers.py` module implements common modifications to stochastic gradient descent. It includes: - -- SGD with momentum ([Rummelhart, Hinton, & Williams, 1986](https://www.cs.princeton.edu/courses/archive/spring18/cos495/res/backprop_old.pdf)) -- AdaGrad ([Duchi, Hazan, & Singer, 2011](http://jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)) -- RMSProp ([Tieleman & Hinton, 2012](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf)) -- Adam ([Kingma & Ba, 2015](https://arxiv.org/pdf/1412.6980v8.pdf)) diff --git a/aitk/keras/optimizers/__init__.py b/aitk/keras/optimizers/__init__.py deleted file mode 100644 index acd7379..0000000 --- a/aitk/keras/optimizers/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .optimizers import * diff --git a/aitk/keras/optimizers/optimizers.py b/aitk/keras/optimizers/optimizers.py deleted file mode 100644 index 6651e64..0000000 --- a/aitk/keras/optimizers/optimizers.py +++ /dev/null @@ -1,498 +0,0 @@ -from copy import deepcopy -from abc import ABC, abstractmethod - -import numpy as np -from numpy.linalg import norm - - -class OptimizerBase(ABC): - def __init__(self, learning_rate, scheduler=None): - """ - An abstract base class for all Optimizer objects. - - This should never be used directly. - """ - from ..initializers import SchedulerInitializer - - self.cache = {} - self.cur_step = 0 - self.hyperparameters = {} - self.lr_scheduler = SchedulerInitializer(scheduler, lr=learning_rate)() - - def __call__(self, param, param_grad, param_name, cur_loss=None): - return self.update(param, param_grad, param_name, cur_loss) - - def step(self): - """Increment the optimizer step counter by 1""" - self.cur_step += 1 - - def reset_step(self): - """Reset the step counter to zero""" - self.cur_step = 0 - - def copy(self): - """Return a copy of the optimizer object""" - return deepcopy(self) - - def set_params(self, hparam_dict=None, cache_dict=None): - """Set the parameters of the optimizer object from a dictionary""" - from ..initializers import SchedulerInitializer - - if hparam_dict is not None: - for k, v in hparam_dict.items(): - if k in self.hyperparameters: - self.hyperparameters[k] = v - if k == "lr_scheduler": - self.lr_scheduler = SchedulerInitializer(v, lr=None)() - - if cache_dict is not None: - for k, v in cache_dict.items(): - if k in self.cache: - self.cache[k] = v - - @abstractmethod - def update(self, param, param_grad, param_name, cur_loss=None): - raise NotImplementedError - - -class SGD(OptimizerBase): - def __init__( - self, learning_rate=0.01, momentum=0.0, clip_norm=None, lr_scheduler=None, **kwargs - ): - """ - A stochastic gradient descent optimizer. - - Notes - ----- - For model parameters :math:`\\theta`, averaged parameter gradients - :math:`\\nabla_{\\theta} \mathcal{L}`, and learning rate :math:`\eta`, - the SGD update at timestep `t` is - - .. math:: - - \\text{update}^{(t)} - &= \\text{momentum} \cdot \\text{update}^{(t-1)} + \eta^{(t)} \\nabla_{\\theta} \mathcal{L}\\\\ - \\theta^{(t+1)} - &\leftarrow \\theta^{(t)} - \\text{update}^{(t)} - - Parameters - ---------- - learning_rate : float - Learning rate for SGD. If scheduler is not None, this is used as - the starting learning rate. Default is 0.01. - momentum : float in range [0, 1] - The fraction of the previous update to add to the current update. - If 0, no momentum is applied. Default is 0. - clip_norm : float - If not None, all param gradients are scaled to have maximum l2 norm of - `clip_norm` before computing update. Default is None. - lr_scheduler : str, :doc:`Scheduler ` object, or None - The learning rate scheduler. If None, use a constant learning - rate equal to `learning_rate`. Default is None. - """ - if "lr" in kwargs: - learning_rate = kwargs["lr"] - print("UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.") - - super().__init__(learning_rate, lr_scheduler) - - self.hyperparameters = { - "id": "SGD", - "learning_rate": learning_rate, - "momentum": momentum, - "clip_norm": clip_norm, - "lr_scheduler": str(self.lr_scheduler), - } - - def __str__(self): - H = self.hyperparameters - learning_rate, mm, cn, sc = H["learning_rate"], H["momentum"], H["clip_norm"], H["lr_scheduler"] - return "SGD(learning_rate={}, momentum={}, clip_norm={}, lr_scheduler={})".format( - learning_rate, mm, cn, sc - ) - - def update(self, param, param_grad, param_name, cur_loss=None): - """ - Compute the SGD update for a given parameter - - Parameters - ---------- - param : :py:class:`ndarray ` of shape (n, m) - The value of the parameter to be updated. - param_grad : :py:class:`ndarray ` of shape (n, m) - The gradient of the loss function with respect to `param_name`. - param_name : str - The name of the parameter. - cur_loss : float - The training or validation loss for the current minibatch. Used for - learning rate scheduling e.g., by - :class:`~numpy_ml.neural_nets.schedulers.KingScheduler`. - Default is None. - - Returns - ------- - updated_params : :py:class:`ndarray ` of shape (n, m) - The value of `param` after applying the momentum update. - """ - C = self.cache - H = self.hyperparameters - momentum, clip_norm = H["momentum"], H["clip_norm"] - learning_rate = self.lr_scheduler(self.cur_step, cur_loss) - - if param_name not in C: - C[param_name] = np.zeros_like(param_grad) - - # scale gradient to avoid explosion - t = np.inf if clip_norm is None else clip_norm - if norm(param_grad) > t: - param_grad = param_grad * t / norm(param_grad) - - update = momentum * C[param_name] + learning_rate * param_grad - self.cache[param_name] = update - return param - update - - -####################################################################### -# Adaptive Gradient Methods # -####################################################################### - - -class AdaGrad(OptimizerBase): - def __init__(self, learning_rate=0.01, eps=1e-7, clip_norm=None, lr_scheduler=None, **kwargs): - """ - An AdaGrad optimizer. - - Notes - ----- - Weights that receive large gradients will have their effective learning - rate reduced, while weights that receive small or infrequent updates - will have their effective learning rate increased. - - Equations:: - - cache[t] = cache[t-1] + grad[t] ** 2 - update[t] = learning_rate * grad[t] / (np.sqrt(cache[t]) + eps) - param[t+1] = param[t] - update[t] - - Note that the ``**`` and `/` operations are elementwise - - "A downside of Adagrad ... is that the monotonic learning rate usually - proves too aggressive and stops learning too early." [1] - - References - ---------- - .. [1] Karpathy, A. "CS231n: Convolutional neural networks for visual - recognition" https://cs231n.github.io/neural-networks-3/ - - Parameters - ---------- - learning_rate : float - Global learning rate - eps : float - Smoothing term to avoid divide-by-zero errors in the update calc. - Default is 1e-7. - clip_norm : float or None - If not None, all param gradients are scaled to have maximum `L2` norm of - `clip_norm` before computing update. Default is None. - lr_scheduler : str or :doc:`Scheduler ` object or None - The learning rate scheduler. If None, use a constant learning - rate equal to `learning_rate`. Default is None. - """ - if "lr" in kwargs: - learning_rate = kwargs["lr"] - print("UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.") - - super().__init__(learning_rate, lr_scheduler) - - self.cache = {} - self.hyperparameters = { - "id": "AdaGrad", - "learning_rate": learning_rate, - "eps": eps, - "clip_norm": clip_norm, - "lr_scheduler": str(self.lr_scheduler), - } - - def __str__(self): - H = self.hyperparameters - learning_rate, eps, cn, sc = H["learning_rate"], H["eps"], H["clip_norm"], H["lr_scheduler"] - return "AdaGrad(learning_rate={}, eps={}, clip_norm={}, lr_scheduler={})".format( - learning_rate, eps, cn, sc - ) - - def update(self, param, param_grad, param_name, cur_loss=None): - """ - Compute the AdaGrad update for a given parameter. - - Notes - ----- - Adjusts the learning rate of each weight based on the magnitudes of its - gradients (big gradient -> small learning_rate, small gradient -> big learning_rate). - - Parameters - ---------- - param : :py:class:`ndarray ` of shape (n, m) - The value of the parameter to be updated - param_grad : :py:class:`ndarray ` of shape (n, m) - The gradient of the loss function with respect to `param_name` - param_name : str - The name of the parameter - cur_loss : float or None - The training or validation loss for the current minibatch. Used for - learning rate scheduling e.g., by - :class:`~numpy_ml.neural_nets.schedulers.KingScheduler`. - Default is None. - - Returns - ------- - updated_params : :py:class:`ndarray ` of shape (n, m) - The value of `param` after applying the AdaGrad update - """ - C = self.cache - H = self.hyperparameters - eps, clip_norm = H["eps"], H["clip_norm"] - learning_rate = self.lr_scheduler(self.cur_step, cur_loss) - - if param_name not in C: - C[param_name] = np.zeros_like(param_grad) - - # scale gradient to avoid explosion - t = np.inf if clip_norm is None else clip_norm - if norm(param_grad) > t: - param_grad = param_grad * t / norm(param_grad) - - C[param_name] += param_grad ** 2 - update = learning_rate * param_grad / (np.sqrt(C[param_name]) + eps) - self.cache = C - return param - update - - -class RMSProp(OptimizerBase): - def __init__( - self, learning_rate=0.001, decay=0.9, eps=1e-7, clip_norm=None, lr_scheduler=None, **kwargs - ): - """ - RMSProp optimizer. - - Notes - ----- - RMSProp was proposed as a refinement of :class:`AdaGrad` to reduce its - aggressive, monotonically decreasing learning rate. - - RMSProp uses a *decaying average* of the previous squared gradients - (second moment) rather than just the immediately preceding squared - gradient for its `previous_update` value. - - Equations:: - - cache[t] = decay * cache[t-1] + (1 - decay) * grad[t] ** 2 - update[t] = learning_rate * grad[t] / (np.sqrt(cache[t]) + eps) - param[t+1] = param[t] - update[t] - - Note that the ``**`` and ``/`` operations are elementwise. - - Parameters - ---------- - learning_rate : float - Learning rate for update. Default is 0.001. - decay : float in [0, 1] - Rate of decay for the moving average. Typical values are [0.9, - 0.99, 0.999]. Default is 0.9. - eps : float - Constant term to avoid divide-by-zero errors during the update calc. Default is 1e-7. - clip_norm : float or None - If not None, all param gradients are scaled to have maximum l2 norm of - `clip_norm` before computing update. Default is None. - lr_scheduler : str or :doc:`Scheduler ` object or None - The learning rate scheduler. If None, use a constant learning - rate equal to `learning_rate`. Default is None. - """ - if "lr" in kwargs: - learning_rate = kwargs["lr"] - print("UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.") - - super().__init__(learning_rate, lr_scheduler) - - self.cache = {} - self.hyperparameters = { - "id": "RMSProp", - "learning_rate": learning_rate, - "eps": eps, - "decay": decay, - "clip_norm": clip_norm, - "lr_scheduler": str(self.lr_scheduler), - } - - def __str__(self): - H = self.hyperparameters - sc = H["lr_scheduler"] - learning_rate, eps, dc, cn = H["learning_rate"], H["eps"], H["decay"], H["clip_norm"] - return "RMSProp(learning_rate={}, eps={}, decay={}, clip_norm={}, lr_scheduler={})".format( - learning_rate, eps, dc, cn, sc - ) - - def update(self, param, param_grad, param_name, cur_loss=None): - """ - Compute the RMSProp update for a given parameter. - - Parameters - ---------- - param : :py:class:`ndarray ` of shape (n, m) - The value of the parameter to be updated - param_grad : :py:class:`ndarray ` of shape (n, m) - The gradient of the loss function with respect to `param_name` - param_name : str - The name of the parameter - cur_loss : float or None - The training or validation loss for the current minibatch. Used for - learning rate scheduling e.g., by - :class:`~numpy_ml.neural_nets.schedulers.KingScheduler`. - Default is None. - - Returns - ------- - updated_params : :py:class:`ndarray ` of shape (n, m) - The value of `param` after applying the RMSProp update. - """ - C = self.cache - H = self.hyperparameters - eps, decay, clip_norm = H["eps"], H["decay"], H["clip_norm"] - learning_rate = self.lr_scheduler(self.cur_step, cur_loss) - - if param_name not in C: - C[param_name] = np.zeros_like(param_grad) - - # scale gradient to avoid explosion - t = np.inf if clip_norm is None else clip_norm - if norm(param_grad) > t: - param_grad = param_grad * t / norm(param_grad) - - C[param_name] = decay * C[param_name] + (1 - decay) * param_grad ** 2 - update = learning_rate * param_grad / (np.sqrt(C[param_name]) + eps) - self.cache = C - return param - update - - -class Adam(OptimizerBase): - def __init__( - self, - learning_rate=0.001, - decay1=0.9, - decay2=0.999, - eps=1e-7, - clip_norm=None, - lr_scheduler=None, - **kwargs - ): - """ - Adam (adaptive moment estimation) optimization algorithm. - - Notes - ----- - Designed to combine the advantages of :class:`AdaGrad`, which works - well with sparse gradients, and :class:`RMSProp`, which works well in - online and non-stationary settings. - - Parameters - ---------- - learning_rate : float - Learning rate for update. This parameter is ignored if using - :class:`~numpy_ml.neural_nets.schedulers.NoamScheduler`. - Default is 0.001. - decay1 : float - The rate of decay to use for in running estimate of the first - moment (mean) of the gradient. Default is 0.9. - decay2 : float - The rate of decay to use for in running estimate of the second - moment (variance) of the gradient. Default is 0.999. - eps : float - Constant term to avoid divide-by-zero errors during the update - calc. Default is 1e-7. - clip_norm : float - If not None, all param gradients are scaled to have maximum l2 norm of - `clip_norm` before computing update. Default is None. - lr_scheduler : str, or :doc:`Scheduler ` object, or None - The learning rate scheduler. If None, use a constant learning rate - equal to `learning_rate`. Default is None. - """ - if "lr" in kwargs: - learning_rate = kwargs["lr"] - print("UserWarning: The `lr` argument is deprecated, use `learning_rate` instead.") - - super().__init__(learning_rate, lr_scheduler) - - self.cache = {} - self.hyperparameters = { - "id": "Adam", - "learning_rate": learning_rate, - "eps": eps, - "decay1": decay1, - "decay2": decay2, - "clip_norm": clip_norm, - "lr_scheduler": str(self.lr_scheduler), - } - - def __str__(self): - H = self.hyperparameters - learning_rate, d1, d2 = H["learning_rate"], H["decay1"], H["decay2"] - eps, cn, sc = H["eps"], H["clip_norm"], H["lr_scheduler"] - return "Adam(learning_rate={}, decay1={}, decay2={}, eps={}, clip_norm={}, lr_scheduler={})".format( - learning_rate, d1, d2, eps, cn, sc - ) - - def update(self, param, param_grad, param_name, cur_loss=None): - """ - Compute the Adam update for a given parameter. - - Parameters - ---------- - param : :py:class:`ndarray ` of shape (n, m) - The value of the parameter to be updated. - param_grad : :py:class:`ndarray ` of shape (n, m) - The gradient of the loss function with respect to `param_name`. - param_name : str - The name of the parameter. - cur_loss : float - The training or validation loss for the current minibatch. Used for - learning rate scheduling e.g., by - :class:`~numpy_ml.neural_nets.schedulers.KingScheduler`. Default is - None. - - Returns - ------- - updated_params : :py:class:`ndarray ` of shape (n, m) - The value of `param` after applying the Adam update. - """ - C = self.cache - H = self.hyperparameters - d1, d2 = H["decay1"], H["decay2"] - eps, clip_norm = H["eps"], H["clip_norm"] - learning_rate = self.lr_scheduler(self.cur_step, cur_loss) - - if param_name not in C: - C[param_name] = { - "t": 0, - "mean": np.zeros_like(param_grad), - "var": np.zeros_like(param_grad), - } - - # scale gradient to avoid explosion - t = np.inf if clip_norm is None else clip_norm - if norm(param_grad) > t: - param_grad = param_grad * t / norm(param_grad) - - t = C[param_name]["t"] + 1 - var = C[param_name]["var"] - mean = C[param_name]["mean"] - - # update cache - C[param_name]["t"] = t - C[param_name]["var"] = d2 * var + (1 - d2) * param_grad ** 2 - C[param_name]["mean"] = d1 * mean + (1 - d1) * param_grad - self.cache = C - - # calc unbiased moment estimates and Adam update - v_hat = C[param_name]["var"] / (1 - d2 ** t) - m_hat = C[param_name]["mean"] / (1 - d1 ** t) - update = learning_rate * m_hat / (np.sqrt(v_hat) + eps) - return param - update diff --git a/aitk/keras/preprocessing/README.md b/aitk/keras/preprocessing/README.md deleted file mode 100644 index b0f90d7..0000000 --- a/aitk/keras/preprocessing/README.md +++ /dev/null @@ -1,24 +0,0 @@ -# Preprocessing -The preprocessing module implements common data preprocessing routines. - -- `nlp.py`: Routines and objects for handling text data. - - n-gram generators - - Word and character tokenization - - Punctuation and stop-word removal - - Vocabulary / unigram count objects - - [Huffman tree](https://en.wikipedia.org/wiki/Huffman_coding) encoding / decoding - - Term frequency-inverse document frequency ([tf-idf](https://en.wikipedia.org/wiki/Tf%E2%80%93idf)) encoding - -- `dsp.py`: Routines for handling audio and image data. - - Signal windowing - - Signal autocorrelation - - Discrete Fourier transform - - Discrete cosine transform (type II) - - Signal resampling via (bi-)linear interpolation and nearest neighbor - - Mel-frequency cepstral coefficients (MFCCs) ([Mermelstein, 1976](https://files.eric.ed.gov/fulltext/ED128870.pdf#page=93); [Davis & Mermelstein, 1980](https://pdfs.semanticscholar.org/24b8/7a58511919cc867a71f0b58328694dd494b3.pdf)) - -- `general.py`: General data preprocessing objects and functions. - - Feature hashing ([Moody, 1989](http://papers.nips.cc/paper/175-fast-learning-in-multi-resolution-hierarchies.pdf)) - - Mini-batch generators - - One-hot encoding / decoding - - Feature standardization diff --git a/aitk/keras/preprocessing/__init__.py b/aitk/keras/preprocessing/__init__.py deleted file mode 100644 index 021db2c..0000000 --- a/aitk/keras/preprocessing/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from . import general -from . import nlp -from . import dsp diff --git a/aitk/keras/preprocessing/dsp.py b/aitk/keras/preprocessing/dsp.py deleted file mode 100644 index 77f3c40..0000000 --- a/aitk/keras/preprocessing/dsp.py +++ /dev/null @@ -1,848 +0,0 @@ -import numpy as np -from numpy.lib.stride_tricks import as_strided - -from ..utils.windows import WindowInitializer - -####################################################################### -# Signal Resampling # -####################################################################### - - -def batch_resample(X, new_dim, mode="bilinear"): - """ - Resample each image (or similar grid-based 2D signal) in a batch to - `new_dim` using the specified resampling strategy. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_channels)` - An input image volume - new_dim : 2-tuple of `(out_rows, out_cols)` - The dimension to resample each image to - mode : {'bilinear', 'neighbor'} - The resampling strategy to employ. Default is 'bilinear'. - - Returns - ------- - resampled : :py:class:`ndarray ` of shape `(n_ex, out_rows, out_cols, in_channels)` - The resampled image volume. - """ - if mode == "bilinear": - interpolate = bilinear_interpolate - elif mode == "neighbor": - interpolate = nn_interpolate_2D - else: - raise NotImplementedError("Unrecognized resampling mode: {}".format(mode)) - - out_rows, out_cols = new_dim - n_ex, in_rows, in_cols, n_in = X.shape - - # compute coordinates to resample - x = np.tile(np.linspace(0, in_cols - 2, out_cols), out_rows) - y = np.repeat(np.linspace(0, in_rows - 2, out_rows), out_cols) - - # resample each image - resampled = [] - for i in range(n_ex): - r = interpolate(X[i, ...], x, y) - r = r.reshape(out_rows, out_cols, n_in) - resampled.append(r) - return np.dstack(resampled) - - -def nn_interpolate_2D(X, x, y): - """ - Estimates of the pixel values at the coordinates (x, y) in `X` using a - nearest neighbor interpolation strategy. - - Notes - ----- - Assumes the current entries in `X` reflect equally-spaced samples from a 2D - integer grid. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(in_rows, in_cols, in_channels)` - An input image sampled along a grid of `in_rows` by `in_cols`. - x : list of length `k` - A list of x-coordinates for the samples we wish to generate - y : list of length `k` - A list of y-coordinates for the samples we wish to generate - - Returns - ------- - samples : :py:class:`ndarray ` of shape `(k, in_channels)` - The samples for each (x,y) coordinate computed via nearest neighbor - interpolation - """ - nx, ny = np.around(x), np.around(y) - nx = np.clip(nx, 0, X.shape[1] - 1).astype(int) - ny = np.clip(ny, 0, X.shape[0] - 1).astype(int) - return X[ny, nx, :] - - -def nn_interpolate_1D(X, t): - """ - Estimates of the signal values at `X[t]` using a nearest neighbor - interpolation strategy. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(in_length, in_channels)` - An input image sampled along an integer `in_length` - t : list of length `k` - A list of coordinates for the samples we wish to generate - - Returns - ------- - samples : :py:class:`ndarray ` of shape `(k, in_channels)` - The samples for each (x,y) coordinate computed via nearest neighbor - interpolation - """ - nt = np.clip(np.around(t), 0, X.shape[0] - 1).astype(int) - return X[nt, :] - - -def bilinear_interpolate(X, x, y): - """ - Estimates of the pixel values at the coordinates (x, y) in `X` via bilinear - interpolation. - - Notes - ----- - Assumes the current entries in X reflect equally-spaced - samples from a 2D integer grid. - - Modified from https://bit.ly/2NMb1Dr - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(in_rows, in_cols, in_channels)` - An input image sampled along a grid of `in_rows` by `in_cols`. - x : list of length `k` - A list of x-coordinates for the samples we wish to generate - y : list of length `k` - A list of y-coordinates for the samples we wish to generate - - Returns - ------- - samples : list of length `(k, in_channels)` - The samples for each (x,y) coordinate computed via bilinear - interpolation - """ - x0 = np.floor(x).astype(int) - y0 = np.floor(y).astype(int) - x1 = x0 + 1 - y1 = y0 + 1 - - x0 = np.clip(x0, 0, X.shape[1] - 1) - y0 = np.clip(y0, 0, X.shape[0] - 1) - x1 = np.clip(x1, 0, X.shape[1] - 1) - y1 = np.clip(y1, 0, X.shape[0] - 1) - - Ia = X[y0, x0, :].T - Ib = X[y1, x0, :].T - Ic = X[y0, x1, :].T - Id = X[y1, x1, :].T - - wa = (x1 - x) * (y1 - y) - wb = (x1 - x) * (y - y0) - wc = (x - x0) * (y1 - y) - wd = (x - x0) * (y - y0) - - return (Ia * wa).T + (Ib * wb).T + (Ic * wc).T + (Id * wd).T - - -####################################################################### -# Fourier Decomposition # -####################################################################### - - -def DCT(frame, orthonormal=True): - """ - A naive :math:`O(N^2)` implementation of the 1D discrete cosine transform-II - (DCT-II). - - Notes - ----- - For a signal :math:`\mathbf{x} = [x_1, \ldots, x_N]` consisting of `N` - samples, the `k` th DCT coefficient, :math:`c_k`, is - - .. math:: - - c_k = 2 \sum_{n=0}^{N-1} x_n \cos(\pi k (2 n + 1) / (2 N)) - - where `k` ranges from :math:`0, \ldots, N-1`. - - The DCT is highly similar to the DFT -- whereas in a DFT the basis - functions are sinusoids, in a DCT they are restricted solely to cosines. A - signal's DCT representation tends to have more of its energy concentrated - in a smaller number of coefficients when compared to the DFT, and is thus - commonly used for signal compression. [1] - - .. [1] Smoother signals can be accurately approximated using fewer DFT / DCT - coefficients, resulting in a higher compression ratio. The DCT naturally - yields a continuous extension at the signal boundaries due its use of - even basis functions (cosine). This in turn produces a smoother - extension in comparison to DFT or DCT approximations, resulting in a - higher compression. - - Parameters - ---------- - frame : :py:class:`ndarray ` of shape `(N,)` - A signal frame consisting of N samples - orthonormal : bool - Scale to ensure the coefficient vector is orthonormal. Default is True. - - Returns - ------- - dct : :py:class:`ndarray ` of shape `(N,)` - The discrete cosine transform of the samples in `frame`. - """ - N = len(frame) - out = np.zeros_like(frame) - for k in range(N): - for (n, xn) in enumerate(frame): - out[k] += xn * np.cos(np.pi * k * (2 * n + 1) / (2 * N)) - scale = np.sqrt(1 / (4 * N)) if k == 0 else np.sqrt(1 / (2 * N)) - out[k] *= 2 * scale if orthonormal else 2 - return out - - -def __DCT2(frame): - """Currently broken""" - N = len(frame) # window length - - k = np.arange(N, dtype=float) - F = k.reshape(1, -1) * k.reshape(-1, 1) - K = np.divide(F, k, out=np.zeros_like(F), where=F != 0) - - FC = np.cos(F * np.pi / N + K * np.pi / 2 * N) - return 2 * (FC @ frame) - - -def DFT(frame, positive_only=True): - """ - A naive :math:`O(N^2)` implementation of the 1D discrete Fourier transform (DFT). - - Notes - ----- - The Fourier transform decomposes a signal into a linear combination of - sinusoids (ie., basis elements in the space of continuous periodic - functions). For a sequence :math:`\mathbf{x} = [x_1, \ldots, x_N]` of N - evenly spaced samples, the `k` th DFT coefficient is given by: - - .. math:: - - c_k = \sum_{n=0}^{N-1} x_n \exp(-2 \pi i k n / N) - - where `i` is the imaginary unit, `k` is an index ranging from `0, ..., N-1`, - and :math:`X_k` is the complex coefficient representing the phase - (imaginary part) and amplitude (real part) of the `k` th sinusoid in the - DFT spectrum. The frequency of the `k` th sinusoid is :math:`(k 2 \pi / N)` - radians per sample. - - When applied to a real-valued input, the negative frequency terms are the - complex conjugates of the positive-frequency terms and the overall spectrum - is symmetric (excluding the first index, which contains the zero-frequency - / intercept term). - - Parameters - ---------- - frame : :py:class:`ndarray ` of shape `(N,)` - A signal frame consisting of N samples - positive_only : bool - Whether to only return the coefficients for the positive frequency - terms. Default is True. - - Returns - ------- - spectrum : :py:class:`ndarray ` of shape `(N,)` or `(N // 2 + 1,)` if `real_only` - The coefficients of the frequency spectrum for `frame`, including - imaginary components. - """ - N = len(frame) # window length - - # F[i,j] = coefficient for basis vector i, timestep j (i.e., k * n) - F = np.arange(N).reshape(1, -1) * np.arange(N).reshape(-1, 1) - F = np.exp(F * (-1j * 2 * np.pi / N)) - - # vdot only operates on vectors (rather than ndarrays), so we have to - # loop over each basis vector in F explicitly - spectrum = np.array([np.vdot(f, frame) for f in F]) - return spectrum[: (N // 2) + 1] if positive_only else spectrum - - -def dft_bins(N, fs=44000, positive_only=True): - """ - Calc the frequency bin centers for a DFT with `N` coefficients. - - Parameters - ---------- - N : int - The number of frequency bins in the DFT - fs : int - The sample rate/frequency of the signal (in Hz). Default is 44000. - positive_only : bool - Whether to only return the bins for the positive frequency - terms. Default is True. - - Returns - ------- - bins : :py:class:`ndarray ` of shape `(N,)` or `(N // 2 + 1,)` if `positive_only` - The frequency bin centers associated with each coefficient in the - DFT spectrum - """ - if positive_only: - freq_bins = np.linspace(0, fs / 2, 1 + N // 2, endpoint=True) - else: - l, r = (1 + (N - 1) / 2, (1 - N) / 2) if N % 2 else (N / 2, -N / 2) - freq_bins = np.r_[np.arange(l), np.arange(r, 0)] * fs / N - return freq_bins - - -def magnitude_spectrum(frames): - """ - Compute the magnitude spectrum (i.e., absolute value of the DFT spectrum) - for each frame in `frames`. Assumes each frame is real-valued only. - - Parameters - ---------- - frames : :py:class:`ndarray ` of shape `(M, N)` - A sequence of `M` frames each consisting of `N` samples - - Returns - ------- - magnitude_spec : :py:class:`ndarray ` of shape `(M, N // 2 + 1)` - The magnitude spectrum for each frame in `frames`. Only includes the - coefficients for the positive spectrum frequencies. - """ - return np.vstack([np.abs(DFT(frame, positive_only=True)) for frame in frames]) - - -def power_spectrum(frames, scale=False): - """ - Compute the power spectrum for a signal represented as a collection of - frames. Assumes each frame is real-valued only. - - The power spectrum is simply the square of the magnitude spectrum, possibly - scaled by the number of FFT bins. It measures how the energy of the signal - is distributed over the frequency domain. - - Parameters - ---------- - frames : :py:class:`ndarray ` of shape `(M, N)` - A sequence of `M` frames each consisting of `N` samples - scale : bool - Whether the scale by the number of DFT bins. Default is False. - - Returns - ------- - power_spec : :py:class:`ndarray ` of shape `(M, N // 2 + 1)` - The power spectrum for each frame in `frames`. Only includes the - coefficients for the positive spectrum frequencies. - """ - scaler = frames.shape[1] // 2 + 1 if scale else 1 - return (1 / scaler) * magnitude_spectrum(frames) ** 2 - - -####################################################################### -# Preprocessing Utils # -####################################################################### - - -def to_frames(x, frame_width, stride, writeable=False): - """ - Convert a 1D signal x into overlapping windows of width `frame_width` using - a hop length of `stride`. - - Notes - ----- - If ``(len(x) - frame_width) % stride != 0`` then some number of the samples - in x will be dropped. Specifically:: - - n_dropped_frames = len(x) - frame_width - stride * (n_frames - 1) - - where:: - - n_frames = (len(x) - frame_width) // stride + 1 - - This method uses low-level stride manipulation to avoid creating an - additional copy of `x`. The downside is that if ``writeable`=True``, - modifying the `frame` output can result in unexpected behavior: - - >>> out = to_frames(np.arange(6), 5, 1) - >>> out - array([[0, 1, 2, 3, 4], - [1, 2, 3, 4, 5]]) - >>> out[0, 1] = 99 - >>> out - array([[ 0, 99, 2, 3, 4], - [99, 2, 3, 4, 5]]) - - Parameters - ---------- - x : :py:class:`ndarray ` of shape `(N,)` - A 1D signal consisting of N samples - frame_width : int - The width of a single frame window in samples - stride : int - The hop size / number of samples advanced between consecutive frames - writeable : bool - If set to False, the returned array will be readonly. Otherwise it will - be writable if `x` was. It is advisable to set this to False whenever - possible to avoid unexpected behavior (see NB 2 above). Default is False. - - Returns - ------- - frame: :py:class:`ndarray ` of shape `(n_frames, frame_width)` - The collection of overlapping frames stacked into a matrix - """ - assert x.ndim == 1 - assert stride >= 1 - assert len(x) >= frame_width - - # get the size for an element in x in bits - byte = x.itemsize - n_frames = (len(x) - frame_width) // stride + 1 - return as_strided( - x, - shape=(n_frames, frame_width), - strides=(byte * stride, byte), - writeable=writeable, - ) - - -def autocorrelate1D(x): - """ - Autocorrelate a 1D signal `x` with itself. - - Notes - ----- - The `k` th term in the 1 dimensional autocorrelation is - - .. math:: - - a_k = \sum_n x_{n + k} x_n - - NB. This is a naive :math:`O(N^2)` implementation. For a faster :math:`O(N - \log N)` approach using the FFT, see [1]. - - References - ---------- - .. [1] https://en.wikipedia.org/wiki/Autocorrelation#Efficient%computation - - Parameters - ---------- - x : :py:class:`ndarray ` of shape `(N,)` - A 1D signal consisting of N samples - - Returns - ------- - auto : :py:class:`ndarray ` of shape `(N,)` - The autocorrelation of `x` with itself - """ - N = len(x) - auto = np.zeros(N) - for k in range(N): - for n in range(N - k): - auto[k] += x[n + k] * x[n] - return auto - - -####################################################################### -# Filters # -####################################################################### - - -def preemphasis(x, alpha): - """ - Increase the amplitude of high frequency bands + decrease the amplitude of - lower bands. - - Notes - ----- - Preemphasis filtering is (was?) a common transform in speech processing, - where higher frequencies tend to be more useful during signal - disambiguation. - - .. math:: - - \\text{preemphasis}( x_t ) = x_t - \\alpha x_{t-1} - - Parameters - ---------- - x : :py:class:`ndarray ` of shape `(N,)` - A 1D signal consisting of `N` samples - alpha : float in [0, 1) - The preemphasis coefficient. A value of 0 corresponds to no - filtering - - Returns - ------- - out : :py:class:`ndarray ` of shape `(N,)` - The filtered signal - """ - return np.concatenate([x[:1], x[1:] - alpha * x[:-1]]) - - -def cepstral_lifter(mfccs, D): - """ - A simple sinusoidal filter applied in the Mel-frequency domain. - - Notes - ----- - Cepstral lifting helps to smooth the spectral envelope and dampen the - magnitude of the higher MFCC coefficients while keeping the other - coefficients unchanged. The filter function is: - - .. math:: - - \\text{lifter}( x_n ) = x_n \left(1 + \\frac{D \sin(\pi n / D)}{2}\\right) - - Parameters - ---------- - mfccs : :py:class:`ndarray ` of shape `(G, C)` - Matrix of Mel cepstral coefficients. Rows correspond to frames, columns - to cepstral coefficients - D : int in :math:`[0, +\infty]` - The filter coefficient. 0 corresponds to no filtering, larger values - correspond to greater amounts of smoothing - - Returns - ------- - out : :py:class:`ndarray ` of shape `(G, C)` - The lifter'd MFCC coefficients - """ - if D == 0: - return mfccs - n = np.arange(mfccs.shape[1]) - return mfccs * (1 + (D / 2) * np.sin(np.pi * n / D)) - - -def mel_spectrogram( - x, - window_duration=0.025, - stride_duration=0.01, - mean_normalize=True, - window="hamming", - n_filters=20, - center=True, - alpha=0.95, - fs=44000, -): - """ - Apply the Mel-filterbank to the power spectrum for a signal `x`. - - Notes - ----- - The Mel spectrogram is the projection of the power spectrum of the framed - and windowed signal onto the basis set provided by the Mel filterbank. - - Parameters - ---------- - x : :py:class:`ndarray ` of shape `(N,)` - A 1D signal consisting of N samples - window_duration : float - The duration of each frame / window (in seconds). Default is 0.025. - stride_duration : float - The duration of the hop between consecutive windows (in seconds). - Default is 0.01. - mean_normalize : bool - Whether to subtract the coefficient means from the final filter values - to improve the signal-to-noise ratio. Default is True. - window : {'hamming', 'hann', 'blackman_harris'} - The windowing function to apply to the signal before FFT. Default is - 'hamming'. - n_filters : int - The number of mel filters to include in the filterbank. Default is 20. - center : bool - Whether to the `k` th frame of the signal should *begin* at index ``x[k * - stride_len]`` (center = False) or be *centered* at ``x[k * stride_len]`` - (center = True). Default is False. - alpha : float in [0, 1) - The coefficient for the preemphasis filter. A value of 0 corresponds to - no filtering. Default is 0.95. - fs : int - The sample rate/frequency for the signal. Default is 44000. - - Returns - ------- - filter_energies : :py:class:`ndarray ` of shape `(G, n_filters)` - The (possibly mean_normalized) power for each filter in the Mel - filterbank (i.e., the Mel spectrogram). Rows correspond to frames, - columns to filters - energy_per_frame : :py:class:`ndarray ` of shape `(G,)` - The total energy in each frame of the signal - """ - eps = np.finfo(float).eps - window_fn = WindowInitializer()(window) - - stride = round(stride_duration * fs) - frame_width = round(window_duration * fs) - N = frame_width - - # add a preemphasis filter to the raw signal - x = preemphasis(x, alpha) - - # convert signal to overlapping frames and apply a window function - x = np.pad(x, N // 2, "reflect") if center else x - frames = to_frames(x, frame_width, stride, fs) - - window = np.tile(window_fn(frame_width), (frames.shape[0], 1)) - frames = frames * window - - # compute the power spectrum - power_spec = power_spectrum(frames) - energy_per_frame = np.sum(power_spec, axis=1) - energy_per_frame[energy_per_frame == 0] = eps - - # compute the power at each filter in the Mel filterbank - fbank = mel_filterbank(N, n_filters=n_filters, fs=fs) - filter_energies = power_spec @ fbank.T - filter_energies -= np.mean(filter_energies, axis=0) if mean_normalize else 0 - filter_energies[filter_energies == 0] = eps - return filter_energies, energy_per_frame - - -####################################################################### -# Mel-Frequency Features # -####################################################################### - - -def mfcc( - x, - fs=44000, - n_mfccs=13, - alpha=0.95, - center=True, - n_filters=20, - window="hann", - normalize=True, - lifter_coef=22, - stride_duration=0.01, - window_duration=0.025, - replace_intercept=True, -): - """ - Compute the Mel-frequency cepstral coefficients (MFCC) for a signal. - - Notes - ----- - Computing MFCC features proceeds in the following stages: - - 1. Convert the signal into overlapping frames and apply a window fn - 2. Compute the power spectrum at each frame - 3. Apply the mel filterbank to the power spectra to get mel filterbank powers - 4. Take the logarithm of the mel filterbank powers at each frame - 5. Take the discrete cosine transform (DCT) of the log filterbank - energies and retain only the first k coefficients to further reduce - the dimensionality - - MFCCs were developed in the context of HMM-GMM automatic speech recognition - (ASR) systems and can be used to provide a somewhat speaker/pitch - invariant representation of phonemes. - - Parameters - ---------- - x : :py:class:`ndarray ` of shape `(N,)` - A 1D signal consisting of N samples - fs : int - The sample rate/frequency for the signal. Default is 44000. - n_mfccs : int - The number of cepstral coefficients to return (including the intercept - coefficient). Default is 13. - alpha : float in [0, 1) - The preemphasis coefficient. A value of 0 corresponds to no - filtering. Default is 0.95. - center : bool - Whether to the kth frame of the signal should *begin* at index ``x[k * - stride_len]`` (center = False) or be *centered* at ``x[k * stride_len]`` - (center = True). Default is True. - n_filters : int - The number of filters to include in the Mel filterbank. Default is 20. - normalize : bool - Whether to mean-normalize the MFCC values. Default is True. - lifter_coef : int in :math:[0, + \infty]` - The cepstral filter coefficient. 0 corresponds to no filtering, larger - values correspond to greater amounts of smoothing. Default is 22. - window : {'hamming', 'hann', 'blackman_harris'} - The windowing function to apply to the signal before taking the DFT. - Default is 'hann'. - stride_duration : float - The duration of the hop between consecutive windows (in seconds). - Default is 0.01. - window_duration : float - The duration of each frame / window (in seconds). Default is 0.025. - replace_intercept : bool - Replace the first MFCC coefficient (the intercept term) with the - log of the total frame energy instead. Default is True. - - Returns - ------- - mfccs : :py:class:`ndarray ` of shape `(G, C)` - Matrix of Mel-frequency cepstral coefficients. Rows correspond to - frames, columns to cepstral coefficients - """ - # map the power spectrum for the (framed + windowed representation of) `x` - # onto the mel scale - filter_energies, frame_energies = mel_spectrogram( - x=x, - fs=fs, - alpha=alpha, - center=center, - window=window, - n_filters=n_filters, - mean_normalize=False, - window_duration=window_duration, - stride_duration=stride_duration, - ) - - log_energies = 10 * np.log10(filter_energies) - - # perform a DCT on the log-mel coefficients to further reduce the data - # dimensionality -- the early DCT coefficients will capture the majority of - # the data, allowing us to discard coefficients > n_mfccs - mfccs = np.array([DCT(frame) for frame in log_energies])[:, :n_mfccs] - - mfccs = cepstral_lifter(mfccs, D=lifter_coef) - mfccs -= np.mean(mfccs, axis=0) if normalize else 0 - - if replace_intercept: - # the 0th MFCC coefficient doesn't tell us anything about the spectrum; - # replace it with the log of the frame energy for something more - # informative - mfccs[:, 0] = np.log(frame_energies) - return mfccs - - -def mel2hz(mel, formula="htk"): - """ - Convert the mel-scale representation of a signal into Hz - - Parameters - ---------- - mel : :py:class:`ndarray ` of shape `(N, \*)` - An array of mel frequencies to convert - formula : {"htk", "slaney"} - The Mel formula to use. "htk" uses the formula used by the Hidden - Markov Model Toolkit, and described in O'Shaughnessy (1987). "slaney" - uses the formula used in the MATLAB auditory toolbox (Slaney, 1998). - Default is 'htk' - - Returns - ------- - hz : :py:class:`ndarray ` of shape `(N, \*)` - The frequencies of the items in `mel`, in Hz - """ - fstr = "formula must be either 'htk' or 'slaney' but got '{}'" - assert formula in ["htk", "slaney"], fstr.format(formula) - if formula == "htk": - return 700 * (10 ** (mel / 2595) - 1) - raise NotImplementedError("slaney") - - -def hz2mel(hz, formula="htk"): - """ - Convert the frequency representaiton of a signal in Hz into the mel scale. - - Parameters - ---------- - hz : :py:class:`ndarray ` of shape `(N, \*)` - The frequencies of the items in `mel`, in Hz - formula : {"htk", "slaney"} - The Mel formula to use. "htk" uses the formula used by the Hidden - Markov Model Toolkit, and described in O'Shaughnessy (1987). "slaney" - uses the formula used in the MATLAB auditory toolbox (Slaney, 1998). - Default is 'htk'. - - Returns - ------- - mel : :py:class:`ndarray ` of shape `(N, \*)` - An array of mel frequencies to convert. - """ - fstr = "formula must be either 'htk' or 'slaney' but got '{}'" - assert formula in ["htk", "slaney"], fstr.format(formula) - - if formula == "htk": - return 2595 * np.log10(1 + hz / 700) - raise NotImplementedError("slaney") - - -def mel_filterbank( - N, n_filters=20, fs=44000, min_freq=0, max_freq=None, normalize=True -): - """ - Compute the filters in a Mel filterbank and return the corresponding - transformation matrix - - Notes - ----- - The Mel scale is a perceptual scale designed to simulate the way the human - ear works. Pitches judged by listeners to be equal in perceptual / - psychological distance have equal distance on the Mel scale. Practically, - this corresponds to a scale with higher resolution at low frequencies and - lower resolution at higher (> 500 Hz) frequencies. - - Each filter in the Mel filterbank is triangular with a response of 1 at its - center and a linear decay on both sides until it reaches the center - frequency of the next adjacent filter. - - This implementation is based on code in the (superb) LibROSA package [1]. - - References - ---------- - .. [1] McFee et al. (2015). "librosa: Audio and music signal analysis in - Python", *Proceedings of the 14th Python in Science Conference* - https://librosa.github.io - - Parameters - ---------- - N : int - The number of DFT bins - n_filters : int - The number of mel filters to include in the filterbank. Default is 20. - min_freq : int - Minimum filter frequency (in Hz). Default is 0. - max_freq : int - Maximum filter frequency (in Hz). Default is 0. - fs : int - The sample rate/frequency for the signal. Default is 44000. - normalize : bool - If True, scale the Mel filter weights by their area in Mel space. - Default is True. - - Returns - ------- - fbank : :py:class:`ndarray ` of shape `(n_filters, N // 2 + 1)` - The mel-filterbank transformation matrix. Rows correspond to filters, - columns to DFT bins. - """ - max_freq = fs / 2 if max_freq is None else max_freq - min_mel, max_mel = hz2mel(min_freq), hz2mel(max_freq) - - fbank = np.zeros((n_filters, N // 2 + 1)) - - # uniformly spaced values on the mel scale, translated back into Hz - mel_bins = mel2hz(np.linspace(min_mel, max_mel, n_filters + 2)) - - # the centers of the frequency bins for the DFT - hz_bins = dft_bins(N, fs) - - mel_spacing = np.diff(mel_bins) - - # ramps[i] = mel_bins[i] - hz_bins - ramps = mel_bins.reshape(-1, 1) - hz_bins.reshape(1, -1) - for i in range(n_filters): - # calc the filter values on the left and right across the bins ... - left = -ramps[i] / mel_spacing[i] - right = ramps[i + 2] / mel_spacing[i + 1] - - # .. and set them zero when they cross the x-axis - fbank[i] = np.maximum(0, np.minimum(left, right)) - - if normalize: - energy_norm = 2.0 / (mel_bins[2 : n_filters + 2] - mel_bins[:n_filters]) - fbank *= energy_norm[:, np.newaxis] - - return fbank diff --git a/aitk/keras/preprocessing/general.py b/aitk/keras/preprocessing/general.py deleted file mode 100644 index a53ac2b..0000000 --- a/aitk/keras/preprocessing/general.py +++ /dev/null @@ -1,388 +0,0 @@ -import json -import hashlib -import warnings - -import numpy as np - -try: - from scipy.sparse import csr_matrix - - _SCIPY = True -except ImportError: - warnings.warn("Scipy not installed. FeatureHasher can only create dense matrices") - _SCIPY = False - - -def minibatch(X, batchsize=256, shuffle=True): - """ - Compute the minibatch indices for a training dataset. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(N, \*)` - The dataset to divide into minibatches. Assumes the first dimension - represents the number of training examples. - batchsize : int - The desired size of each minibatch. Note, however, that if ``X.shape[0] % - batchsize > 0`` then the final batch will contain fewer than batchsize - entries. Default is 256. - shuffle : bool - Whether to shuffle the entries in the dataset before dividing into - minibatches. Default is True. - - Returns - ------- - mb_generator : generator - A generator which yields the indices into `X` for each batch. - n_batches: int - The number of batches. - """ - N = X.shape[0] - ix = np.arange(N) - n_batches = int(np.ceil(N / batchsize)) - - if shuffle: - np.random.shuffle(ix) - - def mb_generator(): - for i in range(n_batches): - yield ix[i * batchsize : (i + 1) * batchsize] - - return mb_generator(), n_batches - - -class OneHotEncoder: - def __init__(self): - """ - Convert between category labels and their one-hot vector - representations. - - Parameters - ---------- - categories : list of length `C` - List of the unique category labels for the items to encode. - """ - self._is_fit = False - self.hyperparameters = {} - self.parameters = {"categories": None} - - def __call__(self, labels): - return self.transform(labels) - - def fit(self, categories): - """ - Create mappings between columns and category labels. - - Parameters - ---------- - categories : list of length `C` - List of the unique category labels for the items to encode. - """ - self.parameters["categories"] = categories - self.cat2idx = {c: i for i, c in enumerate(categories)} - self.idx2cat = {i: c for i, c in enumerate(categories)} - self._is_fit = True - - def transform(self, labels, categories=None): - """ - Convert a list of labels into a one-hot encoding. - - Parameters - ---------- - labels : list of length `N` - A list of category labels. - categories : list of length `C` - List of the unique category labels for the items to encode. Default - is None. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(N, C)` - The one-hot encoded labels. Each row corresponds to an example, - with a single 1 in the column corresponding to the respective - label. - """ - if not self._is_fit: - categories = set(labels) if categories is None else categories - self.fit(categories) - - unknown = list(set(labels) - set(self.cat2idx.keys())) - assert len(unknown) == 0, "Unrecognized label(s): {}".format(unknown) - - N, C = len(labels), len(self.cat2idx) - cols = np.array([self.cat2idx[c] for c in labels]) - - Y = np.zeros((N, C)) - Y[np.arange(N), cols] = 1 - return Y - - def inverse_transform(self, Y): - """ - Convert a one-hot encoding back into the corresponding labels - - Parameters - ---------- - Y : :py:class:`ndarray ` of shape `(N, C)` - One-hot encoded labels. Each row corresponds to an example, with a - single 1 in the column associated with the label for that example - - Returns - ------- - labels : list of length `N` - The list of category labels corresponding to the nonzero columns in - `Y` - """ - C = len(self.cat2idx) - assert Y.ndim == 2, "Y must be 2D, but has shape {}".format(Y.shape) - assert Y.shape[1] == C, "Y must have {} columns, got {}".format(C, Y.shape[1]) - return [self.idx2cat[ix] for ix in Y.nonzero()[1]] - - -class Standardizer: - def __init__(self, with_mean=True, with_std=True): - """ - Feature-wise standardization for vector inputs. - - Notes - ----- - Due to the sensitivity of empirical mean and standard deviation - calculations to extreme values, `Standardizer` cannot guarantee - balanced feature scales in the presence of outliers. In particular, - note that because outliers for each feature can have different - magnitudes, the spread of the transformed data on each feature can be - very different. - - Similar to sklearn, `Standardizer` uses a biased estimator for the - standard deviation: ``numpy.std(x, ddof=0)``. - - Parameters - ---------- - with_mean : bool - Whether to scale samples to have 0 mean during transformation. - Default is True. - with_std : bool - Whether to scale samples to have unit variance during - transformation. Default is True. - """ - self.with_mean = with_mean - self.with_std = with_std - self._is_fit = False - - @property - def hyperparameters(self): - H = {"with_mean": self.with_mean, "with_std": self.with_std} - return H - - @property - def parameters(self): - params = { - "mean": self._mean if hasattr(self, "mean") else None, - "std": self._std if hasattr(self, "std") else None, - } - return params - - def __call__(self, X): - return self.transform(X) - - def fit(self, X): - """ - Store the feature-wise mean and standard deviation across the samples - in `X` for future scaling. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(N, C)` - An array of N samples, each with dimensionality `C` - """ - if not isinstance(X, np.ndarray): - X = np.array(X) - - if X.shape[0] < 2: - raise ValueError("`X` must contain at least 2 samples") - - std = np.ones(X.shape[1]) - mean = np.zeros(X.shape[1]) - - if self.with_mean: - mean = np.mean(X, axis=0) - - if self.with_std: - std = np.std(X, axis=0, ddof=0) - - self._mean = mean - self._std = std - self._is_fit = True - - def transform(self, X): - """ - Standardize features by removing the mean and scaling to unit variance. - - For a sample `x`, the standardized score is calculated as: - - .. math:: - - z = (x - u) / s - - where `u` is the mean of the training samples or zero if `with_mean` is - False, and `s` is the standard deviation of the training samples or 1 - if `with_std` is False. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(N, C)` - An array of N samples, each with dimensionality `C`. - - Returns - ------- - Z : :py:class:`ndarray ` of shape `(N, C)` - The feature-wise standardized version of `X`. - """ - if not self._is_fit: - raise Exception("Must call `fit` before using the `transform` method") - return (X - self._mean) / self._std - - def inverse_transform(self, Z): - """ - Convert a collection of standardized features back into the original - feature space. - - For a standardized sample `z`, the unstandardized score is calculated as: - - .. math:: - - x = z s + u - - where `u` is the mean of the training samples or zero if `with_mean` is - False, and `s` is the standard deviation of the training samples or 1 - if `with_std` is False. - - Parameters - ---------- - Z : :py:class:`ndarray ` of shape `(N, C)` - An array of `N` standardized samples, each with dimensionality `C`. - - Returns - ------- - X : :py:class:`ndarray ` of shape `(N, C)` - The unstandardixed samples from `Z`. - """ - assert self._is_fit, "Must fit `Standardizer` before calling inverse_transform" - P = self.parameters - mean, std = P["mean"], P["std"] - return Z * std + mean - - -class FeatureHasher: - def __init__(self, n_dim=256, sparse=True): - """ - Convert a collection of features to a fixed-dimensional matrix using - the hashing trick. - - Notes - ----- - Uses the md5 hash. - - Parameters - ---------- - n_dim : int - The dimensionality of each example in the output feature matrix. - Small numbers of features are likely to cause hash collisions, but - large numbers will cause larger overall parameter dimensions for - any (linear) learning agent. Default is 256. - sparse : bool - Whether the resulting feature matrix should be a sparse - :py:class:`csr_matrix ` or dense - :py:class:`ndarray `. Default is True. - """ - self.n_dim = n_dim - self.hash = hashlib.md5 - self.sparse = sparse and _SCIPY - - def encode(self, examples): - """ - Encode a collection of multi-featured examples into a - `n_dim`-dimensional feature matrix via feature hashing. - - Notes - ----- - Feature hashing works by applying a hash function to the features of an - example and using the hash values as column indices in the resulting - feature matrix. The entries at each hashed feature column correspond to - the values for that example and feature. For example, given the - following two input examples: - - >>> examples = [ - {"furry": 1, "quadruped": 1, "domesticated": 1}, - {"nocturnal": 1, "quadruped": 1}, - ] - - and a hypothetical hash function `H` mapping strings to [0, 127], we have: - - >>> feature_mat = zeros(2, 128) - >>> ex1_cols = [H("furry"), H("quadruped"), H("domesticated")] - >>> ex2_cols = [H("nocturnal"), H("quadruped")] - >>> feat_mat[0, ex1_cols] = 1 - >>> feat_mat[1, ex2_cols] = 1 - - To better handle hash collisions, it is common to multiply the feature - value by the sign of the digest for the corresponding feature name. - - Parameters - ---------- - examples : dict or list of dicts - A collection of `N` examples, each represented as a dict where keys - correspond to the feature name and values correspond to the feature - value. - - Returns - ------- - table : :py:class:`ndarray ` or :py:class:`csr_matrix ` of shape `(N, n_dim)` - The encoded feature matrix - """ - if isinstance(examples, dict): - examples = [examples] - - sparse = self.sparse - return self._encode_sparse(examples) if sparse else self._encode_dense(examples) - - def _encode_dense(self, examples): - N = len(examples) - table = np.zeros(N, self.n_dim) # dense - - for row, feat_dict in enumerate(examples): - for f_id, val in feat_dict.items(): - if isinstance(f_id, str): - f_id = f_id.encode("utf-8") - - # use json module to convert the feature id into a unique - # string compatible with the buffer API (required by hashlib) - if isinstance(f_id, (tuple, dict, list)): - f_id = json.dumps(f_id, sort_keys=True).encode("utf-8") - - h = int(self.hash(f_id).hexdigest(), base=16) - col = h % self.n_dim - table[row, col] += np.sign(h) * val - - return table - - def _encode_sparse(self, examples): - N = len(examples) - idxs, data = [], [] - - for row, feat_dict in enumerate(examples): - for f_id, val in feat_dict.items(): - if isinstance(f_id, str): - f_id = f_id.encode("utf-8") - - # use json module to convert the feature id into a unique - # string compatible with the buffer API (required by hashlib) - if isinstance(f_id, (tuple, dict, list)): - f_id = json.dumps(f_id, sort_keys=True).encode("utf-8") - - h = int(self.hash(f_id).hexdigest(), base=16) - col = h % self.n_dim - idxs.append((row, col)) - data.append(np.sign(h) * val) - - table = csr_matrix((data, zip(*idxs)), shape=(N, self.n_dim)) - return table diff --git a/aitk/keras/preprocessing/nlp.py b/aitk/keras/preprocessing/nlp.py deleted file mode 100644 index 68fc28e..0000000 --- a/aitk/keras/preprocessing/nlp.py +++ /dev/null @@ -1,1229 +0,0 @@ -"""Common preprocessing utilities for working with text data""" -import re -import heapq -import os.path as op -from collections import Counter - -import numpy as np - - -# This list of English stop words is taken from the "Glasgow Information -# Retrieval Group". The original list can be found at -# http://ir.dcs.gla.ac.uk/resources/linguistic_utils/stop_words -_STOP_WORDS = { - "a", - "about", - "above", - "across", - "after", - "afterwards", - "again", - "against", - "all", - "almost", - "alone", - "along", - "already", - "also", - "although", - "always", - "am", - "among", - "amongst", - "amoungst", - "amount", - "an", - "and", - "another", - "any", - "anyhow", - "anyone", - "anything", - "anyway", - "anywhere", - "are", - "around", - "as", - "at", - "back", - "be", - "became", - "because", - "become", - "becomes", - "becoming", - "been", - "before", - "beforehand", - "behind", - "being", - "below", - "beside", - "besides", - "between", - "beyond", - "bill", - "both", - "bottom", - "but", - "by", - "call", - "can", - "cannot", - "cant", - "co", - "con", - "could", - "couldnt", - "cry", - "de", - "describe", - "detail", - "do", - "done", - "down", - "due", - "during", - "each", - "eg", - "eight", - "either", - "eleven", - "else", - "elsewhere", - "empty", - "enough", - "etc", - "even", - "ever", - "every", - "everyone", - "everything", - "everywhere", - "except", - "few", - "fifteen", - "fifty", - "fill", - "find", - "fire", - "first", - "five", - "for", - "former", - "formerly", - "forty", - "found", - "four", - "from", - "front", - "full", - "further", - "get", - "give", - "go", - "had", - "has", - "hasnt", - "have", - "he", - "hence", - "her", - "here", - "hereafter", - "hereby", - "herein", - "hereupon", - "hers", - "herself", - "him", - "himself", - "his", - "how", - "however", - "hundred", - "i", - "ie", - "if", - "in", - "inc", - "indeed", - "interest", - "into", - "is", - "it", - "its", - "itself", - "keep", - "last", - "latter", - "latterly", - "least", - "less", - "ltd", - "made", - "many", - "may", - "me", - "meanwhile", - "might", - "mill", - "mine", - "more", - "moreover", - "most", - "mostly", - "move", - "much", - "must", - "my", - "myself", - "name", - "namely", - "neither", - "never", - "nevertheless", - "next", - "nine", - "no", - "nobody", - "none", - "noone", - "nor", - "not", - "nothing", - "now", - "nowhere", - "of", - "off", - "often", - "on", - "once", - "one", - "only", - "onto", - "or", - "other", - "others", - "otherwise", - "our", - "ours", - "ourselves", - "out", - "over", - "own", - "part", - "per", - "perhaps", - "please", - "put", - "rather", - "re", - "same", - "see", - "seem", - "seemed", - "seeming", - "seems", - "serious", - "several", - "she", - "should", - "show", - "side", - "since", - "sincere", - "six", - "sixty", - "so", - "some", - "somehow", - "someone", - "something", - "sometime", - "sometimes", - "somewhere", - "still", - "such", - "system", - "take", - "ten", - "than", - "that", - "the", - "their", - "them", - "themselves", - "then", - "thence", - "there", - "thereafter", - "thereby", - "therefore", - "therein", - "thereupon", - "these", - "they", - "thick", - "thin", - "third", - "this", - "those", - "though", - "three", - "through", - "throughout", - "thru", - "thus", - "to", - "together", - "too", - "top", - "toward", - "towards", - "twelve", - "twenty", - "two", - "un", - "under", - "until", - "up", - "upon", - "us", - "very", - "via", - "was", - "we", - "well", - "were", - "what", - "whatever", - "when", - "whence", - "whenever", - "where", - "whereafter", - "whereas", - "whereby", - "wherein", - "whereupon", - "wherever", - "whether", - "which", - "while", - "whither", - "who", - "whoever", - "whole", - "whom", - "whose", - "why", - "will", - "with", - "within", - "without", - "would", - "yet", - "you", - "your", - "yours", - "yourself", - "yourselves", -} - -_PUNCTUATION = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" - -_WORD_REGEX = re.compile(r"(?u)\b\w\w+\b") # sklearn default -_PUNC_TABLE = str.maketrans("", "", _PUNCTUATION) - - -def ngrams(sequence, N): - """Return all `N`-grams of the elements in `sequence`""" - assert N >= 1 - return list(zip(*[sequence[i:] for i in range(N)])) - - -def tokenize_words(line, lowercase=True, filter_stopwords=True): - """ - Split a string into individual lower-case words, optionally removing - punctuation and stop-words in the process - """ - words = _WORD_REGEX.findall(line.lower() if lowercase else line) - return remove_stop_words(words) if filter_stopwords else words - - -def tokenize_chars(line, lowercase=True, filter_punctuation=True): - """ - Split a string into individual lower-case words, optionally removing - punctuation and stop-words in the process - """ - line = line.lower() if lowercase else line - line = strip_punctuation(line) if filter_punctuation else line - chars = list(re.sub(" {2,}", " ", line).strip()) - return chars - - -def remove_stop_words(words): - """Remove stop words from a list of word strings""" - return [w for w in words if w not in _STOP_WORDS] - - -def strip_punctuation(line): - """Remove punctuation from a string""" - return line.translate(_PUNC_TABLE).strip() - - -####################################################################### -# Huffman Tree # -####################################################################### - - -class Node(object): - def __init__(self, key, val): - self.key = key - self.val = val - self.left = None - self.right = None - - def __gt__(self, other): - """Greater than""" - if not isinstance(other, Node): - return -1 - return self.val > other.val - - def __ge__(self, other): - """Greater than or equal to""" - if not isinstance(other, Node): - return -1 - return self.val >= other.val - - def __lt__(self, other): - """Less than""" - if not isinstance(other, Node): - return -1 - return self.val < other.val - - def __le__(self, other): - """Less than or equal to""" - if not isinstance(other, Node): - return -1 - return self.val <= other.val - - -class HuffmanEncoder(object): - def fit(self, text): - """ - Build a Huffman tree for the tokens in `text` and compute each token's - binary encoding. - - Notes - ----- - In a Huffman code, tokens that occur more frequently are (generally) - represented using fewer bits. Huffman codes produce the minimum expected - codeword length among all methods for encoding tokens individually. - - Huffman codes correspond to paths through a binary tree, with 1 - corresponding to "move right" and 0 corresponding to "move left". In - contrast to standard binary trees, the Huffman tree is constructed from the - bottom up. Construction begins by initializing a min-heap priority queue - consisting of each token in the corpus, with priority corresponding to the - token frequency. At each step, the two most infrequent tokens in the corpus - are removed and become the children of a parent pseudotoken whose - "frequency" is the sum of the frequencies of its children. This new parent - pseudotoken is added to the priority queue and the process is repeated - recursively until no tokens remain. - - Parameters - ---------- - text: list of strs or :class:`Vocabulary` instance - The tokenized text or a pretrained :class:`Vocabulary` object to use for - building the Huffman code. - """ - self._build_tree(text) - self._generate_codes() - - def transform(self, text): - """ - Transform the words in `text` into their Huffman-code representations. - - Parameters - ---------- - text: list of `N` strings - The list of words to encode - - Returns - ------- - codes : list of `N` binary strings - The encoded words in `text` - """ - if isinstance(text, str): - text = [text] - for token in set(text): - if token not in self._item2code: - raise Warning("Token '{}' not in Huffman tree. Skipping".format(token)) - return [self._item2code.get(t, None) for t in text] - - def inverse_transform(self, codes): - """ - Transform an encoded sequence of bit-strings back into words. - - Parameters - ---------- - codes : list of `N` binary strings - A list of encoded bit-strings, represented as strings. - - Returns - ------- - text: list of `N` strings - The decoded text. - """ - if isinstance(codes, str): - codes = [codes] - for code in set(codes): - if code not in self._code2item: - raise Warning("Code '{}' not in Huffman tree. Skipping".format(code)) - return [self._code2item.get(c, None) for c in codes] - - @property - def tokens(self): - """A list the unique tokens in `text`""" - return list(self._item2code.keys()) - - @property - def codes(self): - """A list with the Huffman code for each unique token in `text`""" - return list(self._code2item.keys()) - - def _counter(self, text): - counts = {} - for item in text: - counts[item] = counts.get(item, 0) + 1 - return counts - - def _build_tree(self, text): - """Construct Huffman Tree""" - PQ = [] - - if isinstance(text, Vocabulary): - counts = text.counts - else: - counts = self._counter(text) - - for (k, c) in counts.items(): - PQ.append(Node(k, c)) - - # create a priority queue with priority = item frequency - heapq.heapify(PQ) - - while len(PQ) > 1: - node1 = heapq.heappop(PQ) # item with smallest frequency - node2 = heapq.heappop(PQ) # item with second smallest frequency - - parent = Node(None, node1.val + node2.val) - parent.left = node1 - parent.right = node2 - - heapq.heappush(PQ, parent) - - self._root = heapq.heappop(PQ) - - def _generate_codes(self): - current_code = "" - self._item2code = {} - self._code2item = {} - self._build_code(self._root, current_code) - - def _build_code(self, root, current_code): - if root is None: - return - - if root.key is not None: - self._item2code[root.key] = current_code - self._code2item[current_code] = root.key - return - - # 0 = move left, 1 = move right - self._build_code(root.left, current_code + "0") - self._build_code(root.right, current_code + "1") - - -####################################################################### -# Containers # -####################################################################### - - -class Token: - def __init__(self, word): - self.count = 0 - self.word = word - - def __repr__(self): - """A string representation of the token""" - return "Token(word='{}', count={})".format(self.word, self.count) - - -class TFIDFEncoder: - def __init__( - self, - vocab=None, - lowercase=True, - min_count=0, - smooth_idf=True, - max_tokens=None, - input_type="filename", - filter_stopwords=True, - ): - r""" - An object for compiling and encoding the term-frequency - inverse-document-frequency (TF-IDF) representation of the tokens in a - text corpus. - - Notes - ----- - TF-IDF is intended to reflect how important a word is to a document in - a collection or corpus. For a word token `w` in a document `d`, and a - corpus, :math:`D = \{d_1, \ldots, d_N\}`, we have: - - .. math:: - \text{TF}(w, d) &= \text{num. occurences of }w \text{ in document }d \\ - \text{IDF}(w, D) &= \log \frac{|D|}{|\{ d \in D: t \in d \}|} - - Parameters - ---------- - vocab : :class:`Vocabulary` object or list-like - An existing vocabulary to filter the tokens in the corpus against. - Default is None. - lowercase : bool - Whether to convert each string to lowercase before tokenization. - Default is True. - min_count : int - Minimum number of times a token must occur in order to be included - in vocab. Default is 0. - smooth_idf : bool - Whether to add 1 to the denominator of the IDF calculation to avoid - divide-by-zero errors. Default is True. - max_tokens : int - Only add the `max_tokens` most frequent tokens that occur more - than `min_count` to the vocabulary. If None, add all tokens - greater that occur more than than `min_count`. Default is None. - input_type : {'filename', 'strings'} - If 'files', the sequence input to `fit` is expected to be a list - of filepaths. If 'strings', the input is expected to be a list of - lists, each sublist containing the raw strings for a single - document in the corpus. Default is 'filename'. - filter_stopwords : bool - Whether to remove stopwords before encoding the words in the - corpus. Default is True. - """ - # create a function to filter against words in the vocab - self._filter_vocab = lambda words: words - if isinstance(vocab, Vocabulary): - self._filter_vocab = vocab.filter - elif isinstance(vocab, (list, np.ndarray, set)): - vocab = set(vocab) - self._filter_vocab = lambda words: [ - w if w in vocab else "" for w in words - ] - - if input_type not in ["files", "strings"]: - fstr = "`input_type` must be either 'files' or 'strings', but got {}" - raise ValueError(fstr.format(input_type)) - - self._tokens = None - self._idx2doc = None - self.term_freq = None - self.idx2token = None - self.token2idx = None - self.inv_doc_freq = None - - self.hyperparameters = { - "id": "TFIDFEncoder", - "encoding": None, - "vocab": vocab - if not isinstance(vocab, Vocabulary) - else vocab.hyperparameters, - "lowercase": lowercase, - "min_count": min_count, - "input_type": input_type, - "max_tokens": max_tokens, - "smooth_idf": smooth_idf, - "filter_stopwords": filter_stopwords - if not isinstance(vocab, Vocabulary) - else vocab.hyperparameters["filter_stopwords"], - } - - def fit(self, corpus_seq, encoding="utf-8-sig"): - """ - Compute term-frequencies and inverse document frequencies on a - collection of documents. - - Parameters - ---------- - corpus_seq : str or list of strs - The filepath / list of filepaths / raw string contents of the - document(s) to be encoded, in accordance with the `input_type` - parameter passed to the :meth:`__init__` method. Each document is - expected to be a newline-separated strings of text, with adjacent - tokens separated by a whitespace character. - encoding : str - Specifies the text encoding for corpus if `input_type` is `files`. - Common entries are either 'utf-8' (no header byte), or 'utf-8-sig' - (header byte). Default is 'utf-8-sig'. - """ - H = self.hyperparameters - - if isinstance(corpus_seq, str): - corpus_seq = [corpus_seq] - - if H["input_type"] == "files": - for corpus_fp in corpus_seq: - assert op.isfile(corpus_fp), "{} does not exist".format(corpus_fp) - - tokens = [] - idx2token, token2idx = {}, {} - - # encode special tokens - for tt in ["", "", ""]: - token2idx[tt] = len(tokens) - idx2token[len(tokens)] = tt - tokens.append(Token(tt)) - - min_count = H["min_count"] - max_tokens = H["max_tokens"] - H["encoding"] = encoding - - bol_ix = token2idx[""] - eol_ix = token2idx[""] - idx2doc, term_freq = {}, {} - - # encode the text in `corpus_fps` without any filtering ... - for d_ix, doc in enumerate(corpus_seq): - doc_count = {} - idx2doc[d_ix] = doc if H["input_type"] == "files" else None - token2idx, idx2token, tokens, doc_count = self._encode_document( - doc, token2idx, idx2token, tokens, doc_count, bol_ix, eol_ix, - ) - term_freq[d_ix] = doc_count - - self._tokens = tokens - self._idx2doc = idx2doc - self.token2idx = token2idx - self.idx2token = idx2token - self.term_freq = term_freq - - # ... retain only the top `max_tokens` most frequent tokens, coding - # everything else as ... - if max_tokens is not None and len(tokens) > max_tokens: - self._keep_top_n_tokens() - - # ... replace all words occurring less than `min_count` by ... - if min(self._tokens, key=lambda t: t.count).count < min_count: - self._drop_low_freq_tokens() - - # ... sort tokens alphabetically and reindex ... - self._sort_tokens() - - # ... finally, calculate inverse document frequency - self._calc_idf() - - def _encode_document( - self, doc, word2idx, idx2word, tokens, doc_count, bol_ix, eol_ix, - ): - """Perform tokenization and compute token counts for a single document""" - H = self.hyperparameters - lowercase = H["lowercase"] - filter_stop = H["filter_stopwords"] - - if H["input_type"] == "files": - with open(doc, "r", encoding=H["encoding"]) as handle: - doc = handle.read() - - n_words = 0 - lines = doc.split("\n") - for line in lines: - words = tokenize_words(line, lowercase, filter_stop) - words = self._filter_vocab(words) - n_words += len(words) - - for ww in words: - if ww not in word2idx: - word2idx[ww] = len(tokens) - idx2word[len(tokens)] = ww - tokens.append(Token(ww)) - - t_idx = word2idx[ww] - tokens[t_idx].count += 1 - doc_count[t_idx] = doc_count.get(t_idx, 0) + 1 - - # wrap line in and tags - tokens[bol_ix].count += 1 - tokens[eol_ix].count += 1 - - doc_count[bol_ix] = doc_count.get(bol_ix, 0) + 1 - doc_count[eol_ix] = doc_count.get(eol_ix, 0) + 1 - return word2idx, idx2word, tokens, doc_count - - def _keep_top_n_tokens(self): - N = self.hyperparameters["max_tokens"] - doc_counts, word2idx, idx2word = {}, {}, {} - tokens = sorted(self._tokens, key=lambda x: x.count, reverse=True) - - # reindex the top-N tokens... - unk_ix = None - for idx, tt in enumerate(tokens[:N]): - word2idx[tt.word] = idx - idx2word[idx] = tt.word - - if tt.word == "": - unk_ix = idx - - # ... if isn't in the top-N, add it, replacing the Nth - # most-frequent word and adjust the count accordingly ... - if unk_ix is None: - unk_ix = self.token2idx[""] - old_count = tokens[N - 1].count - tokens[N - 1] = self._tokens[unk_ix] - tokens[N - 1].count += old_count - word2idx[""] = N - 1 - idx2word[N - 1] = "" - - # ... and recode all dropped tokens as "" - for tt in tokens[N:]: - tokens[unk_ix].count += tt.count - - # ... finally, reindex the word counts for each document - doc_counts = {} - for d_ix in self.term_freq.keys(): - doc_counts[d_ix] = {} - for old_ix, d_count in self.term_freq[d_ix].items(): - word = self.idx2token[old_ix] - new_ix = word2idx.get(word, unk_ix) - doc_counts[d_ix][new_ix] = doc_counts[d_ix].get(new_ix, 0) + d_count - - self._tokens = tokens[:N] - self.token2idx = word2idx - self.idx2token = idx2word - self.term_freq = doc_counts - - assert len(self._tokens) <= N - - def _drop_low_freq_tokens(self): - """ - Replace all tokens that occur less than `min_count` with the `` - token. - """ - H = self.hyperparameters - unk_token = self._tokens[self.token2idx[""]] - eol_token = self._tokens[self.token2idx[""]] - bol_token = self._tokens[self.token2idx[""]] - tokens = [unk_token, eol_token, bol_token] - - unk_idx = 0 - word2idx = {"": 0, "": 1, "": 2} - idx2word = {0: "", 1: "", 2: ""} - special = {"", "", ""} - - for tt in self._tokens: - if tt.word not in special: - if tt.count < H["min_count"]: - tokens[unk_idx].count += tt.count - else: - word2idx[tt.word] = len(tokens) - idx2word[len(tokens)] = tt.word - tokens.append(tt) - - # reindex document counts - doc_counts = {} - for d_idx in self.term_freq.keys(): - doc_counts[d_idx] = {} - for old_idx, d_count in self.term_freq[d_idx].items(): - word = self.idx2token[old_idx] - new_idx = word2idx.get(word, unk_idx) - doc_counts[d_idx][new_idx] = doc_counts[d_idx].get(new_idx, 0) + d_count - - self._tokens = tokens - self.token2idx = word2idx - self.idx2token = idx2word - self.term_freq = doc_counts - - def _sort_tokens(self): - # sort tokens alphabetically and recode - ix = 0 - token2idx, idx2token, = {}, {} - special = ["", "", ""] - words = sorted(self.token2idx.keys()) - term_freq = {d: {} for d in self.term_freq.keys()} - - for w in words: - if w not in special: - old_ix = self.token2idx[w] - token2idx[w], idx2token[ix] = ix, w - for d in self.term_freq.keys(): - if old_ix in self.term_freq[d]: - count = self.term_freq[d][old_ix] - term_freq[d][ix] = count - ix += 1 - - for w in special: - token2idx[w] = len(token2idx) - idx2token[len(idx2token)] = w - - self.token2idx = token2idx - self.idx2token = idx2token - self.term_freq = term_freq - self.vocab_counts = Counter({t.word: t.count for t in self._tokens}) - - def _calc_idf(self): - """ - Compute the (smoothed-) inverse-document frequency for each token in - the corpus. - - For a word token `w`, the IDF is simply - - IDF(w) = log ( |D| / |{ d in D: w in d }| ) + 1 - - where D is the set of all documents in the corpus, - - D = {d1, d2, ..., dD} - - If `smooth_idf` is True, we perform additive smoothing on the number of - documents containing a given word, equivalent to pretending that there - exists a final D+1st document that contains every word in the corpus: - - SmoothedIDF(w) = log ( |D| + 1 / [1 + |{ d in D: w in d }|] ) + 1 - """ - inv_doc_freq = {} - smooth_idf = self.hyperparameters["smooth_idf"] - tf, doc_idxs = self.term_freq, self._idx2doc.keys() - - D = len(self._idx2doc) + int(smooth_idf) - for word, w_ix in self.token2idx.items(): - d_count = int(smooth_idf) - d_count += np.sum([1 if w_ix in tf[d_ix] else 0 for d_ix in doc_idxs]) - inv_doc_freq[w_ix] = 1 if d_count == 0 else np.log(D / d_count) + 1 - self.inv_doc_freq = inv_doc_freq - - def transform(self, ignore_special_chars=True): - """ - Generate the term-frequency inverse-document-frequency encoding of a - text corpus. - - Parameters - ---------- - ignore_special_chars : bool - Whether to drop columns corresponding to "", "", and - "" tokens from the final tfidf encoding. Default is True. - - Returns - ------- - tfidf : numpy array of shape `(D, M [- 3])` - The encoded corpus, with each row corresponding to a single - document, and each column corresponding to a token id. The mapping - between column numbers and tokens is stored in the `idx2token` - attribute IFF `ignore_special_chars` is False. Otherwise, the - mappings are not accurate. - """ - D, N = len(self._idx2doc), len(self._tokens) - tf = np.zeros((D, N)) - idf = np.zeros((D, N)) - - for d_ix in self._idx2doc.keys(): - words, counts = zip(*self.term_freq[d_ix].items()) - docs = np.ones(len(words), dtype=int) * d_ix - tf[docs, words] = counts - - words = sorted(self.idx2token.keys()) - idf = np.tile(np.array([self.inv_doc_freq[w] for w in words]), (D, 1)) - tfidf = tf * idf - - if ignore_special_chars: - idxs = [ - self.token2idx[""], - self.token2idx[""], - self.token2idx[""], - ] - tfidf = np.delete(tfidf, idxs, 1) - - return tfidf - - -class Vocabulary: - def __init__( - self, lowercase=True, min_count=None, max_tokens=None, filter_stopwords=True, - ): - """ - An object for compiling and encoding the unique tokens in a text corpus. - - Parameters - ---------- - lowercase : bool - Whether to convert each string to lowercase before tokenization. - Default is True. - min_count : int - Minimum number of times a token must occur in order to be included - in vocab. If `None`, include all tokens from `corpus_fp` in vocab. - Default is None. - max_tokens : int - Only add the `max_tokens` most frequent tokens that occur more - than `min_count` to the vocabulary. If None, add all tokens - greater that occur more than than `min_count`. Default is None. - filter_stopwords : bool - Whether to remove stopwords before encoding the words in the - corpus. Default is True. - """ - self.hyperparameters = { - "id": "Vocabulary", - "encoding": None, - "corpus_fps": None, - "lowercase": lowercase, - "min_count": min_count, - "max_tokens": max_tokens, - "filter_stopwords": filter_stopwords, - } - - def __len__(self): - """Return the number of tokens in the vocabulary""" - return len(self._tokens) - - def __iter__(self): - """Return an iterator over the tokens in the vocabulary""" - return iter(self._tokens) - - def __contains__(self, word): - """Assert whether `word` is a token in the vocabulary""" - return word in self.token2idx - - def __getitem__(self, key): - """ - Return the token (if key is an integer) or the index (if key is a string) - for the key in the vocabulary, if it exists. - """ - if isinstance(key, str): - return self._tokens[self.token2idx[key]] - if isinstance(key, int): - return self._tokens[key] - - @property - def n_tokens(self): - """The number of unique word tokens in the vocabulary""" - return len(self.token2idx) - - @property - def n_words(self): - """The total number of words in the corpus""" - return sum(self.counts.values()) - - @property - def shape(self): - """The number of unique word tokens in the vocabulary""" - return self._tokens.shape - - def most_common(self, n=5): - """Return the top `n` most common tokens in the corpus""" - return self.counts.most_common()[:n] - - def words_with_count(self, k): - """Return all tokens that occur `k` times in the corpus""" - return [w for w, c in self.counts.items() if c == k] - - def filter(self, words, unk=True): # noqa: A003 - """ - Filter or replace any word in `words` that does not occur in - `Vocabulary` - - Parameters - ---------- - words : list of strs - A list of words to filter - unk : bool - Whether to replace any out of vocabulary words in `words` with the - token (unk = True) or skip them entirely (unk = False). - Default is True. - - Returns - ------- - filtered : list of strs - The list of words filtered against the vocabulary. - """ - if unk: - return [w if w in self else "" for w in words] - return [w for w in words if w in self] - - def words_to_indices(self, words): - """ - Convert the words in `words` to their token indices. If a word is not - in the vocabulary, return the index for the token - - Parameters - ---------- - words : list of strs - A list of words to filter - - Returns - ------- - indices : list of ints - The token indices for each word in `words` - """ - unk_ix = self.token2idx[""] - lowercase = self.hyperparameters["lowercase"] - words = [w.lower() for w in words] if lowercase else words - return [self.token2idx[w] if w in self else unk_ix for w in words] - - def indices_to_words(self, indices): - """ - Convert the indices in `indices` to their word values. If an index is - not in the vocabulary, return the the token. - - Parameters - ---------- - indices : list of ints - The token indices for each word in `words` - - Returns - ------- - words : list of strs - The word strings corresponding to each token index in `indices` - """ - unk = "" - return [self.idx2token[i] if i in self.idx2token else unk for i in indices] - - def fit(self, corpus_fps, encoding="utf-8-sig"): - """ - Compute the vocabulary across a collection of documents. - - Parameters - ---------- - corpus_fps : str or list of strs - The filepath / list of filepaths for the document(s) to be encoded. - Each document is expected to be encoded as newline-separated - string of text, with adjacent tokens separated by a whitespace - character. - encoding : str - Specifies the text encoding for corpus. Common entries are either - 'utf-8' (no header byte), or 'utf-8-sig' (header byte). Default is - 'utf-8-sig'. - """ - if isinstance(corpus_fps, str): - corpus_fps = [corpus_fps] - - for corpus_fp in corpus_fps: - assert op.isfile(corpus_fp), "{} does not exist".format(corpus_fp) - - tokens = [] - H = self.hyperparameters - idx2word, word2idx = {}, {} - - min_count = H["min_count"] - lowercase = H["lowercase"] - max_tokens = H["max_tokens"] - filter_stop = H["filter_stopwords"] - - H["encoding"] = encoding - H["corpus_fps"] = corpus_fps - - # encode special tokens - for tt in ["", "", ""]: - word2idx[tt] = len(tokens) - idx2word[len(tokens)] = tt - tokens.append(Token(tt)) - - bol_ix = word2idx[""] - eol_ix = word2idx[""] - - for d_ix, doc_fp in enumerate(corpus_fps): - with open(doc_fp, "r", encoding=H["encoding"]) as doc: - for line in doc: - words = tokenize_words(line, lowercase, filter_stop) - - for ww in words: - if ww not in word2idx: - word2idx[ww] = len(tokens) - idx2word[len(tokens)] = ww - tokens.append(Token(ww)) - - t_idx = word2idx[ww] - tokens[t_idx].count += 1 - - # wrap line in and tags - tokens[bol_ix].count += 1 - tokens[eol_ix].count += 1 - - self._tokens = tokens - self.token2idx = word2idx - self.idx2token = idx2word - - # replace all words occurring less than `min_count` by - if min_count is not None: - self._drop_low_freq_tokens() - - # retain only the top `max_tokens` most frequent tokens, coding - # everything else as - if max_tokens is not None and len(tokens) > max_tokens: - self._keep_top_n_tokens() - - counts = {w: self._tokens[ix].count for w, ix in self.token2idx.items()} - self.counts = Counter(counts) - self._tokens = np.array(self._tokens) - - def _keep_top_n_tokens(self): - word2idx, idx2word = {}, {} - N = self.hyperparameters["max_tokens"] - tokens = sorted(self._tokens, key=lambda x: x.count, reverse=True) - - # reindex the top-N tokens... - unk_ix = None - for idx, tt in enumerate(tokens[:N]): - word2idx[tt.word] = idx - idx2word[idx] = tt.word - - if tt.word == "": - unk_ix = idx - - # ... if isn't in the top-N, add it, replacing the Nth - # most-frequent word and adjusting the count accordingly ... - if unk_ix is None: - unk_ix = self.token2idx[""] - old_count = tokens[N - 1].count - tokens[N - 1] = self._tokens[unk_ix] - tokens[N - 1].count += old_count - word2idx[""] = N - 1 - idx2word[N - 1] = "" - - # ... and recode all dropped tokens as "" - for tt in tokens[N:]: - tokens[unk_ix].count += tt.count - - self._tokens = tokens[:N] - self.token2idx = word2idx - self.idx2token = idx2word - - assert len(self._tokens) <= N - - def _drop_low_freq_tokens(self): - """ - Replace all tokens that occur less than `min_count` with the `` - token. - """ - unk_idx = 0 - unk_token = self._tokens[self.token2idx[""]] - eol_token = self._tokens[self.token2idx[""]] - bol_token = self._tokens[self.token2idx[""]] - - H = self.hyperparameters - tokens = [unk_token, eol_token, bol_token] - word2idx = {"": 0, "": 1, "": 2} - idx2word = {0: "", 1: "", 2: ""} - special = {"", "", ""} - - for tt in self._tokens: - if tt.word not in special: - if tt.count < H["min_count"]: - tokens[unk_idx].count += tt.count - else: - word2idx[tt.word] = len(tokens) - idx2word[len(tokens)] = tt.word - tokens.append(tt) - - self._tokens = tokens - self.token2idx = word2idx - self.idx2token = idx2word diff --git a/aitk/keras/schedulers/README.md b/aitk/keras/schedulers/README.md deleted file mode 100644 index 8c69927..0000000 --- a/aitk/keras/schedulers/README.md +++ /dev/null @@ -1,13 +0,0 @@ -# Learning Rate Schedulers -The `schedulers` module implements several common strategies for learning rate -decay: - -- Constant -- Exponential decay -- Noam/Transformer decay ([Vaswani et al., 2017](https://arxiv.org/pdf/1706.03762.pdf)) -- Davis King/Dlib decay ([King, 2018](http://blog.dlib.net/2018/02/automatic-learning-rate-scheduling-that.html)) - -## Plots -

- -

diff --git a/aitk/keras/schedulers/__init__.py b/aitk/keras/schedulers/__init__.py deleted file mode 100644 index 99bcd9d..0000000 --- a/aitk/keras/schedulers/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from .schedulers import * diff --git a/aitk/keras/schedulers/img/plot.png b/aitk/keras/schedulers/img/plot.png deleted file mode 100644 index 43a54fa..0000000 Binary files a/aitk/keras/schedulers/img/plot.png and /dev/null differ diff --git a/aitk/keras/schedulers/schedulers.py b/aitk/keras/schedulers/schedulers.py deleted file mode 100644 index 756f343..0000000 --- a/aitk/keras/schedulers/schedulers.py +++ /dev/null @@ -1,362 +0,0 @@ -from copy import deepcopy -from abc import ABC, abstractmethod - -import numpy as np - -from math import erf - - -def gaussian_cdf(x, mean, var): - """ - Compute the probability that a random draw from a 1D Gaussian with mean - `mean` and variance `var` is less than or equal to `x`. - """ - eps = np.finfo(float).eps - x_scaled = (x - mean) / np.sqrt(var + eps) - return (1 + erf(x_scaled / np.sqrt(2))) / 2 - - -class SchedulerBase(ABC): - def __init__(self): - """Abstract base class for all Scheduler objects.""" - self.hyperparameters = {} - - def __call__(self, step=None, cur_loss=None): - return self.learning_rate(step=step, cur_loss=cur_loss) - - def copy(self): - """Return a copy of the current object.""" - return deepcopy(self) - - def set_params(self, hparam_dict): - """Set the scheduler hyperparameters from a dictionary.""" - if hparam_dict is not None: - for k, v in hparam_dict.items(): - if k in self.hyperparameters: - self.hyperparameters[k] = v - - @abstractmethod - def learning_rate(self, step=None): - raise NotImplementedError - - -class ConstantScheduler(SchedulerBase): - def __init__(self, lr=0.01, **kwargs): - """ - Returns a fixed learning rate, regardless of the current step. - - Parameters - ---------- - initial_lr : float - The learning rate. Default is 0.01 - """ - super().__init__() - self.lr = lr - self.hyperparameters = {"id": "ConstantScheduler", "lr": self.lr} - - def __str__(self): - return "ConstantScheduler(lr={})".format(self.lr) - - def learning_rate(self, **kwargs): - """ - Return the current learning rate. - - Returns - ------- - lr : float - The learning rate - """ - return self.lr - - -class ExponentialScheduler(SchedulerBase): - def __init__( - self, initial_lr=0.01, stage_length=500, staircase=False, decay=0.1, **kwargs - ): - """ - An exponential learning rate scheduler. - - Notes - ----- - The exponential scheduler decays the learning rate by `decay` every - `stage_length` steps, starting from `initial_lr`:: - - learning_rate = initial_lr * decay ** curr_stage - - where:: - - curr_stage = step / stage_length if staircase = False - curr_stage = floor(step / stage_length) if staircase = True - - Parameters - ---------- - initial_lr : float - The learning rate at the first step. Default is 0.01. - stage_length : int - The length of each stage, in steps. Default is 500. - staircase : bool - If True, only adjusts the learning rate at the stage transitions, - producing a step-like decay schedule. If False, adjusts the - learning rate after each step, creating a smooth decay schedule. - Default is False. - decay : float - The amount to decay the learning rate at each new stage. Default is - 0.1. - """ - super().__init__() - self.decay = decay - self.staircase = staircase - self.initial_lr = initial_lr - self.stage_length = stage_length - self.hyperparameters = { - "id": "StepScheduler", - "decay": self.decay, - "staircase": self.staircase, - "initial_lr": self.initial_lr, - "stage_length": self.stage_length, - } - - def __str__(self): - return "ExponentialScheduler(initial_lr={}, stage_length={}, staircase={}, decay={})".format( - self.initial_lr, self.stage_length, self.staircase, self.decay - ) - - def learning_rate(self, step, **kwargs): - """ - Return the current learning rate as a function of `step`. - - Parameters - ---------- - step : int - The current step number. - - Returns - ------- - lr : float - The learning rate for the current step. - """ - cur_stage = step / self.stage_length - if self.staircase: - cur_stage = np.floor(cur_stage) - return self.initial_lr * self.decay ** cur_stage - - -class NoamScheduler(SchedulerBase): - def __init__(self, model_dim=512, scale_factor=1, warmup_steps=4000, **kwargs): - """ - The Noam learning rate scheduler, originally used in conjunction with - the Adam optimizer in [1]. - - Notes - ----- - The Noam scheduler increases the learning rate linearly for the first - `warmup_steps` steps, and decreases it thereafter proportionally to the - inverse square root of the step number:: - - lr = scale_factor * ( (model_dim ** (-0.5)) * adj_step ) - adj_step = min(step_num ** (-0.5), step_num * warmup_steps ** (-1.5)) - - References - ---------- - .. [1] Vaswani et al. (2017) "Attention is all you need". *31st - Conference on Neural Information Processing Systems*, - https://arxiv.org/pdf/1706.03762.pdf - - Parameters - ---------- - model_dim : int - The number of units in the layer output. Default is 512. - scale_factor : float - A fixed coefficient for rescaling the final learning rate. Default - is 1. - warmup_steps : int - The number of steps in the warmup stage of training. Default is - 4000. - """ - super().__init__() - self.model_dim = model_dim - self.scale_factor = scale_factor - self.warmup_steps = warmup_steps - self.hyperparameters = { - "id": "NoamScheduler", - "model_dim": self.model_dim, - "scale_factor": self.scale_factor, - "warmup_steps": self.warmup_steps, - } - - def __str__(self): - return "NoamScheduler(model_dim={}, scale_factor={}, warmup_steps={})".format( - self.model_dim, self.scale_factor, self.warmup_steps - ) - - def learning_rate(self, step, **kwargs): - warmup, d_model = self.warmup_steps, self.model_dim - new_lr = d_model ** (-0.5) * min(step ** (-0.5), step * warmup ** (-1.5)) - return self.scale_factor * new_lr - - -class KingScheduler(SchedulerBase): - def __init__(self, initial_lr=0.01, patience=1000, decay=0.99, **kwargs): - """ - The Davis King / DLib learning rate scheduler. - - Notes - ----- - The KingScheduler computes the probability that the slope of the OLS - fit to the loss history is negative. If the probability that it is - negative is less than 51% over the last `patience` steps, the scheduler - exponentially decreases the current learning rate by `decay`. - - References - ---------- - .. [1] King, D. (2018). "Automatic learning rate scheduling that really - works". http://blog.dlib.net/2018/02/automatic-learning-rate-scheduling-that.html - - Parameters - ---------- - initial_lr : float - The learning rate to begin at. Default is 0.01. - patience : int - Amount of time to maintain the current learning rate without a - decrease in loss before adjustment. Default is 1000. - decay : float - The amount to decay the learning rate at each new stage. Default is - 0.99. - """ - super().__init__() - self.decay = decay - self.patience = patience - self.initial_lr = initial_lr - self.current_lr = initial_lr - self.max_history = np.ceil(1.1 * (patience + 1)).astype(int) - - self.loss_history = [] - self.hyperparameters = { - "id": "KingScheduler", - "decay": self.decay, - "patience": self.patience, - "initial_lr": self.initial_lr, - } - - def __str__(self): - return "KingScheduler(initial_lr={}, patience={}, decay={})".format( - self.initial_lr, self.patience, self.decay - ) - - def _steps_without_decrease(self, robust=False, check_all=False): - """ - Returns the maximum number of timesteps for which `P(loss is decreasing) - < 0.51`. - - Parameters - ---------- - robust : bool - If `robust=True`, first filter out the largest 10% of the loss - values to remove transient spikes in the loss due to, e.g., a few - bad minibatches. Default is False. - check_all : bool - If False, returns the maximum number of timesteps for which P(loss - is decreasing) < 0.51. If True, only checks whether the number of - timesteps for which P(loss is decreasing) < 0.51 is equal to - ``self.patience``. The former provides more information but is - significantly more computationally expensive. Default is False. - - Returns - ------- - steps_without_decrease: int - The maximum number of steps back in loss_history for which P(loss - is decreasing) < 0.51. - """ - lh = np.array(self.loss_history) - - # drop top 10% of loss values to filter out large loss spikes - if robust: - thresh = np.quantile(lh, 0.9) - lh = np.array([i for i in lh if i <= thresh]) - - N = len(lh) - steps_without_decrease = 0 - if check_all: - for i in reversed(range(N - 2)): - if self._p_decreasing(lh, i) < 0.51: - steps_without_decrease = N - i - else: - i = max(0, N - self.patience - 1) - if self._p_decreasing(lh, i) < 0.51: - steps_without_decrease = N - i - return steps_without_decrease - - def _p_decreasing(self, loss_history, i): - """ - Compute the probability that the slope of the OLS fit to the loss - history is negative. - - Parameters - ---------- - loss_history : numpy array of shape (N,) - The sequence of loss values for the previous `N` minibatches. - i : int - Compute P(Slope < 0) beginning at index i in `history`. - - Returns - ------ - p_decreasing : float - The probability that the slope of the OLS fit to loss_history is - less than or equal to 0. - """ - loss = loss_history[i:] - N = len(loss) - - # perform OLS on the loss entries to calc the slope mean - X = np.c_[np.ones(N), np.arange(i, len(loss_history))] - intercept, s_mean = np.linalg.inv(X.T @ X) @ X.T @ loss - loss_pred = s_mean * X[:, 1] + intercept - - # compute the variance of our loss predictions and use this to compute - # the (unbiased) estimate of the slope variance - loss_var = 1 / (N - 2) * np.sum((loss - loss_pred) ** 2) - s_var = (12 * loss_var) / (N ** 3 - N) - - # compute the probability that a random sample from a Gaussian - # parameterized by s_mean and s_var is less than or equal to 0 - p_decreasing = gaussian_cdf(0, s_mean, s_var) - return p_decreasing - - def learning_rate(self, step, cur_loss): - """ - Compute the updated learning rate for the current step and loss. - - Parameters - ---------- - step : int - The current step number. Unused. - cur_loss : float - The loss at the current step. - - Returns - ------- - lr : float - The learning rate for the current step. - """ - if cur_loss is None: - raise ValueError("cur_loss must be a float, but got {}".format(cur_loss)) - - # this happens if we initialize the scheduler from a string / dict - if not hasattr(self, "max_history"): - self.max_history = np.ceil(1.1 * (self.patience + 1)).astype(int) - patience, max_history = self.patience, self.max_history - - self.loss_history.append(cur_loss) - if len(self.loss_history) < patience: - return self.current_lr - self.loss_history = self.loss_history[-max_history:] - - # if the loss has not decreased for `patience` timesteps, drop the - # learning rate - if ( - self._steps_without_decrease() > patience - and self._steps_without_decrease(robust=True) > patience - ): - self.current_lr *= self.decay - - return self.current_lr diff --git a/aitk/keras/utils/README.md b/aitk/keras/utils/README.md deleted file mode 100644 index e4231b3..0000000 --- a/aitk/keras/utils/README.md +++ /dev/null @@ -1,14 +0,0 @@ -# Utilities - -The `utils.py` module implements common, neural network-specific helper -functions, primarily for dealing with CNNs. It includes: - -- `im2col` -- `col2im` -- `conv1D` -- `conv2D` -- `dilate` -- `deconv2D` -- `minibatch` -- Various weight initialization utilities -- Various padding and convolution arithmetic utilities diff --git a/aitk/keras/utils/__init__.py b/aitk/keras/utils/__init__.py deleted file mode 100644 index 1a100c6..0000000 --- a/aitk/keras/utils/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -""" -Common neural network-specific helper functions. - -The ``neural_nets.utils` module contains neural network-specific helper -functions, primarily for dealing with CNNs. -""" - -from .utils import * diff --git a/aitk/keras/utils/utils.py b/aitk/keras/utils/utils.py deleted file mode 100644 index f435cfc..0000000 --- a/aitk/keras/utils/utils.py +++ /dev/null @@ -1,1052 +0,0 @@ -import numpy as np - -def topological_sort(layers): - """ - Given a list of layers, produce a topological - sorted list, from input(s) to output(s). - """ - stack = [] - visited = set() - for layer in reversed(layers): - if layer not in visited: - visit_node(layer, stack, visited) - return reversed(stack) - -def visit_node(layer, stack, visited): - """ - Utility function for topological_sort. - """ - visited.add(layer) - for out_layer in layer.output_layers: - if out_layer not in visited: - visit_node(out_layer, stack, visited) - stack.append(layer) - -####################################################################### -# Training Utils # -####################################################################### - - -def minibatch(X, batchsize=256, shuffle=True): - """ - Compute the minibatch indices for a training dataset. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(N, \*)` - The dataset to divide into minibatches. Assumes the first dimension - represents the number of training examples. - batchsize : int - The desired size of each minibatch. Note, however, that if ``X.shape[0] % - batchsize > 0`` then the final batch will contain fewer than batchsize - entries. Default is 256. - shuffle : bool - Whether to shuffle the entries in the dataset before dividing into - minibatches. Default is True. - - Returns - ------- - mb_generator : generator - A generator which yields the indices into X for each batch - n_batches: int - The number of batches - """ - N = X.shape[0] - ix = np.arange(N) - n_batches = int(np.ceil(N / batchsize)) - - if shuffle: - np.random.shuffle(ix) - - def mb_generator(): - for i in range(n_batches): - yield ix[i * batchsize : (i + 1) * batchsize] - - return mb_generator(), n_batches - - -####################################################################### -# Padding Utils # -####################################################################### - - -def calc_pad_dims_2D(X_shape, out_dim, kernel_shape, stride, dilation=0): - """ - Compute the padding necessary to ensure that convolving `X` with a 2D kernel - of shape `kernel_shape` and stride `stride` produces outputs with dimension - `out_dim`. - - Parameters - ---------- - X_shape : tuple of `(n_ex, in_rows, in_cols, in_ch)` - Dimensions of the input volume. Padding is applied to `in_rows` and - `in_cols`. - out_dim : tuple of `(out_rows, out_cols)` - The desired dimension of an output example after applying the - convolution. - kernel_shape : 2-tuple - The dimension of the 2D convolution kernel. - stride : int - The stride for the convolution kernel. - dilation : int - Number of pixels inserted between kernel elements. Default is 0. - - Returns - ------- - padding_dims : 4-tuple - Padding dims for `X`. Organized as (left, right, up, down) - """ - if not isinstance(X_shape, tuple): - raise ValueError("`X_shape` must be of type tuple") - - if not isinstance(out_dim, tuple): - raise ValueError("`out_dim` must be of type tuple") - - if not isinstance(kernel_shape, tuple): - raise ValueError("`kernel_shape` must be of type tuple") - - if not isinstance(stride, int): - raise ValueError("`stride` must be of type int") - - d = dilation - fr, fc = kernel_shape - out_rows, out_cols = out_dim - n_ex, in_rows, in_cols, in_ch = X_shape - - # update effective filter shape based on dilation factor - _fr, _fc = fr * (d + 1) - d, fc * (d + 1) - d - - pr = int((stride * (out_rows - 1) + _fr - in_rows) / 2) - pc = int((stride * (out_cols - 1) + _fc - in_cols) / 2) - - out_rows1 = int(1 + (in_rows + 2 * pr - _fr) / stride) - out_cols1 = int(1 + (in_cols + 2 * pc - _fc) / stride) - - # add asymmetric padding pixels to right / bottom - pr1, pr2 = pr, pr - if out_rows1 == out_rows - 1: - pr1, pr2 = pr, pr + 1 - elif out_rows1 != out_rows: - raise AssertionError - - pc1, pc2 = pc, pc - if out_cols1 == out_cols - 1: - pc1, pc2 = pc, pc + 1 - elif out_cols1 != out_cols: - raise AssertionError - - if any(np.array([pr1, pr2, pc1, pc2]) < 0): - raise ValueError( - "Padding cannot be less than 0. Got: {}".format((pr1, pr2, pc1, pc2)) - ) - return (pr1, pr2, pc1, pc2) - - -def calc_pad_dims_1D(X_shape, l_out, kernel_width, stride, dilation=0, causal=False): - """ - Compute the padding necessary to ensure that convolving `X` with a 1D kernel - of shape `kernel_shape` and stride `stride` produces outputs with length - `l_out`. - - Parameters - ---------- - X_shape : tuple of `(n_ex, l_in, in_ch)` - Dimensions of the input volume. Padding is applied on either side of - `l_in`. - l_out : int - The desired length an output example after applying the convolution. - kernel_width : int - The width of the 1D convolution kernel. - stride : int - The stride for the convolution kernel. - dilation : int - Number of pixels inserted between kernel elements. Default is 0. - causal : bool - Whether to compute the padding dims for a regular or causal - convolution. If causal, padding is added only to the left side of the - sequence. Default is False. - - Returns - ------- - padding_dims : 2-tuple - Padding dims for X. Organized as (left, right) - """ - if not isinstance(X_shape, tuple): - raise ValueError("`X_shape` must be of type tuple") - - if not isinstance(l_out, int): - raise ValueError("`l_out` must be of type int") - - if not isinstance(kernel_width, int): - raise ValueError("`kernel_width` must be of type int") - - if not isinstance(stride, int): - raise ValueError("`stride` must be of type int") - - d = dilation - fw = kernel_width - n_ex, l_in, in_ch = X_shape - - # update effective filter shape based on dilation factor - _fw = fw * (d + 1) - d - total_pad = int((stride * (l_out - 1) + _fw - l_in)) - - if not causal: - pw = total_pad // 2 - l_out1 = int(1 + (l_in + 2 * pw - _fw) / stride) - - # add asymmetric padding pixels to right / bottom - pw1, pw2 = pw, pw - if l_out1 == l_out - 1: - pw1, pw2 = pw, pw + 1 - elif l_out1 != l_out: - raise AssertionError - - if causal: - # if this is a causal convolution, only pad the left side of the - # sequence - pw1, pw2 = total_pad, 0 - l_out1 = int(1 + (l_in + total_pad - _fw) / stride) - assert l_out1 == l_out - - if any(np.array([pw1, pw2]) < 0): - raise ValueError("Padding cannot be less than 0. Got: {}".format((pw1, pw2))) - return (pw1, pw2) - - -def pad1D(X, pad, kernel_width=None, stride=None, dilation=0): - """ - Zero-pad a 3D input volume `X` along the second dimension. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, l_in, in_ch)` - Input volume. Padding is applied to `l_in`. - pad : tuple, int, or {'same', 'causal'} - The padding amount. If 'same', add padding to ensure that the output - length of a 1D convolution with a kernel of `kernel_shape` and stride - `stride` is the same as the input length. If 'causal' compute padding - such that the output both has the same length as the input AND - ``output[t]`` does not depend on ``input[t + 1:]``. If 2-tuple, - specifies the number of padding columns to add on each side of the - sequence. - kernel_width : int - The dimension of the 2D convolution kernel. Only relevant if p='same' - or 'causal'. Default is None. - stride : int - The stride for the convolution kernel. Only relevant if p='same' or - 'causal'. Default is None. - dilation : int - The dilation of the convolution kernel. Only relevant if p='same' or - 'causal'. Default is None. - - Returns - ------- - X_pad : :py:class:`ndarray ` of shape `(n_ex, padded_seq, in_channels)` - The padded output volume - p : 2-tuple - The number of 0-padded columns added to the (left, right) of the sequences - in `X`. - """ - p = pad - if isinstance(p, int): - p = (p, p) - - if isinstance(p, tuple): - X_pad = np.pad( - X, - pad_width=((0, 0), (p[0], p[1]), (0, 0)), - mode="constant", - constant_values=0, - ) - - # compute the correct padding dims for a 'same' or 'causal' convolution - if p in ["same", "causal"] and kernel_width and stride: - causal = p == "causal" - p = calc_pad_dims_1D( - X.shape, X.shape[1], kernel_width, stride, causal=causal, dilation=dilation - ) - X_pad, p = pad1D(X, p) - - return X_pad, p - - -def pad2D(X, pad, kernel_shape=None, stride=None, dilation=0): - """ - Zero-pad a 4D input volume `X` along the second and third dimensions. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - Input volume. Padding is applied to `in_rows` and `in_cols`. - pad : tuple, int, or 'same' - The padding amount. If 'same', add padding to ensure that the output of - a 2D convolution with a kernel of `kernel_shape` and stride `stride` - has the same dimensions as the input. If 2-tuple, specifies the number - of padding rows and colums to add *on both sides* of the rows/columns - in `X`. If 4-tuple, specifies the number of rows/columns to add to the - top, bottom, left, and right of the input volume. - kernel_shape : 2-tuple - The dimension of the 2D convolution kernel. Only relevant if p='same'. - Default is None. - stride : int - The stride for the convolution kernel. Only relevant if p='same'. - Default is None. - dilation : int - The dilation of the convolution kernel. Only relevant if p='same'. - Default is 0. - - Returns - ------- - X_pad : :py:class:`ndarray ` of shape `(n_ex, padded_in_rows, padded_in_cols, in_channels)` - The padded output volume. - p : 4-tuple - The number of 0-padded rows added to the (top, bottom, left, right) of - `X`. - """ - p = pad - if isinstance(p, int): - p = (p, p, p, p) - - if isinstance(p, tuple): - if len(p) == 2: - p = (p[0], p[0], p[1], p[1]) - - X_pad = np.pad( - X, - pad_width=((0, 0), (p[0], p[1]), (p[2], p[3]), (0, 0)), - mode="constant", - constant_values=0, - ) - - # compute the correct padding dims for a 'same' convolution - if p == "same" and kernel_shape and stride is not None: - p = calc_pad_dims_2D( - X.shape, X.shape[1:3], kernel_shape, stride, dilation=dilation - ) - X_pad, p = pad2D(X, p) - return X_pad, p - - -def dilate(X, d): - """ - Dilate the 4D volume `X` by `d`. - - Notes - ----- - For a visual depiction of a dilated convolution, see [1]. - - References - ---------- - .. [1] Dumoulin & Visin (2016). "A guide to convolution arithmetic for deep - learning." https://arxiv.org/pdf/1603.07285v1.pdf - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - Input volume. - d : int - The number of 0-rows to insert between each adjacent row + column in `X`. - - Returns - ------- - Xd : :py:class:`ndarray ` of shape `(n_ex, out_rows, out_cols, out_ch)` - The dilated array where - - .. math:: - - \\text{out_rows} &= \\text{in_rows} + d(\\text{in_rows} - 1) \\\\ - \\text{out_cols} &= \\text{in_cols} + d (\\text{in_cols} - 1) - """ - n_ex, in_rows, in_cols, n_in = X.shape - r_ix = np.repeat(np.arange(1, in_rows), d) - c_ix = np.repeat(np.arange(1, in_cols), d) - Xd = np.insert(X, r_ix, 0, axis=1) - Xd = np.insert(Xd, c_ix, 0, axis=2) - return Xd - - -####################################################################### -# Convolution Arithmetic # -####################################################################### - - -def calc_fan(weight_shape): - """ - Compute the fan-in and fan-out for a weight matrix/volume. - - Parameters - ---------- - weight_shape : tuple - The dimensions of the weight matrix/volume. The final 2 entries must be - `in_ch`, `out_ch`. - - Returns - ------- - fan_in : int - The number of input units in the weight tensor - fan_out : int - The number of output units in the weight tensor - """ - if len(weight_shape) == 2: - fan_in, fan_out = weight_shape - elif len(weight_shape) in [3, 4]: - in_ch, out_ch = weight_shape[-2:] - kernel_size = np.prod(weight_shape[:-2]) - fan_in, fan_out = in_ch * kernel_size, out_ch * kernel_size - else: - raise ValueError("Unrecognized weight dimension: {}".format(weight_shape)) - return fan_in, fan_out - - -def calc_conv_out_dims(X_shape, W_shape, stride=1, pad=0, dilation=0): - """ - Compute the dimension of the output volume for the specified convolution. - - Parameters - ---------- - X_shape : 3-tuple or 4-tuple - The dimensions of the input volume to the convolution. If 3-tuple, - entries are expected to be (`n_ex`, `in_length`, `in_ch`). If 4-tuple, - entries are expected to be (`n_ex`, `in_rows`, `in_cols`, `in_ch`). - weight_shape : 3-tuple or 4-tuple - The dimensions of the weight volume for the convolution. If 3-tuple, - entries are expected to be (`f_len`, `in_ch`, `out_ch`). If 4-tuple, - entries are expected to be (`fr`, `fc`, `in_ch`, `out_ch`). - pad : tuple, int, or {'same', 'causal'} - The padding amount. If 'same', add padding to ensure that the output - length of a 1D convolution with a kernel of `kernel_shape` and stride - `stride` is the same as the input length. If 'causal' compute padding - such that the output both has the same length as the input AND - ``output[t]`` does not depend on ``input[t + 1:]``. If 2-tuple, specifies the - number of padding columns to add on each side of the sequence. Default - is 0. - stride : int - The stride for the convolution kernel. Default is 1. - dilation : int - The dilation of the convolution kernel. Default is 0. - - Returns - ------- - out_dims : 3-tuple or 4-tuple - The dimensions of the output volume. If 3-tuple, entries are (`n_ex`, - `out_length`, `out_ch`). If 4-tuple, entries are (`n_ex`, `out_rows`, - `out_cols`, `out_ch`). - """ - dummy = np.zeros(X_shape) - s, p, d = stride, pad, dilation - if len(X_shape) == 3: - _, p = pad1D(dummy, p) - pw1, pw2 = p - fw, in_ch, out_ch = W_shape - n_ex, in_length, in_ch = X_shape - - _fw = fw * (d + 1) - d - out_length = (in_length + pw1 + pw2 - _fw) // s + 1 - out_dims = (n_ex, out_length, out_ch) - - elif len(X_shape) == 4: - _, p = pad2D(dummy, p) - pr1, pr2, pc1, pc2 = p - fr, fc, in_ch, out_ch = W_shape - n_ex, in_rows, in_cols, in_ch = X_shape - - # adjust effective filter size to account for dilation - _fr, _fc = fr * (d + 1) - d, fc * (d + 1) - d - out_rows = (in_rows + pr1 + pr2 - _fr) // s + 1 - out_cols = (in_cols + pc1 + pc2 - _fc) // s + 1 - out_dims = (n_ex, out_rows, out_cols, out_ch) - else: - raise ValueError("Unrecognized number of input dims: {}".format(len(X_shape))) - return out_dims - - -####################################################################### -# Convolution Vectorization Utils # -####################################################################### - - -def _im2col_indices(X_shape, fr, fc, p, s, d=0): - """ - Helper function that computes indices into X in prep for columnization in - :func:`im2col`. - - Code extended from Andrej Karpathy's `im2col.py` - """ - pr1, pr2, pc1, pc2 = p - n_ex, n_in, in_rows, in_cols = X_shape - - # adjust effective filter size to account for dilation - _fr, _fc = fr * (d + 1) - d, fc * (d + 1) - d - - out_rows = (in_rows + pr1 + pr2 - _fr) // s + 1 - out_cols = (in_cols + pc1 + pc2 - _fc) // s + 1 - - if any([out_rows <= 0, out_cols <= 0]): - raise ValueError( - "Dimension mismatch during convolution: " - "out_rows = {}, out_cols = {}".format(out_rows, out_cols) - ) - - # i1/j1 : row/col templates - # i0/j0 : n. copies (len) and offsets (values) for row/col templates - i0 = np.repeat(np.arange(fr), fc) - i0 = np.tile(i0, n_in) * (d + 1) - i1 = s * np.repeat(np.arange(out_rows), out_cols) - j0 = np.tile(np.arange(fc), fr * n_in) * (d + 1) - j1 = s * np.tile(np.arange(out_cols), out_rows) - - # i.shape = (fr * fc * n_in, out_height * out_width) - # j.shape = (fr * fc * n_in, out_height * out_width) - # k.shape = (fr * fc * n_in, 1) - i = i0.reshape(-1, 1) + i1.reshape(1, -1) - j = j0.reshape(-1, 1) + j1.reshape(1, -1) - k = np.repeat(np.arange(n_in), fr * fc).reshape(-1, 1) - return k, i, j - - -def im2col(X, W_shape, pad, stride, dilation=0): - """ - Pads and rearrange overlapping windows of the input volume into column - vectors, returning the concatenated padded vectors in a matrix `X_col`. - - Notes - ----- - A NumPy reimagining of MATLAB's ``im2col`` 'sliding' function. - - Code extended from Andrej Karpathy's ``im2col.py``. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - Input volume (not padded). - W_shape: 4-tuple containing `(kernel_rows, kernel_cols, in_ch, out_ch)` - The dimensions of the weights/kernels in the present convolutional - layer. - pad : tuple, int, or 'same' - The padding amount. If 'same', add padding to ensure that the output of - a 2D convolution with a kernel of `kernel_shape` and stride `stride` - produces an output volume of the same dimensions as the input. If - 2-tuple, specifies the number of padding rows and colums to add *on both - sides* of the rows/columns in X. If 4-tuple, specifies the number of - rows/columns to add to the top, bottom, left, and right of the input - volume. - stride : int - The stride of each convolution kernel - dilation : int - Number of pixels inserted between kernel elements. Default is 0. - - Returns - ------- - X_col : :py:class:`ndarray ` of shape (Q, Z) - The reshaped input volume where where: - - .. math:: - - Q &= \\text{kernel_rows} \\times \\text{kernel_cols} \\times \\text{n_in} \\\\ - Z &= \\text{n_ex} \\times \\text{out_rows} \\times \\text{out_cols} - """ - fr, fc, n_in, n_out = W_shape - s, p, d = stride, pad, dilation - n_ex, in_rows, in_cols, n_in = X.shape - - # zero-pad the input - X_pad, p = pad2D(X, p, W_shape[:2], stride=s, dilation=d) - pr1, pr2, pc1, pc2 = p - - # shuffle to have channels as the first dim - X_pad = X_pad.transpose(0, 3, 1, 2) - - # get the indices for im2col - k, i, j = _im2col_indices((n_ex, n_in, in_rows, in_cols), fr, fc, p, s, d) - - X_col = X_pad[:, k, i, j] - X_col = X_col.transpose(1, 2, 0).reshape(fr * fc * n_in, -1) - return X_col, p - - -def col2im(X_col, X_shape, W_shape, pad, stride, dilation=0): - """ - Take columns of a 2D matrix and rearrange them into the blocks/windows of - a 4D image volume. - - Notes - ----- - A NumPy reimagining of MATLAB's ``col2im`` 'sliding' function. - - Code extended from Andrej Karpathy's ``im2col.py``. - - Parameters - ---------- - X_col : :py:class:`ndarray ` of shape `(Q, Z)` - The columnized version of `X` (assumed to include padding) - X_shape : 4-tuple containing `(n_ex, in_rows, in_cols, in_ch)` - The original dimensions of `X` (not including padding) - W_shape: 4-tuple containing `(kernel_rows, kernel_cols, in_ch, out_ch)` - The dimensions of the weights in the present convolutional layer - pad : 4-tuple of `(left, right, up, down)` - Number of zero-padding rows/cols to add to `X` - stride : int - The stride of each convolution kernel - dilation : int - Number of pixels inserted between kernel elements. Default is 0. - - Returns - ------- - img : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - The reshaped `X_col` input matrix - """ - if not (isinstance(pad, tuple) and len(pad) == 4): - raise TypeError("pad must be a 4-tuple, but got: {}".format(pad)) - - s, d = stride, dilation - pr1, pr2, pc1, pc2 = pad - fr, fc, n_in, n_out = W_shape - n_ex, in_rows, in_cols, n_in = X_shape - - X_pad = np.zeros((n_ex, n_in, in_rows + pr1 + pr2, in_cols + pc1 + pc2)) - k, i, j = _im2col_indices((n_ex, n_in, in_rows, in_cols), fr, fc, pad, s, d) - - X_col_reshaped = X_col.reshape(n_in * fr * fc, -1, n_ex) - X_col_reshaped = X_col_reshaped.transpose(2, 0, 1) - - np.add.at(X_pad, (slice(None), k, i, j), X_col_reshaped) - - pr2 = None if pr2 == 0 else -pr2 - pc2 = None if pc2 == 0 else -pc2 - return X_pad[:, :, pr1:pr2, pc1:pc2] - - -####################################################################### -# Convolution # -####################################################################### - - -def conv2D(X, W, stride, pad, dilation=0): - """ - A faster (but more memory intensive) implementation of the 2D "convolution" - (technically, cross-correlation) of input `X` with a collection of kernels in - `W`. - - Notes - ----- - Relies on the :func:`im2col` function to perform the convolution as a single - matrix multiplication. - - For a helpful diagram, see Pete Warden's 2015 blogpost [1]. - - References - ---------- - .. [1] Warden (2015). "Why GEMM is at the heart of deep learning," - https://petewarden.com/2015/04/20/why-gemm-is-at-the-heart-of-deep-learning/ - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - Input volume (unpadded). - W: :py:class:`ndarray ` of shape `(kernel_rows, kernel_cols, in_ch, out_ch)` - A volume of convolution weights/kernels for a given layer. - stride : int - The stride of each convolution kernel. - pad : tuple, int, or 'same' - The padding amount. If 'same', add padding to ensure that the output of - a 2D convolution with a kernel of `kernel_shape` and stride `stride` - produces an output volume of the same dimensions as the input. If - 2-tuple, specifies the number of padding rows and colums to add *on both - sides* of the rows/columns in `X`. If 4-tuple, specifies the number of - rows/columns to add to the top, bottom, left, and right of the input - volume. - dilation : int - Number of pixels inserted between kernel elements. Default is 0. - - Returns - ------- - Z : :py:class:`ndarray ` of shape `(n_ex, out_rows, out_cols, out_ch)` - The covolution of `X` with `W`. - """ - s, d = stride, dilation - _, p = pad2D(X, pad, W.shape[:2], s, dilation=dilation) - - pr1, pr2, pc1, pc2 = p - fr, fc, in_ch, out_ch = W.shape - n_ex, in_rows, in_cols, in_ch = X.shape - - # update effective filter shape based on dilation factor - _fr, _fc = fr * (d + 1) - d, fc * (d + 1) - d - - # compute the dimensions of the convolution output - out_rows = int((in_rows + pr1 + pr2 - _fr) / s + 1) - out_cols = int((in_cols + pc1 + pc2 - _fc) / s + 1) - - # convert X and W into the appropriate 2D matrices and take their product - X_col, _ = im2col(X, W.shape, p, s, d) - W_col = W.transpose(3, 2, 0, 1).reshape(out_ch, -1) - - Z = (W_col @ X_col).reshape(out_ch, out_rows, out_cols, n_ex).transpose(3, 1, 2, 0) - - return Z - - -def conv1D(X, W, stride, pad, dilation=0): - """ - A faster (but more memory intensive) implementation of a 1D "convolution" - (technically, cross-correlation) of input `X` with a collection of kernels in - `W`. - - Notes - ----- - Relies on the :func:`im2col` function to perform the convolution as a single - matrix multiplication. - - For a helpful diagram, see Pete Warden's 2015 blogpost [1]. - - References - ---------- - .. [1] Warden (2015). "Why GEMM is at the heart of deep learning," - https://petewarden.com/2015/04/20/why-gemm-is-at-the-heart-of-deep-learning/ - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, l_in, in_ch)` - Input volume (unpadded) - W: :py:class:`ndarray ` of shape `(kernel_width, in_ch, out_ch)` - A volume of convolution weights/kernels for a given layer - stride : int - The stride of each convolution kernel - pad : tuple, int, or 'same' - The padding amount. If 'same', add padding to ensure that the output of - a 1D convolution with a kernel of `kernel_shape` and stride `stride` - produces an output volume of the same dimensions as the input. If - 2-tuple, specifies the number of padding colums to add *on both sides* - of the columns in X. - dilation : int - Number of pixels inserted between kernel elements. Default is 0. - - Returns - ------- - Z : :py:class:`ndarray ` of shape `(n_ex, l_out, out_ch)` - The convolution of X with W. - """ - _, p = pad1D(X, pad, W.shape[0], stride, dilation=dilation) - - # add a row dimension to X to permit us to use im2col/col2im - X2D = np.expand_dims(X, axis=1) - W2D = np.expand_dims(W, axis=0) - p2D = (0, 0, p[0], p[1]) - Z2D = conv2D(X2D, W2D, stride, p2D, dilation) - - # drop the row dimension - return np.squeeze(Z2D, axis=1) - - -def deconv2D_naive(X, W, stride, pad, dilation=0): - """ - Perform a "deconvolution" (more accurately, a transposed convolution) of an - input volume `X` with a weight kernel `W`, incorporating stride, pad, and - dilation. - - Notes - ----- - Rather than using the transpose of the convolution matrix, this approach - uses a direct convolution with zero padding, which, while conceptually - straightforward, is computationally inefficient. - - For further explanation, see [1]. - - References - ---------- - .. [1] Dumoulin & Visin (2016). "A guide to convolution arithmetic for deep - learning." https://arxiv.org/pdf/1603.07285v1.pdf - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - Input volume (not padded) - W: :py:class:`ndarray ` of shape `(kernel_rows, kernel_cols, in_ch, out_ch)` - A volume of convolution weights/kernels for a given layer - stride : int - The stride of each convolution kernel - pad : tuple, int, or 'same' - The padding amount. If 'same', add padding to ensure that the output of - a 2D convolution with a kernel of `kernel_shape` and stride `stride` - produces an output volume of the same dimensions as the input. If - 2-tuple, specifies the number of padding rows and colums to add *on both - sides* of the rows/columns in `X`. If 4-tuple, specifies the number of - rows/columns to add to the top, bottom, left, and right of the input - volume. - dilation : int - Number of pixels inserted between kernel elements. Default is 0. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, out_rows, out_cols, n_out)` - The decovolution of (padded) input volume `X` with `W` using stride `s` and - dilation `d`. - """ - if stride > 1: - X = dilate(X, stride - 1) - stride = 1 - - # pad the input - X_pad, p = pad2D(X, pad, W.shape[:2], stride=stride, dilation=dilation) - - n_ex, in_rows, in_cols, n_in = X_pad.shape - fr, fc, n_in, n_out = W.shape - s, d = stride, dilation - pr1, pr2, pc1, pc2 = p - - # update effective filter shape based on dilation factor - _fr, _fc = fr * (d + 1) - d, fc * (d + 1) - d - - # compute deconvolution output dims - out_rows = s * (in_rows - 1) - pr1 - pr2 + _fr - out_cols = s * (in_cols - 1) - pc1 - pc2 + _fc - out_dim = (out_rows, out_cols) - - # add additional padding to achieve the target output dim - _p = calc_pad_dims_2D(X_pad.shape, out_dim, W.shape[:2], s, d) - X_pad, pad = pad2D(X_pad, _p, W.shape[:2], stride=s, dilation=dilation) - - # perform the forward convolution using the flipped weight matrix (note - # we set pad to 0, since we've already added padding) - Z = conv2D(X_pad, np.rot90(W, 2), s, 0, d) - - pr2 = None if pr2 == 0 else -pr2 - pc2 = None if pc2 == 0 else -pc2 - return Z[:, pr1:pr2, pc1:pc2, :] - - -def conv2D_naive(X, W, stride, pad, dilation=0): - """ - A slow but more straightforward implementation of a 2D "convolution" - (technically, cross-correlation) of input `X` with a collection of kernels `W`. - - Notes - ----- - This implementation uses ``for`` loops and direct indexing to perform the - convolution. As a result, it is slower than the vectorized :func:`conv2D` - function that relies on the :func:`col2im` and :func:`im2col` - transformations. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, in_rows, in_cols, in_ch)` - Input volume. - W: :py:class:`ndarray ` of shape `(kernel_rows, kernel_cols, in_ch, out_ch)` - The volume of convolution weights/kernels. - stride : int - The stride of each convolution kernel. - pad : tuple, int, or 'same' - The padding amount. If 'same', add padding to ensure that the output of - a 2D convolution with a kernel of `kernel_shape` and stride `stride` - produces an output volume of the same dimensions as the input. If - 2-tuple, specifies the number of padding rows and colums to add *on both - sides* of the rows/columns in `X`. If 4-tuple, specifies the number of - rows/columns to add to the top, bottom, left, and right of the input - volume. - dilation : int - Number of pixels inserted between kernel elements. Default is 0. - - Returns - ------- - Z : :py:class:`ndarray ` of shape `(n_ex, out_rows, out_cols, out_ch)` - The covolution of `X` with `W`. - """ - s, d = stride, dilation - X_pad, p = pad2D(X, pad, W.shape[:2], stride=s, dilation=d) - - pr1, pr2, pc1, pc2 = p - fr, fc, in_ch, out_ch = W.shape - n_ex, in_rows, in_cols, in_ch = X.shape - - # update effective filter shape based on dilation factor - fr, fc = fr * (d + 1) - d, fc * (d + 1) - d - - out_rows = int((in_rows + pr1 + pr2 - fr) / s + 1) - out_cols = int((in_cols + pc1 + pc2 - fc) / s + 1) - - Z = np.zeros((n_ex, out_rows, out_cols, out_ch)) - for m in range(n_ex): - for c in range(out_ch): - for i in range(out_rows): - for j in range(out_cols): - i0, i1 = i * s, (i * s) + fr - j0, j1 = j * s, (j * s) + fc - - window = X_pad[m, i0 : i1 : (d + 1), j0 : j1 : (d + 1), :] - Z[m, i, j, c] = np.sum(window * W[:, :, :, c]) - return Z - - -####################################################################### -# Weight Initialization # -####################################################################### - - -def he_uniform(weight_shape): - """ - Initializes network weights `W` with using the He uniform initialization - strategy. - - Notes - ----- - The He uniform initializations trategy initializes thew eights in `W` using - draws from Uniform(-b, b) where - - .. math:: - - b = \sqrt{\\frac{6}{\\text{fan_in}}} - - Developed for deep networks with ReLU nonlinearities. - - Parameters - ---------- - weight_shape : tuple - The dimensions of the weight matrix/volume. - - Returns - ------- - W : :py:class:`ndarray ` of shape `weight_shape` - The initialized weights. - """ - fan_in, fan_out = calc_fan(weight_shape) - b = np.sqrt(6 / fan_in) - return np.random.uniform(-b, b, size=weight_shape) - - -def he_normal(weight_shape): - """ - Initialize network weights `W` using the He normal initialization strategy. - - Notes - ----- - The He normal initialization strategy initializes the weights in `W` using - draws from TruncatedNormal(0, b) where the variance `b` is - - .. math:: - - b = \\frac{2}{\\text{fan_in}} - - He normal initialization was originally developed for deep networks with - :class:`~numpy_ml.neural_nets.activations.ReLU` nonlinearities. - - Parameters - ---------- - weight_shape : tuple - The dimensions of the weight matrix/volume. - - Returns - ------- - W : :py:class:`ndarray ` of shape `weight_shape` - The initialized weights. - """ - fan_in, fan_out = calc_fan(weight_shape) - std = np.sqrt(2 / fan_in) - return truncated_normal(0, std, weight_shape) - - -def glorot_uniform(weight_shape, gain=1.0): - """ - Initialize network weights `W` using the Glorot uniform initialization - strategy. - - Notes - ----- - The Glorot uniform initialization strategy initializes weights using draws - from ``Uniform(-b, b)`` where: - - .. math:: - - b = \\text{gain} \sqrt{\\frac{6}{\\text{fan_in} + \\text{fan_out}}} - - The motivation for Glorot uniform initialization is to choose weights to - ensure that the variance of the layer outputs are approximately equal to - the variance of its inputs. - - This initialization strategy was primarily developed for deep networks with - tanh and logistic sigmoid nonlinearities. - - Parameters - ---------- - weight_shape : tuple - The dimensions of the weight matrix/volume. - - Returns - ------- - W : :py:class:`ndarray ` of shape `weight_shape` - The initialized weights. - """ - fan_in, fan_out = calc_fan(weight_shape) - b = gain * np.sqrt(6 / (fan_in + fan_out)) - return np.random.uniform(-b, b, size=weight_shape) - - -def glorot_normal(weight_shape, gain=1.0): - """ - Initialize network weights `W` using the Glorot normal initialization strategy. - - Notes - ----- - The Glorot normal initializaiton initializes weights with draws from - TruncatedNormal(0, b) where the variance `b` is - - .. math:: - - b = \\frac{2 \\text{gain}^2}{\\text{fan_in} + \\text{fan_out}} - - The motivation for Glorot normal initialization is to choose weights to - ensure that the variance of the layer outputs are approximately equal to - the variance of its inputs. - - This initialization strategy was primarily developed for deep networks with - :class:`~numpy_ml.neural_nets.activations.Tanh` and - :class:`~numpy_ml.neural_nets.activations.Sigmoid` nonlinearities. - - Parameters - ---------- - weight_shape : tuple - The dimensions of the weight matrix/volume. - - Returns - ------- - W : :py:class:`ndarray ` of shape `weight_shape` - The initialized weights. - """ - fan_in, fan_out = calc_fan(weight_shape) - std = gain * np.sqrt(2 / (fan_in + fan_out)) - return truncated_normal(0, std, weight_shape) - - -def truncated_normal(mean, std, out_shape): - """ - Generate draws from a truncated normal distribution via rejection sampling. - - Notes - ----- - The rejection sampling regimen draws samples from a normal distribution - with mean `mean` and standard deviation `std`, and resamples any values - more than two standard deviations from `mean`. - - Parameters - ---------- - mean : float or array_like of floats - The mean/center of the distribution - std : float or array_like of floats - Standard deviation (spread or "width") of the distribution. - out_shape : int or tuple of ints - Output shape. If the given shape is, e.g., ``(m, n, k)``, then - ``m * n * k`` samples are drawn. - - Returns - ------- - samples : :py:class:`ndarray ` of shape `out_shape` - Samples from the truncated normal distribution parameterized by `mean` - and `std`. - """ - samples = np.random.normal(loc=mean, scale=std, size=out_shape) - reject = np.logical_or(samples >= mean + 2 * std, samples <= mean - 2 * std) - while any(reject.flatten()): - resamples = np.random.normal(loc=mean, scale=std, size=reject.sum()) - samples[reject] = resamples - reject = np.logical_or(samples >= mean + 2 * std, samples <= mean - 2 * std) - return samples diff --git a/aitk/keras/wrappers/README.md b/aitk/keras/wrappers/README.md deleted file mode 100644 index 36794a1..0000000 --- a/aitk/keras/wrappers/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Wrappers - -The `wrappers.py` module implements wrappers for the layers in `layers.py`. It -includes -- Dropout ([Srivastava, et al., 2014](http://www.jmlr.org/papers/volume15/srivastava14a/srivastava14a.pdf)) diff --git a/aitk/keras/wrappers/__init__.py b/aitk/keras/wrappers/__init__.py deleted file mode 100644 index 4d07b0a..0000000 --- a/aitk/keras/wrappers/__init__.py +++ /dev/null @@ -1,258 +0,0 @@ -""" -A collection of objects thats can wrap / otherwise modify arbitrary neural -network layers. -""" - -from abc import ABC, abstractmethod - -import numpy as np - - -class WrapperBase(ABC): - def __init__(self, wrapped_layer): - """An abstract base class for all Wrapper instances""" - self._base_layer = wrapped_layer - if hasattr(wrapped_layer, "_base_layer"): - self._base_layer = wrapped_layer._base_layer - super().__init__() - - @abstractmethod - def _init_wrapper_params(self): - raise NotImplementedError - - @abstractmethod - def forward(self, z, **kwargs): - """Overwritten by inherited class""" - raise NotImplementedError - - @abstractmethod - def backward(self, out, **kwargs): - """Overwritten by inherited class""" - raise NotImplementedError - - @property - def trainable(self): - """Whether the base layer is frozen""" - return self._base_layer.trainable - - @property - def parameters(self): - """A dictionary of the base layer parameters""" - return self._base_layer.parameters - - @property - def hyperparameters(self): - """A dictionary of the base layer's hyperparameters""" - hp = self._base_layer.hyperparameters - hpw = self._wrapper_hyperparameters - if "wrappers" in hp: - hp["wrappers"].append(hpw) - else: - hp["wrappers"] = [hpw] - return hp - - @property - def derived_variables(self): - """ - A dictionary of the intermediate values computed during layer - training. - """ - dv = self._base_layer.derived_variables.copy() - if "wrappers" in dv: - dv["wrappers"].append(self._wrapper_derived_variables) - else: - dv["wrappers"] = [self._wrapper_derived_variables] - return dv - - @property - def gradients(self): - """A dictionary of the current layer parameter gradients.""" - return self._base_layer.gradients - - @property - def act_fn(self): - """The activation function for the base layer.""" - return self._base_layer.act_fn - - @property - def X(self): - """The collection of layer inputs.""" - return self._base_layer.X - - def _init_params(self): - hp = self._wrapper_hyperparameters - if "wrappers" in self._base_layer.hyperparameters: - self._base_layer.hyperparameters["wrappers"].append(hp) - else: - self._base_layer.hyperparameters["wrappers"] = [hp] - - def freeze(self): - """ - Freeze the base layer's parameters at their current values so they can - no longer be updated. - """ - self._base_layer.freeze() - - def unfreeze(self): - """Unfreeze the base layer's parameters so they can be updated.""" - self._base_layer.freeze() - - def flush_gradients(self): - """Erase all the wrapper and base layer's derived variables and gradients.""" - assert self.trainable, "Layer is frozen" - self._base_layer.flush_gradients() - - for k, v in self._wrapper_derived_variables.items(): - self._wrapper_derived_variables[k] = [] - - def update(self, lr): - """ - Update the base layer's parameters using the accrued gradients and - layer optimizer. Flush all gradients once the update is complete. - """ - assert self.trainable, "Layer is frozen" - self._base_layer.update(lr) - self.flush_gradients() - - def _set_wrapper_params(self, pdict): - for k, v in pdict.items(): - if k in self._wrapper_hyperparameters: - self._wrapper_hyperparameters[k] = v - return self - - def set_params(self, summary_dict): - """ - Set the base layer parameters from a dictionary of values. - - Parameters - ---------- - summary_dict : dict - A dictionary of layer parameters and hyperparameters. If a required - parameter or hyperparameter is not included within `summary_dict`, - this method will use the value in the current layer's - :meth:`summary` method. - - Returns - ------- - layer : :doc:`Layer ` object - The newly-initialized layer. - """ - return self._base_layer.set_params(summary_dict) - - def summary(self): - """Return a dict of the layer parameters, hyperparameters, and ID.""" - return { - "layer": self.hyperparameters["layer"], - "layer_wrappers": [i["wrapper"] for i in self.hyperparameters["wrappers"]], - "parameters": self.parameters, - "hyperparameters": self.hyperparameters, - } - - -class Dropout(WrapperBase): - def __init__(self, wrapped_layer, p): - """ - A dropout regularization wrapper. - - Notes - ----- - During training, a dropout layer zeroes each element of the layer input - with probability `p` and scales the activation by `1 / (1 - p)` (to reflect - the fact that on average only `(1 - p) * N` units are active on any - training pass). At test time, does not adjust elements of the input at - all (ie., simply computes the identity function). - - Parameters - ---------- - wrapped_layer : :doc:`Layer ` instance - The layer to apply dropout to. - p : float in [0, 1) - The dropout propbability during training - """ - super().__init__(wrapped_layer) - self.p = p - self._init_wrapper_params() - self._init_params() - - def _init_wrapper_params(self): - self._wrapper_derived_variables = {"dropout_mask": []} - self._wrapper_hyperparameters = {"wrapper": "Dropout", "p": self.p} - - def forward(self, X, retain_derived=True): - """ - Compute the layer output with dropout for a single minibatch. - - Parameters - ---------- - X : :py:class:`ndarray ` of shape `(n_ex, n_in)` - Layer input, representing the `n_in`-dimensional features for a - minibatch of `n_ex` examples. - retain_derived : bool - Whether to retain the variables calculated during the forward pass - for use later during backprop. If False, this suggests the layer - will not be expected to backprop through wrt. this input. Default - is True. - - Returns - ------- - Y : :py:class:`ndarray ` of shape `(n_ex, n_out)` - Layer output for each of the `n_ex` examples. - """ - scaler, mask = 1.0, np.ones(X.shape).astype(bool) - if self.trainable: - scaler = 1.0 / (1.0 - self.p) - mask = np.random.rand(*X.shape) >= self.p - X = mask * X - - if retain_derived: - self._wrapper_derived_variables["dropout_mask"].append(mask) - - return scaler * self._base_layer.forward(X, retain_derived) - - def backward(self, dLdy, retain_grads=True): - """ - Backprop from the base layer's outputs to inputs. - - Parameters - ---------- - dLdy : :py:class:`ndarray ` of shape `(n_ex, n_out)` or list of arrays - The gradient(s) of the loss wrt. the layer output(s). - retain_grads : bool - Whether to include the intermediate parameter gradients computed - during the backward pass in the final parameter update. Default is - True. - - Returns - ------- - dLdX : :py:class:`ndarray ` of shape `(n_ex, n_in)` or list of arrays - The gradient of the loss wrt. the layer input(s) `X`. - """ # noqa: E501 - assert self.trainable, "Layer is frozen" - dLdy *= 1.0 / (1.0 - self.p) - return self._base_layer.backward(dLdy, retain_grads) - - -def init_wrappers(layer, wrappers_list): - """ - Initialize the layer wrappers in `wrapper_list` and return a wrapped - `layer` object. - - Parameters - ---------- - layer : :doc:`Layer ` instance - The base layer object to apply the wrappers to. - wrappers : list of dicts - A list of parameter dictionaries for a the wrapper objects. The - wrappers are initialized and applied to the the layer sequentially. - - Returns - ------- - wrapped_layer : :class:`WrapperBase` instance - The wrapped layer object - """ - for wr in wrappers_list: - if wr["wrapper"] == "Dropout": - layer = Dropout(layer, 1)._set_wrapper_params(wr) - else: - raise NotImplementedError("{}".format(wr["wrapper"])) - return layer diff --git a/aitk/networks/__init__.py b/aitk/networks/__init__.py index eb89512..a8d22f3 100644 --- a/aitk/networks/__init__.py +++ b/aitk/networks/__init__.py @@ -8,4 +8,6 @@ # # ****************************************************** -from .network import Network, SimpleNetwork # noqa: F401 +from tensorflow.keras.layers import * + +from .network import Network, SequentialNetwork, SimpleNetwork # noqa: F401 diff --git a/aitk/networks/network.py b/aitk/networks/network.py index 9bba143..295a242 100644 --- a/aitk/networks/network.py +++ b/aitk/networks/network.py @@ -2,7 +2,7 @@ # ****************************************************** # aitk.networks: Keras model wrapper with visualizations # -# Copyright (c) 2021 Douglas S. Blank +# Copyright (c) 2021-2024 Douglas S. Blank # # https://github.com/ArtificialIntelligenceToolkit/aitk.networks # @@ -16,56 +16,57 @@ import numbers import operator import random -import sys +from types import FunctionType import matplotlib.pyplot as plt import numpy as np +import tensorflow as tf +import tensorflow.keras.backend as K from matplotlib import cm -from PIL import Image, ImageDraw +from PIL import Image +from tensorflow.keras.layers import Concatenate, Dense, InputLayer, Layer +from tensorflow.keras.models import Model + +from aitk.utils import array_to_image from .utils import ( - find_path, get_argument_bindings, + get_array_shape, + get_connections, get_error_colormap, - get_layer_name, + get_layer_input_tensor, get_templates, image_to_uri, is_keras_tensor, make_input_from_shape, render_curve, - scale_output_for_image, svg_to_image, - topological_sort, ) -from aitk.utils import array_to_image - try: from IPython.display import HTML, clear_output, display except ImportError: HTML = None + class Network: """ Wrapper around a keras.Model. """ - def __init__(self, model=None, layers=None, **config): - self._initialized = False + + def __init__(self, model=None, layers=None, name="Network", **config): + if model is not None and layers is not None: + raise Exception("Network() takes model or layers, not both") + + self._state = { + "tolerance_accuracy_used": False, + "pca": {}, + } self._watchers = [] self._fit_inputs = None self._fit_targets = None - self._init_state() - self._model = model - # {name: (layer, [incoming], [outgoing])...} - if layers is not None: - self._pre_layers = {get_layer_name(layer): layer - for layer in layers} - self._name = config.get("name", "Network") - self._show_connection_help() - else: - self._pre_layers = {} - self._name = None self._connections = [] + self._model = None # Place to put models between layers: self._predict_models = {} # Place to map layer to its input layers: @@ -75,9 +76,14 @@ def __init__(self, model=None, layers=None, **config): self._history = {"weights": [], "metrics": []} self._epoch = 0 self._tolerance = 0.1 - name = self._model.name if self._model is not None else "Network" + self._name = name + self._layers = [] + self._layers_map = {} + self.input_bank_order = [] + self.output_bank_order = [] + self._level_ordering = [] self.config = { - "name": name, # for svg title + "name": self._name, # for svg title "class_id": "keras-network", # for svg network classid "id": "keras-network", # for svg id "font_size": 12, # for svg @@ -110,152 +116,91 @@ def __init__(self, model=None, layers=None, **config): # layer_name: {vshape, feature, keep_aspect_ratio, visible # colormap, border_color, border_width} } - # Get all of the layers, even implicit ones, in order: - if self._model is not None: - self.initialize_model() - else: - self._layers = [] - self._layers_map = {} - self.input_bank_order = [] - self.output_bank_order = [] - self._level_ordering = [] - # Override settings: self.set_config(**config) + if model: + self._model = model + self._name = self._model.name + for layer in model.layers: + self.add(layer) + self._connections = get_connections(model) + self.compile() + elif layers: + for layer in layers: + self.add(layer) + # When we are done here, we are in 1 of 2 states: + # 1. A model, ready to go + # 2. Network, ready for more add(), connect(), compile() def __getattr__(self, attr): + if self._model is None: + raise Exception("Model has not yet been compiled") return getattr(self._model, attr) def __getitem__(self, layer_name): return self._layers_map.get(layer_name, None) - def _init_state(self): - if "_state" not in dir(self): - self._state = { - "tolerance_accuracy_used": False, - "pca": {}, - } + def add(self, layer): + """ + Add a layer to the network. + """ + if isinstance(layer, FunctionType): + raise Exception("Don't use Input; use InputLayer") + + if not isinstance(layer, Layer): + raise Exception("Network.add() requires a Layer") + + # Let's find a good name for the layer: + name = layer.name + if name.startswith("keras_tensor"): + name = "input" + name[12:] + + if name in self._layers_map: + raise Exception("The name %r is already used" % name) + + # Add the layer: + layer.name = name + self._layers.append(layer) + self._layers_map[layer.name] = layer @property def model(self): return self._model - def _show_connection_help(self): - print("Connect layers with Network.connect(NAME, NAME) where NAMEs are in:") - print(" ", list(self._pre_layers.keys())) - def initialize_model(self): - self._layers = topological_sort(self._model.layers) - # Make a mapping of names to layers: - self._layers_map = {layer.name: layer for layer in self._layers} - # Get the input bank names, in order: - self.input_bank_order = self._get_input_layers() - # Get the output bank names, in order: - self.output_bank_order = self._get_output_layers() - # Get the best (shortest path) between layers: - self._level_ordering = self._get_level_ordering() # Build intermediary models: self._build_predict_models() - # Setup layer config dicts: + # Config for various layer settings (like 'vshape'): self.config["layers"] = {layer.name: {} for layer in self._layers} - # Set the colormap, etc for each layer: - self.initialize() - - def initialize(self, inputs=None, reset=True): - """ - Set colormap for each layer based on inputs or - activation functions per layer. - - Args: - inputs: inputs in single pattern format (not a dataset) - reset: if True, reset the colormap ranges - - If inputs is None, just make best guess for all layers. - - If inputs is not None, use these for input layer - colormap, and all other layers get best guess. - - If reset is True, don't use previous colormap - for input layers, but sample from inputs again. - If reset is False, consider previous input - layer colormap's with new input values. - """ - if inputs is None: - # We don't have direct values, so we base colormap - # on activation output ranges - for layer in self._layers: - if layer.name not in self.config["layers"]: - self.config["layers"][layer.name] = {} - if self._get_layer_type(layer.name) == "input": - self.config["layers"][layer.name]["colormap"] = ("gray", -2, 2) - else: - minmax = self._get_act_minmax(layer.name) - self.config["layers"][layer.name]["colormap"] = ("gray", minmax[0], minmax[1]) - else: - self._initialized = True - input_dataset = self.input_to_dataset(inputs) - # If reset is true, we set to extremes so any value will adjust - # Only do this on input layers: - if reset: - for layer in self._layers: - if self._get_layer_type(layer.name) == "input": - if layer.name not in self.config["layers"]: - self.config["layers"][layer.name] = {} - # FIXME: set color at some point if image - self.config["layers"][layer.name]["colormap"] = ( - "gray", - float("+inf"), # extreme too big - float("-inf"), # extreme too small - ) - # Now we set the minmax for input layer, based on past values - # or extremes: - for layer in self._layers: - outputs = self.predict_to(input_dataset, layer.name) - color_orig, min_orig, max_orig = self.config["layers"][layer.name]["colormap"] - min_new, max_new = math.floor(outputs.min()), math.ceil(outputs.max()) - if min_new != max_new: - self.config["layers"][layer.name]["colormap"] = (color_orig, min_new, max_new) - else: - # Don't let them be equal: - self.config["layers"][layer.name]["colormap"] = ( - color_orig, - min_new - 1, - max_new + 1, - ) def connect(self, from_layer_name=None, to_layer_name=None): - """ - """ - if len(self._pre_layers) == 0: + """ """ + if len(self._layers) == 0: raise Exception("no layers have been added") if from_layer_name is not None and not isinstance(from_layer_name, str): raise Exception("from_layer_name should be a string or None") if to_layer_name is not None and not isinstance(to_layer_name, str): raise Exception("to_layer_name should be a string or None") if from_layer_name is None and to_layer_name is None: - #if (any([layer.outgoing_connections for name, layer in self.layers]) or - # any([layer.incoming_connections for layer in self.layers])): - # raise Exception("layers already have connections") - for i in range(len(self._pre_layers) - 1): - names = list(self._pre_layers) - from_layer = self._pre_layers[names[i]] - to_layer = self._pre_layers[names[i + 1]] + for i in range(len(self._layers) - 1): + from_layer = self._layers[i] + to_layer = self._layers[i + 1] self.connect(from_layer.name, to_layer.name) else: if from_layer_name == to_layer_name: raise Exception("self connections are not allowed") if not isinstance(from_layer_name, str): raise Exception("from_layer_name should be a string") - if from_layer_name not in self._pre_layers: - raise Exception('unknown layer: %s' % from_layer_name) + if from_layer_name not in self._layers_map: + raise Exception("unknown layer: %s" % from_layer_name) if not isinstance(to_layer_name, str): raise Exception("to_layer_name should be a string") - if to_layer_name not in self._pre_layers: - raise Exception('unknown layer: %s' % to_layer_name) - from_layer = self._pre_layers[from_layer_name] - to_layer = self._pre_layers[to_layer_name] + if to_layer_name not in self._layers_map: + raise Exception("unknown layer: %s" % to_layer_name) + from_layer = self[from_layer_name] + to_layer = self[to_layer_name] # Check for input going to a Dense to warn: - #if len(from_layer.shape) > 2 and to_layer.__class__.__name__ == "Dense": + # if len(from_layer.shape) > 2 and to_layer.__class__.__name__ == "Dense": # print("WARNING: connected multi-dimensional input layer '%s' to layer '%s'; consider adding a FlattenLayer between them" % ( # from_layer.name, to_layer.name), file=sys.stderr) self._connections.append((from_layer_name, to_layer_name)) @@ -276,7 +221,7 @@ def fit(self, *args, **kwargs): * monitor: (str) metric to monitor to determine whether to stop * callbacks: (list) list of callbacks """ - from .callbacks import UpdateCallback, make_early_stop, make_stop, make_save + from .callbacks import UpdateCallback, make_early_stop, make_save, make_stop # plot = True # if plot: @@ -284,7 +229,6 @@ def fit(self, *args, **kwargs): # mpl_backend = matplotlib.get_backend() # else: # mpl_backend = None - # Get any kwargs that are not standard: report_rate = kwargs.pop("report_rate", 1) # Early stopping and Stop on Accuracy, Val_accuracy @@ -328,8 +272,14 @@ def fit(self, *args, **kwargs): kwargs["verbose"] = 0 kwargs["initial_epoch"] = self._epoch - self._fit_inputs = kwargs.get("x", None) # inputs - self._fit_targets = kwargs.get("y", None) # targets + shape = get_array_shape(kwargs.get("x")) + # TODO: check all types of networks + if shape: + kwargs["x"] = np.array(kwargs["x"]) + kwargs["y"] = np.array(kwargs["y"]) + + self._fit_inputs = kwargs.get("x") # inputs + self._fit_targets = kwargs.get("y") # targets # call underlying model fit: try: @@ -342,13 +292,23 @@ def fit(self, *args, **kwargs): # FIXME: don't save if didn't go through loop? self._history["weights"].append((self._epoch, self.get_weights())) - - metrics = {key: history.history[key][-1] for key in history.history - if len(history.history[key]) > 0} + metrics = { + key: history.history[key][-1] + for key in history.history + if len(history.history[key]) > 0 + } ## FIXME: getting epochs by keyword: - print("Epoch %d/%d %s" % (self._epoch, kwargs["epochs"], " - ".join( - ["%s: %s" % (key, value) for (key, value) in metrics.items()]))) + print( + "Epoch %d/%d %s" + % ( + self._epoch, + kwargs["epochs"], + " - ".join( + ["%s: %s" % (key, value) for (key, value) in metrics.items()] + ), + ) + ) return history def in_console(self, mpl_backend: str) -> bool: @@ -401,14 +361,13 @@ def on_epoch_end(self, callback, logs, report_rate=None, clear=True): index = random.randint(0, self.get_input_length(self._fit_inputs) - 1) inputs = self.get_input_from_dataset(index, self._fit_inputs) targets = self.get_target_from_dataset(index, self._fit_targets) - self.propagate(inputs, targets) # update watchers + self.propagate(inputs, targets) # update watchers metrics = [list(history[1].keys()) for history in self._history["metrics"]] metrics = set([item for sublist in metrics for item in sublist]) def match_acc(name): - return (name.endswith("acc") or - name.endswith("accuracy")) + return name.endswith("acc") or name.endswith("accuracy") def match_val(name): return name.startswith("val_") @@ -445,7 +404,7 @@ def get_xy(name): loss_ax.plot(x_values, y_values, label=metric, color="orange") elif match_acc(metric) and not match_val(metric) and acc_ax is not None: acc_ax.plot(x_values, y_values, label=metric, color="b") # blue - elif match_acc(metric) and match_val(metric) and acc_ax is not None: + elif match_acc(metric) and match_val(metric) and acc_ax is not None: acc_ax.plot(x_values, y_values, label=metric, color="c") # cyan # FIXME: add a chart for each metric # else: @@ -463,7 +422,6 @@ def get_xy(name): acc_ax.set_ylabel("Accuracy") acc_ax.legend(loc="best") - if True or format == "svg": # FIXME: work in console # if (callback is not None and not callback.in_console) or format == "svg": @@ -480,7 +438,7 @@ def get_xy(name): plt.pause(0.01) # plt.show(block=False) - def _extract_inputs(self, inputs, input_names): + def _prepare_input(self, inputs, input_names): """ Get the input_names from the inputs """ @@ -489,15 +447,16 @@ def _extract_inputs(self, inputs, input_names): if isinstance(inputs, dict): return [np.array(inputs[name]) for name in input_names] elif len(self.input_bank_order) == 1: - return inputs + return np.array([inputs]) else: return [ - np.array(inputs[index]) + np.array([inputs[index]]) for index in [self.input_bank_order.index(name) for name in input_names] ] def build_model(self): - from tensorflow.keras.models import Model + if len(self._connections) == 0: + raise Exception("Need to connect layers before building model") # Assumes layers either added or passed in via layers # and connected via Network.connect() @@ -505,45 +464,89 @@ def build_model(self): tos = [connect[1] for connect in self._connections] input_layers = [] output_layers = [] - for layer_name in self._pre_layers: - if layer_name not in tos: - input_layers.append(layer_name) - if layer_name not in froms: - output_layers.append(layer_name) - outputs = [self._get_tensor_to(output_layer) - for output_layer in output_layers] - inputs = [self._pre_layers[layer_name] - for layer_name in input_layers] + for layer in self._layers: + if layer.name not in tos: + input_layers.append(layer.name) + if layer.name not in froms: + output_layers.append(layer.name) + # Now we build the model: + outputs = [self._build_graph_to(output_layer) for output_layer in output_layers] + inputs = [self[layer_name]._input_tensor for layer_name in input_layers] self._model = Model(inputs=inputs, outputs=outputs, name=self._name) - self.initialize_model() def _get_layers_to(self, layer_name): - return [connection[0] for connection in self._connections - if connection[1] == layer_name] + return [ + self[connection[0]] + for connection in self._connections + if connection[1] == layer_name + ] - def _get_tensor_to(self, layer_name): - from tensorflow.keras.layers import Concatenate + def _get_layers_from(self, layer_name): + return [ + self[connection[1]] + for connection in self._connections + if connection[0] == layer_name + ] + + def topological_sort(self, layers, input_layers): + for layer in layers: + layer.visited = False + # Next gather them: + sorted_layers = [] + queue = input_layers + while queue: + current = queue.pop(0) + if not current.visited: + sorted_layers.append(current) + current.visited = True + queue.extend(self._get_layers_from(current.name)) + for layer in layers: + if layer.visited is False: + raise Exception( + "Layer %r is not part of the network graph" % layer.name + ) + return sorted_layers + def _build_graph_to(self, layer_name): + """ + Given the name of a layer, build all of the models + to that layer by calling the Keras layer as a function. + """ # recursive layers = self._get_layers_to(layer_name) if len(layers) == 0: # An input layer: - return self._pre_layers[layer_name] + return self[layer_name] - incoming_layers = [self._get_tensor_to(incoming_layer_name) - for incoming_layer_name in layers] + incoming_layers = [ + self._build_graph_to(incoming_layer.name) for incoming_layer in layers + ] if len(incoming_layers) == 1: incoming_layer = incoming_layers[0] - else: # more than one - incoming_layer = Concatenate()(incoming_layers) + else: # more than one + incoming_layer = Concatenate()( + [get_layer_input_tensor(layer) for layer in incoming_layers] + ) + + if isinstance(incoming_layer, InputLayer): + incoming_layer = incoming_layer._input_tensor - layer = self._pre_layers[layer_name] - return layer(incoming_layer) + layer = self[layer_name] + return layer(inputs=incoming_layer) def compile(self, *args, **kwargs): """ + The last step before you run a network. """ + # _layers, _connections already set + self._layers = self.topological_sort(self._layers, self._get_input_layers()) + # Get the input bank names, in order: + self.input_bank_order = [layer.name for layer in self._get_input_layers()] + # Get the output bank names, in order: + self.output_bank_order = [layer.name for layer in self._get_output_layers()] + # Get the best (shortest path) between layers: + self._level_ordering = self._get_level_ordering() # First, build model if necessary: if self._model is None: self.build_model() @@ -553,19 +556,60 @@ def compile(self, *args, **kwargs): metrics = [self.get_metric(metric) for metric in metrics] kwargs["metrics"] = metrics # Let the standard keras model do the rest: - return self._model.compile(*args, **kwargs) + results = self._model.compile(*args, **kwargs) + self.initialize_model() + return results + + def _post_process_outputs(self, outputs, return_type): + def numpy(item): + if hasattr(item, "numpy"): + return item.numpy() + else: + return item + if len(self.output_bank_order) == 1: + if return_type == "list": + return numpy(outputs)[0].tolist() + elif return_type == "numpy": + return numpy(outputs)[0] + else: + if return_type == "list": + return [numpy(item)[0].tolist() for item in outputs] + elif return_type == "numpy": + return [numpy(item)[0] for item in outputs] + + def _post_process_dataset_outputs(self, outputs, return_type): + def numpy(item): + if hasattr(item, "numpy"): + return item.numpy() + else: + return item - def predict(self, inputs): + if len(self.output_bank_order) == 1: + if return_type == "list": + return numpy(outputs).tolist() + elif return_type == "numpy": + return numpy(outputs) + else: + if return_type == "list": + return [numpy(item).tolist() for item in outputs] + elif return_type == "numpy": + return [numpy(item) for item in outputs] + + def propagate(self, inputs, return_type="list"): """ Propagate input patterns to a bank in the network. """ - input_vectors = self._extract_inputs(inputs, self.input_bank_order) + if self._model is None: + raise Exception("Model has not yet been compiled") + + input_vectors = self._prepare_input(inputs, self.input_bank_order) try: - outputs = self._model(input_vectors, training=False).numpy() - except Exception as exc: + outputs = self._model(input_vectors, training=False) + except Exception: input_layers_shapes = [ - self._get_raw_output_shape(layer_name) for layer_name in self.input_bank_order + self._get_raw_output_shape(layer_name) + for layer_name in self.input_bank_order ] hints = ", ".join( [ @@ -578,7 +622,7 @@ def predict(self, inputs): % hints ) from None - return outputs + return self._post_process_outputs(outputs, return_type) def set_pca_spaces(self, inputs): """ @@ -588,7 +632,7 @@ def set_pca_spaces(self, inputs): for layer in self.layers: pca = PCA(2) - hidden_raw = self.predict_to(inputs, layer.name) + hidden_raw = self.predict_to(inputs, layer.name, return_type="numpy") try: pca_space = pca.fit(hidden_raw) except ValueError: @@ -603,10 +647,16 @@ def get_input_length(self, inputs): return len(inputs[0]) def predict_histogram_to(self, inputs, layer_name): - hidden_raw = self.predict_to(inputs, layer_name) + """ + Entire dataset + """ + if self._model is None: + raise Exception("Model has not yet been compiled") + + hidden_raw = self.predict_to(inputs, layer_name, return_type="numpy") plt.hist(hidden_raw) - plt.axis('off') + plt.axis("off") fp = io.BytesIO() plt.savefig(fp, format="png") plt.close() @@ -614,22 +664,25 @@ def predict_histogram_to(self, inputs, layer_name): return image def predict_pca_to(self, inputs, layer_name, colors, sizes): + if self._model is None: + raise Exception("Model has not yet been compiled") + if layer_name not in self._state["pca"]: raise Exception("Need to set_pca_spaces first") - hidden_raw = self.predict_to(inputs, layer_name) + hidden_raw = self.predict_to(inputs, layer_name, return_type="numpy") pca_space = self._state["pca"][layer_name] if pca_space is not None: hidden_pca = pca_space.transform(hidden_raw) - x = hidden_pca[:,0] - y = hidden_pca[:,1] + x = hidden_pca[:, 0] + y = hidden_pca[:, 1] else: # Only one hidden layer unit; we'll use zeros for Y axis x = hidden_raw y = np.zeros(len(hidden_raw)) plt.scatter(x, y, c=colors, s=sizes) - plt.axis('off') + plt.axis("off") fp = io.BytesIO() plt.savefig(fp, format="png") plt.close() @@ -651,8 +704,10 @@ def predict_pca( sizes=None, **config, ): - """ - """ + """ """ + if self._model is None: + raise Exception("Model has not yet been compiled") + # This are not sticky; need to set each time: config["rotate"] = rotate config["scale"] = scale @@ -663,8 +718,9 @@ def predict_pca( self.set_pca_spaces(inputs) try: - svg = self.to_svg(inputs=inputs, targets=targets, mode="pca", - colors=colors, sizes=sizes) + svg = self.to_svg( + inputs=inputs, targets=targets, mode="pca", colors=colors, sizes=sizes + ) except KeyboardInterrupt: raise KeyboardInterrupt() from None @@ -691,7 +747,6 @@ def predict_pca( else: raise ValueError("unable to convert to return_type %r" % return_type) - def predict_histogram( self, inputs=None, @@ -704,8 +759,10 @@ def predict_histogram( clear=True, **config, ): - """ - """ + """ """ + if self._model is None: + raise Exception("Model has not yet been compiled") + # This are not sticky; need to set each time: config["rotate"] = rotate config["scale"] = scale @@ -740,8 +797,7 @@ def predict_histogram( else: raise ValueError("unable to convert to return_type %r" % return_type) - - def predict_to(self, inputs, layer_name): + def predict_to(self, inputs, layer_name, return_type="list"): """ Propagate input patterns to a bank in the network. @@ -752,12 +808,15 @@ def predict_to(self, inputs, layer_name): Returns: a numpy array """ + if self._model is None: + raise Exception("Model has not yet been compiled") + input_names = self._input_layer_names[layer_name] model = self._predict_models[input_names, layer_name] - input_vectors = self._extract_inputs(inputs, input_names) + inputs = self._prepare_dataset_inputs(inputs) try: - outputs = model(input_vectors, training=False).numpy() - except Exception as exc: + outputs = model(inputs, training=False) + except Exception: input_layers_shapes = [ self._get_raw_output_shape(layer_name) for layer_name in input_names ] @@ -772,20 +831,26 @@ def predict_to(self, inputs, layer_name): % hints ) from None - return outputs + return self._post_process_dataset_outputs(outputs, return_type) def predict_from(self, inputs, from_layer_name, to_layer_name): """ Propagate patterns from one bank to another bank in the network. """ - from tensorflow.keras.models import Model + if self._model is None: + raise Exception("Model has not yet been compiled") key = (tuple([from_layer_name]), to_layer_name) if key not in self._predict_models: from_layer = self[from_layer_name] - path = find_path(from_layer, to_layer_name) + path = self.find_path(from_layer, to_layer_name) + if path is None: + raise Exception( + "no path between %r to %r" % (from_layer_name, to_layer_name) + ) # Input should be what next layer expects: - current = input_layer = make_input_from_shape(self[path[0]].input_shape) + input_shape = self[path[0]]._build_shapes_dict["input_shape"] + current = input_layer = make_input_from_shape(input_shape) for layer_name in path: current = self[layer_name](current) self._predict_models[key] = Model(inputs=input_layer, outputs=current) @@ -852,12 +917,14 @@ def get_image( # Everything else is sticky: self.config.update(config) - if not self._initialized and inputs is not None: - self.initialize(inputs) - try: - svg = self.to_svg(inputs=inputs, targets=targets, mode="activation", - colors=None, sizes=None) + svg = self.to_svg( + inputs=inputs, + targets=targets, + mode="activation", + colors=None, + sizes=None, + ) except KeyboardInterrupt: raise KeyboardInterrupt() from None @@ -880,6 +947,9 @@ def display( clear=True, **config, ): + if self._model is None: + raise Exception("Model has not yet been compiled") + if return_type is None: try: get_ipython() # noqa: F821 @@ -887,9 +957,19 @@ def display( except Exception: return_type = "image" + # input_vectors = self._prepare_input(inputs, self.input_bank_order) + if return_type == "html": - svg = self.get_image(inputs, targets, show_error, show_targets, "svg", - rotate, scale, **config) + svg = self.get_image( + inputs, + targets, + show_error, + show_targets, + "svg", + rotate, + scale, + **config, + ) if HTML is not None: if clear: clear_output(wait=True) @@ -899,13 +979,20 @@ def display( "need to install `IPython` or use Network.display(return_type='image')" ) else: - image = self.get_image(inputs, targets, show_error, show_targets, return_type, - rotate, scale, **config) + image = self.get_image( + inputs, + targets, + show_error, + show_targets, + return_type, + rotate, + scale, + **config, + ) return image def watch_weights(self, to_name): - """ - """ + """ """ from .watchers import WeightWatcher name = "WeightWatcher: to %s" % (to_name,) @@ -919,8 +1006,7 @@ def watch_weights(self, to_name): display(watcher._widget) def watch_layer(self, layer_name): - """ - """ + """ """ from .watchers import LayerWatcher name = "LayerWatcher: %s" % (layer_name,) @@ -933,14 +1019,14 @@ def watch_layer(self, layer_name): display(watcher._widget) - def watch(self, + def watch( + self, show_error=None, show_targets=None, rotate=None, scale=None, ): - """ - """ + """ """ from .watchers import NetworkWatcher name = "NetworkWatcher" @@ -954,10 +1040,11 @@ def watch(self, widget = watcher.get_widget(show_error, show_targets, rotate, scale) display(widget) - def propagate(self, - inputs, - targets=None, - show=True, + def predict( + self, + inputs, + targets=None, + show=True, ): """ Update all of the watchers whatever they may be watching, @@ -966,32 +1053,31 @@ def propagate(self, if show: for watcher in self._watchers: watcher.update(inputs, targets) - dataset = self.input_to_dataset(inputs) - # FIXME: rather than just the first, format in case - # of multiple output layers - return self._model(dataset, training=False)[0].numpy() - - def propagate_to(self, - inputs, - layer_name, - return_type=None, - channel=None, + inputs = self._prepare_dataset_inputs(inputs) + outputs = self._model(inputs, training=False) + return outputs + + def propagate_to( + self, + inputs, + layer_name, + return_type="numpy", + channel=None, ): - dataset = self.input_to_dataset(inputs) - # FIXME: rather than just the first, format in case - # of multiple output layers - array = self.predict_to(dataset, layer_name) - # FIXME: get output banks - # Strip out just the single return row from one bank - array = array[0] + input_names = self._input_layer_names[layer_name] + model = self._predict_models[input_names, layer_name] + input_vectors = self._prepare_input(inputs, input_names) + array = model(input_vectors, training=False) + if return_type == "image": return self._layer_array_to_image(layer_name, array, channel=channel) else: - return array + return self._post_process_outputs(array, return_type) - def propagate_each(self, - inputs=None, - targets=None, + def propagate_each( + self, + inputs=None, + targets=None, ): """ Update all of the watchers whatever they may be watching. @@ -1016,8 +1102,6 @@ def propagate_each(self, count += 1 def _build_predict_models(self): - from tensorflow.keras.models import Model - # for all layers, inputs to here: for layer in self._layers: if self._get_layer_type(layer.name) != "input": @@ -1032,7 +1116,8 @@ def _build_predict_models(self): else: self._input_layer_names[layer.name] = tuple([layer.name]) self._predict_models[tuple([layer.name]), layer.name] = Model( - inputs=[layer.input], outputs=[layer.output], + inputs=[layer._input_tensor], + outputs=[layer.output], ) def _get_input_tensors(self, layer_name, input_list): @@ -1040,15 +1125,15 @@ def _get_input_tensors(self, layer_name, input_list): Given a layer_name, return {input_layer_name: tensor} """ # Recursive; results in input_list of [(name, tensor), ...] - for layer in self.incoming_layers(layer_name): + for layer in self._get_layers_to(layer_name): if layer.name in self._input_layer_names: for layer_name in self._input_layer_names[layer.name]: if layer_name not in [name for (name, tensor) in input_list]: - input_list.append((layer_name, self[layer_name].input)) + input_list.append((layer_name, self[layer_name]._input_tensor)) else: if self._get_layer_type(layer.name) == "input": if layer.name not in [name for (name, tensor) in input_list]: - input_list.append((layer.name, layer.input)) + input_list.append((layer.name, layer._input_tensor)) else: self._get_input_tensors(layer.name, input_list) return input_list @@ -1058,8 +1143,6 @@ def make_image(self, layer_name, vector, colormap=None): Given an activation name (or function), and an output vector, display make and return an image widget. """ - import tensorflow.keras.backend as K - image = self._layer_array_to_image(layer_name, vector) # If rotated, and has features, rotate it: if self.config.get("rotate", False): @@ -1072,7 +1155,6 @@ def make_image(self, layer_name, vector, colormap=None): return image def _layer_has_channels(self, layer_name): - layer = self[layer_name] class_name = self[layer_name].__class__.__name__ return class_name in ["Conv2D", "MaxPooling2D"] @@ -1080,10 +1162,13 @@ def _layer_array_to_image(self, layer_name, vector, channel=None): if self._layer_has_channels(layer_name): if channel is None: channel = self._get_feature(layer_name) - select = tuple([slice(None) for i in range(len(vector.shape) - 1)] + [slice(channel, channel+1)]) + select = tuple( + [slice(None) for i in range(len(vector.shape) - 1)] + + [slice(channel, channel + 1)] + ) vector = vector[select] else: - pass # let's try it as is + pass # let's try it as is # If vshape is given, then resize the vector: vshape = self.vshape(layer_name) @@ -1091,13 +1176,42 @@ def _layer_array_to_image(self, layer_name, vector, channel=None): vector = vector.reshape(vshape) try: - image = array_to_image(vector, minmax=self._layer_minmax(layer_name)) + minmax = self._get_dynamic_minmax(layer_name, vector) + image = array_to_image(vector, minmax=minmax) except Exception: # Error: make a red image image = array_to_image([[[255, 0, 0]], [[255, 0, 0]]]) return image + def _get_dynamic_minmax(self, layer_name, vector): + if self[layer_name].__class__.__name__ == "Dense": + # Get minmax based on activation function + minmax = self._get_act_minmax(layer_name) + elif self[layer_name].__class__.__name__ == "Flatten": + # Get minmax from previous layer + inputs_to_layer_name = self._get_layers_to(layer_name) + minmax = self._get_dynamic_minmax(inputs_to_layer_name[0].name, vector) + elif self[layer_name].__class__.__name__ == "InputLayer": + # Hardcoded to typical ranges + minimum = vector.min() + maximum = vector.max() + if minimum < 0: + minmax = [-1, 1] + elif maximum > 100 and maximum <= 255: + # Assuming image + minmax = [0, 255] + else: + minmax = [0, 1] + else: + # Compute minmax based on mean +/- std + avg = vector.mean() + std = vector.std() + minimum = vector.min() + maximum = vector.max() + minmax = [max(avg - std, minimum), min(avg + std, maximum)] + return minmax + def _make_color(self, item): if isinstance(item, numbers.Number): return (item, item, item) @@ -1105,14 +1219,24 @@ def _make_color(self, item): return tuple(item) def _get_input_layers(self): - return tuple( - [x.name for x in self._layers if self._get_layer_type(x.name) == "input"] - ) + layers = [] + for layer_from, layer_to in self._connections: + if layer_from not in layers: + layers.append(layer_from) + for layer_from, layer_to in self._connections: + if layer_to in layers: + layers.remove(layer_to) + return [self._layers_map[name] for name in layers] def _get_output_layers(self): - return tuple( - [x.name for x in self._layers if self._get_layer_type(x.name) == "output"] - ) + layers = [] + for layer_from, layer_to in self._connections: + if layer_to not in layers: + layers.append(layer_to) + for layer_from, layer_to in self._connections: + if layer_from in layers: + layers.remove(layer_from) + return [self._layers_map[name] for name in layers] def vshape(self, layer_name): """ @@ -1128,21 +1252,38 @@ def vshape(self, layer_name): def _get_output_shape(self, layer_name): layer = self[layer_name] - if isinstance(layer.output_shape, list): - return layer.output_shape[0][1:] + if (layer._build_shapes_dict is not None) and ( + "input_shape" in layer._build_shapes_dict + ): + output_shape = layer.compute_output_shape( + layer._build_shapes_dict["input_shape"] + ) else: - return layer.output_shape[1:] + output_shape = layer.batch_shape + if isinstance(output_shape, list): + return output_shape[0][1:] + else: + return output_shape[1:] def _get_input_shape(self, layer_name): layer = self[layer_name] - if isinstance(layer.input_shape, list): - return layer.input_shape[0][1:] + input_shape = layer._build_shapes_dict["input_shape"] + if isinstance(input_shape, list): + return input_shape[0][1:] else: - return layer.input_shape[1:] + return input_shape[1:] def _get_raw_output_shape(self, layer_name): layer = self[layer_name] - return layer.output_shape + if (layer._build_shapes_dict is not None) and ( + "input_shape" in layer._build_shapes_dict + ): + output_shape = layer.compute_output_shape( + layer._build_shapes_dict["input_shape"] + ) + else: + output_shape = layer.batch_shape + return output_shape def _get_feature(self, layer_name): """ @@ -1191,10 +1332,14 @@ def format_range(minmax): if activation: retval += "\nAct function: %s" % activation retval += "\nAct output range: %s" % ( - format_range(self._get_act_minmax(layer_name),) + format_range( + self._get_act_minmax(layer_name), + ) ) retval += "\nActual minmax: %s" % ( - format_range(self._layer_minmax(layer_name),) + format_range( + self._layer_minmax(layer_name), + ) ) retval += "\nShape = %s" % (self._get_raw_output_shape(layer_name),) return retval @@ -1235,24 +1380,17 @@ def _get_act_minmax(self, layer_name): Note: +/- 2 represents infinity """ layer = self[layer_name] - if layer.__class__.__name__ == "Flatten": - in_layer = self.incoming_layers(layer_name)[0] - return self._get_act_minmax(in_layer.name) - elif self._get_layer_type(layer_name) == "input": - color, mini, maxi = self._get_colormap(layer) - return (mini, maxi) - else: # try to get from activation function - activation = self._get_activation_name(layer) - if activation in ["tanh", "softsign"]: - return (-1, +1) - elif activation in ["sigmoid", "softmax", "hard_sigmoid"]: - return (0, +1) - elif activation in ["relu", "elu", "softplus"]: - return (0, +2) - elif activation in ["selu", "linear"]: - return (-2, +2) - else: # default, or unknown activation function - return (-2, +2) + activation = self._get_activation_name(layer) + if activation in ["tanh", "softsign"]: + return (-1, +1) + elif activation in ["sigmoid", "softmax", "hard_sigmoid"]: + return (0, +1) + elif activation in ["relu", "elu", "softplus"]: + return (0, +2) + elif activation in ["selu", "linear"]: + return (-2, +2) + else: # default, or unknown activation function + return (0, +2) def _get_border_color(self, layer_name): if ( @@ -1324,7 +1462,7 @@ def get_target_from_dataset(self, index, dataset): return data def enumerate_dataset(self, dataset1, dataset2=None): - """" + """ " Takes a dataset and turns it into individual sets of one pattern each. """ @@ -1359,15 +1497,14 @@ def enumerate_dataset(self, dataset1, dataset2=None): count += 1 - def input_to_dataset(self, input): + def _prepare_dataset_inputs(self, inputs): """ - Take input tensor(s) and turn into an appropriate - dataset. + Take input dataset and make sure it is correct format. """ if len(self.input_bank_order) == 1: - inputs = [np.array([input])] + inputs = np.array(inputs) else: - inputs = [np.array([bank]) for bank in input] + inputs = [np.array(bank) for bank in inputs] return inputs def target_to_dataset(self, target): @@ -1378,16 +1515,20 @@ def target_to_dataset(self, target): targets = [np.array([bank]) for bank in target] return targets - def to_svg(self, inputs=None, targets=None, mode="activation", colors=None, sizes=None): - """ - """ + def to_svg( + self, inputs=None, targets=None, mode="activation", colors=None, sizes=None + ): + """ """ + # FIXME: # First, turn single patterns into a dataset: - if inputs is not None: - if mode == "activation": - inputs = self.input_to_dataset(inputs) - if targets is not None: - if mode == "activation": - targets = self.target_to_dataset(targets) + # if inputs is not None: + # if mode == "activation": + # inputs = self._extract_inputs(inputs, self.input_bank_order) + # + # if targets is not None: + # if mode == "activation": + # # FIXME: + # targets = self.target_to_dataset(targets) # Next, build the structures: struct = self.build_struct(inputs, targets, mode, colors, sizes) templates = get_templates(self.config) @@ -1408,9 +1549,12 @@ def to_svg(self, inputs=None, targets=None, mode="activation", colors=None, size if template_name == "label_svg" and rotate: dict["x"] += 8 dict["text_anchor"] = "middle" - dict["transform"] = ( - """ transform="rotate(-90 %s %s) translate(%s)" """ - % (dict["x"], dict["y"], 2) + dict[ + "transform" + ] = """ transform="rotate(-90 %s %s) translate(%s)" """ % ( + dict["x"], + dict["y"], + 2, ) else: dict["transform"] = "" @@ -1580,7 +1724,7 @@ def build_struct(self, inputs, targets, mode, colors, sizes): continue elif anchor: continue - for out in self.outgoing_layers(layer_name): + for out in self._get_layers_from(layer_name): if ( out.name not in positioning ): # is it drawn yet? if not, continue, @@ -1761,7 +1905,7 @@ def build_struct(self, inputs, targets, mode, colors, sizes): x1 = cwidth + width / 2 y1 = cheight - 1 # Arrows going up - for out in self.outgoing_layers(layer_name): + for out in self._get_layers_from(layer_name): if out.name not in positioning: continue # draw an arrow between layers: @@ -1842,7 +1986,7 @@ def build_struct(self, inputs, targets, mode, colors, sizes): ] ) output_shape = self._get_output_shape(layer_name) - if (self._layer_has_channels(layer_name)): + if self._layer_has_channels(layer_name): features = str(output_shape[-1]) # FIXME: feature = str(self._get_feature(layer_name)) @@ -1924,7 +2068,7 @@ def build_struct(self, inputs, targets, mode, colors, sizes): # DONE! # Draw the title: if mode == "activation": - title = "Activations for %s" % self.config["name"] + title = "Activations for %s" % self.config["name"] elif mode == "pca": title = "PCAs for %s" % self.config["name"] elif mode == "histogram": @@ -2009,35 +2153,14 @@ def build_struct(self, inputs, targets, mode, colors, sizes): ) return struct - def incoming_layers(self, layer_name): - layer = self[layer_name] - layers = [] - for node in layer.inbound_nodes: - if hasattr(node.inbound_layers, "__iter__"): - for layer in node.inbound_layers: - if layer not in layers: - layers.append(layer) - else: - if node.inbound_layers not in layers: - layers.append(node.inbound_layers) - return layers - - def outgoing_layers(self, layer_name): - layer = self[layer_name] - layers = [] - for node in layer.outbound_nodes: - if node.outbound_layer not in layers: - layers.append(node.outbound_layer) - return layers - def _get_layer_type(self, layer_name): """ Determines whether a layer is a "input", "hidden", or "output" layer based on its connections. If no connections, then it is "unconnected". """ - incoming_connections = self.incoming_layers(layer_name) - outgoing_connections = self.outgoing_layers(layer_name) + incoming_connections = self._get_layers_to(layer_name) + outgoing_connections = self._get_layers_from(layer_name) if len(incoming_connections) == 0 and len(outgoing_connections) == 0: return "unconnected" elif len(incoming_connections) > 0 and len(outgoing_connections) > 0: @@ -2048,8 +2171,7 @@ def _get_layer_type(self, layer_name): return "input" def _get_layer_class(self, layer_name): - """ - """ + """ """ layer = self[layer_name] return layer.__class__.__name__ @@ -2066,7 +2188,7 @@ def _get_level_ordering(self): levels = {} for layer in self._layers: level = max( - [levels[lay.name] for lay in self.incoming_layers(layer.name)] + [-1] + [levels[lay.name] for lay in self._get_layers_to(layer.name)] + [-1] ) levels[layer.name] = level + 1 max_level = max(levels.values()) @@ -2077,7 +2199,7 @@ def _get_level_ordering(self): ] ordering.append( [ - (name, False, [x.name for x in self.incoming_layers(name)]) + (name, False, [x.name for x in self._get_layers_to(name)]) for name in layer_names ] ) # (going_to/layer_name, anchor, coming_from) @@ -2115,7 +2237,7 @@ def _get_level_ordering(self): else: # if next level doesn't contain an outgoing # connection, add it to next level as anchor point - for layer in self.outgoing_layers(name): + for layer in self._get_layers_from(name): next_level = [ (n, anchor) for (n, anchor, fname) in ordering[level + 1] ] @@ -2127,8 +2249,8 @@ def _get_level_ordering(self): return ordering def _optimize_ordering(self, ordering): - def perms(l): - return list(itertools.permutations(l)) + def perms(items): + return list(itertools.permutations(items)) def distance(xy1, xy2): return math.sqrt((xy1[0] - xy2[0]) ** 2 + (xy1[1] - xy2[1]) ** 2) @@ -2230,7 +2352,7 @@ def _pre_process_struct(self, inputs, ordering, targets, mode, colors, sizes): if inputs is None: inputs = self.make_dummy_dataset() if targets is not None: - outputs = self._model(inputs, training=False).numpy() + outputs = self.propagate(inputs) if len(self.output_bank_order) == 1: targets = [targets] errors = (np.array(outputs) - np.array(targets)).tolist() @@ -2269,37 +2391,41 @@ def _pre_process_struct(self, inputs, ordering, targets, mode, colors, sizes): continue hiding[column] = False # The rest of this for loop is handling image of bank - keep_aspect_ratio = None if mode == "pca": image = self.predict_pca_to(inputs, layer_name, colors, sizes) - keep_aspect_ratio = True elif mode == "histogram": image = self.predict_histogram_to(inputs, layer_name) - keep_aspect_ratio = True - else: # activations of a dataset + else: # activations of a dataset try: - image = self.make_image( - layer_name, self.predict_to(inputs, layer_name)[0] - ) + outputs = self.propagate_to(inputs, layer_name) + image = self.make_image(layer_name, outputs) except Exception: # Error: make a red image - image = array_to_image([[ - [255, 0, 0], - [255, 0, 0], - ]]) + image = array_to_image( + [ + [ + [255, 0, 0], + [255, 0, 0], + ] + ] + ) (width, height) = image.size images[layer_name] = image # little image if self._get_layer_type(layer_name) == "output": if targets is not None: # Target image, targets set above: - target_colormap = ("grey", -2, 2) # FIXME: self[layer_name].colormap + target_colormap = ( + "grey", + -2, + 2, + ) # FIXME: self[layer_name].colormap target_bank = targets[self.output_bank_order.index(layer_name)] target_array = np.array(target_bank) target_image = self.make_image( layer_name, target_array, target_colormap ) # Error image, error set above: - error_colormap = (get_error_colormap(), -2, 2) # FIXME + error_colormap = (get_error_colormap(), -2, 2) # FIXME error_bank = errors[self.output_bank_order.index(layer_name)] error_array = np.array(error_bank) error_image = self.make_image( @@ -2335,7 +2461,6 @@ def _pre_process_struct(self, inputs, ordering, targets, mode, colors, sizes): max_width = max(max_width, row_width) # of all rows return max_width, max_height, row_heights, images, image_dims - def make_dummy_dataset(self): """ Make a stand-in dataset for this network: @@ -2343,8 +2468,7 @@ def make_dummy_dataset(self): inputs = [] for layer_name in self.input_bank_order: shape = self._get_input_shape(layer_name) - if (shape is None) or (isinstance(shape, (list, tuple)) - and None in shape): + if (shape is None) or (isinstance(shape, (list, tuple)) and None in shape): v = np.random.rand(100) else: v = np.random.rand(*shape) @@ -2415,6 +2539,7 @@ def set_config_layer(self, layer_name, **items): "border_color": "string", "border_width": "integer", } + def validate_type(value, format): if format == "integer": return isinstance(value, int) @@ -2425,7 +2550,7 @@ def validate_type(value, format): elif format == "boolean": return isinstance(value, bool) else: - return all([validate_type(v,f) for v,f in zip(value, format)]) + return all([validate_type(v, f) for v, f in zip(value, format)]) if layer_name in self.config["layers"]: for item in items: @@ -2433,7 +2558,10 @@ def validate_type(value, format): if validate_type(items[item], proper_items[item]): self.config["layers"][layer_name][item] = items[item] else: - raise AttributeError("invalid form for: %r; should be: %s" % (item, proper_items[item])) + raise AttributeError( + "invalid form for: %r; should be: %s" + % (item, proper_items[item]) + ) else: raise AttributeError("no such config layer item: %r" % item) else: @@ -2477,7 +2605,7 @@ def set_weights(self, weights): new_weights = [] for item in orig: total = functools.reduce(operator.mul, item.shape, 1) - w = np.array(weights[current:current + total]) + w = np.array(weights[current : current + total]) new_weights.append(w.reshape(item.shape)) current += total layer.set_weights(new_weights) @@ -2503,23 +2631,30 @@ def get_learning_rate(self): print("WARNING: you need to use an optimizer with lr") def get_metric(self, name): - import tensorflow.keras.backend as K - if name == "tolerance_accuracy": self._state["tolerance_accuracy_used"] = True + def tolerance_accuracy(targets, outputs): return K.mean( K.all( - K.less_equal(K.abs(targets - outputs), - self._tolerance), axis=-1), - axis=-1) + K.less_equal( + K.abs( + tf.cast(targets, tf.float32) + - tf.cast(outputs, tf.float32) + ), + self._tolerance, + ), + axis=-1, + ), + axis=-1, + ) + return tolerance_accuracy else: return name def get_momentum(self): - """ - """ + """ """ if hasattr(self._model, "optimizer") and hasattr( self._model.optimizer, "momentum" ): @@ -2528,8 +2663,7 @@ def get_momentum(self): print("WARNING: you need to use an optimizer with momentum") def set_momentum(self, momentum): - """ - """ + """ """ if hasattr(self._model, "optimizer") and hasattr( self._model.optimizer, "momentum" ): @@ -2538,25 +2672,49 @@ def set_momentum(self, momentum): print("WARNING: you need to use an optimizer with momentum") def get_tolerance(self): - """ - """ + """ """ if not self._state["tolerance_accuracy_used"]: - print("WARNING: you need Network.compile(metrics=['tolerance_accuracy']) to use tolerance") + print( + "WARNING: you need Network.compile(metrics=['tolerance_accuracy']) to use tolerance" + ) return self._tolerance def set_tolerance(self, tolerance): - """ - """ + """ """ if not self._state["tolerance_accuracy_used"]: - print("WARNING: you need Network.compile(metrics=['tolerance_accuracy']) to use tolerance") + print( + "WARNING: you need Network.compile(metrics=['tolerance_accuracy']) to use tolerance" + ) self._tolerance = tolerance + def find_path(self, from_layer, to_layer_name): + """ + Breadth-first search to find shortest path + from from_layer to to_layer_name. + + Returns None if there is no path. + """ + # No need to put from_layer.name in path: + path = {} + path[from_layer.name] = [] + queue = [from_layer] + while queue: + current = queue.pop() + if current.name == to_layer_name: + return path[current.name] + else: + # expand: + for layer in self._get_layers_from(current.name): + path[layer.name] = path[current.name] + [layer.name] + queue.append(layer) + return None + class SimpleNetwork(Network): def __init__( self, *layers, - name="SimpleNetwork", + name="SequentialNetwork", activation="sigmoid", loss="mse", optimizer="sgd", @@ -2584,9 +2742,6 @@ def __init__( * (int, int, ...): (input layers only) the shape of the input patterns * keras layer instance: an instance of a keras layer, like Flatten() """ - from tensorflow.keras.models import Model - from tensorflow.keras.layers import Dense, Input, Layer - import tensorflow.keras.layers def make_name(index, total): if index == 0: @@ -2601,15 +2756,18 @@ def make_name(index, total): def make_layer(index, layers, activation): if isinstance(layers[index], Layer) or is_keras_tensor(layers[index]): return layers[index] - elif (isinstance(layers[index], str) and - hasattr(tensorflow.keras.layers, layers[index])): - layer_class = getattr(tensorflow.keras.layers, layers[index]) + elif isinstance(layers[index], str) and hasattr( + tf.keras.layers, layers[index] + ): + layer_class = getattr(tf.keras.layers, layers[index]) return layer_class() else: name = make_name(index, len(layers)) if index == 0: size = layers[index] - return Input(size, name=name) + if not isinstance(size, (list, tuple)): + size = tuple([size]) + return InputLayer(size, name=name) else: size = layers[index] if isinstance(size, int): @@ -2617,28 +2775,23 @@ def make_layer(index, layers, activation): elif len(size) == 2 and isinstance(size[1], str): size, activation_function = size else: - raise Exception("Invalid SimpleNetwork layer representation: %r" % size) + raise Exception( + "Invalid SquentialNetwork layer representation: %r" % size + ) return Dense(size, activation=activation_function, name=name) - layers = [ - make_layer(index, layers, activation) - for index in range(len(layers)) - ] - current_layer = layers[0] - for layer in layers[1:]: - current_layer = layer(current_layer) - model = Model(inputs=layers[0], outputs=current_layer, name=name) + layers = [make_layer(index, layers, activation) for index in range(len(layers))] + super().__init__(layers=layers, name=name) + for i in range(len(layers) - 1): + self.connect(layers[i].name, layers[i + 1].name) if metrics is None: metrics = ["tolerance_accuracy"] - # Replaced special named metrics with ours: - super()._init_state() metrics = [self.get_metric(name) for name in metrics] - model.compile(optimizer=self._make_optimizer(optimizer), loss=loss, metrics=metrics) - super().__init__(model) + self.compile( + optimizer=self._make_optimizer(optimizer), loss=loss, metrics=metrics + ) def _make_optimizer(self, optimizer): - import tensorflow as tf - # Get optimizer with some defaults if optimizer == "sgd": return tf.keras.optimizers.SGD( @@ -2653,3 +2806,6 @@ def clear_watchers(self): weights, etc. """ self._watchers[:] = [] + + +SequentialNetwork = SimpleNetwork diff --git a/aitk/networks/utils.py b/aitk/networks/utils.py index 393395e..273b62c 100644 --- a/aitk/networks/utils.py +++ b/aitk/networks/utils.py @@ -37,15 +37,18 @@ def __init__(self, pointA, pointB): self.angle = math.atan2(lengthY, lengthX) -def get_layer_name(layer): - from tensorflow.python.framework.ops import Tensor - from tensorflow.keras.models import Model - - if isinstance(layer, Tensor): - m = Model(inputs=layer, outputs=layer) - return m.layers[0].name +def get_array_shape(array): + if isinstance(array, list): + return [len(array)] + get_array_shape(array[0]) else: + return [] + + +def get_layer_name(layer): + if hasattr(layer, "name"): return layer.name + else: + return "layer" def get_error_colormap(): @@ -110,101 +113,18 @@ def make_input_from_shape(shape): return Input(input_shape, name="input") -def find_path(from_layer, to_layer_name): - """ - Breadth-first search to find shortest path - from from_layer to to_layer_name. - - Returns None if there is no path. - """ - # No need to put from_layer.name in path: - from_layer.path = [] - queue = [from_layer] - while len(queue) > 0: - current = queue.pop() - if current.name == to_layer_name: - return current.path - else: - # expand: - for node in current.outbound_nodes: - layer = node.outbound_layer - layer.path = current.path + [layer.name] - queue.append(layer) - return None - - -def gather_nodes(layers): - nodes = [] - for layer in layers: - for node in layer.inbound_nodes: - if node not in nodes: - nodes.append(node) - - for node in layer.outbound_nodes: - if node not in nodes: - nodes.append(node) - return nodes - -#def topological_sort_connections(input_layers, connections): -# layer_list = input_layers[:] -# while not done: -# for connection in connections: - -def topological_sort(layers): - """ - Given a keras model and list of layers, produce a topological - sorted list, from input(s) to output(s). - """ - nodes = topological_sort_nodes(layers) - layer_list = [] - for node in nodes: - if hasattr(node.inbound_layers, "__iter__"): - for layer in node.inbound_layers: - if layer not in layer_list: - layer_list.append(layer) - else: - if node.inbound_layers not in layer_list: - layer_list.append(node.inbound_layers) - - if node.outbound_layer not in layer_list: - layer_list.append(node.outbound_layer) - return layer_list - - -def topological_sort_nodes(layers): - """ - Given a keras model and list of layers, produce a topological - sorted list, from input(s) to output(s). - """ - # Initilize all: - nodes = gather_nodes(layers) - for node in nodes: - node.visited = False - stack = [] - for node in reversed(nodes): - if not node.visited: - visit_node(node, stack) - return reversed(stack) - - -def visit_node(node, stack): - """ - Utility function for topological_sort. - """ - node.visited = True - if node.outbound_layer: - for subnode in node.outbound_layer.outbound_nodes: - if not subnode.visited: - visit_node(subnode, stack) - stack.append(node) - - def scale_output_for_image(vector, minmax, truncate=False): """ Given an activation name (or something else) and an output vector, scale the vector. """ - return rescale_numpy_array(vector, minmax, (0, 255), "uint8", truncate=truncate,) + return rescale_numpy_array( + vector, + minmax, + (0, 255), + "uint8", + truncate=truncate, + ) def rescale_numpy_array(a, old_range, new_range, new_dtype, truncate=False): @@ -246,7 +166,8 @@ def svg_to_image(svg, config): else: raise Exception("svg_to_image takes a str, rather than %s" % type(svg)) - image_bytes = cairosvg.svg2png(bytestring=svg) + # FIXME: if not in notebook, need output_height? + image_bytes = cairosvg.svg2png(bytestring=svg) # , output_height=INT) image = Image.open(io.BytesIO(image_bytes)) if "background_color" in config: # create a blank image, with background: @@ -458,3 +379,22 @@ def is_keras_tensor(item): return K.is_keras_tensor(item) except Exception: return False + + +def get_connections(model): + connections = [] + for layer in model.layers: + for node in layer._inbound_nodes: + for parent_node in node.parent_nodes: + connections.append((parent_node.operation.name, layer.name)) + return connections + + +def get_layer_input_tensor(layer): + """ + Get the layer, or layer._input_tensor + """ + if hasattr(layer, "_input_tensor"): + return layer._input_tensor + else: + return layer diff --git a/aitk/utils/utils.py b/aitk/utils/utils.py index 75dd76a..177aa59 100644 --- a/aitk/utils/utils.py +++ b/aitk/utils/utils.py @@ -8,10 +8,10 @@ # # *********************************************************** -import math import base64 import html import io +import math import os import sys @@ -65,9 +65,13 @@ def array_to_image(array, colormap=None, channels="last", minmax=None): ) from exc ## Need to be in range (0,1) for colormapping: + if minmax[0] != minmax[1]: + array.clip(*minmax) + else: + minmax = [minmax[0] - 1, minmax[1] + 1] array = rescale_array(array, minmax, (0, 1), "float") try: - cm_hot = cm.get_cmap(image_colormap) + cm_hot = cm.get_cmap(colormap) array = cm_hot(array) except Exception: print("WARNING: invalid colormap; ignored") @@ -81,6 +85,7 @@ def array_to_image(array, colormap=None, channels="last", minmax=None): image = PIL.Image.fromarray(array, mode) return image + def rescale_array(array, old_range, new_range, dtype): """ Given a numpy array in an old_range, rescale it @@ -105,6 +110,7 @@ def rescale_array(array, old_range, new_range, dtype): else: return (new_min + (array - old_min) * new_delta / old_delta).astype(dtype) + def image_to_data(img_src, format="PNG"): # Convert to binary data: b = io.BytesIO() @@ -115,8 +121,16 @@ def image_to_data(img_src, format="PNG"): data = data.decode("latin1") return "data:image/%s;base64,%s" % (format, html.escape(data)) -def gallery(images, labels="{index}", border_width=1, background_color=(255, 255, 255), - return_type="display", clear=True, gallery_shape=None): + +def gallery( + images, + labels="{index}", + border_width=1, + background_color=(255, 255, 255), + return_type="display", + clear=True, + gallery_shape=None, +): """ Construct a gallery (grid) of images. Can return an HTML table of images or a single Image. @@ -155,10 +169,11 @@ def gallery(images, labels="{index}", border_width=1, background_color=(255, 255 if len(images) == 0: return None - if ((gallery_shape is None) or - (len(gallery_shape) == 2 and - (gallery_shape[0] is None) and - (gallery_shape[1] is None))): + if (gallery_shape is None) or ( + len(gallery_shape) == 2 + and (gallery_shape[0] is None) + and (gallery_shape[1] is None) + ): gallery_cols = math.ceil(math.sqrt(len(images))) gallery_rows = math.ceil(len(images) / gallery_cols) else: @@ -201,16 +216,24 @@ def gallery(images, labels="{index}", border_width=1, background_color=(255, 255 label_pattern = labels labels = [label_pattern for i in range(len(images))] - table = '' + table = "
" index = 0 for row in range(gallery_rows): table += '' % border_width for col in range(gallery_cols): if index < len(labels): - label = str(labels[index]).format(**{ - "count": index + 1, "index": index, "row": row, "col": col}) - table += '" else: table += "" @@ -228,6 +251,7 @@ def gallery(images, labels="{index}", border_width=1, background_color=(255, 255 else: return output + def progress_bar(range, show_progress=True, progress_type="tqdm"): """ Wrap a range/iter in a progress bar (or not). @@ -242,14 +266,15 @@ def progress_bar(range, show_progress=True, progress_type="tqdm"): return range elif progress_type == "tqdm": return tqdm.tqdm(range) - elif ((progress_type == "notebook") and - (sys.platform != "emscripten")): + elif (progress_type == "notebook") and (sys.platform != "emscripten"): return tqdm.notebook.tqdm(range) else: return range -def images_to_movie(*frames, movie_name="aitk_movie", start=0, stop=None, - loop=0, duration=100, mp4=True): + +def images_to_movie( + *frames, movie_name="aitk_movie", start=0, stop=None, loop=0, duration=100, mp4=True +): """ Save as animated gif and optionally mp4; show with controls. loop - 0 means continually @@ -270,7 +295,9 @@ def images_to_movie(*frames, movie_name="aitk_movie", start=0, stop=None, ) if mp4: retval = os.system( - """ffmpeg -y -v quiet -nostats -hide_banner -loglevel error -i {0}.gif -movflags faststart -pix_fmt yuv420p -vf "scale=trunc(iw/2)*2:trunc(ih/2)*2" {0}.mp4""".format(movie_name) + """ffmpeg -y -v quiet -nostats -hide_banner -loglevel error -i {0}.gif -movflags faststart -pix_fmt yuv420p -vf "scale=trunc(iw/2)*2:trunc(ih/2)*2" {0}.mp4""".format( + movie_name + ) ) if retval != 0: print( diff --git a/notebooks/Advanced/DogsVsCats.ipynb b/notebooks/Advanced/DogsVsCats.ipynb index 685d691..021cf19 100644 --- a/notebooks/Advanced/DogsVsCats.ipynb +++ b/notebooks/Advanced/DogsVsCats.ipynb @@ -20,19 +20,20 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "from aitk.utils.datasets import get_dataset\n", - "import aitk.networks as nets\n", + "from aitk.utils import array_to_image\n", + "from aitk.networks import SequentialNetwork, Conv2D\n", "from PIL import Image\n", "import numpy as np" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -41,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -50,7 +51,7 @@ "(24478, 128, 128, 3)" ] }, - "execution_count": 3, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -61,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -70,7 +71,7 @@ "(24478, 2)" ] }, - "execution_count": 4, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -81,121 +82,212 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 18, "metadata": {}, "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAIAAAACACAIAAABMXPacAABiY0lEQVR4nO392Y+d2bIfiEXEGr5xzzknkyySNdepOlV1Bt1JV0a3riQD6oaMdttGCzDgF8OADb8afvU/0Ab8aKMFwxD8YrQNud2ypLZu3/neM1TVqSKrimRxSOY87Pkb1xR++JK8R63/oMFAIrGZ3Htn7rVixfCLX8QCeCNv5I28kTfyRt7IG3kjb+SNvJE38kbeyBt5I2/kjbyRN/JG3sh/3wX/9/+7/0NZlv1BfuvgIM1i6711xhgzm84FohLatq4pm0Qned4HH0LM3Su95xBCCAERiUQcx0QkhEAQIQTvvbXWe9+PUyKaTqdCySiKjDG9Xi9N09q0dV1nWcbMIQRmNtYOBoM2tOy91hoAynXhnENEY4wQoqqqra2tNM/Oz88RcTAaNU3zO//h39NaK4HBeQgskRACOC+QIpIEIVhHDJKED65pGr/0ZVkWRVHXdVvVxWpdrApTN23ThNb71kBgBSRQEgD7MLDOAQRJrFQgwY5964R1MgTi4BWXWDUZ9u9syq3+yjcy3vDea637/aFWsbXOWksknXPM3jnnnLWudc5470MI8vVWMDMzAwARdUtJrx6/FgRkcIjIzAQIgACIgMiAiIh4s6uI3XdEZEkoJUVKSElKYvBM6IGbpnHOIZEUottI5z0RudqGEAgFh2CMFUhZmg2z/mw2Q8ehdUF7CYKQZEDFJG2IJCYqFhGzDxTYGeuNtXVVBa5XxfT6cjGdFet1U1Z1XaeYWWu73y4AlZTd78qEQqVQxoQoAJEJAzNzXNcmBIvsEQUgCxKaJAnhPQaPMkiUSqCUUgghAgKAUiqOY621IOF9eL2eIbAQIgQvhGAWzIyIUimllLp5vRBAJFEIIVZijcxEJKWUUt5oNzKDR0BADASeGRH5ZqUBGQi6hyCImAiYmRAEoRQgCAR1n8t5r7RGoiiKAMBa2/01iBhHEQBESltrRQD2jpUDCf00kwFiklHAjBQASBuc8blFdG01XS9n8+nV9XJ6vZrN61VRV1UshCYBPgjgOIoGOhrrXlOYnHTQ0nKLDJGKFEnnHDIAAYHoFJEASCoiUojahya4hjkgEQiBKEQg58A7QEtIQhARAYD3nkKIoihN0yiKOCCRl1IiCgBAZABglgyeWQBACEES0X9HeUkQIgoh2HtEvNF9xJvD0ak8IiIw3ZgjBBSASNQ9v/sM3WPLgRAccAgeg/fAjBCASQoKobM/3jlglkLEUVSvC4EkpYiVoixA4FhpDsG64CyGdWvrgFXrnCuxatv2v/pn/3clJCJ6YykEKYQCigLt9zYFIAVG7wlAAElL6HAU9XpZLqU0jWXvCdBaW5laSsnMzoXWGuMdAAiNiijWkWWG4LxzgRFRCA/ggyLhORBStxrdanrvTdOkaaq1llI6G14tr2BmAGLmTpW7DUBE2dnfTrz3AQCQurX23r+2J927Ewm82S8kQoE3tgsBu615vZ0hBCEEAHgEJgQA5714taOI2FS1d46tk0IIoZjZsVNA47TnWiOMD8Fy1drWtD6YqmbnTd0QgxKSvffWQWDn3DjOFDEieAcYUEsRCYEsEyuBvWSKVRxpLUkwewhhulwFFiKK08AEKhIyoDZBAiADOOkbocu2sd6BY2CHSighWVIg4RiAKQQXnEcpBBGj0CgdASISIhFVbcvM3WfvHrzSxhvVJCLqvr02QVrrKIq01kopzywkAUB3Al6bcu+9914AgrzRdGCkAJ4ZmBmQ6dXWdG9NhMzILAAVCSmEB5BIDCAAyYVEqBAwEUoJyezbtm1W5dqGFHSzXltjbNOW6yI4j8xtUfWzXLkgEGMJkYgQNRKIREStjeNYK00KiUBKKRDZB+es9yAAhefQ2sY3zjlm3tjYCM671lBgQGIBkVK9vF+WFRGBihLtE6mLtq6bxhkDqAUJTcITECCRdB5aRGQQJEiRAqtEEEQkZaT0uuUbYw7QLSMAOBdeGwYiEnBzAphZCiGklK/dAIfQLaNSKjhHSN0WBA4hBCYOCEDdZmIABkZgYAZGAEKgm/OIgoADAgnPEVBMkgMqJu881MZbr5Wqi7r22DAE50zTruaLNorOrlfWGKWUEpJC6Ok4z1KI+hRYaRIMsVSJisAHYM7ihNelUoqQrHfeerbGheCCRyGISCglpPTsHASKkziOG+LFalUuVyKAFjIh2Ut6w7yntep0SQGhkgiRZLRkW9MqIQGJACUJpbQKRMbZqlRKSCQBBBA6dVZKaQ2dNnvPRCylCiE4F4QQfGOwGXwAkF0sI40x1lrnnHMOia33JLDbOiJ6vQGvg6UgEAiYABhD9xX4VSgEDMBdZORvbJdonI458+R9UAxt67ENrmlXTbuYzuZI7IMkkiRM06os6weBKu1luVKqbVsBGAcBAWKpIqHAeckYefSN5xAU+khoBHI+eBeYQUWaIsWCamuCAC9w2dSX8+liNScpe72eFeHq8rItqkHSG2W9XpQI6fNMRzr2rTF1G6xHxDSKEx154JOLUwgBGTkEIaQWEiIhrJstFhKBNTJwcL4LghExjqMoipRSAD4IEEIgIrN5fQKYuxPgb5ywlLJT+SiK4kS31pJAIkKgcr1GoiRKSyidc51jEJEEAB+C884ZE0KQJIjIOZckCRHZ1iBiHMeooGmakVJmuvJlWa2Lpq5DayGwBITAPRCKhJJCkZAksJcJxCxSAIAOBXMuEyKCwAEgYooCCqnBerQ+lRoBoHUOQpZl7J0SlKV62VRFsUrHgxLC+ez6h+PDH46eX82nlkPW7+X9Xmnr+XR2/85bVM3zdfzenbfnq8s6ET2TbPSHJJEbE5Eqi8LUTb/f355s1sYs66pxFoVFpkwnaa/fU3q+mC5Wy0Y2apKkaRqIbNWy0L1erwsuENFa21mXbrlvwhwiIuqCb+n/PWFAZrbWhhAChM6TdBsIAI1plVJaKa1FUNq2xjnHzrvArLTWUkiFDMpx27amLM9fXLIP3nv2IWIWgQhQAhKiQurSJYkkUXR+bCB1QGDmAAyBlSCZaqVUtS6koogkKikCSBIU2DmHAkJEtvbLppQipjxmDxft6vvDZ04jb2S3Nj7ckejQz9er2Wwm9Wg06fGgf3x0kjvXg7JZlsflfHc42TdbOqAOsJEPojRhZmOMAMzTDJQwi2XVtC0Ilk74kEU6kiqJ48a1TWtca9BJ7z0IeB0EIgIA/zt577+XKt2E/53cBPsEXfivtUYmItJae3PjkJMoxsCuNd6G4FyXfEqSsVQREzTW18a0rTe2qeqiKLLLsnu3SCollFZCIAkGYpBIEkkJKZEEkUACAOW8kFJGigktB8/BOROC01lESIZDcE4ixYqUkMRiXVcSbLw12FDjypnCm/m6ejm7UFv9nYOdW+/cS0b9i/n0ycsXan419rdePn+xvbm5OZ7Em6N+lqdpfrR6eDKfUT+tV+dh2SQeP7p97/bWbmA/vbjqR0kcJyypaNrWefYhsGPnLLMQQmsNDl6bh26JOvV/vcqd4v720r+K4/FmA/6dXPfV/0dRFEWRaaz3vntr51wIQQXwPoBxbDyGIBm1QEWEtbVF2xbVerFuqxp88N5760bxQCBJKbtYSAASAwEIou4EdD+XXQzFABgIAIGRSAmJBMzBI7OmQMTM3guPxBJr8MH5ZG84Xcw1NlJGs6aog9Ob/Vtb+dbt/SfHz798/GBzfzcd9TcOtid3toej0eYX34zHY8/BEQyH4ySK99nmOs5AD0XsZ2tztZq31Q6E3mhIRM3VwjQtSsrTTEjtPROQBmzLQkoSgBBYAGqtZRSJikDeeNf/ju98HU++3oDOqMgOkejCfO/Je48MzNxZKmste0BGZu7COLuuFIlYKopjdMG2rV1WdWurVQHOu6qxjSXPkdKR1krHA53c/G4G8iy7AA4wElIAdksvUXT5BSDEeWq9M8GFECDSMtagCJBn6yUzeuDAgZAoULDOGCNhtSzXcUhy6lGuRuONoHA9vfqv/n//34btYDLubY6gEvPldLFcEpFe+2xDrapyuVg4Drt7t4Z723mctMsqH27mOzuzpy+vTqbq+PDt7Vu94UAUbdE01kGa53GSVVXjjIuFZKm6zBYRlZKR0igEM2utO339ba1/fThuAnUkfpUKyH9P/W82iJCUUt1bRFFklDHGEVFPKeecL5u2tqaq26IyReVqIzwrRg2YgYqkTKSOhBZC3CTLzDcLLYQWUhAhgwQkIklCINFrfSEvlZIqdhJr8Nf18vp6tWwrkUSrtp6vl+uylFJ2iJ6U8ur5RRRFOfcmubi7f2+yufHdk0d/9td/cXR6cuutWxzct7/56vjkZFUWWS8dDocbJr492kwIkkAScDwZRlleVZX3fmFrBB0Ne1w0zvGsXJVFcSvLrQ8uWMIbzCFYV7UmjiLnDHlUSrEUzOydY+YkSaSUXQ772/Daa6wFEX/7MMjXKNCNhIAEiChIKKWEEATUpelVVUVS6Rqrqirmy3Jd+daQCzKAcNCLU40UkUxEpFFIJOkJA9iEBGDnXZSQWkgphEDq0jrxagMQkQABwEKrdEyxqr25WEyfnZ8czS7ndTHc3Spcu66rxhodRXUMqfXkqURnyxrKxYHisdkpr86eH78s66IoV4+/+z47ORoOh6kQgUR5Pa+u53//7/7H/SAXbT1Je6xiCDwejzc2N48Y1uezGHFnczIUMc1L9MJZj4hZloFtS2MgcKS0V7acL6J+3sU5kVIO2RgDTnQwnJQdvgCvHcBvwwqvN+BvTdBvCTrnOicMfONMQmBLtizL2Wxmm/bs4jJ4zz4okInSiY4jECpghEJ4Vh4kgiZUSBqlEKJ6hSx10SoihhAYWUnZbYDoorJXJ0CoCARVTX02u3p2dnS8uC7AURY/PXkZD3qDrcn+eJTmGTMvl8vr+fzy5KSqqvF4/PHWp/2N8ePHj58fPouS+PPPP3/x7CkR7W/tEOKL6llvOHrnnXeoMc+/fdRKGN/eXYP7+suvvnv6NM0yJaVbl+PxrtR6tj5bH5/uZOO9jS2zbtMs8wJXi4UPPOiPMEA5X7jWWG+ZOtTBWWsVJEkcd1r775n+f+efzH9rnSQQR4lWSdw6i1IIoSIZBesmSX+5CGY9uz45W02XUJmN1oKb6jhXHbgshDQoiVSn70KQJEVCSolCeClaIkRM5Y1BpBAUohQoSUgkgdR5f/CBrQMAKaWWcmF90u+9OH3xf/ov/29NT3/0ez87vjwfDJL+cPv2zt7extbe3s755YWMo/T+3X/xX/9/dkejC2MGWXrnrYPpanaxvG41/fQPfr/f79MgW87mIs/G/cHzFy/zpO/a8Dfu/Hp+2cvy+80gOF8enl1Ov2m9+9Gnn+g4Pgkn6pbg3TREm7968fKT0XB0TU64Yb/Pni4uLwHLrX4/2dq4vr50tkk3B0VwrWAx7FdpvA6wBQpql4vUeEMohv1ebdpVuZKRBqIgwLG3IZjgHfkALAVR0TRNUcrhKBISAMgFX5u//OWfmlVdzZdhXZMLEQslJZKQ+Lc+Q0AXPgqBJLoI5/V/IcErcBT5pq7QZbxdgn0TCTB0r0bEzh1t7u/9yS/+6v/5x//65Ori7p1PVsU6SmKSYmNjUyl1+86d66uLOwe33//RR/+P/9d/2VZ1uSrrut7d3c2ybHp60rbtO++8c+vWrX/+z//5Tz//yf/2f/2/+b/+F/+sKIqf/vSnV2fn0+n0++Nj9p43t46Pj+fT2en5mQeOsrQLQ6bTKbkwTPJESimlc84F4evaSySCPM+dc1VVJUmysbHx8ux4tVz62EslitW68WUy6es0QSUZKRA654I1DBAniXGWkZGZGASgBARAZpDI4K1TQm4Ox9662eX04vh0dnk1O7/ENmDrFGNMShAIBgggtFBCKikVCYGkkCQJhdRtzG97kxsnQ0LgjQ8QSPKVuRdExNBtlbrJ15l9+OL7h7958v3p/DpICgIvZ9P3Pv4oiaP1bOGgfvLkycMHX3/++ecqiZ88ejwcDC7mKyDa3d1NkqSzv3meP378mJlfvnz5/Pnzqqomg+H04urBt99CCKAUIlZVdVxU5+fnzPz+xx+9/6OPluVaKhWcc85JIZqirqqKQyBSjkNd1zpN+v3+fDYrV+tkOBjkvclwNG1WrqlFHEFrjXdZr+e1aIkFMCsRAJrgiEEoGUwLEAA7EA4ooGBiZtlF6BpFhOLl0fHzR0+On71YT+d7423hiUhpFrrzkwBIfKPCRJKEJJJAikRnygWSINF9ERIRMYJ6tTGqeyHcGD8tZAeeKCGklMjgnPMh/Ms//beFNx989ok9/sEGL1SUJMlqtfzm6683+sNnT34YD/vM/Bd/9mchBO+sEGJra6vX611fX3vv26apqurZs2fIcHh4+H/8z//zjz748P79+6eHR3mer5dLY9tytTZlFWldNxUzW2uEJGvN9fXVdDrdHk02e4MQvAAQglSsJEDtHDuf5HGjo0U5LdciSaK9nd3MDcrT54FBkrYhYG0cgfFWESgtSQhbG+sc+g6HCIQdeg8QUMGrDUBE0zSLq/l3X359fXwKtdnNxrJxMoAEFMBKdeg3dc5b0Ct9R1J4k0Z1Nv23M+puqRUQAQm42SEAIAZkkCS6YtpNFMTsA4MPXtKLw6O3hu+rSA+HQ5En3z389tnzp5KxXq6X09knH35wcXV5eHS0vbvz5MmTxWp1cOeOc+7hw4cqiVer1db29tbWVpZlv/vzv3N8dPTxRz8yVb23t1euiwcPHvyD//g/+vbbb6vlWik16PXrtmmaZj6fH+zfAoD5dHZ5eVkcFMMo1Vqfnp4CvhPFkalKY0wO+aCf27qqywrY94a9ZJgtXTUNVRUgcpB7SrLUNK33jN4DICPY4MGHACyYgUEA3JRyAzIDGWcDAqFEgPV84comQT2OszhgFFA7VAHQBQiMIASpDj+QJDQJSUIKoUgoIbVSWsjXX11+q6SUggShBBDMxCwBBKEkFAiSKBJCCRIICCwIlRTv3Lt/sTj77psHwvhhnLl19eCLr84Oj6qynM/no/H45enJX/3N35xfXlRVZZyt65qkuJpeX15egg+DwSDLsmCdADTGfPTRR/ffulsUxd3793b39/7JP/kn3nvbmrZtiajX68VxvJzNH3373ePHj+M4/uyzzzY2Nlar1Xq9ds69ePGi8cZxEEJwcNaYPEkng5GWkp2fX8+D8wfbuzvDiXYct6EfdCp1FsWahLPW1i2/qgnDa6gYABm69AgBZBvYAwYEpRQGTnUi2tCsip5OBCEyAHUFR8GEnkALlJKUQClQEWkkKUgS6VeYxmtso0MWIiFex78SEAEFEBJSYCG60j8CABNFSoOCt2/d+ezgw5PZVeIxzAtfF4kDR+ri8Hhra+ujjz6qmzJN0+liPl0tGmfjNHHONU0TRVEIoZ/34ig6Pj7+/PPPz8/ORv3B8+fPv/rqqzu3DqIo+vnPf/7P/vk/V0rdvn37YG9fKVVV1boq1031L/7Fv7j/ztu//7u/l0RxVVV+VcnAxNBaAw1GKoqVtlXDMurnuWvaslyvlsukn6lB0lfxdj4USLI0q6PzNE0zqcmjsU6gECS6YiYjMrDveAwMzAABCGNlgl+U68VyWdRNBzARYLCefUBEQklKkpKkpVBSkVBAim4w5C6T0iQiIRUJ/Vvf1esfopCMkhEZBJIkUkJ0vIrXSYBA6rKYD3YO/mf/8D/69Nb9fgt8udxR2bsbe7CsNnqDPEkvLi4CwtbBvkfASEV5qpQCwjRNkyQ5PDycTqdJksRR9OTxY611kiS//OUvv/zyyzzP/8M/+vu3bt3q9bK8l5bF6je/+fKLL361Wi22dzbfeef+hx+9X6yXJ6dHgCFLIh9s8JYEePDGGCEoiiJrmrauBVIWJyFAV8RtihJMuD3ZuTPc9NNy+u2zcLHoWxyyyoJIAskA7DwAMIJHcAQWuUW2yIaY1nVFiRZxwkolvdxyqNtG6UhoRVIwUAAOzF1mrNNECRlHUSQVBAbnCVAhCUAtlZZKk4yljoQSSMF507ToHQQnkCMlIiUEcnDWW4McjGls2yAHIgAIzF4IfGuw+dN77/+v/pP/7OPtO+bk2p3NyqOL+1v7O70R+rBcLp8evvj64YPtg/3pcgFSlHUVQnAcVqtVh5qU66KqKmYu18Xjx4+/+eYbKeXv/P7v9Xq9JEs/+/TTNIqttfP5/OriwhjzOz//O6PhMFI6jmPbmuFwWBRFURTMHMcxSaET7Zxr6koA+rYxVR3H8XA4ZOb1chkprYGKy2nK8t3tW/Jy/ezPf/3sr76kabEpE1Hb6moeoVAkhBAsyQloMRhiqwkSLT2SRwrAxjsXPArk7nnWM0olCASBFEiSkVyASJAilISIJIFiLROpBJIgEABSkpQEAMEHQu7K9kSklNJSvcakOhvlnINXjADvfYe2xt6qyt7KRv/4d//e3njju8Nnh8s6tOhjMdrbbAn7/YEHTrIUlJgtl6Tk9Xy2d3BrsVxqrYvlynu/s7VdVdV8Pr86v8iybLVa/fEf//Hm5ub+/v7du3dPXh4t54vs9u3xcPjBRx8ppcqyTNO0MS0iKhKTyaQKFBpjfaiqAhmEhi5+h8AhBA6hC3mNc6Y2SBChkh6t5Z/s3D06P714dvbN4enw1u7OO/cPNrYbwVflKkgGiSglSLC2NaZxzskOrzDBNaatrYkZQAkWZE0gBBAkpCYliQQidaQEJaQSggIoITIdR0oLIgxMgFoqpZT33hkLgFIp1a3+K4wQX7EiuugrhHCzG1J0iAX1B+MAIOjj9z66e3D70enLu0/uPzh7/nx1ffudd0/LhR5lpW3zQX9cja+vrzXi+fn522+/Xdf1eDy21hbLFTu/MRobY548ebK1tWWM+eM//uMPPvhgOp0uFrPxePi7v/PzUX+wWCwIwqNH302nV1rLwaDH7Kuq2B1txGOuF6uIpGdm9hiYvXMmoFCgPIBKsjSK49I0VVVpLbUSyiF5iAr33mD7zmB7ZquFtdXFVFizBBsPshqscc46x8RCya72J9EH8hysraumaVqNCpACEkkBSPgqwCchFEkSnenvKtSopYqUjqNIAHb0BUmkpbTMyIwAWqpIyY5vEULodLwjwtwARPJGA7otIaVguQCtuG2DN3ma/+QnP9u4vb/+4395tJrmabaZKsp0jyBP0lvbu0+/f0xCrFar07Oz1Wq1vb29tbWFiHmer1YrREySpN/vb21tee8PX778/tGjYNrhcIiBF+ns/PQsSmKQQqVxVRR5v++MOT8/T0kNdNLv98e9Qb/IExlLRluH4JwIyM57FYhIZ0kdbG1aYC9DhBIiVL5oUck4S7NhFhbXy1U13NpI+8OZrZQgj8I6a9rGhpvSr5SOOQC7YKraNi3HEkiEEHSH6aCURApIo9SklIwEO+oK8AgSqYOXCZABnbHsAwFSV6YQFGutJGothSBrPUBABObgnAPiKIqU1s45awwzMwYt4Lpcbm7v4LBPi9lqtcwjoZWSSLPZ7PDwMN4e16YcbUwWV9PhYBCTtKFVSh0dHRVFAQBbW1txFL19//6XX35ZlWW3E2VZdia73+//7LNP67K6vLiIlR4NhoPR8PD4aLFeFUWxd+uWt3Z5PVssFirnTOooijLOYqm0I+/IhhYCGGO892kkozjV3pq2scYb32qt0jiNkqSsymU1UxuDno5B4no6/8Vf/TkOs2xrONrZGPdynyTGtyF4AJBRIGLhvAPj2HmCVzCpkgKkJqlQRCQj1AlFWmjgQAgCQCB2kbsUJABJiMIY9o6AlSAiFESRUoThdYGiYzm64D2HsixvtJ6IlAQAkpIJh3f3l8u1DqY21bJa21It67VO4t3d3aurK8XmZHo+3txIdPTJxx/vjjcePf2h1+sZYxDx/Pz85eFhU9d7e3tE1KGk09msLEuttRDi4ODAOae1vn37dl2UVVFWVbVYLC6n171eb2NjI1JqkWQRE//WeYWO7plJELFt2qJpirqGRIssjjhxzoEPzjkHhJpDU2e9nuLASeLQseAfXvzwX/yf/y8h1dnWcGN/uz8exImWkpSSkdYyAiFJtmBFAGKQpCSJrpLcFWwToWIRxUJFKEVA0AoRMbBCklJqIaWUAlAJWZdl9+cKIZQQSqkoitg2nZsNIQglpVbgEAC8962zaNrX21PXddu2cyd//de/iEG4phaI999557otWms2t7efP3mIkuuqOnl51E+zq9HG1nB8OhykaVoXZZZlQoirq6vOtUynU0Tc3d3d2Nzc2dlBxIvrq5Pzs+nFeaT0zvb20bMXTdPs7u7evXv3zr2759MrKWWWZYM449YWl9MQghbSmFYwCOCESUspk8QyNyHUbZP3U6kUSiGFkNZ565zxpqrzNAnsbdMUtsJBvr+7d//2W08vT5ZX0+vrqwAeMBCBkkIIITUJT8wkOtPRRUvogZmRUJKIpNIdv4mRAqCUAhCQRVfNkVJKKQJESnWfvAt7ujpPpJTxBjpmIwdi7p7jnIvSpCt5dlpmjJkvF7PZ7Df11fcPHt7d2a+XS7ZO5enKtVVTn12cn5ydbmdv3bp1a7VapSpaTmejNH/33Xfn8/lqvojj+M6dO+vVajweZ1m2u7vb/Rl100wmk+3t7YP6ztnZmS2K68srQVQUxe3bt9999902uG8ffd8ZKwhhd7xJKBbOEZBSKgC3bcu+DUGiSrWM0jS1Uizb6m8TTCFE4GCtt7bX74P3jWni/kijrrxPk+Rgf/+qWlWhbbwJSEKgQHbWmLqV58JIBHJkFkWKkqqmFwkNlAlKFEWxEIoQPXNwSApIm5toUkqhUZAAxmAhBO8bblGw4VqwSHKVJImH1kusTaOlyvq9pmmY+MsHX02Xi+vl/L1PfxT1sl89+M2zs5fbB/s6S55fvDip1tFBfph72hgcn51+8fhP00Hv2fPD4/OzjbsHzgf2cjLZHY0m89XywcnZIII0Td+9d7eoq8vp5d3793bv7EdRBBIl4CBJyvNLvyhHg82Bk8No+ODlcouG4bw8yLdk6f783/zbVvjd+29Nm+XB3Xvz6+n1w4vP777349vvhuvVrku3417TNNbbVpIBD6aUUka9RPu2uZ4ppXaSvnOuDM4iGmfOemKc99fni/xyfms4tAbOV+tPRrt/cfmnyb3ty7I05KUgMGZjlPu1lR00IaBjw/NN+RBFHMdKqQ7NB2ZA7DAf8QqVuzkreBMaKyFkRz595aJDCNbaLMkjkiGEi9OzdVVONsfb+3v7b9/9q1//8q9+/ctZsarYWRGeHh+u6+r04lyPh+8f3BoMBqcXp/PljAkd8nw1V0olSSKVTrM4MBrTtG0rhIj6uXPOBR9FUQfrV1U1GAw++uijJ999//z585NnL2xRL6ezREdZnEgpBXLS73lnX7x4MV3P996+vbW1per04uQ0j5K9/VuL+VxwvJcN2qISatjF0N3B9famPaJDPl4RIG6OQgjBW2ubNooicGCMEWmstd7d3c3StKgqicQSnbOKwVrb1alCFwi+bneRUmqpkjiOlJZIr6uaHVTQxY6RVEopKQQBsvPBuo46GZEUiARwA/B7X17NoLE6SgRSnuc2eEt8vpyND3aOZpd//fCrH86PC7Y1hpCo4d727Xt3esPexezy+eGL+WpZNXVZVcwsFalIR0kURRFCsNYGaxC5aRohRJqm/X4/hPDkyZPvvvuubdvlfD6ZTMbjsXEuH/Q///zzg9u3Hz/9Ie/3fv8P/+4/+kf/aHN7Cwjv3Lmzs7Nz9OKwXK1v7+4Psvz85fHqaqGBItKpjoQQURQlSdI17XSdP9baLnwIr/i2nZNnZmds2zRJFLP3dVl1ld6dza3NyYYp61jpVEXkmYicsUIIGbznbq2sC94jgBIyIiXwppguAQmpW3cpJQff6X6HgwoGH0IwViaJRgEIEBgQpBJSSmttppPgnFuuZtNpNum3PlTBPb04vigWV6YsJRdmVa1oGG9kg16C2WhnUpjqxcnhbD1nwUVTYqS2dravZzNmrusaBDVNS1IU5SplV0GUJEnn54UQdVkdHR0dPn+xs7V1cXY+v7iaTCb3bt3uDQfFat2VvQaDQWmak9NTFCIf9C8uLr58+M1wYxL/RGUqgdpoqRNQqVDjpC/tDauQmSOlO5ZfVVVRFHnvu18qpYyIAnNgJgDbtCLKauesN6pNqqaUPXVn/9aT2UlEErSoqRQI3to40jKEAAIgsPe+C0O1VJHU4AOKG5RfAkm60Wl8Vd7qKmLEwJ7BBQqsSTBztwFdFd6xBQDwYblez9fzSoU124Vvvj958fDls4Wp490RJZFHWLqmKtl6V7sqiiIDjmIdKtcYO1A0nEzKtjGuXS7Wuiq992mamqYRgiCNGtMWVZmafP/g1t27dy8uLh4/fgwhXJyfnx+doPW2bsrV2lT1qizKpvzyq68ePXr06Pvvtne3Tk5Ojs9PI5Kz88v/9l/+67/7s9/76Yc/ro6n7bLIx/uJUOiQmW8KGHHcVdOapum25HW40TEhvPcevTe26/npDFewDizd2bvV/+FB5Rk8S0YKwVvHWknozAUiMENgRSLWOlExee5gS4EkO8TYB49eK0l0UxWTRJI6FEJ467oODYYQAADABV81dTAVSnG9nl+tl5dXx+fVcg328cXxyrfrUCfpoLcxtuzLpjSuFkKcXp8nSXK9mM1msxDCeLQx3txwzus4stau1os4pHEcp1kcJ3owGKwX86yXk5JVVV1dXXWs9Pl8fn5+vjGZbAxGwdhIyNFookkUdXV5fcXMR6fHs9UyG+bQctM0/VFfIU0GE+24upr3ZTQAzbVLsggCcwivlT2OYxe8cdZ6h4ghsHFWOKm1JimkVtJzIDIdriwVAMRxbBH2t3f2N7cfXx15oAiFcbYj4MpuD0k4A0hEWqlMx5mKkVmSkCQ6dgv44MGL36r6ShRKyEhQUIgu+NYgglbKgWcOnkNdm/lyGaFwEL589ujZxcnj86M1uWhjGDJdV1UDKAXUwZZ1URRFV9kHtNfn84uLC2u8EKI/njCJZbHMB/2iKJiwU73ONPf7+cnLw42Njazfu5pef//991GS9Hq9NI4PDw+/+eKrLEn/4Hd+N9XRNw8fxDqKo2h9NWMEncaDyWixXgmJzHxxfL493vj4/rtuWf/w5be//6PPb23uUuPIcNem2cGFiEhSJEnSnYPOJTRN44ETYEQEQZoFaGqrJotTksIYk6RxQ3Z/c/vu/sGT4+eMmKWxM42UipllByd0pFyFpIXUSkVKKRQCbzKm4LvzxiEEFASd/iMRkRKSGUGpqmikVkAkEByA5VDZdl6uHx8+i7L0h+kZDFMNo1RwI6E0TY1htLO5ubXlvFkVy9Daum5Wq5Xsq/l8TiS2djYXi9XZxXmS5t7z7YMtQSqOrgK7uigX02siEsAhhFVZMGFRFIvFYkPKfp5HUTQejo6tPzs5/eI3X20MRuz9YDAw1t67d+/hd99Fsb5z787L46PFbJolCcVQL9df//UX29no/mQvF3FPRnGalvOlkvFNr5X3AKAEdf1f3nvHwTa+ddbVoQtSGEEgKS2X67LbtrKp0zxGhnFvsLexJQMEz5mOSxJCkfdWSimbqs6Bzs7OVovl7o+32TjrWqm0SpI4jpm5Kau2btqmsUQoME+zVEdaavahcU0kZJxmpqmttS17nUUyjkpnanbJeNBTBw8efXvJtTNmAfZqvaJelG2O2rWorfnh+Q9XFxdsnESq16skSUywKtLseVVWQkUhwNXVdDQauxDa1mxsbDjn1st5Xdd5nh8dHXngy8vL8/Nzz0EIsVqtLi4u3nrrrfFoPOoPBr1+2zS9QT9Ser1aKaUGo5EP7ux8ZoIdjEZFXZyenu6ONwXT+mr64737v/vZTzaifoSq3xtw1TTOvQZxO4Js18+ik1iGIITo1Lxq6hg4juNgXF2XiFhUVZIkea9XW2O9qRbmJx//+L/58z+e2sI7J0kEDkwoq6rSAZmiJIreOrjdyzIqjUC0TRuRlEkaxVGqokpXVVm61gRg611rTERSC6lQIAM4F6eJqUoQxErMyvWjw2cvjo+Ktj6L7PVy3gRjDCx9U5Jti+ZsPQdBRbGSROgCBK8kiSjpp72X7bUQUmkFQM5CCBwCm9bVVatk1LazPM/bqi6rdRc3C62EEOv1mhGUUpubm0VRPHn0+MuiSKO43+87Yxfr1Qfvvnf7rTubk41Hj55GcYxVOV3OE5PkvV7/fnZnc0dXoc/6/t7tgc5TEUcgubXeeqUkA3Q+IIQAgB2IIjh0Rj8CdiE454xz3La7g2HbtmVR1E1bmkYRey2IyLdN7VyepBerqY5VFEUt2BBY9no92XpoPPuwt7sLgU1VRzKKlAZmdl7EFCUaAYKxRWsaY5BBo3BSU6wFIVtnndNJrLy1EoMSs2X5+OTw++c/NMZcbyiUwmtsvW1lQFJt2c6WCyFwvV4PspwgQOBIKimjOI5TyrIsk1JVpSmaCjy3jeVQLGbrTz75Udvati6999vb22kav3jxYt1UnkPV1F00ggzlupg2jfd+jSSlXC2WL1++vL6+3tzczPM8oljFUd7vzYuFCX57Mu6peKM32b893o+Gd8c746yXYyI9gmNEwQD4imbLzPCK8WmtVYRKKam19j4Au+CDZWZWSsVJYpmtcx4YBEVSSGvBtXf2bh1eHAukKIqM80BC7m7vmGWxWJ6tl6t4tNNWNRsrdZrHSWd8Xmd93UlsmoYAjI48B8/BewjeW+fQESoJCgvXzqp1ESykkcqjWjVCYm3axtYBuHXWGCMJvHORUr00aZvGOK+TWCIZ74b9fi/vM6OplgKFlFrKmB308v7+3u3vvnvkHSdJur21u7k1cc59//TJYrHoeqM3xpOyLJk5z3PbtF3+ePutO7PrKRLt7O4KIc4Oz0IIg+EwGff6/f7OxqZyPKKoJ9PdydY4GVKgjifsQ0CSzrcAELzvwkqSwnsvWRpnQwjWe0RUkWaExhrr/Xq9juNYRjoKwdvWI3hrGxd8sM7Zg9099bVkZqEke5RKyV6vV9tw1bbOOUkCA2il8izz1nUNAc45Tn0X0jJz14smtSIlGcGH4DkwYWkaSqNAfLVcni9mNQXuxS74ZX1tC1sURRe3LFbLslxnWeYAN4fDwWBwcXEhdIj7ubVtVdTb/R2t4rpu0YEilSb9KIqDhx998Ml6sT4+PBoM+v3+cLVab2xsvPfeB4b94eFhV3i5e/fu119/LYTopRnlva4Nf3t7u6qqh999m2RpFEWr6+VstYwH6WBzMhgN8+FANK4vk/J8ZTITlGePDohZsGMmD694tX9bSeXQNde5EELTUAdqRlrjTamgw+dICgzEwNa7xpnClBZsnmaJjpZt65VywRNJmSSJHkAURaP+QCIRIPtg6sZ1GiRl169xk3cAykjrV42AhBRCYARGCAiMUJrmajm/XM3nTTktVqu6XJiFMaZpmvFwlKdZVawt0KQ/NMbs7G4rpaaLuc4iPUhtDUksb9868J6n10stCydAS51GPSAxHI4PD59XZVOW5ebmGGQw3mmhP/zww7Zt27btWsOVUm1VL9wihLC1tQUAWuudnZ2j0xMQdPft+xd0WpnWGFNV1aoqoyiKLfhUl0VRrFaUbSQ6gdaHEEhKDjfUfiLqoNzO+LRtm2RpcM44i951fgiJhJTokAkBIQiUcaS1EsG5quS2IEIMnGXZ9bJ0hmvTMJMkojhNlVKDwaBtW5BpCKGua4WkhFRav64a3qSCCUkpA7APgaUkRO99Y0xQcH0+O5pfvZifn82ur5bzs8V0VZdGGnhVLh71+ty2iVC7m1sBw+b2VtHUST/PB7lMYwC3v7u7t7HfNIYdleuaoIl00uv1sqxHQEVRjUaTF0fPkjzN81jrqK6r0dbwnXfeefbsWcej6vf713WzWq2EELPZTCn15NnTnZ2dvb29s7Oz999//+OPP46y+Gq9KG1blqUExMp4WopFfQWJmdyOenHTFMEDCWFdK9j9rf4RBWbnnDFGatXhAjZ423h8RYWy1hpjkEUIgQUG4LKurqbX09XMiPBydg7M1lpAyYjGWVlVFaI0xhBRXdeyn8s4fo1DCSWBKDgPhJ1+ORkAwFrrpGOhGME4VzV129gfnj/9/uTFlVnPQzsvV2VdmeC9dUopZHDGBOdjrUMcJ1E8mgyzYd9cX2V5PtycNM6CkTu3b4kKFYk0Sgf5gINK4nxna3dja2swGMxniyhKxoPxZDIpioX3fjQa1XV19+7d5XI5nU4PDw8//fTTpqy67Gm1WimllsX6zp0777333l/81V+21tR13SGX6/V6tl5OSdZX86Ry7w/3zxssb63D0DeNccbHjGVdaRk6Lj/+VpsRIzjngEgIYYNv2zYwd/3ZxhhhFIFsnDPsq7Z5cXL0w4vnq7b0Gn84f7lwRdu2uepnKqPQynEve/TF181s1kfcThIoy4R0nkYoBSopIq3jSJNUXWuj5wkzMvjGCldJIQyEtWjXfXhwfvIIL0/GZsVwPF+pcTw7nW5ubqZ1uLi4gFi99eF7xhjLkUqiwf4uIy6rtjVhe7wzSAZN06SRNserM0is9bv7u9EkVN+fDPv7HN6/u/M/LmZnae9Pw+pvNg/ksD8a9Q9OTl/evi/W82xva1uSWC2WOzs74/H4+vraeEdEOon39vbyPPfer9frzz/97Pz07DJaOmuZcSBTU1TDOB3vb2Jlrg/Pbg+2r8w6X55rQcwtVHWmI2dv+APE6BtjjKHgB1HatK1QEpSIhQoqtMZUTV23zfPRet2c1pW7WM6/e/7D4fmp9Q4AmqpWSCpSIhVZf2yMUYRKpLK8nrt1xa0VXkhJaaQHaW8yGhvv4IbdKQWS8EzMENgHn6eZTFMACAiNMVVTr9GeXZw33ub9/uHLJ8vlEmva3ty6/dadL/7qiw6gL4qiAxF7vd50Or1161Zd11EUZVnW6/Xatu1cxbDXb1s7nV4nOY7HG4NslMAwsKubEgBGo9Fivi7LcjIetV6s1+vz8/V4PB4MBkmSpGnakfeJKE3T5XJZVdXu7u7e3l7HnQaAumpNVcdRNJxstnGFJqRpOp5scWVH25tRL2vA2+ClIs9QtWU8HFVVVVUrY22n+x7Yc7DeAyN6AqKAXKNZrpZFUfzX/+aPDYSgRAlu1VSOgBHKskyiWJJQUgKRQgAIHgEI5fXT43a6ji3lIs50mlKUJ3ma5ip4FoSChBAigECmwAihXM46YjoKkUYyEK7b+rSYLeryfH1VKTg5P8tGA5Xq0WjUlFVVVZ0bFEJkWTYYDHq93rNnz7a3t7vaQ+dgsizrChRRFBVFcXx8dvvuKM8GcZRt9LaEAGanlNjZ2Wlq61u/sbFRtlVrFt7D6elpBw4bYy4vL7vy8nA47BBKY0xXVW+aZrFYDCnTQveSnACNXbvGVFSR8RRrr8XSNdW6VUH0klQylrYoVnUIwYvg2K3W68PT4ydPnx6fnrz77rtREsvohu/Utu2iE7dkQexE6U3FLhn0dBKDFMF7BrQdsAdsgrfB2+Dl4ukxls2Ioo2kN1RpBIKZm6YBQYGDYNEhsZJIAaKGKIqstau6VpGmTLXsT+fXD4+eLrk5X0yv2xKlGG9MlBJVUV5dXYUQJpPJZDIBgK44Xpblcrk8Pj6Oosg5570fDoeTySSKorIsu26x6+vrrZ2MSJmWs51MSFYRA/okzuI4dazH46GqF/O1vnfv1mw2W6/X3fSzrqduf3+/aZo8z6Momk6nFxcX3QirsiwT47a3twcyXq1W5MLGeJJnWblav/3xh4PBqI3EuqzrssHVtWnaqigOzaps6sVqOV8sameEUlGeRD86+Jvz5x252hsbnEfE4HxLjdLSA4tYKZShLlZ1KZ3x3mNgidTRnUEQaQ3BsQUp53UEYpQkA5mkIlKMiGR9QABm5G4uGRMCEQkJuLW11VqDS/IEhW1P59cvL8+OppdLslYisLxz+6C1djVdzK6nxCCESJKkI2tmWTafz4+OjsqyfPHiRQghiqJ+vy+l7KpO3TMRGYmljDgoZwkRfWiEdADBGKtVHIs8ijSLvPW5wvTq6mq9Xo9Go+l02pUP33rrrUePHnVjeDogk4gmk4kQYqdUWZahEE40LUkEaI1ZNdWjw2ffhdDF3G1t6qI0xiDDmV/v7O1u3tvLxf6yKlZlUdrWepe9vae1ds77lgZpvr+9k0RxWzer0+PTqwuLPJ4MM54sirX1DhlM3ciukCVllCY6Sxxw3TZyG7NIqp5IoiAooJBKRbGKNBB1s2ckCYGCuu9IQYU0ykQStcFd18uTxfXSNtG4b9ZTkSTUhGzQX52czufzpqwG/X6SiI4kIqU8Pz9frVaz2QwAZrPZarXa3Nzc3t4WQlxeXiZJkuf5elUSYZrGcZQHF0k1BIDWrEia8WRQtotebxjLiZCYRVltek3psyzrUKCOa9TNBdzf31+v10mSDAaD0WhUVVUcxzs7O3trdXl5WdZVbc1yuXx6eOiQAVHGSWuNECLPcym1jUlkWZKmH0/ujicTmURXy3m5bkpuvQKR6P5oZFtj1m2c6Pvv3v87n34+7PVX88XF0fG//bM/+e7pE+WMTmLjPDMQYp73Jd7EUVrHWsWBnW9auaHSSGmttCKBiEBoOVhnUAoIgIiS0ZPwJANJgdT6lpREJTlWdeWvq/XKt7KX2nYeFHuLi9WSEbx1vTwXAZRSRVForfM8f/78udY6juO2bZk5TdPxeJymadetb4yZTqfIAtADQNt4RclwuCulsrbxobh9Z+fkbCnJJnoQgkuzmEhGUbS1tdWFyO+//z4Ree/btt3f37++vu71epPJZDQaNU2zXq/rul4+XxyfnrTWRkm8qstlXao0znu9xnkn0BGbtsS2CSEQ0dI2eHF01stJq3Vb1+yifj4a9qXWTVVRYA0YMSRIfakHMgJS8d23Dg6fX6+XhljFUSwyIYRgMHUjAL11znvbGhBUt816tZQxUyq1jmIg9AgtsTO1qZ3ngIjIoElEJGOptFSKhBpFjamCQ4lxCW7RlBfrhQt6ul7qUZb081VRoA9lWU529ti6yrrVarW7uzsajb755pudnZ1+vz+fz6WUe3t7BwcHXYl1MplorY+OjsbDSVUXTVPNZ6t+tpHtbkRRBNgsl9ODvcFskZm2IaKmLeNMNI1ZL2YdWlXX9f3799M0bZrmu+++Gw6Hw+Gw3+/HcZxl2Wg0WiwWZ2dnSWmdIhCqYtciZ+NBNuiTkvOraxVFRGScReQ4S4QQ3jodOApMAJQkw1iJJGqNXS0WTVUPsjzywayLo+8efd2GYd6rV8UPy+uXpydSiDiNWZDn4J1vmlYEEEjAgIyxUFpFANDoWEopp4s5ljKfDBtnbfAi1tPlfDwegw9KSpay8n5VVN57JWW9bPvDgU5iaNzcVD+cvCy9efLkWbQ1rK0pinp6eeFbM0rSqqrYOpaqW4jpdHrv3j3vfaeziLizsxPH8cnJiff+4cOHe3t73vurq6vp/CRN0+WyHPf6kR6sV2U+9M9+883Vwp+cHvXz7YP33kMK19NDBGlt03WPtm17fHx8586djqE1m806yuJkMjHG3L179/vvv2/bFiWFVK9Xq7KpZRzleVo5s1rO0jRvrdFSvP/ue1XVFEWxs7l1eXnZzqbL+UJnSTLosQvFfGm8Q8JUalvWKmA/ydi47x88tFXjjZ1HON7cICUL0yDhar5g59+9e98by9at1+uqrdD6RCgialUty9Cm455IY9CSGaUSOo6HuSIS4Ly1rnWVtdaDF1pEEVqhvj78QUW6tqb2rcwTs1jdvnfXZ2per1zptNZANyUkLaXMsm5IZxzHm5ub3WSXjkhb13XTNJ39ub6+7libbdMwWAC+vprf2pRZMpEqzIuHq+JiVRUX57MwyQC5btbWtd5zFEX37t0bjUZdU+qTJ0/29vbG47GUcjgc1nV9cnLStf0kSeKcG+wfaK3Hpr2+vl6uVy14UjIfDpSKpFHD/uBnP/lJnvb+zb/6175u/+l/+j/98pd/fnx8fHx+tpq2g8ko7+WNt3Vde+e0lJpk8LyqS+FCnqSDjbFtaw5grcMA5EELtbWx8/mPP5v0h+v54sWz5y+Pj1rvXG0a01SrUopJrvq9JriL5aywrYNQm5adX80WqY4SHXWDRUWshVDIYbqYf/3koZTy9Pw8UNh/+27R1KPN4WBnozxpOgwAiUzTEgWVpIN+v9uA4XA4Ho+bpunAgA5AXi6Xnef03hdFEcexdwawZWbThjzdVKIXxe3q/Gq2OLGuaFtprVdKXU/XQohBfzTsx9vb2zs7O2maMvP19XXbtnVd371794cffjDGdIMuHjx4kCTJzs7O6ew6TdMAXLh23dYAqLVWQl5eXu/v7H7+6ac//viTVCUvHv/wl3/25//tv/5vPvmDH0dbI3HYv57PPELLPhCCFFEcuaatnEl1lGYjwRB8WIFTUWScbduWCSEEJeTO1vb777zLrd3qDVMVmbp59OJpU67zQf/OwYF8PD12U54Vq6Pri5Z92dTL5XJjMLJlnSgtAL33ItbpsC/jyEJI0rRNpBfCJdKYJh8O8lWfiPI8z5I0ksrUlTcGvYul6uCRjsbSPegA247jtbGxUZbler0WQgyHww6MGg376/JSShqNNvb37jkrfbVcri6upycMLov3OZBzbja/HgyT0WgiKDLGzOdzRNze3u7ISCGEs7OzEEK/37+8vOyYW8PhcLFYrOpy3VTdoN60lyupIYS2bXd2dgaDwe3bt9fL1Q8njzfHk08/+fH15dXXL54hIuWZNO2yWIeqIiJiUCRNaAhR5nk6HAbvV/NFURTjtA8QSEnD3jnrgi+q8ujoaHMwGvcG+/v708X8YjE9m13XphVOy79++mBVlUFRFRwoYcDG4+yjn3324b13fNUcHb789vvvrlaLypKMgwnercosj0nKj25/vlzMVk0lpXTWVutCMGipvHWmaRMl814vHw26sc/dEJemabqIaDAYCCE6sl+XQHV745zTWvYoy/Ikgu1BbwtB13V9fnFUVjNmdM0aw3w6nc7nU6S+kpEQbV3Xy+VSCFHXdQghy7LhcPjixYvOuBFRFEVCCP+qqNL9XEkZRRECNVVVl5UEeXZ29qtf/cpUzeNvHxFDGieT0fjhk8dCCASw1iohN0aTrY3NQa93fX1d9SprLQqxLMuiKq21Iolqb1GRzBJvjfWOOZxdX/63f/5nP3r3/TxOkKFum+3d3WjQW7VV2TaySOGyqgbDiVJp651gPRlvfPTzT+9u7UUoBtvjha/mz0yBzoeqbBshosZxHORnb9/tzfp//id/aoONe9np4dGyLdqq7rr9gyCUIorjal1JKdM07VqXQghJknQltvl83jTN/fv3m6ZZLpchhC6GmWwO8zyXPnMWExVba88vTkbj3DlYXNXBLi4vLwFC2zaz2awb+pokyXA4TNM0y7IOtU/TdDabnZ2ddSWBxWKxXC6jKFrPFp1RheDLdm2tc8Z2BnBrsvH8+XPTtFvb26Ztry8uGcHERklJgKE2NrQqH35w595PPv2srutvH33/3aPvr+azDoZzHATIat1KpUSsMUk0QAwQGnM5m//Nr79QSOy844BKql4aZbnMc7nz3t0zs240NKEt20oJGWaXv/7mK/9u++5b93bv3LozvX9hi5mpWgnO6khmvjXWhlmxCsGvyiJJomDdslrPy2Vt21hHyGBsW9e14xsF7FraOmJT95iZ1+u1tbZrjFZK9fv95XJZFosoViGEfm/gPbaNX69KY6o7b+1HuvfsUVMscT6f64TiOM7Snvd+tVpVVaWU6sxIXdeLxaIsy/v37z958uTx48eTyeSjjz6qqmo2m2kgiaS7pIcZScapllKenV2YXn++nm1OJp//nZ9tbWy+ePrs4cOHojUCMJbKQSgWi6vm+HK8t9o8+Pjjj6cvzx83gWqfylgkqrHGcuBIVdZqT1kaCyEgMCqlpfJ1KwhVEgnEIMkT1KZprZXv/vhHT8+OK2/a1kOk+v3hejr/8sHXGqiXZMN+v2iq6/ns2lainzpFSqDl0LbNL778dUaiqEpEni3m/cnQO0eA/eHQGDObXnXkL611V8C7of1q3SWrzNzZnKZpXucBL1++FBSYfdM0o4ORFHq5XM3n8yhWk8l4b/duaFdHL5ar1WqkqXOqXXo1n88vLi66MVr9fj/Lsrquu/bHPM/H43Ecx3Vdv/XWW9OLuXPOtxYEKSl0pAIAIsZxXBTF7t7uP/yjf/De2+91FLRf/PpXo16/bRoRYDQYbcS5Kaqz5y9/YQPX7cnTF75sYpKOMDBEeS/OUkdwcXXpgUWkrbV1XWmmVCnwIY3TvZ3de2/fn+xuz8v1r775zdffPpC//stfkAmhqFNBgeTicoqIS3D/9vuv5hFMhqMfTl+svBlEWcx6PV0HamIhRCzbtj28njcR12atY91gIKG2h0Nmrur6zu69q6srYF00xXA4ZCFRy1VZNG3DzHkvLVZFJKiX6PXsajwYoGmcbe/u7Rjzh1Gsvv324XA8adVfmHR59OTXSg0iuvObX7+Yza8tVDJEu7s/fv/9D4nIx6SqpbRRbZt1udJajgfDw8NDLWQklfdc1e31YulevpxOp+PNDa3E+dnJaDIOHKqi7A1HwXtBYjDqEdAHH7734Ufv72/tXJxdnhwdsjVBKIseJZVknbCci1Moj188POHi9PRUCJHm+Xy5KMsy6+XOm6UMm7d22rJaXc96Oh7rzNdtFPj+2+/dv3tvf2d3MB5JKR98+1BdLnadlOfn503TdIBtAHDkGmtcWRpjvvjiizSKm6pm75WQVdM451QSddTE14alKsu2bfM877xflwFkWXZ9fb1areJeLBUxeG9dx9XuaqpSSlJSOo0kETEwB+/ZgBJisVgMBoPJZALgzy9ezmazwOb09PTq6qo1dZLEg0GfmU9PT9fr9a13b9++fXt3d5edH/UHcZwS0dbGprcu6+UHe7sd/NeYVmv98OtvttL+crlsrdFJnGZZlmXO+xv6gXGnp6e/+MUv8ihZLdZHh4cu+GK16vV6/bznjPHWpVkmpVwvlxcXF12hpmmazuVorZl5PZ3bVTnIe3ub275ui+m8FyV3Dm7/zu/8zsHBQRRF8+Xih5cvnh4dGuTtg31ZFQUzM2JbNyxICJGqFBE3Jxtt25Zl2TVltG3bDdlfz2bdbnVzLTc2Nkyv1/VidMe2I9MNen1r7WQyuZhddvcKJEkSQsDAzrm2sZHWAOA4OOeMvZnj64O7Xp8dnxzdu/eW8cba5vzqsmzKvBeToqzf64t+f9BL0xilqE3LhEqpXq/njSWGREdd8SuJ4r29vZOTk08//fT6ejabzdi46XK1mM7sfG2tnWQb23u7w9EoyXuz2awsy63NzflscX5+vpwv2DgCkUTRaDRq3Lqr4EuibgYuATBzVVVd1Ltcr7sQw3pnjHnv1lumaSKlNaNj3N3ajkienJz84le/PD4/y3r5bLk4Oj15fvyyapreoC8jpYWSANA4ywhCKqmVB16tVswcSSWURAYB0Bv0R4NhlqVFUZSv+vG89965LqTBwFEUbW9vZ1kWnAcA51xTFcEZgiAJkiSRcapVHEXR5eVlWdbrddE2vmpc5ySUUj64KFL37981pl0XCxKwv7+f9+IolsNhv5uu1dXO8jzv0Avn3GK+jJQGD8iFbdqN8STSGpiJ4eOPPhyNBi9evBiP9t+6dXD07MXu7u7+wa3RxkRKWTcGEZWUVVURke4mmCtwxldNg4hZLytWaydVbzBo66Ysy2G///bbb3eVpbOLi1VRIBEKAg9S69XJRVEUG+NJ1OslKO+/dW9zMnn58uUvfvWr7OmTtJdjrFkg5YnOtCGSACBJkBQoRHevQmCAEDbHky5iISII3AH3w37fGdNxhNq27RoigxAAcHJyAj50lE0p5fXlVUejTOMohNDWVUlCIMXDuIOGjHHMKFWEeEN31VprFVeFuHv3zlt3bx8ePru8OvXe9Qe5tU3bekTvPQf2Hfte66LXG1RF1ev1mqbRUpVl6a1rqzpPs6auD/b2v/766+Fw+PGHH52fnP7sxz8eDofVT39urZ3OZ6vVamlaY32sdLS5iYjsgrUWGaRUrAAAlJBr6QaDwWgwiKLIWxf19Ntvv/32228nSbJcLs8uLlzwBOwa75l7vd5Q5U3a+/jjj3e3d7766qtHD78tDm7Fabp7+1ZRV9frJTeCEq2yREhtvZPe2MLajngttZYkdByRlN0VPavVqivpaa2vrq689wqxgxa01q8Xjoi2trbK1Xq9Xl9dXdV1XazWUsokSQjTqmyKqqqqqisLBw8dhtMfjqIkBSBA0THOlVJ23o4nO8a0V9cXi+UMMSit1+u1kCwEOm+70mYIYbFYtG1rwNy5cyeKosFgcHV2jgzGGGtMrLS1djQYPvjmwfb29tt37h49e7H7859/9vs/vbq6+su//MvVauWBgUmnuqOTpGkaQnDGIqKpTdM0rDjppQTgvV/OF2mcvPPOOwf7+9bax48fLxaLq+k0iiKpVdu21pi6rt3lent7+73777z99ttJFE/ns42tzUD49OjQQgBJbXDFvMBKqTS2zsnRaBRCUHHUHw6iKGqMMc4ywnK99t674DvLAACeQwBG/Nu4vmtL67LNTz75ZL1YHh8fr9frxWKBDF0A2uv1ur6+jj51dXVVlHWWXXkGreM4TbSKg+fGtN4FRNzYvjcY9U4vjoxr0zxltmkWex74YJMkqqrSOae0DiHUTWPXa0+hLmrVV1GUeMcqV8G6dVEUPiRR3Ov1gjNVsXrr9p1/9a/+VT9NQOvOTwBAoqO6taZp27YdTcY//vGP33nnnTzN6rr+/rvHjx8/btvWCG+trcuqbdtYRwRwcXHx4sWLFy9eWGuTLOsN+gEYiISSVVP3gIGwauoA/N5HHxDRbLH48sHXs8W8DS7u51KJdu09A/jgrJUHBwcdfePg4MBz+P7x42+//fbi6jLNs+B9h+FY78qyvFE93/W/3FxqwsxaKSHE2dlZGsVxHHeow2gw7CJ9QW2v13PjYIxZFuVqVZRlXZYNoxiOR/3BqDcYBs9QlXXdhhAmGwMkfvbkB2PrNI3LasngSMBgOBwMBovF/OrqqqqqbjJqHMdCoXOuqWvnHBMOBqMSqanqB9988w//g7//w+Pvf/r5T+I4vjy/+PD9D548evz84nJ3d/f6+rpji5SrQkppg59eXddl1VR127bX19eHRy+vZ9M4jqu2liQ6lvVqtfriiy+klFVVbWxsVE2jtfbezxZzoVTWy4ko0/nJ7OqrR99Obu2Ox+PT4+MHDx48ePgwzlJbV93ENgkoARUI5xj/s//kn96MeCNsmqYsy6az8sDMHKcJSdk0TQAWQlhrcx11bL2OCjgejw9u3epIgM9/eHpyctIxG5ChQyCCq7oXzlfr1aqom6au26KqtY4//8nPPvzRx96Fw8PDq9m8e74tOYpUYF9VRQhOafLeVnWRpvHt2wd37twBgBcvXjx79rxjtZR1efvWQZZlw0Gv1+th4Nl0+u1vfmPa9nd//rMsTYN1G+PJYj59/N33zrm5524QEDO/ePFi/9Zt5xwRoRRN004mExXppmm0iltnV6tV1kubpiGiLEn51dHJsswYI5RSkQZC45z1zgYPAG5ZOmM2NjZ2d3dta66vrzuEKgAjIgqBgrqXtNYYY/Af/dE/hhA6O4OI3YAERBRKISIKqpp6vV6TlOPxOEkS1zTGmG7q13vvvTeZTNqmmc/ncRxfnJ5dXFwYY0IItjU3fBCNACClllJaF9q2ZaQoSj786GNmbEy7Wq6rqrHBIyKCSFQ8HPbH43Gvl/UHmdayKFfT6dVvfvNVWZZKqYODg729/bZtnzx58vz54WjU/8lPfpIkyXq1QsRBnq1Xq1/81V9eX169decgjeJhnm1vbZ2dnB69OBwMBpO79+fzuVLq+++/DyEA0GAw0FrnvZ7xPo7jANxaK6WGm3Yor6XqnIS39vWs8zRNgYgRPAfjnOcQEIjItaabg2SMCc6lcZKnGQA0VfXbHcXuZlq6x//Jf/pPO6faxfvI/FvNCGC9N8YwQpTEXaKRxrFzrgOQx+OxUqpYr5fLZdu2GG5G6mutkaFpmqZpgNi50BXHmaGs6yhKJpPNu/fuHR+fHh0dVXUTRUkHUXQMhjiOkyTK8nQw6GVZ0pp6uVw8fPiwS5e6wXREpFUcxzHxjUnc2d3WWl9fX3pjm7p88JuvJ+OhbY1AHvcG11eX15dX4/H4Z/+Dv98BId99912SJOW6Iikmk4n3PoqT7pYtFMIFZgCtNXh3wy9qmi5GIiJGSNPUem+cNc623jFhR2Irq6obxg2Bg3XB+S7TSeMEuk5sBkQkfnXPw//8f/G/bNs2OCeQkCE4102wt9Z2FKC6bXQcx3HsOVhr1W/N/+k+SRxFHd6rhewK4ogYqRvMpzKmK78kSWatbUwbR+loNKqqpiuHdb1qzjlCmSRJaxvvfWsaa1spKU40AFtr67oMISAKIgJA725GDCngDudJ83xza6PX6xHB2dnJ7Pp6Z2tzOZ+eHB4JhOB8XZZKyHsffXrr1q2qqqqiLIoiBHj27Nm9e/esd2mWL4s1SZH2e6t1WZtWSgnGSCmzJImiiODmei4dR7PZzHrfWtM46zmAoG4D8l5vsVg0dZ2mqRJyNV84Y8fDYVs37EMIgQAIsJuaGEKQp1cXxCCl1CQgcHBeIUmlBnlvMBis1+uL6ysJKBAFChXLOI27xgchRK/XAwDvnPd+sVhIvKm3bG5uvvfee++8885oNPr1g+86HzCdTq8uLkPdrlbFel123eUdo6Qsq7qupZRxpHxwUpHSqffKB8vcTXfifr9fFIUxTgiRphmhdM55z6FuQN0Qh0MIZVUpLRghzfM0z5C4aRrTNMFYwOC97+pxXZIopUx0srO59dZbb403Jq2xv3nwTWeam6bxHKx3A6Xffvvte/fu5XkuSTDzarW6ml6fnZ25EDx0V7UREyKi8942LTE4Y0tf5EnamXEUwgXPwAAcAAiAgbsDIau2EUQaGIghBLYeSSgSP//pz7Isu7q62p3vTDY3pJTPDl+cn5/fNAg69zoKMm3btu1gMMDAiNi2bdM05+fnAJDnuY4zKaXnygVwgbvLx2zTdqe+Ksqu57+XZSEE09SOLZFU3fVTvrsLDYVIO1i7aUxZlpeXl8A0HI4nk0mGZL0HgO29raKqfnj2g1CU91IX/Pn1FSEPNkaubk9Pjq4X8xAC6cuuHvD4+0cff/zxztbOZ5999uGHH8ZZ+uLw5dMXz5vFvKqqqm16g75zrt/vf/zxx5988okztutCePToUccqQ0QppBKCJQXgDuqC2rx7525ZlkdHR2xdGifd9DopZeBuaC54YAb2EAIEmfZydp5fjZPppGvCurW/f/vgYLVaTTY26rouy3K9WC5WKyml+q3hNDeEqvW6G6fb0TSPjo6Ojo6YuRUZEncMl86CZ2kejSJnjXNOIA0Gg9u3bmktT05OXrx4kYx7IQQbbHCBISAygCRi451xlgnzQb83GDIjkbTB37pze3t7e75cnl2ertfr3d1dHauLy7N+v29MQ8hJksgst64ty3I6nR4dHXXZSVmWSoidnZ3d3V2lVJe+NE2jlKpt22nPYrWqqurw8JB9qOt6tVrVdf3y5ctHTx5vbW0BESpBUrIkzyG0rXNuezT55N0P6rZxjVkul6gEpGnHdjHeeedc8ADdJS0IICQzN9a4phVIsVAaqLNNv/rVr7I0zbJsvV4fHx9fXl2dn593IGj3PYqiLr1ar1ZlWcZxbKx5PRarGwgqpRQiLcsSPSdJggBtW3dlyLapkyTZnGx8+P77H374PiL+zV/99fOnzzotAwhCEiI6Z5um6jwzkVRKA6MQYjzeuH//nTt37pjp/PLy8unTp8a3KIT1ToMaTSadqWlNa5qmn+eTzc2qqqbzWVmW3Z+9vb1dliUAfPXVV957odXl1fV0Oh1tbnhkKY1SyhgDcfLNN988efRYKdXtAREd7N9ywfOrriUm8v7mXszTwyP/o0/efutuL8vPLy+ePH92fHrCUspIews2+ODZA3d91oiI/+Af/tMurwIM3Uw3JEbkKJaDQS+KVGtqa9so0lLKwO5DsfHOO+/sH9xCko5DYLxazI7Ozn/z8IFx3oTgETyDD0FrHWdp2V0jEFgT5kmaaW2aer2Ys/Pvf/Duj3704Z07t5VST188/eqrr84uzist8zQzxpTrdXCefQCAWGsppfW+bVvPYe/W/o8/+6zX6x2fnjx9+tQ5h1J0IUPV1EkUT4Yj07TlclWvi1HeJxds3fTznm3Nnzx7cHBwsLOz043NnU6nIYSOSt0xNvxvXVpkrZ1k/S529Bwa03ZJkop0N+YHoFNY7uZvAkBSQppnw8l4tLmR9XIHvKrLdVUeHh9VdV21TXgVgHZImiTh4GZuIQAgEhMhAAbPbWvDzR2emqhr3PUf/PjHg/GIhW6Mk5EejUf97Z3Jzv6vv35gmZFkmsRCahe88c5aHwKkWZboKJh2vV4XzkVKZr38448+/PSTT7a2Nk4vzh49evTsxdPleiW1IoAQQnCuA6AEEnvfVfAFolKKnV0sFt9/+61QajabNaaVUkrmyntETKK4axsWDGmaBmPLdaGQNkajft47PT75wz/4u03TWGOdsRxCP+91+E+kdC/L8zTrKnTOuUjpPM2aoupasqy1KKiLO1prAABeXYSBiEyAAQAgGeSB8GI+fXr8snYm6/du3bm9s793dnXpQ+jgdwwcguumo+D/8B//j153Af62dNTlrku/oxmHEJqm6Vdw+/btu3fvjjc30jxrrf/h2dPvvv9+tlwFQhQUGKy1ASFOkyzLIp2FEBACefbOgPWSUCvx9/7e393d3lkVy28efP3y6Ki2NTOTEKUPSZIE5+qyQgbvPXvfjY7qmnJvlsO71lpr7ebmZhRFAW4GB+V5zszluhgNBrf3bg2yPNOxJjHI8kjp50+f/b+/+Iu7d+9+/vnniPg3f/M3T5486TgyXV7d4Vfd9LrOzZILSimSwjlXt01jTJfTRlHUOTaBHeW8GwfNpnYob34WgJlQSIlSxHHcJUbBOmtt2zShuzcE0L7qw7yx2wiEKLpQgbC7PFVxkN654DBE4vBy/uTsqiO7pXlmvFutK5QakAKzZyYptFZZmiVxwsHZurLWaiU0Cg+ham1d+0dPHj968vhyejmdT5MkyYcD46wxRgR2xkDgzc3N/d09ZL44O5/P56+nVcVxrOOoKApjTJYkSggACM5764LzDdWICCEs5wsz3th86+4Hb78rAeuy6qYMRt/8YnZ9ffj8uVJqMZux9yDEcj5nZmeMj+MsyzbGY2vtarUqiiJL89VqFYD7w8E4S8u69hziOF4ulwCAXRcr4Q2RHzEa9RtrnHMoSEoZOBRNU6/q8XBkjPHWdTfnRFJ5RkSUDK8vDcbOoCEKIqlVolQkSDGTd2wch6AQxNI2UkokFaSoA5qybZ1tTBgO+957RMiiqBtgWNf1YraU4IUQaaQR0ZrWtgaRVaQfPXkiNHlmlcQyjUvbeu+jLFEa1suVc25rY/MP/+APNicbh4eHL5+/ODk5WRbrq6urxWyW5nljWtO0QgjbGrAWEWOtmdk0TUepb+tmOp2en5xuDsfj/uB1Z12e5ycnJ7PZLE3T9XrdYenr9brf73dtwF2zQjexLU3TzdHk4uLi7OL88vIyzfM4jo2x5+fnaZoiIgIy/jvXtTkBghRIqtu2KOqOltAfDqumdsZCYCAhGAAJhUBECSw7EAaBECRhNxlacVDeESN2G3Bz1TNqSjUKQd2NJgCImMWc9wZt3cRJLKU0dXN5dt4xIbRSsRIA4I1lCFLKZNgjIu9tsAwCBGGUxiRlva5b24CWKeooitiH2fX1999/b966u72x+c69+1dXV9fX17/84tc/PHvaHREikiS898TU3cweQmh9YBdYoBKiqerDw8Mkit9/+52N8aQoih+ePb26uBRIw/4gyzII3LYtAvbzHvtAQkLgcl2U66JDBkeD4TvvvPPjzz69vLz84qsvL66u0jzrx33HgQEIEBBvPEEX4wcuVmtmvjFTSgWArnfq5qSGEAA70xpCIERJkAMDAhJJgUqgIhKEMo6yEIADdmOIiLo7bKFVLRHFSistILAxjTWGvWPwxIzeB9sSh3G/v7u7OxoPONiqqi4uLq6urlCAlAMb/Gq1mGxueHZ12/q6jrNURdqyb0wL1iVJooScz+d/9id/+s1vvv7RBx9+9tlnOzs7W1tby2I9nU7rthGI2I2Ubw0IYXwwTZum6c7Wdp6mAFCs1svl8uXLl+xDU9e9NJvP57/61a96vV5RFNfX14vFosNUutaduq4BoLvc13vflcGTJPmTP/vTt956K0rijli2WCziLNVaO+e6PejUv1t9Bt7Oh0VVlmXpOJBSOtIohSOPiIyeMTCzDcGx726MlxiG3VEiJgJBIARKRBwNt9q29d5HUdIVnbsNnJnruq7Xxdx7z8F565CDQBhkeVOt2PntzY1PP/nkgw/e72V527Yqksz8/OXhL37xi2cvni+XcxVHUqu6rbJeD6RYV4WvK5Kim3vXFkUcx2mapnFsW7Narb788svz8/Pf+73f6xoxukI0EHUJeS/LuuVwzu3v7v7kJz+5fesAAMqy/OKXv3rw4EEI4eXR0WKxCN63zoYWX08S71C2jtjatY10dI1uxkHbtkVR9Pv9w8PDvN/b2tkZb21eXFxcz2fOuW5BuhutQzdYkhkAwrzY6ffTjV3j3Wy5WFYFEyZx5ENwRIjomRnBAQcCiSiRB8AgJGVp1utlaZoKicwhyxIcMBEhsveuy4Sbtlq0192cSmCWCJEUikgQRZKLptje2PzD3//5++++54xpyvmwP2AdA4YsS+JYS0kemKQAgNbamEOnfcaYuqq11qt1MYoz55wB9Na2bRtHEQd++PDharUKIdjgPYckSXw374IZEU9PTyeTye/8zu+89957g16/Y39ubm5++umnQognT57URdMfDpqmKeqqMaYDjlarVTf6Lc/zP/zDP1RKPXz48MWLFx3G11Fau0mMUitjzMXFRZwmO/t7Wb/XlT2stUCkSLAPSBQlUZakf/fgRx9+9FGUJuu6suwfPHn017/8xeVqrpNYkfDkXfAeGJUgJs8stzbupGmaZYkQIrBBhCgWUaSKcul807Z1a6pu2L11rXPOYdWhqQQIyBAoBEbAltskEkrx9dX5Sy0jLYkomOYX334PAPP5/OjkuLGNjiOSiCx0HCNDURSIoJQapHkcx+RZRxEERkEYBDPXbQOBPfB0Me/iDQBgxO5adaCbWu7e3t6tW7e2NjallF0LwvX19cn52fnVZW3aKIllEgkIFKkYpXNOKJlkaXeYUNBsMf+jP/qj+++8fXh4+OzZs45B3FUul8tlVyGw1kKDUZr0er07d996+PDhxniyWCyqpticbBhj4jj+vT/4/f9g66PBaDhfr1B6lSXF9t7T8WRZrJ11gX0IwXNwIXQld8dB3rl9j9l3qAuzsK5ZrVatKeu6sK4ytnLOMLiucxER0RsiIoJuiP3N7bkMxrhYy6oqfv3rX371xS+6BuDgfEizEEJjjDFGRVpFmpldCN4YIYQmklJKEsF6BoPGhQjathFOCCQXAnsvpYzj2BhDUkiSjNi5OCAkKcuq7vV6e3t7XbPf1dXV+fn5fD6fzWZXV1eX11fW+0wgtQok9Saj5XpdNQ0KQVJ67wNAa+2Tp097g0EURbPZ7Pj0dLlcdt0G8/m8u2UrADCCMaYoiv5wMBgM7ty5c3l52YXFF1eXWxubP/3pT997773Zk+u6ruu28QJ1EishFQl2niUGDl22zAgBwHHwIUgdsXPB2Ma0bWvKdTFfLKZFuUiziNkiBSFQKZBSdhi2bW8uXaWbDbjxQpFUfzvfzHlNpIR0GLy3xloASLJEKNmZEQYGz+CcFjKLkrZumnXR39jc3DlYkquqqq6qSCkXvHeOETvTL5SSSgXoSHQMzNgNaglhvlw+e/bMe39+enZ2djabzaqmdt1rlVyXxaoqs14+HA61NVAWxlmtdfAcgIP3RyfHl9dXXdNydywuri47yMtbK6XsEkDjXVEU1rs8z+/evbter71U1trisrhzcHt7e7tt21SSiJSSWJbrs8Pnj54/PTo/XdVlnGd8c4KJGGSAQBRCkMdn3/b7/SSNGru8mp5cTy8BQpxKpQPDa3QIGUI3tEjKDjMNIQRGIUiQUIi4LMpVVb/OqI3zggERvbkZJBMEMgfnHTNrIZWSzlpv7DDOev0R9UYfffDh2/fu/auHv6qqysznLoQAHIC7kTBa6+6foRsRFnwA9g6yOK6a+snTHy4uLm6Y8SF4DlJKklIoCVJUTV02lSUGLZM863nXNI1QigkDAgHUdc2E3Z0Po9EokkmH0+kkNuuyblvpvY4jAHDBh7p23gNi14l2enyyubnpvf/1l1/evnXrnpxMXb0qiqOzk5enJydXF8uq0HlqILiOVtK1XzNIRkCSx+dfZ+ssSRJrbVHPgRophVSxZ+O99d4ze6KbS0+7gnvojA4AovfdHZCI/Y3N1toQApFk5rZtve3CLAOEQAjehBC8Y0EUkGywkZD9PHn3rfu39/bzON3f2c3S1HrngQMCIZCUXbWg23DPAYOH7moCgUgEhN20ImPMqlizDzf38iEmWdaY1gVPQCqNUyUch2VVxFHUG/StdwEYBXE3Y1Yr652KdAihqEqlFApyzl3Ppj2d+G5QX/Ay0jfztBFfvny5ubHRsfPm19PVavXDDz8cHh7+2ikbvLW2aOuqri0EEgIEGWtc8OFmpiogs2BARlm3F0V1M5VUax0lgtk0plZKAYLSRKSEuEH/vffWdeAddDUgZgyBkWExm6Ho5hYBIoJUIKNASMBCCET0IQChjgSBAAYl5Xtvv3t7Z+/urTtbwzH6UMyX3/z6y5OTk+V6FUIIkohQCBlFUddFBESMSIIECQHACJ07UUqlWSZJmKYNAMa5um0DgA3OhYAehVA6iQUHz1xW1XA4BMRu/rOxVgEorevVSkrJAE3bWufiOGYAYy3opOOIdJir1Cowu7b13j999uzW/v7WxiY7v1qtnHWXl5cqydu2BUKSwkn2DJ5tUzTdG3pmZBZIAoiIGFkyVjqWRBSCdcH6ILTWSRx1DMM4SrXWiKJt26KomqZpnMNuFVACISMxQGBofchjLXTUtK13Tuu4UyLFN+guANxMQEcKrd/Z3PrZ5z+5c+tAOG5X5Xq+eP7D0z//kz+dDsEa02mxs7bLeEMI0F1GxxxCQBKM4L133idRSgDW2tpUVVF2HeHdlUhxEqOSxrvW2cBIUiqtmrLpQEZrrRCiK0B1aGM3bazjHphXpexuCB+x6JTXOdca0/Xdx3H87Nmz4PyPf/Txy5cvv33wcDQaLYEtW2YOwTWmIaI4jlOtgvfAjAEFYHepKf27N86/kTfyRt7IG3kjb+SNvJE38kbeyBt5I2/kjbyRN/JG3sgbeSNv5L+38v8HCL37MhYv754AAAAASUVORK5CYII=\n", + "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/2wBDAQkJCQwLDBgNDRgyIRwhMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjL/wAARCACAAIADASIAAhEBAxEB/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/8QAHwEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoL/8QAtREAAgECBAQDBAcFBAQAAQJ3AAECAxEEBSExBhJBUQdhcRMiMoEIFEKRobHBCSMzUvAVYnLRChYkNOEl8RcYGRomJygpKjU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6goOEhYaHiImKkpOUlZaXmJmaoqOkpaanqKmqsrO0tba3uLm6wsPExcbHyMnK0tPU1dbX2Nna4uPk5ebn6Onq8vP09fb3+Pn6/9oADAMBAAIRAxEAPwD5/ooooAKKKKACiiigAooooAKKKKACijFGD6UAFFGD6UYoAKKKKAPpTSYh9lcHGC571UKBJkyPUVp6QMWjf9dD3qlfbYZiWIAWTqT61otwW4PHwDVho8xVm3GrWqnCv5hHZBn9ahOrXU3ywosa4xk8mqRtChUnsjUOEn3HAHBz+FKLy0RF3TJkNyAcnrWStu8xzIzOfVjVuGxUD7tM64YBvdksmoQNt8tJJMZ7YB/Oo1uZtoCQAYJxub1pbmS10+MPcOFz0AGSfwqvHrNo5OyKRkA+/t+X86V0W8PRpu02b8nibWZVCq0EPH8EeT+uaqS6vqL48/UplzwAGCZ/LFVIL+0u9NuLtXKRw8Ejkk9gO2aw3u9Cu4Fe40yd5i+3eMlx7Env9BUtoidShT+CNzbMkd3KY2vZZJOhBkLDNRTad5ZyEDeoxzVez0Gy80JbyG3Yxj94ZQMNnJHpmtZI3hZRJM0sTjaCSMqw9PWmma0MRSnLkcbMyHtFKkoMjGcUafEBckrjlDitG4t9rGTJVlHO3uPWq1qn+nSHPWM9O5on8JtiIWpyOjtYM2MbFcFhuNVLq3z2rYjQpbRIeoQD9KqXCZBo6HgnLX+szaXp6JbBfNllcb252gY7fjXOvNcXspe4leRj3Y5q3rbsXtoiOAzt/wCg0WVsWIP600j08JBON7akttbZA4rVgtSB0qaxtA8mCvQVoiDICAc79pp3PUhT0IooAO3NXFg+XIGT2qcQKicAHAyc04PCiB2YIvU5OMVNzdRSPHr3ULiLV7+91KOVpovkaE8DAJxt9F6GtTTfFF1c6PNE8CrFImxGVRwOSfcE4x9K6jxDH4c1yCdZJEkuo42KvGxBBB6Z7jNef6ZNY7HtlR1neMoGByu8e/1H61DPCxlLklzJ3udnos6x6DbRKg3gtcsz85OCenfGBVq70y4udJgjguIZdQkXdEqx4ZgDyzHnJy2N3oK5rw/duZltLlHVVjMYYDOM54I+h/StS0udTsr9L208xcIUzjICZHBHXGRmkcXW5zsLXL3KQyO01skpLxhsM49cZzXUWmqK1uzq++Dcrx4BB4PIIPQ8VF5Ma3k15aiGS6ZGLOkRBweD1OF70thp7JBGkqL5cjAgqeVY9RRc0gveR1BQvHlDllGVz3HcVU+y7CtzAx2H7y/3eefwq6EMYV0beFHI700HypiB/q3O4e2a0eqPo6lJVIcr6nQSMvlgoQykcEdCKov8w5rPlmGmWk92iO6Ro0jQofvYGeAeAa56P4l6FKgMguoW7q0WcfiDQ9j53E4aVCXK9TN1GNptUtY8dUY/yrVtoQhUMMD0qG/thba5IvB2Rqqt1OCcn+lX0fzLUoY+R90nrTPUwMP3SZqWsJRGZRzirEZWMvITheWzUVgzLZ7nIyP0rmPF/iCKztfItboxybsuYzyPapZ3ynGEOZlzW/Fdnas8IBmynAUZUnrz29K87l1W4ld57iZ0gfKxpuI2L6A+lZs17cX+549xfeBuLc5Peo20e7aZRMC7lQcH1J4FQ2eLiMZKo9NEWbrU4728S3gfy4VQr+nPPc022laJzZqio64BLHGelQXOiT299HBGp34zkc5NaF1awy6/LYnzAUG3eO7beT+YPFNJ2ucbbe5t6fqgtpIJYiPO4YLjO9e49q1zqi3dxdTxSBG2GONFkZdrdeVx/wDWrgPs09hJL5shE0ZGNo/h/vD9Pzra0a8mMsUouDPucRyiQA4z90nIPfipCLs7mnqWs28UsRt7Qx+Ug5ZiQ8ncke3pXS+Grg6iY5DtX5R5iocgehz/AE7VyaWtq19OdTYRHfuKlgVJz1r0bw9FZW+jxNZoqRvk8Hv3qkrs7sHTVWpd9C7PFzui+8OcetVPllRkPysOQKuTMNrNngDtVORlYjnDjofWtD227ACXjeNgCGXawPf/ACK8N1SybTtRntWB3QyFD9O36V7jO8USGcuiLty2TgV5P4vmt59beaJxJ5kaltpzgjj+WKmWx5uYpOKfY9AtYmmuWurnBd23HPStuAwEbQyD6GuAbX7w3a23lW9qN+xzIpbbzgk1oavNqejTwrJPbXUUqb0dF25/DNO6CGMowVo7I6PW7xtL0uSRWRmYhUPTn3rye8u3eT7RLiRi2ETt65Pr/wDWrW1K+mubTdd3e6IybRbnHynrn6YyK5e7Mc2rAQgrEzbQSOntUSOLF4n2zVti9Yyw2+nu/wA7TjLLzwW7n8OK1rHURfQtJNAYlY5BHXjv9Kj0+zDWbDYMuxCk/wAI5qpqFpcSMTbMSsPy4B5yBz+HNI4zpNHxdalkgMsTkqO6jAP86hm0otqtzK3y5UuDnqc5B/KmeFU+wWGoX0jFisXyg+vJ/PpVPRbu6SSe1u4n82RkOM9Vwen6VtPSnFd7v9BkU2n3E7yHO6Mx+WzggkKOhP8AntV3RtPmt7GZZQqLu4cjO08/pW9bQW97sFpD5UsA2yYPLD3HeqjTN5k9sqsoMfzRqOMHp/T3rADJnu7WKVApaZxkEqnU/j2rStPFws7OO2tbXCoMDe368Cuclk807/LAbGMKoGfrW/p7ed4dza6SZDA/mzXZI49sEYIx1FWjaFeUX7mhJdeJtUuI18i5iQsMny4z8v4tx+lP0+61jV2ktxqtvCUTdvdQM/Ssu3igvtRSFFSzSY4y2Sin+lSX+mnTbqW1crNIjYzGcr/9ei7D6zVbu5OxjahDeNcSQ3F09wYmK7i+5T7g5qolmSjZcLtGeFzWzDiO7ineESRxuCYn4zjtWhqiQX5+2WNjNbQhcSnZ8h/EdKVjJvmu7md/Zl20JuJYZVizzI4wM/WnqqvsWa5lKqNowM4HoMmguj/Kd3XoWzitq68K3dnF9oklgMQQMuJB82fQd6Zna+qMG4sILn5N0hUH5S2Af0qD+z1W3CvCxuFf+HkYyDnIrorSRrJGeJUyf42jBI+hPSnQtDd3bG7maINyZUTcc/QYpDRg2lz5DJDeb4lU8OF4qeIOrPKkiGJ2yGCmta5trPKKri8553IV/Dmrn2e5ubVriO2ItYOoRcJGPamo9wv2M24BXSxZs5MsrebKRgEei/kBVa0RLG8M5WVXb7zFvmwfStmO68uMoYLRwRjMkQJH41XSO8uALRQ06E/KoXdj6dx+FVOXMxXN3TriFG+1RW6DzRhjjnHGc/nWRrunW11di5imkgQqoeWMZXaeh+o4/DmrRimtrZ7S4RRlduyPGQMd/wDOaxL8XdrpxWK5laBAu5TgkY/pWT0LjbZlabSbXTZJIRdi6c8+eD8vPPArT+y6xaaFsWYrps5zsEi4bPtnNNsrKDXbqP7JIsOEVmaQ7UAJ5/z9KdqFuLC7ktUkjnCHG9PunjJqkD01KdpZQyyCOe4SJACWdh29vU026jthN/o7SPGOjSDDE9zitw6Xp40xLqbVYRIV3eRGhLZ9PrWOwcjdgBB2pkPRGrp3h03ekpqLTw+SCwfc+0x46ZznP0qpe+Iru9sks59iwL2ijChseoqvdWF1awxu0bCCQB0YHKkH9M1nOxGVI/CgblpZaDo49x6VpxwzBcKjsQMdOlOfVLeNFFlAqseu5en+NMe5u5VJkMuz2GBWrVNdbkG0niXUobM25FrjbtBZBkfh0/Ssuz064vJdtvBLOw5IjTOPqe1U0xnqfrWpYa5f6bGUs7gxox3EAA5PryKi66Irmv8AEyO6tJrKXybiBoHxna4IyP61oXer3F1apahUt7ZAAIYRgH3PrVC5vbrUZhNdSyTSAYBY5wK27HwxdXNqLu7dLS0xkyS/3cdQKlu41d6RM/TtOs9Qdo7q++yynAhZkyjfU9qL6wu9AvXtzIBKyj95E/DKaz5zulZI2DICQD0yKepYgEkn1yaBXVjQtLS4u5NlrbySuBnaoJI9639PtfD4sZ21SKSC8iBDqzMMnHYetVdI8Sy6PpjW1tbxCZ2LGVuSfwrOudQur66ee6YStJw3GOPagaajqUZpI3tYoobZEeMnJBPzenHQfhUGIpVxKdknrt4/GrDq1rcJJGRvRgyHH4ipLzXZr24aaSC2jc9fLhAz7/Wgi6KV3ZmAgiRZYyMrImcfQ5HBqrjjmtOK7O4lwCMYYN0IpLQ2ttfx3kkbT2SN88fcHsD/AJ5oC+pnrdXNuR5byKvYH7p/Doazrlg7EkbTntXa+IvGVrqOmtY21qQGAG51A249AK4aUlycZzTHOyejueaUUUVmMKKKKACiiigAooooAKKKKACiiigAooooAKKKKAP/2Q==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAIAAAACACAIAAABMXPacAABhY0lEQVR4Ae3dWa9tW34Y9NX3e++1++b059y+qdtV53JsJ0ocTJRIQRFYgITEC+IhHwBe+ArwiBAiRsiIB4QiAihYxoWdqrh8q+q257anP2f3/er7tfj957r3OrGCCA/wdGadWnfuueYcc4x/346VSj0/nkPgOQSeQ+A5BJ5D4DkEnkPgOQSeQ+A5BJ5D4DkEnkPgOQSeQ+A5BJ5D4DkEnkPgOQSeQ+D/cwikU6nMv8ZLMu6bH7PU9NvT7/77/zyCx2ep1Pzzu8f+70+8wpj/rz7/6mDzCf+LM/uXr3w3fjw4/+q7k++ecn1+Pl+zJST3pDOxmm+OSWo6jX+pSfLv2ye+/fpfAd75YN/ckPtP/5P/rNPpLCzVrl27VqmUR9PJZDjqj4aXZ+fpdCafzQ2H416nXymWFqqLk9k4k8/MkvlOJpNpcqRT2UwmUyqVfGaz+XQ67bJvR6ORz2q54vrF2VkmlysXi/3hcGFhoVKpDAaDXq9XrVZns9lkNvXp/qWlpdFk6NFCoQQBnVZ3PBlmUtnhsG/kbre9vrFVrZUPD47TmVl9ebU76P727/yNXDGXz2a9KzWd5dJoZTodT3KZTDGTS6dmk9HYWvOZbKysN+i3+9bbbnV7/c6gO2i1Wu1ma9jrD/r98XA0Howzk1k2HSuyyvRkkp/N0tPZNJdL57KpdCZWPQCGUTZQMJvlUr1Zf1JO16+tl9YXW+NRobQ4mkxL+VJtabGULwzG3jpJZTLT8Xgym03Hw6G/h6PhZBwzm05zqRl0xr/ZND0xKOh6+ySTzua8IZMumEjO/NN5n1AynaXS6ZlXB3XMTGOaCQIC9pxvU7NsnEO722KsdCaTz+Vy2XzJZyZXyE58nZvNMv3+aDye+TYLdgkiJ2Ovy48GvekUCOLSCDgyuVq1mq0tnZ+fpyaZ6XAyG5pHLpPO5KaZ/DSfnqQKs3w5W4pRp1Pgs7zxcDLqdSfTWbfVPj85vjg/7zRbvU633x2UskUQ6AP3eJxJp1FYLh0zWM4VU+lCupgJMANBLHGWzk4yw+EUvAMvVgMdgGXJU/hEOZP0tJCaDlKzciYHx9lUgux8sVCuoCEEnPEyyMwAVSBhli1k8Esu5S3jmWXOcvnkCBglh1tjQtlsM9uKl/kzl5vkYhAXoQR9m0kAHZCDcNPZQEjy/xlMw4Q/A4ke9vUMArOZIB+fGc95foYs8oWCPwvFojGno1EMlBzFYtHzhUIBQ/Rm6RmI5yepXHqpVMuP0+VUvjDJVNIFCM4OZ7nBtDLKpCeDzlnr8vzi/PT08vS0eX4B7v1Ot5TLFtJZWMnN0uVCEUGuFBdwczabWyrWBqmBF5XzRXQBGTHpoBeoj1nH+iw4Y4DcZDLuT2dDEEchlhJfzTJj5DuepUeZuNHCQ1bhD5RTLOaxuIWg6QSAAZQEdN9wzSwFec5ziAynJjBL7khuMoe44u0xHKpOjvkQ7oa3uM08pigSC8YxH2I+lM9YQQJQ5+PZNJsmH8FhgmqmCe/41vgu+cskJuOxEyRQKRT7zaanCvliqVDKVBdIlVKhMJtMRxNkOJu2e8PBONXpAVnnoj0aDP7JH/z3qNj0JqOhmwuZbD6VWZxldhbXzQrdZqYTwI11oqJJerG0UKvVCtnCsD8gqVDPaDjujrv41GxAlXggHywqiwoyqXy+OMnngv7dh4hi6SFKEnKcgjysAR4AmySITXr9UmXBowh3PJp6rVm4w5JDZCQ0HWuPP6euBxL8MT88HuDEgu412+TPGN69iVg33TjinkCM/zo861rypm8+/el+hDElhtyR8ITRRtlvMOqGfq83JQPL4xyE0xz4fDoupDLL1aXxYJgZQsxo1h2NBsPRpN3v9lLjyaDX781SwI31Q7ITzePxUqlKSjomY29NFfKhtxCGgVNTkjRDHBfzLseazery8pL8yJZm5RB0eTenimQIwk/j7FFuMsjmun0vHafQN50HC94I6pl0wBMoBlAxCSCSCmn4ziH7mEEChN5g4EUJegIyCawD7vNvnQBUCIcg05h3iCDowi8+ncczuYCsJ79DgPucO+KVxC/RBfEJK8SnZwLIONNHzMKVQIU3UAPekSX5shgYbVpk3pfjaSWTR37lbICASBqOBt1WpzmaFjK5QbPVGrIEBp12G5Givn63u1ipIRbCpJTPFILK0TTyzGZGE9xu8sSNK+jOG2dYjYqZmtHMi6ezUXfSD52Xmq2trVnIcDg07Czt9lQhl69WF7vtLtVLUo3zxXJu1EEgfXgY5i2ZGmMDhEAJ5QwMFBkQQSrBP6RMs3SVP3Iw3R0MgM7hBkdykg19861giDtT33AAyIXoNwug9+kcjQQQaad8Ph5LuMafrsdXs/QEnEOYB5STf0HkBgoVFawYaGBAWgypg0hodnRdojKn6UJIzsmsN/RZyuVH7V5/nB4SnayPwaB5cTksFttnzclg6O1ByNNpjfVVq6RKddqfhUM3lrJ5kgq/Y5paqTzqdAxlRuTGZDRJoU6yGe2H0MlkC4bJsVpYSJlSuVwqDVKTi9Zlq9FmP5gVS2mxsrBUXcgXg/isKU8A5HPpWZFSH44zw8EIWCyG8MGs5gURaYZMt5MvBD5wRxAsfqKjg5pnc2qGZtMFVQsHSYMEncZBBYQlmoilGStzSN25w8EUisfI7AR1sYJvERAPJiYSgTZJpmMEFhESR2ABd/+SG8DIOWlgxt6TGYwLwe8Zpk7eHYNJZjAlfxv9wcXZOZgCJVhZm4vFanVhiqwqIaZzeVixqhKLA7ByDKl8ajTJMVYm6TFbcBI2UClbIIgZh5lRrCrHACmQCpneeDDNpP07H/ROz08umw3mRG1xgSV1fHw8aPcXK9XlysICcs/OFiqhdCCeYQoCpg9VJVp0Nts72gcvwEKDmL9I3xQY3ZNu4xJaCFnk5xGHmz2YPFeECZOhIsDdRV+Bpc/k+I4DgtxhKHjWEXgrh+0BAe5GyWxkotGI3VQXeozoerbIDJlavCtcBCdBIJTteFwulz1KhxnNU+6fdPplnHTWnLU73XZ70O2RNZ4OVTadLdBzjAw2ahgcGL0KuMVSmIBstOxsUs0XjYy4XCikyOpZljE8mmRGo0rwa2pGX6ZSpWo1NRmSS/lKqdnvtjvN6spiJzU7PD958OzR/WdPTi7ORrNpdXEBArrD3uX5+a0bN9Pds1qh9MqN2xfN4aCUro3Ka4t1a58NhsVMvtNq8wwWFxc3V9d7o0GjN/AZ0n6WqhZKSwuLtULh4uIMXnvZYWG1WCtXwHjU7c2yRY4OQIfAoOFHo3xiggc+wl0L4rQoR+K/TdlYf/WwWs970gPB54km8YATz6PKTDELxaE58lMMBBVhV5KLOddCC7otN565c9jqXOweZ0MoBMEWDBV6LMwCaA/CT2UIfQjMh8yK61VIwUkWEP8POz1IOp9nWaKVArkES6kUzEEMrJtYqpAZ9WeXvQ5jJVctpkqpo37ryycPp0ZaW7i+9tqVbHqcJnnaZxen+fzyyupCenHxYG+vOhovzXqDy85+62K7vr4z6BbIyWlqrVovVMveb3XgVatUkcqwMe7x47x8MCJXq/ROPl+elvpjtpgbKZUAJgvfgw4PJkIBMP7y+O7S/MRnyP3vDotxTgY6idXSbNOgbidzDvBApVQaE9rgPiJzORUzcMwSpjmCOZPmQ3Ox+gNeJdNl0GynLjtEPw3DXc1nmSPxAjROBDshcENIJzThTWEvTiZeDcN0CbKlQXEaIV6oBjfElRHDiZieWzvZTq9ruMrG0kZupU0yj/sXre6zs6P8xuLata1rL96u1BcPL87uP32YPb9Ynu48ffR4Z219fXW1tLm0WFmsVGrPmp/tXpxmliq9xnja6Jem6Vev376xsc3uOz8+qRXLhVKpms238QDPhK05YyBNeC5gBTJ8L8Axc9MDaSAC/e+I3RV/zq/P8eCGRF3Gfx2BAE9+dyQXYxQSyTHuBX/41sNek5/kOXKYB/RnQ4QQHl0hPMhMujsaTgdka/uyEVYjkU/zDcdb1SVKGJRjuuGymKD/h0UE7t4dms11WEy+4r4yIEKRcIuDMZinM3bemKgnTjkflKOruTRLdTQd17ZXzi7PBrMhS+K83+7NhoX1+pWN2ua1K/f2Hn349d31K9vVpUDG6vWd5eXlzz/6ZG15hUfBg6ovraCnK7PRYr5UTheWc6XJWad/etkYdIfp6cJKHabbZ5foKZXL44NctkS9ZxlP5cyg3ab7sSwABxAY/sViptthWIHeXwH6X4I+ge8cAaDqtuCaEDWJmc9V8ScA+WIuqUajvq/Yra6MWdpEU7uLhqteXiqkIb8/GDa63cGw2+ywCIfdoSvkFoMsbMNiqZ4vkxgBXKZR6CJqN/RzUWSC2ZCA3mdQQOBlWi1WI1ZCsXGgiqVcSdyB0JmcN1vmQM4RPLxAdBBO07BfmLUbnUZpWq3kqrka+bICWJ3zo//lT/5pfzpaWltZXFtOd7MXDfK69SwhlMpqXlyC0YW9Cjs7Kzub1WLZKqor6wtbW5jlZP8s++zJi1tXxaam7Mpuj19QqVXKpTyG46oxEGbUrJVYVGIx8hZ5xABn0XN6/Repfn5lDnyfAYNv1TIc/+Xx7R0xqJtIXicGwgoEOo+c0KgWihMyodMfcDkAvt0ddLqT3iAzCQO/MM1WwxjLlfOFUq4QTpYpBgGEGQDQrpDj4A3NsOIkcOBt3uhG9JSZhcjP51F6bzY76zbO2+3LQTtbLrcGnXOxs25LiGFhsVop15DByf5JvlhcmA7WqumbN26vbKx9+fXXf/qLnz/b37ty8yo2/PSTj3b39pqddnWhtrpYr87y15bX8xQHmZFK11dXitWauAU+uxz20ulCsb4w7fC0ZmftZr/ZXqvVgvTG7CkxlZgqI5ovFuJhPAzaZ5uxWhEHXTibsUSIUDZRrDk5nFi702/BCx7fnYYn8i+pAfQekBCKSGzeeCM5lcP2s263W8xl0xROu928bPRa7QkzfjzlwnN2ajzSVKZME+TyYdsIloW5SrFSKSHjDRXinqRIqD7CAAkCoGSOAC9iKUxmo3wBr+dFzE4uzh8ePXt2dnreay5vb7VHPZ5Db9znKtXL0woLfJrqpodcgcPOeT8/qY+2OscHD/eedHptttBXX365v7dXry9W+DeZbOf0bHh09g/+1t9bnOWavd5qZSEtXjadraysZNfXd1NPWoenpUx6a32VLJpddjKTLGPXrAif9GjcGwwBGoEz/trtRjFXC1gl5E8WU8KzcXgJzD9rhHgQm+NgjgCixYn7HfP/zrESIui7g5dF0AO579jWAgdcVvET0bJJo9M5Oc+1e/vC1PwdxnsauMmIYoBbDAAcxxEgEjrL8/ET7eoaLU43g74jlC0iD5k3C3sGauNVid2bzA2RkDeGbg07Ty9O7u8/222cdmiBhdK9/ceVxYXljdXrK/VKtcaxumw2z87PD/cOet0OyfPmxtuLaytff/01fZurlN56791Hjx4xrba2tmD6Sfdxrb766p0XWXdfffUVD3P16raR737w0b17D8q1qvkMWp36Sjldzp3sNs8P9rdr9Vv1LQHUerHKy2hddsXNl5aW+XWdiwvxktDEXM0cZwApTvIUGyGVhD3YhUkQhpRirgE5dTk/IqA3C/+IOo/wLjtwluj5kpmBEb1HArA0VsqLqda405+c7B11zptTQmY4ao3OKsWKWIDpolyyWbw2YiVhz3CggpvmXxkK44U8IRwjzkIBiMpFHNn9gO4RhO8MNlKC4yxXw+Sz7fGwXF/c3X38X/4Pfziu5d74yQ/Ojw8XqpXlhc0rOzub6xtXd7YOjk4KpfwLL7zwv/5v/2RtdeV4NOAtX7t17aJxftw45S/96Dd+kwlfXKheXlyUytX60tKTR0+r5VpvNH7YPjg6PhGQK60s0ajnuwdHZ58OJuM33/peuVjan0zLxXxutbaQ2bz36OnGxkauOR6lxlwICuPw+HSU66wsLGY3Ns5PThlF5Y3FDg+cBbpQmRYLwooWLopezJfwhEhRrV7tD0bNdkfUNWReBvmh3/DVx/zLCBNT9P1+t9NZWV4Gu+CXUWrSG//5L382aHZ6561JuyekGZZMxAIQOxqFJ2pICI3eYZd7LILYmGtO0eRX/AsbIcLgoA/Sc20TaEueQhS+DnsUIubPRrwyvXl15//41c//8U//6OnZ0e3bbxz3WqlaaVLIrq+tsWNv3bh2enxy8/rVV19//X/8x/8T9dhrdSR2tre35Xae7u/1B4M7L724ff3qH/7hH37/3ff+4T/8h//tf/OP2u3297///aODw7OzswfPnnFLNlMb2b1drvj+4YFIbbFaSTO/JpPT8zP+0nK5VipmJqVMdzasTkWiemiHzl2q1ph/QlTVUjm7tvLkYLd12RiVJtlKttVEq73K6kq5zGRlX1gcSTFOibSk0hTDcDwQrWHfxN/fHHFKfUQoRkhkY2mFWXd6cnK0u39xdHJ+eMqITA/GVGuJHR9SiVrh5CeCnJybg5Ksn58kRO0cQP3zkbyFlxAiPkj+W8J3nSAKLkhsZ9+Gb4BNxcxSk4+/+OzTe18eXJyGxZtJg8jLb75OsDYvLgDiq3v3Pr97991332WbkzaslPZl00gQYJFz+SuMce+rr4329OlTUojqWl2qnx2dfP7558FtYSamut3Os07n6PCQu/fqG6+/8ubrAoBsB2m8sWxXNsOgdQ9RbnHgA8ciSbhKXkiAALEu1RZ8nvVFrnrZUnE8GIluVxaXMPIQ2VHA+Uia9Y0wo05z02EP/MiByFiFQciUTszQuVYEHZLh6e7uo6/u7T58LKdxZXlTJCUH9ImzGn4R9Mng4LFvYT0XI3OlOj/35fxI9C44h6ogiOMeR2LtJEZRKrhNaidyUoFRgwtHA9Af/Z9/cpbuv/b296bPHsqPZnNFkG02m5988gkb5tG9r0GTfvtn/+yfEaLjRGwSFLz/09NTykz8gElz/9FDi332+Ml/8Z//56+/+tqdO3f2nzyDmEajQTKA4LDTIxuBFQLmstfnCW1/eraxsrq+UDeTIBRhkmKxlErLPxi8Wq30CsXLDkOsXS4Xr2xt18ZL3f1HDJxcrjSa9qf9IPhhoDDFBkSGcm8DbhqhM49DBEhwQugG5Aei1h6Ow7A3vDy5+OLDT05396c93tNKrj+GQYaG0JzgYIyGZBP97OSbI4AbluU30E9MHXD2bUA71D0JQzqF0xv/Ei7yWi/3FAHlHieBGGac+D67Opt5/PTZrforAFSv13O18hefff7g0UOEI63YODt989XXD4+Pnz17trm9de/ePbi5ceMG2+Gzzz4rYpRmc2Nzc2t9Y6FS/dGPfrT77Nlbr70htLyzsyO88+lnd3/v7/49rGAoqk5IR3BBQrhxfnH16tXZXopQOjk6bl9r14tyioX9/f2rS3eK5ZLYJxc/U5PorY27XdlN9lptaWFzqdocdc+m3S57bDQtzcy30hSVFSQLsyfCkU54xnOjKGgfTAL65JFFp1i1I2kTIPNF6+KS2q2kc2zjYkRFwqrxj3z8hloxhBRxInaCchMnFnVHxIb69/ntv7gocOfPRDGEuibujRYpzNAHwB+PR0T9G/fYdX++ePvO8eXBF5/cJUGXS9Vhq8tQOXjyrNfuSDoyGXf3937x/l8cHh/xiUy+2++hU8QrxonK6otLcsgRG4hU1/CN116/desWHXDrzu3tKzt//+//feDABBHRymTwDal1cXHxxRdfEGiU8LvvvCNhAIu4hLh//PgxzoikXhKoZ/mA70p92TxlvwT1JG2ubm1v1VeLo1lxkKqlcmJZNSmgdJavwCdF+XNiBeR/5ZGTcsYTvmbD+k+5UBbXZeMvFSrgFU9ZSphS0BYZrkLYObwnTmxEIHjjeANAXQ/KT2h//kqPIfaI3yZpGfyAmXzCdRij01hV8FXyEhJO7F6K+s61G+9cfW3v/KQ6Tk8v2tNeu0yTZfKHT3fXNzZef/31EMfVCqidNS4H41GpUgZugodnBAFg6mRvd5eeODg4gA9q4KOPPrpx9ZrrP/zhD//gD/87lHL9xtWrO1f4ULDY7nZave4/+Z//8Z0XX/jN3/hJqVwQUT1pt6MeIjUT2OqNmP/FKHHoCe4WF2s14bdOp8UZKi/UCqytQmmzVkdAo/ao+ewwhQZyku/pvnxCmCOy18HrcyVMMYQMAtvQyaBRyMvpNNv0eYPiARRRBPCmeTBOwB446WBJocjZJF5byMYg8CDeRLvGeeJh4QDnVPr8WwkrIHaO8P3DeEHmc62bzMmfcYS7Fo63EOMLO1d+//f+3pvX71SHqfFJYzNfvbO+M2t012tLqO/o6AgR7Fy5EnmhYp714inIgxKq4smTJ+ycSkn5SxFFkyEu/vKXv/zwww8rC7W/+bt/68q1qzC0WK0RRx9//PEHH3yA2Dc3N1988cXXXnsN1e/t7SE5T/ETHWZFdEhBBliKRflRtTTm7BUh0/1vPKGu04PptbWt68sb0/PWyZcPJ8eXtVF6KZWvzaQ+paJSEUIP8ydl2v7xEUbpb/5lmv2+mh6OvhAKfIK6GpmINOfBjdgQt6c2UD+gFtnUQCxTwd0llLAh2AEoCSNRhaDwJvw5MUuTCx4M0c65Ex2CWuLeXyPRVP8dDfr+OUlUg5dMGALX6uvv3X7lP/4H/94b2zeGe6ejw/POs6M7m1c2FpdJQlr00ePHH39+d/vqFRwAvYgGLBxAaYWmRuAkMnoGyve/+vrTTz9FNz/5yU+AXrHC22+/TewQLATaydERmP74hz9artdhy3XSaWWpbgSHBRJKNFipJBiMEbomT8uzfVVJLSe2QKvRiPWmMu3js8os+/LG1fRp6+HPfv3gFx+mztqruXK2O2qfXliXw2xpOCBTPTDKzMbiIeWCFEOCFngW9xX/IiYi08Pr4toFhSNOphVaQHrWaTFB0aGbw5nCmMI+wJ3841bgkXAmWDSwwv5P9G1Qt8M9vgrRlBxEh0XOD+TmT+MXxqNcb3Sltvx3fuN3tpdXvnj68FGzNxVqKmWWd9bhbaFe8xCSz1xkLy8aXnh6cb5z7SrcAEW70TQUomZ9ElMnh0f8g1az+dOf/nRzbf3KlStUwt7TZ42Ly+p1PnWdP2FiSrXmtWIharPZ1dXVtjCHENyIeu0OCBGJ/9CbYStHrFntSTlCDubMfrGsoropcdLJ9K3tO4+P9g8eH3zydH/5yvbWi7eurW32s7PTzqUiAAlPeFBLwi0QSfR4JOBROGevTzOJ8+EUBmw2EyUVoS0DpCGC4IKtIhOCxk0IYlTvZLLlUkH8B34MghsYS2FNK1oQQk6FG6w8w0X/BfPvoA8HZu/TSr7BhijT3CKqL62Sj9nM2y+/fuva1a/2n968d+ez/cePWyc3Xnxpv3MuWNbh+i4trnRXLo5PPXV4eMgr5pNdWVlB2nAgfCbmjJyZSYxUa/2Tn/70tVdfPTk/a16cr6zUf/zjH3KPBUQt/quvvjg7OykVcktLC5wi9Xfby2ul5Vk33awX5ZDE1aQbRV/G/KSJdFM+pIlEAqOrM+QudIPlJeSEf0Rru+MX65vXljcvBt0GG+HoLDsaN1Kj0mK5z+NSM0YCcQ1yuYqcB/9fNI3TwKDm5nQHfdEAAhmxJwCfU2pQehiLYdIkcj+YIkx678UBpUIR4RA4rKrQxrmcZAVKAW/IKMq2RnQjD9ZzGneC6gEuRuRWJ1HDgH5gOp9qXKYL+Uk/gq88zx++94ON61c6P/2nzz4/FRTbkPSslRbTKRWPO1vbD7782iAIfP/gYC7NgdtQTH5/OiHNeU8uogl+mSjQZDggPSxZuRxKLZRLViaXSebUFhcF2qBTxcZSIR7cLi0vTqImID/NRkkhLy3HrJRVDZlQqlSV1jCoZIWzsyK6rqgWaw+EZCrVysJy9cnlabPZrW9sVBbr56M2qs2nVEiMemifEEp8pZxawch0jqaDbmSyZiWEKVmsWCN8XfeEiE9HnWUhUwwCn5tFlMO39iiegA8MKguGBwM3xIx4t6cC9DJrdLY8LruYVp+rrnGK3aaELMm1sRdDECnzy6ZO24114bP6YuqSgGkooZrLLjYfHVvarA+Gvfr66uXpGTfYq1UOUT3cgpDaqRRYU1Ev3LlD8YqvzDFBwnApoB1Mv//O2zTE8dER0uHZLS3Xn+w+u2w1Pb5z9SoSbZ6cqx2iQFkyZljNVCpTkjp06WgWqY6wYiejihRguVSclAfDPrZjMZpnsVRZ5Db2Oo2T8+LqEnsUu7TOLt7/859l6uXq+sry1hpPopYqi2hIwQQHiDSwhbDBVIZdWCkxSEAjCwNSiVglE3WXECBVTezBtigzeTGX+2EZBbNQArk2vZEEXVFlfKv8SdQOMyXCJ16WHDQNJgCUb6ieHZXwgU8hl/XrVxjhxTFKkuNpTSp5gW/qcXN7++TkpDjr754dr66tAc333nxzc3Xt3v37tCugGA3xQtLc7fJSwkGUVPDAu8hA81GArHrF+fXr1zkW8vfuAe7js1ODbKyt4enLJK7+l/yKYIKV86Wq+s6S0g1lvaLiKWFn6bTECKYbMQeezxRmw35vsbagKnFWKU9Im9zkweOHf/Bf/dfjaq66sbJ2ZXNxZYmlS5KEcEabRaE0tVCpUV6uMXFQSXgGqtorkGHFM2/EnAP69AHJQVjDUpSVsYsSncu2YQtlc71OzCTQg9f8I3+KRQZPUHeiusJWVTMyDu+COGXFp4fhEPnTMej3Oq3BqJv7xa/ep6tYUDT47cmLJwqaR0N69cm9z3q5iUjDbu8ZR/dkeW29vnKytMQYBU3KFqOJZZGNwMcehZKd7e2NdRy15fzo9IRncHJ8SGltbW5KDrPrt3a2qeUbt2/5FgUYZLFcTYtfHp8ZxJUh3aiSjBUnJEPbldXXzPoz6YE+q4rvAiriklEVIlwxVKbaKwA9jMDTqJuqV65s7yjCeHD8rMFdPD0R+wU+i45qr3BmQTXJLwc90qJJUIhcUlKJtP0jRIqJ8xWQjRCTNFl40age0AkBB8yhEcElhgw5Q2PP+YLhQ5qFAxK5bKnNEP0sXRYX2ona7hCrsk9R7ocSDy/Pn1yefPzlZ9e3r3WbDV8OV8ptX426z04On4nRV29evXKdfIfa8/PL6sLiSy+9dH5xQZ3ikus3bvgK1VeqVeE5qDU9kYbN5VX44+JBAPlzdnIiZsDZxAcvvPSSyXz+1ZfyZRIMgL69ss5abAxFYjIK9zAA24SGY+vUCpHtK9YqlVy6MegHvcKNiCaiJEBH6u9GteVFfhVtWltYJrt6g0m9XL61vX3euWylhh1RioBOhOQkXqWXco3pMApCcpl2s61CRDiJ0uQ+qPYvI2GSEiwD5EkVFs4I7yRoNtyEEDROpX9Uf8mbjGiGEVshPS3X6D88qJoxbTb8BmU5/FV/fvzpR0z408bFa2+9UazVfn33oyd7u1vXriCce88eH3caChouylMl6UIxTz/8Z6WlpQePHz09Oti+eY34ouBX1tel1xuXrQdPnlTLhUql9MKdW3za07Pjm7dvX7l+BXoiPpWSzCpfHh932p3S6noBoGorXx2cCzB1zpqbi2tq9P7sj/9EPcnOnZuX3daNWzcvTs/O9o/euf3ya7deGp43l9LlNYQyHapCYK51xqPZSPY/p1cC6bQuzuFjtbaIUCRmhbVHw0G/kF2sLkoZjk/PTbKaTh02Wi9uXPnpL36+cHPrsnXKogZwt64s1VKtSW4cuBAaozXD44LS4LVMlq9BIAojgK+vqAbwDpETITUYiXw62WOR8a1gEWREtC5o3CPxGLKRyy7X6A80zuRoSV2tr61f3dl58dY///X7P/vgl2edZhfbZKZnh4/EA3bJh3r95VuRDd8/OtzrnkNYpTc77F6wRGCowuiqVqBbPkrlrnDISl1uNtxWQGfLI2GYoFwFLb764svHjx49e/h41Ol1js/VfZZL1RIzr5AuLyyZ3JNHj8+bF9svXKe6C90KfAv0X7t6tXneyKZ625VlGW/WEFdzHIYJEg85wyUgCbzOFbIX0CzYqk0VdaDx9GhQiHxCCoK9UHTo+sb2QrnMwOVgRSmsHE+azh2BeGRogp65q8lhIBgGesF1KiJgDb4RsghnCpuH2JoX+ZDxkAIBlh9VpN8E11wxYFj15P540j4958tbOwTVahVcwgsnalaubT89P/7zux/dP9xtpkY9crucX97ZvHbnRm154ej8+MHTh+ethiJZehKOY1ac1YgyRIotrIjR0ORwlSkBPQvHChj+ImtMQ14Yf0rwDsh89fZ77167fv3eg/uK437rt37r3/g3f09kyRyv3byxvbUlcN1uta7tXFkuLx493W2enrP9yINKSN8IQuBmbweHcHGSw3zAJNyypL7Bt+40Twa9kDgX2p1mDgLgIzq7ubJGPVA/rHZGsIfDdgJM90GdJ0OPCy2Q5uI5iDukS5g6qF3ay63kqa+U58ZjEJmEg4JDyPfhKGeKbnQksirJTKr1HqmfBYJhoxlRmrVFCbnebPzwaO+ofXEqKp+bddkUjQzOAJpqurqyud4Zdh/vPbtoNtAVBNBCG1ubp+eXtAlvy4xUilBd0u4zmfmMQoQianOYmDUzSZH21saGFJjMEjTcvnodS3UbLRii/BfqS91Bb/dgnycCN0eHJx999vHy2mrlvXwtV0a2LOdSSporX68u5qc5Ss+DxgdlJ3PIwkqALnkpklQ15nwSWa8Uaz5brCJKyMpXy4LXuYU8xvr6fC9gW0hLQDIOpSbVecrfJ2W0MGk4ZujcvcoVgDWJw4XEmSMgoemQMDBvqXE4C/ETyRTEQHABBFDAQWAo/hzFhcn0stM4b12o/GtPR5eT3ud7j8QYmsNeeWs5XSlymhtjpB7RiM5EXDPPXcyW9L+wsQdL+Vx9bbUjcD8ekPtsSkBE8krw0FGqVkbvhFt1WBPTZ9II2InEkYAnB4f7u3tRxaYfTCNYV/qyrU3pw48++urrLx1bWxuC23sH+2jr/PD4p//0j377+z9577W3urtnw8tubaVUUYXJdQmbMElglEqWD8fYjlAF8QDFLFz9bNJpYWKSevwhR+AmyXhblLTYjZ2rS/fu9jR6JPU7QITitTOEI2pQhwccoEb0VNick2ATUh4TsI7MICyZ9MQj1Juhwz32L5w1FnDWcKSQmwkspBqMgQf7vcG4j1rP2o0T/052j9BhanT/aLcJmtMBh3FxfQXruRPvetvB6SF+12rEfuepkCGr62sSHKSqVTFyrIcHBAHk4VJ9oXV5QbIRCMwbjgKiBpTGxQXyV38odq+OikZeXl5FH8B/dHIitvBsby+8PGpwkAZNpStkzvriamGc7p40FnPFBXVDvWG5rPo1qP87YvdS52ZiGnOgOZ+LR8vErNpKyGuml7Xw1cCBMEdhVza3dtY3vz7Zox6Jir4UcZKMC6seX6hungsc5gr5FQhIqDjgD5iJFc+ksThX5ocsfCKskt49OOYKhV/HnZDYisbHXp8d0YBC1XUfPfjq4eHevcOnrcykuFYfVwvdjvI/Rm2KN087OQKvqrEz09PDM1SskZCQW0wv884ajVZtMYKUDGHVeLoDiEheRn2htvf0iRQKE0tKElFDHpcKmHhkn3z4kYjFX/vxb+gnkAsjfGGudXKCi6njpdVlDrC1AMTB3iEZ/foLL00ve/c/vPuTN9+9tr6d7oe7Ln4SxR3ES1K27n4GNGMPHwAukmfj4ftykvOAe+uFS2EF+hw18BDp4WFqBPq3rlz7eu+JChHEM+TvcIITZRAwjf8ndB2eV1KJzxWiSOeEPG/tYuEgBu/45ghRFO5c2PkRUOxzslyJmJxQ82zaHQ3Ouq37jx8qq7x/dpCuVwqplUp21s+lVNTRusub62ubG+oZW60GLcLDbLQui7XSxeU5Q4Pcv7xsHhweC3vJ8F2/thaLL1aUJpJC+IP4wZimFIjJpH3yJBCj8AMBjfZJ1IO9/Q8+/mhtaRn8qAHhEKmxL774Ml8qcL4oC7q6Wi5yR3uN1ie/+GCzunx7bWchU1rIlQGwfdmKzhsCO2Fon7HkCDCG7mR8j/TdJGXkrjNS3BAEXcg1Wp25ZmXpV2rKv1LLi0tX1jbEM9AmC1OBp0GYJKFXOdfVVIaTIimz9dYmYcKhE94vlaLOgICBbU4j5R4BJNGwSlW8hVtoVSRM4KxS1fZrKloJqR0Zg44w7WwsHlLPXv346y+OZzoDhpep0UmrmasVK+v1QTvdGw0fPLp/fHjEpmHvynQX+ZCqo0PXzRQTCgPRUMcnZ8vLK9GfOxiura2z4VqNC3kxkoe+RX2HJ8fTo0OYgCEyCvfcvHlzZXnFmmV9TVvoVNSpwX3LFZdWlrlVp4fnprq4ssxmZX3urKxrem2dnL21c+cnb7+3WlqUU1xaXELe5DmwkjZADHqR7dUtmIhB+jNoVxnEcEiElvBBseQrbrn7Aa1UrkRHglCXDuWL4Tvfe+uPfv6nZ6PmzHrlOAkWdUHuK8zStA0D78a161ED3BkS+nxtAah8qeILzNstqNLoeBOu8Q4nEZ5L2C1RD9GdMep26B1WFML/+vHDx4JcIwbOSPV9H4UPUs1Jv6untN0YtS8sqNlpkn0Al+Z1Kz8234UqVWFV9HBEqEZUKRqf6YwUquUXDvqXtYUKiul0mTSRPyB23S+ZBUxYcH19HStQwrIxCI2RQyUSNa++9PL1mzfWV9e++uqBMHK62zm/vGA+VSXIKneurW8VutPFWeHOzvVFnq7KSE5un085pvLIYGRh4eYCss4FUfR4Ar03MipNQ8Gof+n0YK1eBzpzoFpIJ3WBipqwhUyO0opaqXLUjKoyPEp80eM5EjM7mqRp9cn02tY2XSzrZqks1nAOJiH31TljwqSBU8tUVF7wwtwQqTQd6BoFVK6XS87UbhHPF43Ovd0nXz6832TGLOaHxeykkBZBHOQsIAt8F5eXKpI6rdZCtaZD3IuSqKlZFavjYWmhIr7S7Q61u3Clh72J6rCLi8b3vve9wWDU63WAQGiBJJU0lxGjbxAgjoYJ1AD0Z1zu8aSVhPlkbgWiaQi4kYyMstNSsbZYvWg3GTCby8v6ZNYWlq9cW9spL95e3loWlEiX81ERrzA0DGtrB/354U+xeufYPRLj82AvcaTFlY2SxL1cRI4ElJ5/94Z0MjVu12Rw48rVJ8dP4aNYKCqzkPzKCVQNmzzng1ajWa5vMc0BmtMpx0r4iHx5GbhAckxFe2m/70QroWX7F9kEVoHclhAFxxlYx4Nz0kRYokqjF3upvtSagjUNvuJQZKb0iDlp+zXRarXMRlRliSphWq4SzdKoSK3fv1DaF05hrsgcWqjVr1y59sUXX6HFcrmyubG9vrEqtPn1/XtEvxmyi9ZWVqkHE6YGoqg/EdkIX7UPhtva3gaHvScH1iIKXV1e8q6ttXXNPHV9RbnK1trmcrkePTyMcea17odkSsaZc4Av4Ni6ITuqSXgVboIGeZt0ilRAinhRD5M0CEuB1AU1WkL4n7hnD1/b2iHzzdAIgpLoLLe0sNAZT08GyoeS5uPIBEQzQnJ/lPi6PqtEAMj74k+MiDDCGuWUcQBcJAjS7HThC/G2s8tzjm5PUqdaUtnbvDxvJyxp6kzJJoOm2xJ0NOpqiOkFIjtbHJeWKmiKT7ZVX6Plel30El0wEcmgeCepN159o3XR3n3ybCngVlc4srKx8vLLr6I7Bg+Kc5UToH4Ld4qVZsSEkzb8eXrysy8+L1crZOn5mdxlw+v4FovLdU4Zk3ApV+4cNUe1gX4JuI/tELS3xum8MCSYAPElZgevFSQiUOqTLxLWJ89UYajkrsZ+j1GUEjYMTEWjUi0TbS5DwaJheqRkRkSkMRhM8+TOJKppTV0982GxSGWBsmgOuUtxEWQiCl4TIj4xQ32aQ0HnRXLAQRBK4IMICVMIv3WG/ePGxYngZL9z0m2eD9pnnUu+lYNZAq9C8KoYZELQy9bWtnDp2eWF9iPFHsR8tZATshfbO5s1itmWUIvgowAOjNfrKwDd7cRWG+vrKyQfEiwUM6oZOGIO4AAIXEXE4Ql/ivAgXujE5codLO72C3cKuf0k+YrVoyAFOLjR9XKB1CYD0tU1OVatnMloAWJpJcyQQMYSw1tCKF4HnYncV5o+nush6EHQSTAsrCbAIulVN4i2jnqdWb/NqvQ44jttsFH0VPTJJ1pQyieKO8JKw7b5iGdZQyTCkvCLzxhu7qOVSrMIwiXWFNr3vqmeWREH+zakmBbPLk5EeA4vTqHhyIYrg7ayCNNmMWKs5YXaVEdiLkt8IK71zY12v1eWv5A7khqcja9Lu2xd4cXogmm3etlZV/hnsbake1WhgM1T+FOPnz0s1/Sx8heLvV57dX3lzosvPn74EKdSCfjgtGe7gzDwmarW9eDBgw3hnis7e4cHr7zyyptvvgl2NpRgJbc7nXAeu8NRtpG76J2my6PVq6WFUr/fpiCZIPytMFY4+wn3gxV+n9sg1O+cIpE8Y3TOHLKJrCbQkLvBP4xWT0eY9vT0DDVmp7vnx0AJhemUmKfKWxUInW4lShiHIh6cdeEPawasSkHcjRYD79D7XIJwMaQAZQJozQF9O+QqoAptko1hpz+cfP703he7j0+Gnca0f9xrnLko6D0ZRsedfMtIWH2CVUtjRFYStQf34ekZec3ZjalkBzvXrqcJwozoV0knqBW6cXP99tr61tLC9vnFP9fEulJfiKqFdnMyHaqRZY/euXVLeYhYExZRdRLtLombCg0QQCiLuL388ss///N/LrFDXSfSJG2XGvq5mcl1Ty7yvelLKzvVYfbyWuvKyoj1Yk8avhUTUcQR35s/EAcLJA/Hqr0icZ8sioYD6JB4RSP1RxGKzPUFRaeTTrP/eHfv/pMHtlvQ1vzoYPdCiHU4qOcXa1ytySC3vLDw5YcfdS4va1n7NFQUPlaizAn0ReRYmix+GX91DwoLiT9KJwkz0Dfd/ixU8binO72S+Xr/2cP++Vl53Cln1PJk1xeG40vb++glJuVlh1949WViBzI1Oa1ub1uVPXtElTdWNxZqS2SU8S+PLjs6PIaj7Z2N2mJZXHO1vpZJr92++VvdzmmltjI9/2p9pybBK9S+t//ouvxlo7e1sSkiAppEjdAFcqOcUSs0KwmFYAQEDe++/Q5f5yzfQIBMmYV8BaqkcG/eWNNhePZk7/rSRqPX2b84ZfONie7eyOIVMZkqfvJp/g5ah8zQoMUZjzocBVQyItpmBj3Svp1h4x0riiaHP394/9nhPuXLh0UodDqCmFVy9dpKGJPZdIXXK4sxaXWZKcpaCrnoQtY/Tj0iIuVAkUR0wD7VEmZpGAdEeaZYQRRMsp5AJ5JPjR6dHjRSg9zq4v7Te8edi9koV99eu3rj+i//4gOJV/Kt1W4jKGSi0UXaQ+Cs1wuqIROZwsf9Y10mw9xQcEi9lvoRsf/6mljDUjmzPR2XO8pfZ+nllepl44QaWBXdnWRa7YvDI2Uma8anzMDFK5zM5Spl60+pMWiAFWgw40EnNooC2e3V9WGxqwK1Vq6urNRSnRFeKyxWu6KZQsIRjdSgFyGQjnqRdjPC9yhwboOGfS40E/lufIAo+ykB38Z5r/F/fvgz/RRsfzi4HHRVUczyKRMuL5RQCavOI8kWH8iZlZbOnd7fHZxKgmdquVKtUKlleaNybkE1vCqjx2NRA6FSKVI2l+3zCMA5IL/Ef0zD/H7rHO0ctk66+RRRy8KLYAv5IHDYIcdDDSIisAYp4H748CHjBE19p2B8FYSpe7KYV96wu7t7/frVWrXOZF9bWgv7fjbl4m8tbPUHLXke8R9210CObzLhykKkE+SpRBeUgTgqq+eGyjBxXPJ5TNaSxcxXF7KFxVKN9B+OW9ReN9O10YSqWFC7UGnYGhRmWWWQZE9LsedlD7/YaWc0GzfaLck7SuXZwZ5UKA6zNMQJktQyza9FpzVs2k3BA93xUK6psrTAwkbKEWxORedzgrrYfYjsCpl88WhXSH4pW9S0tpivyNHTEuYaIi9228jqpSPrAJySEH6xVJAiSRm/2Up+MJtopvji6YPmrH9kY7JBB77IaDpKhE14kvrypwNcoAHQkQPaBGJD4TOAAyw3+LPT6XkRuwPBsmGU//Li4UZ+URgdw6Fu0ZFxurCyupzvdi4uR7dv31ZtFdZ3svuZ8Z2ogLOEeVCIbiADPQVYnHnS4ur6psZgGkJN1NrqKo0uWP3SG6/p/xI4NYc+5dU4jQ0tuqy4ZmvQEzo9a15SIQwewbjaK9d+ffRoHvVLinGimoTrx+nRf4s1iFylDLNeu9nt5Fg8Ei083VgbozJq9qUFtDdE1UT2oldKZ1dKPBDNNbYMoXF4SZIBEGHNoBHFiqASTUiZKLwJ+dWM3j0bJu1fnj45Onh2dizkMGKDzXI3rl/DrXTi+elJvCur7lQ2Lhw6oBH8EsCBAxYL3AA6uwWpOnFb3Nlrm0HCHGJNChVDB4oDRSOgyqOhopJSKR/htlmmpktWEZQ9ySCAVgBrULZMsSA1WM7jtiSQ6SIcl1PZ2ihviyGzGueonTDwOIayoV89efhlsokJQava0Ax5jOB62W9t7GyvvbBTy1xpdvUkt33B9V94Ycf4+sMmg0y9Utve3BIft0+aJMThyQFTZW2lXputYhoKyVv4myQHk4a7CoXSq7qu7YiTW8/oSstXBaGFCRSbMD1L9oJIOMvSUX3oH6CPz0ChDu1SfrVc7E3HRB6VhY0VkJ20z/gUmf5UW3prfx+gQ8UtLs7KkU0FGlBWt4PuWIcm5NO58ABZBBxEB+ijWRvJeAtpHoHPsTKxxYAROZEdr6wsdQZNwRvv90i1UBWR6/eCRSg3Q4GaCRNEaBMTwIoxCT24QQGYAAsuDPOXB8dMQ6FAjPjwyePQDGz2khKrqII2BzAY2TyuUl2olHZWbqkDE71QRWAjtPZsMFGLqwlgeVmQrd8clCqF2y/f+cHb7y4vLDYuLw529376Zz/94sG9xqivcIh1N7dWDQuIfAqKRDlaOM/IXIfMWt6+Hy5EUzVSVwYtoyDeR7+bmCRB7H3I7VFSqk4jk/JnbOQnk0PI9Can3ZYuqfxCZTK4sJ0SlWADEf432aITEQcBDR/HG8xAqb4TgCAxTQuUGS0+cSgkARw8hc+QHIP+WD11vb4Oc3zkybR//cbW3mFDLrasMVtDVjX2aVSojSkBzouY+a4YzfgQQI7BBMKHgFAArWg43N2/PHq6h0cjN9DtqGITuKktLOgzxcGocjjopIfJ/gDaF0fdg+P92m6VfRlbOM3GvJL60iLwMd8jCaggSg9EOrOQLyyxCbMFJUD3r988a7XCXi9LhSPg2P8lUjRJ0T+hi4VSWSzBX2kJuSk/if6D0LP27MrMFJzynkEk6J89kDS6FDVLcVuz6dJStTeQa05DLyXD4z1uXY6nhdNWo8CMXahhUilUxLi2uQUN7eEYpTNFQEGlOBrEFvgDWBkn/F7wQp7ABDekk843f9qs6uKitVhZqV5Zjshouoe4rt1cOm+U4QmUhRpL1USvNvtEJVFD8eoFg06wZr/SKw7vgm8s4u2U5N7BAbk55cJn86ApI8ReYBgQC5fHp1ifiULDsXRICYBTPBIyIMn0iSIs6GAtFQVtLhqn0CG0xzzptlqPv7qni0QUD6mRxk8O9jxbrJRYMdwxASOOAucpJHhkt2KfsIiiplLVPq80n7HZhRnYm8LOK7SznOTFZTQDUdgiYdJlPTK90+YDcI6HFxNVC6HZe+OLYffe/lN7lNz/+mFxo05HKc0/PT6y85G6WnCEAM2sc0CgbgoTuAELzVoYZICOIIGLOrzgwwm97U5wbFw2V7S75BdRbm0x/fDxFycX6b39Z4sL9WuveHx2evaEVASvWm3Ri1A9xa5fzJg+iTjkD1tQ61OYSL4MIAjhaaXQaDV1ekVYdKHCXGFAav52m8gxl40SBkqTPD06JKYaowHTkD1DaDQbXdRJZCma0qFPZbJ/1Wd8efdTjtEArG1ourHO4OfNUZyCuOD20q07nBvhNd6f3UrVTpW5VunsMN/PKTCqrCyJBAjC2eJJdJ2MsBtOWKsMO+UfYnnkjgJgfrBUcSb16ZP7ciY8gJ68Ta08vGheu3NrVsnbUGDcESelB0N8cViQeZQQJ5t0ggsxDVK+sjyfaBa1AjpuIy58hQkG/VDXyr9tQXl1I1Ot1GXuLpqPm62TpsbSozNB33i2r2vOo4FOeEXg86ZUZSkQiXq8GuK9AoLRo5HpA2GZK9vXnK8NBupBsSYrjidFrMjVsNy0NP3gvfdqlYU/+qM/YqH+u7//+7/65S92d59qJ26dDRZXl2sLNeLLsCLAqFM2X4ZLTElf10K5sri6cC7skwQbQvTbtCSbo+befett4a/m5eXjh4+e7j7jRQu1qZEWOc9lV2tFNWvT8bGwZQgniSel4ZPm+aVUqlgV2eoQQqDDyZbzxsXHX3+GFQ6ODnXaA714zsp6fXF7rb0bHQfBs7LSiaBQr0MIzBEAHOAC4mbvHoALMm/YAiE0pzfCiqrV2H0yCT0pQa5VV/PyZ6VRc+/s/OJoNGkqBQd3svD0rA2sS0t10UxqHDMZzZoh0vy9Asnfv3/fNGDC9bt370KAxsqDs1MOmyuMGfald5G/7jk+Pr26uf3u22+/9eb3hEoefX3/5z/72R//8R+/++PvV9aWC0/r2kDEO0N3Ia/InJfZqVrvRVhrq7IaUbTQSY3JKBjCakS6sKaROeqvvPiSIL8dubSYgcD9hw/AXsfljWvXcl+d7o/PJpet9rPTA6OLNtrSbqW+NOoMcBkfQKwpWyxV6xy1sj4OkayB/UDsqahsZNQnQ2vNRRAnEDkvgkgDzYt8H5sFgbFwYGIXzmlwjhiwhlEMEc6UXjcbo2UFO+u+RdfL9Zquc/OWhryyc1v97mTWbTSF9Pdn6X61vAEC0Hx+frZUL4tnM+u8jlKBRZjwIKGEpUQdfEI/+8r40OMVF8paKF5hmcQfEgc0kwgi9wdQKAepWlRM9N7e1/yDd9986+D86IuH9zlWGD0/LJtqr62SM8wEheORgdEUhV+W6rI3dIw8yGKN2abDVL53AhPegz9UwG+wxRaWmAYyccfnZ73zIBTSPveLB3ebtgNQ2zwb2gNHCXxppfrGD9559c7LrNrdJ7uffflZtBmoYdGCzHlrdGSZaYbXr7/buDxnQeMGa8ZNMZtcvpGIY4qdCJaUBR3rR31AA/nIHEqYhi6CAlDSkEFVCdP407cLiykXi5nNpYV1UUPkfHi42+k2ZynFHOX0rKSZ9+JSjmVRlAoC7CSGkwzoTmN6Fqz5GTDtTyOjA9/604toQrY72iQ9XIc2+W7qSmAHzn71q18J1331+VfWYq+s2nr9i3tfid2DOHFMBq+srm7KrNWiCKMr3ZtUt11y8PgNzitFO1wBSKFSpcAtjzQ9ODn905/9/I2XXjEgo0b+aWvnSqm+BHT0RE4u+azTXVpdR5TC4DpF15c3Xv/h2zc3r7I26xtrjXG78eBBJzNu6pfyQCbfH8/Kk9xbL9xaOF/82Z/+GeWiWFXjVUsdeSIcyUd7HzJkmXqNZhfoESBam0MEMqwcaMJX6PeZLj5B0BW3EVGrqzbpqeVmkbQpF2MfwcOj/eWVBWVjl6e96US5bcQbUND52SX7E1iNCegeB30wdcU5PQymiZApIE+v8N6GyglTke6fTdqDFiKVzDWx0Lqrawxl3ICT9GudHh3bD2hoo4zY/lBsc8gfzC3WX75x+72334FsNdVffPWlAibhJXYbg0f3l1oCg5PYbNAsA1XwbDBE8u//6ldC/9olZbSoWyX1WKegomnrlZsHdxvCH4PJEPyQ1Pji6Fd3P+J0vnTz1vaNK9fP7xyNOhfDjujfNCmdjqjZeHreVsw98T7wpN8b3ZbyN3nHIKvwJEJTfUeABA40IEMn83MwwtGAO2+MNmniotG86LQv5uhZXFq0RZcksNt4PDduXC2W0g+/FoceX5w3ZP10R8gvIWu6FAkbIcTIUgSogRtJQi2dLEHPEFKr6x4okQ6Kf+FXRgY3DAvNLlTawdFwYfGida6c690f/WBjbf3xg4effPFZu3PB6cfZNglsNS6P+ruHqzuNjWvyCifPDr/uy51OIqhZyfPs4EAQAl8Uppmq0LnSWUmT5FBgwYstwAp/1+6CmVQ06QvCv/K91x8cPvMwOlDKtbhU11Lz688+IelKtVChQnrHl2fkph1ZhGXNOwozhv33P/yl6kntKwJGJ42L+kqdxPNtfak+LI8vzk7CdLKHliak2P09WvJ8+jNWnlRX+CRzmD14kbfERXv6lOvHRSz1h+nlGyu5wqDRUsK2b1ui1RWMuz4dHj17etRsXS7nBZIp1R3oEZjCTHrnCT2Eb874ABosHBow07Jtscolt928efvs+Mw8pZAiOEySJnVNpoQsENPV7e3f+9t/+6UXXiapIOkvPnjfaIwIbMTQEnKwPdjuk6c/k4voD7R44F0jcD+FiW3fgaiphaOTY+QVC1cK0hGfjD6DSX6qJOLK9vatO3fWtzcv260PPv3407t3c+//4n31XYN2L9IvmdzZSTSWqNL8808/6mfirWr2aVXMLSugn7uf7kMsTJrBs+MTSQaqh+kpZCg+ullfFXrhX2qj0Kwiitrpd5kqEU3KB3uqLjI5QJEAlOuQeGicn+nUYkprcLq+c2Oyaefrpc+/uFtfWR3Mng3TvWcHn9tsp1hc+/ijx+fnDX38uUlJiPmVV18LT50P1W4UBkUdbvY+VjSl0VdmJkR8jtBTzzs4Y4ynn9p4RVkKrBwcPA0vRy1FuyMnTFMItHDaLPzV115+7bVXtjd3VDY+230SOomFZqNAKko1T2qkH/py1Dm995ld1BSVErOKKIg1mtaJYlY9wFtXtoRh5APF9vWK2GqJHfbiSy+/cPu2xsL6yjIlgbcG55cLCEB8BhppGQaMEMI0M+ZPNacRihp8/IHSGgTKlchOcmqzlL8oEwJM/9LyneViupMTueq2B3xg+04RxIjFsqvV2tmJaE9bckdWTdCD9CNqHcQO8Y1wHMAE7VauuEaEiylXKNplqb+0sEFuGOnw5Mn55Z742P7e6cnxpYo68dDoJ53NRKEb7cbNF25fuXmdpy3MK62NkA04dzigWcU55uBgywMjSa64fgqpG8xHP2EU6fuRLt3oWBZlGRnz/fffr5Sq7nn25AlLpt+8LMc+rQsRCByPi5Uq05462Ts9Uoog+U76Wx3OM758yWXjAtvhBjFXad/m2YXNeW9du/6bP/oxt590ZYk9evIYiVgCoygnaBySQkmRTGwSuKjkK/6srHJW7IjQISWRvHOvB1xFgT7RkbEcJLjQF1lv9lY+l/KGxrkYcGVt9fiMwlS3G3oy6Y4KI9Jo7NMovRIkodqo1yg1gLnhuVDN7snt2zdj78RRX9WbIExtQbF+TrnKonryJXJG22CWvmFrm0l1sSpFym6x0QXcE0pqyXRmccHeeet7p6fHRL9toC8um6hS+aOprlbXNnciOkKNnF6Ewlhf2wg5JlxoN0U90nZZUqiwvHzRp4GG9AdK8S5EI5xgga7Q1WbldaDvKxTmzpeu3UTQNixUJSGmvL2xqYLNTN7/1S93ZUoWarqDnh7sPXn2VD5DyV7sLCDtBaYD5nMUCimOVyYRms1rQvlEph+vp9ytw9aeOQwGMw6ABbFHd4Zzs4/7i0XTQlkux7fjoY0vFI1FgYWeJ3WqSVzebSc6hxIngP+uZszCYN1E5F0Ef+7cuaUZtNVqcnquXNmuLUhhFZRV0EHUJ53sQOA2QEneP1YOHRiN/H+U5ysQCnpEWKnUm6+/Ean8x49FmW5evaZ1gBywaQQOIxXJd2LM278BcWK8KcdHPUib0Wot5LUmAWFVNEpqCTDpC59nlg6OjkCD5ylS7O2AeXZw1LPXogmIo2RzL9y8Tas/fvr0l7/6VfXBPXKC7W/b+ExNhXdhHPmupPI9iliSakUn9AkzeX1lFbAc6JoqBzJE7d2w7AqIo2IgRvKqQZxAsou9mpJN4dyckA5KR2v2tZA3Zqd1TNI+f0rKkxgZhlbQonJLmxtgecRnIbfAnNG1ePPW9SdPHh6fHEKvDWiwAiOQeQncUVifVN/bl0Dl5aDVl9IUvGPawSgLWEmHpCmD+OqVK3c//kTxz5uvvXq4t/uDt97y6u4Pfujxs4sIhveaUfrptXYx4L8HLodRCWoyMzlJq8vLrY8joK0XMzo+x3ZGAX0HhiaIIMBTBOiYFTqb2YV9tVRT2sdG2tnaskvLl59/3r56VTW9XdQU8QlZzpQrareplOVkCLXwoRyC1JFiBAGhbv1NSTIPQM0SWyErswRTL5OoQwiAG/D6FnBQInSFGR1uY4E4sYwQOynuQ2SDHUlW2E+7hJIIjNaXi+VKJEaTRiu4lF2xwd3Kqqqhwcnp0WXDTgQykQWjCbogf6Fct6FKc2Nr8vhh98bNkK36wiRDTNWEHTxBMgzEP/vkLqZ84cYtzWJbP/rhu+98//j0RIWEpc0LDPQkmoIKFJJkxqLmT6WywWIsHJJzqSIXZ+G2l0BM9lW5duWKe1i3JiBwCFxkhhw9MyIWftHaEXt44UWb35CEME0hMZMePHvCTiEHSKr2eTvViYIn5zz+ZYsRF7QAzB/FI/ChZUX0HEXwtkkGufyovorKOJRitTgDfANwyQEBCjdRhH0mAcvMkA/0+NLeOfaekNbHMQ7oaXf8eFJ0SbA7YUgVNnFFSSak1FvbuLq0vLB/JGI1sFmtuJa4v+0i9TZxOFQIwz2UGJn04COIeHXavZwQfbHMyZEri6AjIemnjEolXU8KFqTUxUf/6J/+7wvVqkIPizJJMxTpokjmh0aQt7/3FvjCrkJgoVMgNivbjSms0s3gNmEfT0lwEmgOgCpT40uLlDAIAAhLN0kkpp3giFdee1V8m9b96NNPRCCksCp2XVd+A7TiXQbW2kU1swdEEK9ev2ZVXmtHL5ZspVZltQRVFmPnbtztHGu7h2B1OAEyKFFX6ys+ZwgsJe1J1CHMIbbLiISd2KBnvKJcRh9Ap2EHx45Wob5Ist+hWgRsgRQlF17Q5+VKHC7Lgj68d18XLWXb6TZ4dTRBhN2WluS9oRAzebtXex2uMT2k5xNBuKeTTrHc7n766d/+m3/r3tdfvvfeeygR1F575dV7X32p2XhjJ3I17rcKGV8OO8cgQgsi1MIafOCTc0pSVDy8h1GPbCB+gRvTfPDBB8SlO1kflAQig84z6Y1CPmSvXF65dnim1fnztSvbjN393V3G/t3PP2MNCu1gUDqc1qHk+Rl2y0r/e//g36dAQNDsMZ0WLPzriuVBLGjCrOv+NA8YkyokyuPbpBTQO7SfkT8Wo1xAQs4JFvHJfEK8DHxjs3PwB6sU2Vpju9tD/u++94PXX39zPJmyyU7OI0Vj9qNu7ApM0JNYYkX5AtGsd6MNGdevX0PIRn78+Im6CsCimcECDSFboK/XqgyUi9OTu598irJ/44c/4mdgiDXNMOcXX3/xpViLkI1wPsxZAiqmjfll6FfFiEALzYzg+r2hWAIJpq6doWr5lhwCKqkvMgGvAyVJOAigqInyuZHtK7u/suDtesAydgKv5glWgAnC4clS15l4JGQWUP/e7/5d3/pK3NknumDTOCF23GdmuAnDQgNYw0eyy9LQtwKHchdmrJCUCWRJmMD+PAY1oE+v10+KPUwrsfiRqj14BpL+xMVrr79JntGtyEoGDA0ywVCBqgPbJ9lRZsEPxy0p08vphrSdzMcff4RJvBe4d3auoFNhBqEbIvS99941MXummDaJ5+T9P//nsb3ojWviAa5sbmxolYFmDLd944bt0Y1DyARYUikqFiBFDmXPrQKk5FXmAlZogV/pZgSLA2h/TO8pB3wQLySze0DTJ8kAXPYBoqtFo0DAiYkRG97iZ3ASmk4q6xLRjXVC6v47//a/D8PAPrf33QTPceu3VmZf2EBMMqmBcV3u3804hpCBEpPTYYu6QcS32ApROIIWUDsnPvYvMI8qnxNPIX8ErhZRFmV3d//Zs11sDx8eAb7k5RNQiNaJWoXDJdrD+Wo0LqXMjOMGi0wcQXut6eApBFMnx9b2pkH8bBtI2XVCmfTach04lBQpguTFkV1+u+HHv/M3lOMZR9qSUqUtSFn+CkYkil23WAlL0DGqAYMLY4fNaUApsZG83j0QABuCOcm/+A26iGzkQzoBTiTYw1wL3zNCviE55rucBcqh02e8y9v+g//wPwI7N+EyVz0DAU48A1IWI2YkRwYBUOLPpCOB2I/9fyDZEEx0R7yV9ebHpgYDoDR1r8Flsee1Pd0sTiFG/DTSCHyRbdLrKR0WYSKD8EUFhMXFgnCm4anxl+XfpWhMNaaRMLKRrV8VQ9BOWImxU46qc9+q113fWBMO8v3RwR7e39pYb56f7T59Fk25inaU4ubyL7325pWr14EJPwmhW8XDh49RA1hRe6pIVOrUFpdUGpmDuRGhPqtJBNe73G+lLB/OneC82coXIn+xlphNLhpewjyzrRnfmM+sdGk0sjUXcvQsxWv2cwQAsiu5/ZMjZ26FNGl+E6VJhCXqtKN6wlYrthHhqkCaV4hBVuLHYYIIs1kvc550JEc6wsVgwMmE4UU6vfTCi3jkw0/veg7ohWJsyTntUWV4puNmSMKecAkUuoS5fIJuoG8qfg5G2oblgxWDTNJc6xpq5T14r8CUmr6QlWrk+n7IgWiNqgjrUQbJs0RmUT+t8TvjJ0r6/DYuCXsJb0MVLOIbpEadIh1t7NFTxvMfDT+++2nEJRNPTwxx3B9XioWXXhDFuZ3o2GipJDNte0LeRplbkm9AFex4h7ULkXk9/dGdtNPliClYqflhKc8iJnODA+fxMASYnzviOy4ZFNlWKeGgH/zgB1QNSbp1uUXj4y8qSxoSmvEVrvKw93lwbsjBVpyj+gGl0OfTWyQXSXmPOhhNq0Qa1wvzus09lIfHgR7VmIBeJQOKjJooiw40kQRWJBABF8zdA1gcLo8cH51CPBG4trImMoVo1E1ubG1pHbv/4IHQU7InwoTQYcj7hYBRv69jWyOCOWQLx7w/FKZy6y07Dm1uvvP2e6+8/hqkPnr6xK4g/csL/EEhsy8hif3Dq2JkI2TANUkPAoUTm9gE1Sd6ldS3nDj6wxdu3jJJAShyBetwguhIy0xIycS5zMDtn9K4aU4m0VMqUfFR4GF+2N5gY1OoqHf1GtslMbnU4kdJIfwDDQDBrUkYLvSMgiqKOjmAyYBej/d9rUovjIyIZcQEnKo/WF5RdUx1hdyDuetXo8qML61MiqqIcLf6fBI/dthCNR7SeiIJEQ6jVOrC0koyVPSKqv81VcVI+0f75sD20AJ3dHwQhqMOiMwsWhVrVQ6yvr6z0wjMmbXgGhhBrBSCRyzHsyE6+n3nfsDNumJR0kndbgTOJlOsY+0+Y+uze18z/CIkGZ0wSgYjWoCqIGxjbfXNl18NhdGPwi83qLS1glAVwpFJKabBk+qn2Aia/LXdum23BuiQUW0oUOOz/cUHv/rrVaGqigayJ/vPaDBETRv5xS2aVuCF7wYwKplNSzSLkvAKQwdFR1F9xDnRvkYjnhfsajoTcYtZ8qJtNNCb2KIcal99+RURYOj8xS8mDx/el1GPnFH83CDsJJvw9KJyguawXKmT2NEvm1URLd9y6/oNrbVSV/cePuC4WW34lunisoYD20cxtJV39Ro2t99Y3eq3Bsr4Glo5FdoMR4LyjW5vnM78+pOPwuMpFHjIUlca89Uh5WIjtMg2U11iqJwyf85xYNU2NwtOTcSLP0kZIgaBCb/Z5Hf62vdeuHkHTx8endz3M1r7B3aeZ3ZHV3Gszf9FtPgCIDRL/+7v/T7VAnBMEJ9sBkRHpvH6dGPZL2jeACxABrK8wu2F5Vdvv6y4HNqBVek7b3t3f//Tzz4PdRTbGYRlhsrYdiXVQQNUHCaBBCerXIhcvyaoEd+vvPLSG2+8cePGNWV58/1t91RaZCWCtEry2sIVx9gsBvzh7fzycJhnU6z5zjvvkG+Y5tEDoA/FQDHOdXWi5OsEY+dSAqK9XFu0nRU1UK8uKGL49Zd3t29cS7qjWCw93hbq1rNnfGRrK3wvDY7TIpBYLwoOMeGc3b0dSixczAasHGG3JAJ9LjgQr5+p8xNxpKPWnfLCInXQ5MV0W9o0FDwQocyqZFmYjB5Np3/v7/z+fIhoA9FwFFh1IQiwUvUrbHIa4szhW0GaYqy/85t/fWNh1Wp5tmQpbxaELprNf/QHf8CWQp3sHb3iZAg5AplibhF1V7bnl4LbLTSSlJfl33z9NYJ1Y3Pt8PDgy6+/evTogWIpQWa9JoqUwAu5sQtwARyYzFzczS0/Qk8fvbnJx2KpufkxV4PujMM8kl16/aSTSLXqv7X6MrGjZq22va6YFapikUlVgE8HhRdwTyrDiZ0Qj5CqyENKKwqDk74iSTTVvklDpMcd7BnQgiEP4gDntXwNDBl1yi9odSH0EJJXdj7+5JMobkw8dm+fWyuhuf/Nv/tvzcdy/i8e5sQ8j7UkBAgBrij+yg+n0guqbrCqSIhEJgf4iy+/Pm9cCjmRDiS9F/BKgEl5q0GQGNRGfbZNM/k4UqP57O/8zm+Rv1yIT+5+omqDsR8aWte5Pcj1y2vA6YbjPmcCsAYdn0gPuK2WqPEWtiqLy9xcYU2YLcHtnH0pfnndBiVVP05SUtLkRKxY+/gf/8XP1TLZWdpi/+Iv/oI3Z1hMAzTGwWqW6RxenSBQk4eGiAJEMRUK1nkaPq2bA1yB5kRPfYsGwj8hgKjzjGBfkoi1Lq/AYQ6eecw8OQFb5mVY/XFEtDj5XwwcVp3cfgg4LKEC1W/umQLHtph/dnTxaO9EwkRaPEJG47GyImyJhdC89ZuuqgCFopGvn9iNT/Wf3uNwT/TmCWspc6XH/JOuIcEkudSmhZqS0VOlE3m9GchKqhiO7mFFQInDzKzE4m08w9NmY4hDmXuYIEox7OScCSsTbTPA+ytrDJJXX3jJItXSkrjSfn/6wftcBBLP+tjyngOveTIjJH5SSMrBBCO6TfyWKKeKAV2wsm53QDJqFmE+CjZglohvjSwhpvyd2AgMWVPlGYTYnE3JnV6jx/UxvkkGd7LopEsDj/E74IGAmHSMENJHGIKVTckCtn45vEV1cB7FTL2lPWCnB4Iokp4WSXwW0fppvb5gMXAFOtiZ7EBGjfNGbPWX89uY4ejiOwEKk0VqfgsDLTO/CBz/km2CYyt0cXgLNtGN2+u//df+Gi399PGTsIB393hJ9CSoIXPiOIRP/GALrvKjbHy9yMCQXbGqTJbjo1/kcG/fzoori0vJAkOse1Z/i11IOUrUjKcQmV/FYDWFURM+UE4ujMMvNsObklo5zh+xv9XChG8R+0uP0ITHjTnv2QO07w4CWWEEWjNDYUdTwlV0jIgOKkRYJH8AGjXIwpsqJkgGUkhnuCg8Z8CYicyUTreEoPkXcOAVxg521ZtGHwThUdoMBoaexIg8nE2zslRZ7/jAT4OEB495bavLweCYkJEoYqnCU434Gh8La1kwoNvjpdeUZNAwAR7hHKlGR6fCNbdv3pIt8aMCzDBXfvnBr9X1zVnEOBAQWGfcBsOGIybcZs8AATKcwR1lQQqFvvTii/JL5BJjye8MeJDti0pABxYtn0fpWdNzhbvncA7EMoAC1DovhYftBGuvIRy/WFp0c4AjQcD8hBZwEo+zHcLbCvK2/BBEvLOkOs0NHiSOISB8Ql5X0IpfHUi0fGzRTYSrxmDfgqrfbjKM33Cgi8I6xRzwwfiywXfk3iwY95AaCksM692hwtjv0SU7XVlclPnTRuor/pFosB+nCfsht0Q/M1ypkOiwlGq2z0i2LFGX6U2E6vw8nrAJ5BELf/Znf/bpx5+88eprbB4Kg+mNCXjU4sCx+GhnjS3lkZLwBUsayPgEYXf7MQr5zGSXCEKc8pN8N+D7v/5VwLrTPjvRb3GJNgNGqjRUEPOQpTMjRRYRN6g14KBcNgd+MiPbel3nK3iLWClmCpCDZvIfZ8ZxrNvXqGNjrk6AWtsFvrQ7sR8Ic6MqS+YgqPFhYUAhOvJNTxdjMbR5mD/RjQoNLizXN1CHV1qkV3o9BPiVtWavofGu2Q470jsECIlpTGVD3l63qSR0Y2PtnTe/9+qrr5CeRmAp4R+UqNrg4eNHdvg0YJJC6rIjIZ1DRN9SGwGNTLbfaKNZb8TsjFGCGOlh+Z/85CcQAC6WalYJG4VWQsgedOJQdSP6L1zqHslbdYaff3rXJJ8+ewZwlqxfJT1I4lRBT+Gy+NZQ5JLXma3DRTP0rXOsQDQ9fvoE2njaKxvrKEmVrnclAAF1pB5yJRxbRoR8XKO9vrB4c2ObztJWxhAGTHsNAnps9iNcAQPJTwiwU1huEu4LRuHc+gkErxHBxwXuEYZML+HtYDShBwTCk9SaAQGqYwO9Crtlx6LBIFimkJ21B+2t1fXf/s0fvvriy8Ju/c7FwtJyGC3p2JRDNp6Lhj3B2vIQmkGwDF+PRUtcgKMtnBfKodWFNNzAQ6RwRYOEQmHC/eEhz6JVzwzn5pDpHe7vC2z85Mc/fuWll0OUJ9ldOpzogLAHX9/r+fnmetKK7DUY1JwLeQOyaowJ+r/1O78NoN5C2RgQDuZCw2hu8JU/gZ60lLliWdppzhLIdFTrFUgapFgcS8XKe7feePvV1/0artJPO7h+/vVXv/jl+8fNC9Y5B5b84MEBAhFJckJGbmPtBmoLtYkZeFF+102LWElfUcNuscnPRdqJifYM21kFF31sd3Vi3URD1rE8bMKls0f7jjYaGxacHD4u5EAbdv2q7cd37/JOsP+zvV2A5kDo9tO9I8JqDRwl41ghMzFMiyT7H7aU4OI0Kw0ERv40YyNEKo5vnAjfCP8mBGVS2CUyelevAjrYwZ+RKQzxMprTlgRGpliVCQph+6GK8JYTEe+2OTMZ/Hd/93el2nGqVA/CRw2+IsdYO5AdlgbXQQ+knNHiws3CTdiC9Qh2ttvqvWDIW37rN37zBzde9yNMfBq7RkNDZ2vnwUq06qlBjVgoEMcWM/HpYNnkbty4RSgFWxAl0Rw2QBqDU/uHqZ8RFOGSDF0PCUYF4LEprRXLj0AwE5VmxoEM8+hfLBD3v/71Lz/89a+YzmgdtrPFsqih+fFKFLj5513eLYYX+0uLlMbOIpqBOAtDGwDYQyEsesY0PJto8vPCRDDkoVw7LXvpXMWZBMbVbYdxIUBYDTHOQyYAyrp17gjv3HQH8Tvn9okD047yPmyebDsGnYB778EDsV+Sh4nFqwd0SDVP41DmsBU6V3F4/IpkGzM55OZg12TA3Qnc+604MeCz3dOp8gdmflYlaClWl1gK4QpZd6Krk8EiIgEh8TPMzHtDo3Hee6utzu7MLxapAANVQAAJ8t8kvCxc5UH8YAQhMifAwEJiD4EtISN6pvNDVqAQ5py6q9jPmF5DtRoLYiUJTOc4wDysJpvbMRmjsn5tbWXnqjI84UPVoqzA4Fa6zhvAy0t5Jkwd2HY9kbkEeYhgVUn6HR8+hK1DvyRgP4KLczKTNW7OuJLToF9XUZQkEhGE75Az5RR77xhtPMGdCarCk4gBxxNmD4UEJe40bbRlHNE0CMBApBZXFC6117ihdRzbUKMARkTMlO610WC7dfD40VePH6hgpJP9riJNYT7BuonKIILMPLd78IVQO6kqqnZytm/7ZcJNzVC+gLAhXnAiFLTzxNLxxoiJh/BmISWZ5SSikfbzZmk+EGglR5T6JmKKGMNqwvX8dLke8UCPg7tWKB5hVI6UbMG9nFlY9otrN164/bOPfhXS069lemFiz3HQbAnD7kpEXoDMV3ATsEunZMWY2LbEJaMFUB2ec4AaGkcGsOcGJar6DxmG6icXeOR6FgsR0LXPp105BQmcxyaKqbRN5UBDIMGacR5lThtDLeGJcp1ArWgOWGICvzsmHoX8zefXH354c+fqdmVVkTmUK3/z04p7J0eNXlvegxgIL8CRREIBl24kSnK7+59XG3Z3DFTbwjaV6Zo6ASIczKJksHsEytC7Q9QDUcxXOMcnILPlaXcdBkbwlVsBJiYd21wwkHhsXmUsGaRYABJxiSOgcAzqX7h1+/r2VQ1WOoQUYIMgiW+pgBuuJCAmIZow3KDcEN5mCJaa64lKcCf+leYhY+cSw/uESbhCIMVCIAqUCOB3+1UUV4sC/RfwFLaHfCnHlPuRxy7BE5MpamWbegVxbMsKDCrxiAiAmPXpTu+FLUFpTiKuIv1s+U1u3+/d3338JM9dlRZMfuAMJgAdgXrRkIwOkTMXZvHbSN6PgpXWHrWjhiX2/cVowj94Ug4pzCxb1ISfGIreKz0MfKwrnJTg0TVE6TodPOVbuo1Uj15jQip+5li0hPi1D034a8Huas8B1H90v+ZyL7/w0vWtHZGljaVVCQmBs88+/hBB0WBgLYIWUj7pY/FqFlGg1tDe4TKFHIwc6sRUBc69nBC1PqCEft/aHRE7MMFBjbPNO3E/mzdKZqISa2QOIR79KgwDrB95Do8TxIhA5VPgdTyqpmLrD2t0kU9kKIPMeYLQU3ynkwDEIQCOnh4fF0u1KDwwT7Hx6OAjOvw8TJ+qMLhnHdQbQcR6BDhm6CCa1PxggrWMla3Z1K7gd/z0fCFAP14bzR5iKcMBMZ3s4jiK0ZMjWAMsQxiFS1YuLpqfyWlUCKEZu7iMhL/J4SBnQE92swszcTjaWt967/vv3LxyQ3w0Uj0Xlw/vP/iTn/9ZJy9ep12UyokuD0Io8tthrWMlePeyoFmjMUmtnMsQ5zYC6iqf6ngvAS2OC46kk8wEiqFmTTG4yX5asVXKBrYytM/EkJsWhbKTCh/r8aM3XmIjevwuUOEG0XKN6qgXa/N7cJtG+3BWikXB8NRoLGX27OlTdpGAj03x4pcjw9WNPV3BCejzhaqpInNsh3Ls0RkYQOBoOQjJRwiUf43PbwLg8dC/dCTDUMz/0sX5HwGwb/juL78NGRNdhIjc2fxpZ+4LE/L/i+ObGX479L/uer+9/6/8dw6rv3Lxuz//yru+uz4/ibU/P55D4DkEnkPgOQSeQ+A5BJ5D4DkEnkPgOQSeQ+A5BJ5D4DkEnkPgOQSeQ+A5BJ5D4DkEnkPg/1cI/F9J9Da4/KpvpwAAAABJRU5ErkJggg==", "text/plain": [ - "" + "" ] }, - "execution_count": 5, + "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "Image.fromarray((inputs[0] * 255).astype(\"uint8\"))" + "array_to_image(inputs[0])" ] }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ - "from tensorflow.keras.layers import Conv2D\n", - "network = nets.SimpleNetwork(\n", + "network = SequentialNetwork(\n", " (128, 128, 3), \n", - " Conv2D(2, 3, activation=\"relu\", input_shape=(128, 128, 3), name=\"conv2d\"),\n", + " Conv2D(2, 3, activation=\"relu\", name=\"conv2d\"),\n", " \"Flatten\", \n", - " (100, \"tanh\"), \n", - " (10, \"tanh\"), \n", + " (100, \"relu\"), \n", + " (10, \"relu\"), \n", " (2, \"softmax\"),\n", " loss=\"binary_crossentropy\",\n", + " optimizer=\"adam\",\n", ")" ] }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 20, "metadata": {}, - "outputs": [], - "source": [ - "network.set_learning_rate(.1)\n", - "network.set_momentum(.1)" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
Model: \"SequentialNetwork\"\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1mModel: \"SequentialNetwork\"\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
+       "┃ Layer (type)                     Output Shape                  Param # ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
+       "│ input (InputLayer)              │ (None, 128, 128, 3)    │             0 │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ conv2d (Conv2D)                 │ (None, 126, 126, 2)    │            56 │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ flatten_3 (Flatten)             │ (None, 31752)          │             0 │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ hidden_3 (Dense)                │ (None, 100)            │     3,175,300 │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ hidden_4 (Dense)                │ (None, 10)             │         1,010 │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ output (Dense)                  │ (None, 2)              │            22 │\n",
+       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", + "│ input (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m, \u001b[38;5;34m128\u001b[0m, \u001b[38;5;34m3\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", + "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", + "│ conv2d (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m126\u001b[0m, \u001b[38;5;34m126\u001b[0m, \u001b[38;5;34m2\u001b[0m) │ \u001b[38;5;34m56\u001b[0m │\n", + "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", + "│ flatten_3 (\u001b[38;5;33mFlatten\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m31752\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", + "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", + "│ hidden_3 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m100\u001b[0m) │ \u001b[38;5;34m3,175,300\u001b[0m │\n", + "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", + "│ hidden_4 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m10\u001b[0m) │ \u001b[38;5;34m1,010\u001b[0m │\n", + "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", + "│ output (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m2\u001b[0m) │ \u001b[38;5;34m22\u001b[0m │\n", + "└─────────────────────────────────┴────────────────────────┴───────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
 Total params: 3,176,388 (12.12 MB)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m3,176,388\u001b[0m (12.12 MB)\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
 Trainable params: 3,176,388 (12.12 MB)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m3,176,388\u001b[0m (12.12 MB)\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
 Non-trainable params: 0 (0.00 B)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "network.config[\"layers\"][\"conv2d\"][\"feature\"] = 0" + "network.summary()" ] }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ - "ds = network.input_to_dataset(inputs[0])" + "network.config[\"layers\"][\"conv2d\"][\"feature\"] = 1" ] }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "array([[0.7334817 , 0.26651827]], dtype=float32)" + "[0.5129900574684143, 0.4870099127292633]" ] }, - "execution_count": 73, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "network.predict(ds)" + "network.propagate(inputs[0])" ] }, { "cell_type": "code", - "execution_count": 74, + "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "\n", + "\n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", - " Layer: output 'Dense'\n", + " Layer: output 'Dense'\n", "Act function: softmax\n", - "Act output range: (0, 1)\n", - "Shape = (None, 2)outputLayer: hidden_4 'Dense'\n", - "Act function: tanh\n", - "Act output range: (-1, 1)\n", - "Shape = (None, 10)hidden_4Layer: hidden_3 'Dense'\n", - "Act function: tanh\n", - "Act output range: (-1, 1)\n", - "Shape = (None, 100)hidden_3Layer: hidden_2 'Flatten'\n", - "Shape = (None, 31752)hidden_2Layer: conv2d 'Conv2D'\n", + "Act output range: (0.0, 1.0)\n", + "Actual minmax: (0.0, 1.0)\n", + "Shape = (None, 2)outputLayer: hidden_4 'Dense'\n", "Act function: relu\n", - "Act output range: (0, +Infinity)\n", - "Shape = (None, 126, 126, 2)conv2dLayer: input 'InputLayer'\n", - "Shape = [(None, 128, 128, 3)]inputActivations for SimpleNetwork" + "Act output range: (0.0, +Infinity)\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 10)hidden_4Layer: hidden_3 'Dense'\n", + "Act function: relu\n", + "Act output range: (0.0, +Infinity)\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 100)hidden_3Layer: flatten_3 'Flatten'\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 31752)flatten_3Layer: conv2d 'Conv2D'\n", + "Act function: relu\n", + "Act output range: (0.0, +Infinity)\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 126, 126, 2)conv2d21Layer: input 'InputLayer'\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 128, 128, 3)inputActivations for SequentialNetwork" ], "text/plain": [ "" @@ -211,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -222,7 +314,7 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -231,7 +323,7 @@ "(20, 2)" ] }, - "execution_count": 80, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } @@ -242,7 +334,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 27, "metadata": {}, "outputs": [ { @@ -251,24 +343,23 @@ "\n", "\n", - "\n", - "\n", + "\n", " \n", - " \n", + " \n", " \n", " \n", - " 2021-04-27T19:27:38.154813\n", + " 2024-10-20T11:42:03.268446\n", " image/svg+xml\n", " \n", " \n", - " Matplotlib v3.3.1, https://matplotlib.org/\n", + " Matplotlib v3.8.1, https://matplotlib.org/\n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -277,1005 +368,1039 @@ "L 720 0 \n", "L 0 0 \n", "z\n", - "\" style=\"fill:none;\"/>\n", + "\" style=\"fill: #ffffff\"/>\n", " \n", " \n", " \n", - " \n", + "\" style=\"fill: #ffffff\"/>\n", " \n", " \n", " \n", " \n", " \n", - " \n", + "\" style=\"stroke: #000000; stroke-width: 0.8\"/>\n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + "\" transform=\"scale(0.015625)\"/>\n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + "M 2253 4666 \n", + "L 3047 4666 \n", + "L 3047 1625 \n", + "L 3713 1625 \n", + "L 3713 1100 \n", + "L 3047 1100 \n", + "L 3047 0 \n", + "L 2419 0 \n", + "L 2419 1100 \n", + "L 313 1100 \n", + "L 313 1709 \n", + "L 2253 4666 \n", + "z\n", + "\" transform=\"scale(0.015625)\"/>\n", " \n", - " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + "\" transform=\"scale(0.015625)\"/>\n", " \n", - " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + "\" transform=\"scale(0.015625)\"/>\n", " \n", - " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", + "\" transform=\"scale(0.015625)\"/>\n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", + " \n", - " \n", + " \n", - " \n", + " \n", - " \n", + " \n", + "\" transform=\"scale(0.015625)\"/>\n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + "\" style=\"stroke: #000000; stroke-width: 0.8\"/>\n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", - " \n", + "\" transform=\"scale(0.015625)\"/>\n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", + " \n", + "\" transform=\"scale(0.015625)\"/>\n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", + "\" style=\"fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square\"/>\n", " \n", " \n", - " \n", + "\" style=\"fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square\"/>\n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", + "\" style=\"fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square\"/>\n", " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", " \n", - " \n", + " \n", + " \n", - " \n", + " \n", - " \n", - " \n", + " \n", - " \n", + " \n", + " \n", - " \n", - " \n", + " \n", - " \n", - " \n", + " \n", - " \n", + " \n", - " \n", + " \n", + " \n", - " \n", - " \n", + " \n", - " \n", + " \n", - " \n", + " \n", + " \n", - " \n", + " \n", + "\" transform=\"scale(0.015625)\"/>\n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -1290,453 +1415,356 @@ "L 283.320739 55.238125 \n", "Q 283.320739 57.238125 285.320739 57.238125 \n", "z\n", - "\" style=\"fill:#ffffff;opacity:0.8;stroke:#cccccc;stroke-linejoin:miter;\"/>\n", + "\" style=\"fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter\"/>\n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", + "\" style=\"fill: #ffffff\"/>\n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", - " \n", - " \n", - " \n", + " \n", + "\" transform=\"scale(0.015625)\"/>\n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", + "\" style=\"fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square\"/>\n", " \n", " \n", - " \n", + "\" style=\"fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square\"/>\n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", + "\" style=\"fill: none; stroke: #000000; stroke-width: 0.8; stroke-linejoin: miter; stroke-linecap: square\"/>\n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -1751,54 +1779,55 @@ "L 399.363636 55.51625 \n", "Q 399.363636 57.51625 401.363636 57.51625 \n", "z\n", - "\" style=\"fill:#ffffff;opacity:0.8;stroke:#cccccc;stroke-linejoin:miter;\"/>\n", + "\" style=\"fill: #ffffff; opacity: 0.8; stroke: #cccccc; stroke-linejoin: miter\"/>\n", " \n", - " \n", + " \n", " \n", + "\" style=\"fill: none; stroke: #0000ff; stroke-width: 1.5; stroke-linecap: square\"/>\n", " \n", - " \n", - " \n", + " \n", " \n", - " \n", + " \n", " \n", - " \n", + "\" transform=\"scale(0.015625)\"/>\n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", "\n" @@ -1815,16 +1844,16 @@ "output_type": "stream", "text": [ "Stopped because accuracy beat goal of 1.0\n", - "Epoch 107/500 \n" + "Epoch 14/500 loss: 0.09768255800008774 - tolerance_accuracy: 1.0\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 81, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -1841,34 +1870,40 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "\n", + "\n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", - " Layer: output 'Dense'\n", + " Layer: output 'Dense'\n", "Act function: softmax\n", - "Act output range: (0, 1)\n", - "Shape = (None, 2)outputLayer: hidden_4 'Dense'\n", - "Act function: tanh\n", - "Act output range: (-1, 1)\n", - "Shape = (None, 10)hidden_4Layer: hidden_3 'Dense'\n", - "Act function: tanh\n", - "Act output range: (-1, 1)\n", - "Shape = (None, 100)hidden_3Layer: hidden_2 'Flatten'\n", - "Shape = (None, 31752)hidden_2Layer: conv2d 'Conv2D'\n", + "Act output range: (0.0, 1.0)\n", + "Actual minmax: (0.0, 1.0)\n", + "Shape = (None, 2)outputLayer: hidden_4 'Dense'\n", "Act function: relu\n", - "Act output range: (0, +Infinity)\n", - "Shape = (None, 126, 126, 2)conv2dLayer: input 'InputLayer'\n", - "Shape = [(None, 128, 128, 3)]inputActivations for SimpleNetwork" + "Act output range: (0.0, +Infinity)\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 10)hidden_4Layer: hidden_3 'Dense'\n", + "Act function: relu\n", + "Act output range: (0.0, +Infinity)\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 100)hidden_3Layer: flatten_3 'Flatten'\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 31752)flatten_3Layer: conv2d 'Conv2D'\n", + "Act function: relu\n", + "Act output range: (0.0, +Infinity)\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 126, 126, 2)conv2d21Layer: input 'InputLayer'\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 128, 128, 3)inputActivations for SequentialNetwork" ], "text/plain": [ "" @@ -1879,7 +1914,7 @@ } ], "source": [ - "network.display(inputs[-1], scale=0.75)" + "network.display(inputs[0], scale=0.75)" ] }, { @@ -1909,7 +1944,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.13" }, "widgets": { "application/vnd.jupyter.widget-state+json": { diff --git a/notebooks/NeuralNetworks/BasicNeuralNets.ipynb b/notebooks/NeuralNetworks/BasicNeuralNets.ipynb index 234d222..a680239 100644 --- a/notebooks/NeuralNetworks/BasicNeuralNets.ipynb +++ b/notebooks/NeuralNetworks/BasicNeuralNets.ipynb @@ -96,31 +96,21 @@ "id": "jndZgPkeaTwN", "outputId": "54305dbe-9044-48e1-96a9-fb57b461cc3d" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m454.2/454.2 kB\u001b[0m \u001b[31m2.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m16.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25h" - ] - } - ], + "outputs": [], "source": [ "%pip install aitk --quiet" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "id": "il-65yhPaYRZ" }, "outputs": [], "source": [ "from aitk.utils import array_to_image, get_dataset, gallery\n", - "from aitk.networks import SimpleNetwork\n", + "from aitk.networks import SequentialNetwork\n", "import numpy as np" ] }, @@ -137,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": { "id": "wqQfLfDZoHf9" }, @@ -164,7 +154,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -176,12 +166,13 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAAAAABVicqIAAAAYklEQVR4nO3VMQrAIAxAURXvf+V21lBScQvvj0F8kCW97T1hclrfB+P6yx9BIBAIpDwy0xfhBIXSM1dnXRAIBAL5KN6T/H4cV2ddEAgEAoFAIBAIBAKBQCAQCAQCgUAgkLUXA4UCzaT1sj8AAAAASUVORK5CYII=\n", + "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCABkAGQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APn+ivt/wJ/yTzw1/wBgq1/9FLXQUUUUUUV8wftHf8lD0/8A7BUf/o2WvH6KKK+3/An/ACTzw1/2CrX/ANFLXQUUUUUUV8wftHf8lD0//sFR/wDo2WvH6KKK+3/An/JPPDX/AGCrX/0UtdBXx5408aeKrXx14ht7fxLrMMEWp3KRxx38qqiiVgAAGwABxisP/hO/GH/Q165/4MZv/iqP+E78Yf8AQ165/wCDGb/4qj/hO/GH/Q165/4MZv8A4qvov4BatqWs+Bb641TULu+nXU5EWS6maVgvlRHALEnGSTj3NeqV8wftHf8AJQ9P/wCwVH/6Nlrx+iiivt/wJ/yTzw1/2CrX/wBFLXQV8QeO/wDkofiX/sK3X/o1q5+iivp/9nH/AJJ5qH/YVk/9FRV7BXzB+0d/yUPT/wDsFR/+jZa8fooorcg8aeKrW3it7fxLrMMESBI447+VVRQMAABsAAcYqT/hO/GH/Q165/4MZv8A4qsOeea6uJbi4lkmnlcvJJIxZnYnJJJ5JJ5zUdFFfT/7OP8AyTzUP+wrJ/6Kir2CvmD9o7/koen/APYKj/8ARsteP0UUUUUUUUV9P/s4/wDJPNQ/7Csn/oqKvYK+YP2jv+Sh6f8A9gqP/wBGy14/RRRRRRRRRX0/+zj/AMk81D/sKyf+ioq9gr5g/aO/5KHp/wD2Co//AEbLXj9FFFFFFFFFfT/7OP8AyTzUP+wrJ/6Kir2CvmD9o7/koen/APYKj/8ARsteP0UUUUUUUUV9P/s4/wDJPNQ/7Csn/oqKvYK+YP2jv+Sh6f8A9gqP/wBGy14/RRRRRRRRRX0/+zj/AMk81D/sKyf+ioq9gr5g/aO/5KHp/wD2Co//AEbLXj9FFFFFFFFFfT/7OP8AyTzUP+wrJ/6Kir2CvmD9o7/koen/APYKj/8ARsteP0UUUUUUUUV9P/s4/wDJPNQ/7Csn/oqKvYK+YP2jv+Sh6f8A9gqP/wBGy14/RRRRRRRRRX0/+zj/AMk81D/sKyf+ioq9gr5g/aO/5KHp/wD2Co//AEbLXj9Ff//Z", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAAAAABVicqIAAAAwUlEQVR4Ae3VMQ7DIBTA0Kbq/a/c7nwbxNDN2TCQSC9IPK/1+a7hevysO95r+Me4j1ypxhXXlcDV4k5XXFcCV4s/x9XjCho7jtdcR3iY7UJcO50xF9cg2YW4djpjLq5BsgvzPjnfH7v34Vz/BFksxmUy2ONCFotxmQz2uJDFYlwmgz0uZLEYl8lgjwtZLMZlMtjjQhaLcZkM9riQxWJcJoM9LmSxGJfJYI8LWSzGZTLY40IWi3GZDPa4kMViXCaD/QcDhQLNQYGjsgAAAABJRU5ErkJggg==", "text/plain": [ "" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -201,7 +192,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": { "id": "L1--TM14RsRP" }, @@ -228,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -240,12 +231,13 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAAAAABVicqIAAAAP0lEQVR4nO3NQQEAAAQEMBRXXYrz2gqst/Lm4ZBIJBKJRCKRSCQSiUQikUgkEolEIpFIJBKJRCKRSCQSiSTsACTeAUd6+u1NAAAAAElFTkSuQmCC\n", + "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCABkAGQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/AEoooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooor/2Q==", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAAAAABVicqIAAAAf0lEQVR4Ae3SsQ0AIAwEscDirM4MV1Dh1CdFsn6deX/7/YsZT5IyLlxJIMXWhSsJpNi6cCWBFFsXriSQYuvClQRSbF24kkCKrQtXEkixdeFKAim2LlxJIMXWhSsJpNi6cCWBFFsXriSQYuvClQRSbF24kkCKrQtXEkixdX3KdQEk3gFH5//jEAAAAABJRU5ErkJggg==", "text/plain": [ "" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -271,7 +263,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -286,7 +278,7 @@ "68719476736" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -311,7 +303,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -319,22 +311,7 @@ "id": "fk8wYgrdarNm", "outputId": "4650c15c-6a04-4b65-c51c-c312b09f5110" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading data from https://raw.githubusercontent.com/ArtificialIntelligenceToolkit/datasets/master/digits6x6/digits6x6.zip\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "8192it [00:00, 14997703.35it/s]\n" - ] - } - ], + "outputs": [], "source": [ "inputs, targets = get_dataset(\"digits6x6\")" ] @@ -350,7 +327,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -370,7 +347,7 @@ " [0, 1, 1, 1, 1, 0]])" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -390,7 +367,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -405,7 +382,7 @@ "array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0])" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -425,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -445,7 +422,7 @@ " [1, 1, 1, 1, 1, 1]])" ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -456,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -471,7 +448,7 @@ "array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0])" ] }, - "execution_count": 13, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } @@ -491,7 +468,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -504,7 +481,7 @@ { "data": { "text/html": [ - "
%s
' % (border_width, label) - table += '%s' % (image_to_data(images[index]), label, label) + label = str(labels[index]).format( + **{"count": index + 1, "index": index, "row": row, "col": col} + ) + table += '
%s
' % ( + border_width, + label, + ) + table += '%s' % ( + image_to_data(images[index]), + label, + label, + ) table += "
0
\"0\"
1
\"1\"
2
\"2\"
3
\"3\"
4
\"4\"
5
\"5\"
6
\"6\"
7
\"7\"
8
\"8\"
9
\"9\"
10
\"10\"
11
\"11\"
12
\"12\"
13
\"13\"
14
\"14\"
15
\"15\"
16
\"16\"
17
\"17\"
18
\"18\"
19
\"19\"
20
\"20\"
21
\"21\"
22
\"22\"
23
\"23\"
24
\"24\"
25
\"25\"
26
\"26\"
27
\"27\"
28
\"28\"
29
\"29\"
30
\"30\"
31
\"31\"
32
\"32\"
33
\"33\"
34
\"34\"
35
\"35\"
36
\"36\"
37
\"37\"
38
\"38\"
39
\"39\"
40
\"40\"
41
\"41\"
42
\"42\"
43
\"43\"
44
\"44\"
45
\"45\"
46
\"46\"
47
\"47\"
48
\"48\"
49
\"49\"
50
\"50\"
51
\"51\"
52
\"52\"
53
\"53\"
54
\"54\"
55
\"55\"
56
\"56\"
57
\"57\"
58
\"58\"
59
\"59\"
60
\"60\"
61
\"61\"
62
\"62\"
63
\"63\"
64
\"64\"
65
\"65\"
66
\"66\"
67
\"67\"
68
\"68\"
69
\"69\"
70
\"70\"
71
\"71\"
72
\"72\"
73
\"73\"
74
\"74\"
75
\"75\"
76
\"76\"
77
\"77\"
78
\"78\"
79
\"79\"
80
\"80\"
81
\"81\"
82
\"82\"
83
\"83\"
84
\"84\"
85
\"85\"
86
\"86\"
87
\"87\"
88
\"88\"
89
\"89\"
90
\"90\"
91
\"91\"
92
\"92\"
93
\"93\"
94
\"94\"
95
\"95\"
96
\"96\"
97
\"97\"
98
\"98\"
99
\"99\"
100
\"100\"
101
\"101\"
102
\"102\"
103
\"103\"
104
\"104\"
105
\"105\"
106
\"106\"
107
\"107\"
108
\"108\"
109
\"109\"
110
\"110\"
111
\"111\"
112
\"112\"
113
\"113\"
114
\"114\"
115
\"115\"
116
\"116\"
117
\"117\"
118
\"118\"
119
\"119\"
120
\"120\"
121
\"121\"
122
\"122\"
123
\"123\"
124
\"124\"
125
\"125\"
126
\"126\"
127
\"127\"
128
\"128\"
129
\"129\"
130
\"130\"
131
\"131\"
132
\"132\"
133
\"133\"
134
\"134\"
135
\"135\"
136
\"136\"
137
\"137\"
138
\"138\"
139
\"139\"
140
\"140\"
141
\"141\"
142
\"142\"
143
\"143\"
144
\"144\"
145
\"145\"
146
\"146\"
147
\"147\"
148
\"148\"
149
\"149\"
150
\"150\"
151
\"151\"
152
\"152\"
153
\"153\"
154
\"154\"
155
\"155\"
156
\"156\"
157
\"157\"
158
\"158\"
159
\"159\"
160
\"160\"
161
\"161\"
162
\"162\"
163
\"163\"
164
\"164\"
165
\"165\"
166
\"166\"
167
\"167\"
168
\"168\"
169
\"169\"
170
\"170\"
171
\"171\"
172
\"172\"
173
\"173\"
174
\"174\"
175
\"175\"
176
\"176\"
177
\"177\"
178
\"178\"
179
\"179\"
180
\"180\"
181
\"181\"
182
\"182\"
183
\"183\"
184
\"184\"
185
\"185\"
186
\"186\"
187
\"187\"
188
\"188\"
189
\"189\"
190
\"190\"
191
\"191\"
192
\"192\"
193
\"193\"
194
\"194\"
195
\"195\"
196
\"196\"
197
\"197\"
198
\"198\"
199
\"199\"
200
\"200\"
201
\"201\"
202
\"202\"
203
\"203\"
204
\"204\"
205
\"205\"
206
\"206\"
207
\"207\"
208
\"208\"
209
\"209\"
210
\"210\"
211
\"211\"
212
\"212\"
213
\"213\"
214
\"214\"
215
\"215\"
216
\"216\"
217
\"217\"
218
\"218\"
219
\"219\"
220
\"220\"
221
\"221\"
222
\"222\"
223
\"223\"
224
\"224\"
225
\"225\"
226
\"226\"
227
\"227\"
228
\"228\"
229
\"229\"
230
\"230\"
231
\"231\"
232
\"232\"
233
\"233\"
234
\"234\"
235
\"235\"
236
\"236\"
237
\"237\"
238
\"238\"
239
\"239\"
" + "
0
\"0\"
1
\"1\"
2
\"2\"
3
\"3\"
4
\"4\"
5
\"5\"
6
\"6\"
7
\"7\"
8
\"8\"
9
\"9\"
10
\"10\"
11
\"11\"
12
\"12\"
13
\"13\"
14
\"14\"
15
\"15\"
16
\"16\"
17
\"17\"
18
\"18\"
19
\"19\"
20
\"20\"
21
\"21\"
22
\"22\"
23
\"23\"
24
\"24\"
25
\"25\"
26
\"26\"
27
\"27\"
28
\"28\"
29
\"29\"
30
\"30\"
31
\"31\"
32
\"32\"
33
\"33\"
34
\"34\"
35
\"35\"
36
\"36\"
37
\"37\"
38
\"38\"
39
\"39\"
40
\"40\"
41
\"41\"
42
\"42\"
43
\"43\"
44
\"44\"
45
\"45\"
46
\"46\"
47
\"47\"
48
\"48\"
49
\"49\"
50
\"50\"
51
\"51\"
52
\"52\"
53
\"53\"
54
\"54\"
55
\"55\"
56
\"56\"
57
\"57\"
58
\"58\"
59
\"59\"
60
\"60\"
61
\"61\"
62
\"62\"
63
\"63\"
64
\"64\"
65
\"65\"
66
\"66\"
67
\"67\"
68
\"68\"
69
\"69\"
70
\"70\"
71
\"71\"
72
\"72\"
73
\"73\"
74
\"74\"
75
\"75\"
76
\"76\"
77
\"77\"
78
\"78\"
79
\"79\"
80
\"80\"
81
\"81\"
82
\"82\"
83
\"83\"
84
\"84\"
85
\"85\"
86
\"86\"
87
\"87\"
88
\"88\"
89
\"89\"
90
\"90\"
91
\"91\"
92
\"92\"
93
\"93\"
94
\"94\"
95
\"95\"
96
\"96\"
97
\"97\"
98
\"98\"
99
\"99\"
100
\"100\"
101
\"101\"
102
\"102\"
103
\"103\"
104
\"104\"
105
\"105\"
106
\"106\"
107
\"107\"
108
\"108\"
109
\"109\"
110
\"110\"
111
\"111\"
112
\"112\"
113
\"113\"
114
\"114\"
115
\"115\"
116
\"116\"
117
\"117\"
118
\"118\"
119
\"119\"
120
\"120\"
121
\"121\"
122
\"122\"
123
\"123\"
124
\"124\"
125
\"125\"
126
\"126\"
127
\"127\"
128
\"128\"
129
\"129\"
130
\"130\"
131
\"131\"
132
\"132\"
133
\"133\"
134
\"134\"
135
\"135\"
136
\"136\"
137
\"137\"
138
\"138\"
139
\"139\"
140
\"140\"
141
\"141\"
142
\"142\"
143
\"143\"
144
\"144\"
145
\"145\"
146
\"146\"
147
\"147\"
148
\"148\"
149
\"149\"
150
\"150\"
151
\"151\"
152
\"152\"
153
\"153\"
154
\"154\"
155
\"155\"
156
\"156\"
157
\"157\"
158
\"158\"
159
\"159\"
160
\"160\"
161
\"161\"
162
\"162\"
163
\"163\"
164
\"164\"
165
\"165\"
166
\"166\"
167
\"167\"
168
\"168\"
169
\"169\"
170
\"170\"
171
\"171\"
172
\"172\"
173
\"173\"
174
\"174\"
175
\"175\"
176
\"176\"
177
\"177\"
178
\"178\"
179
\"179\"
180
\"180\"
181
\"181\"
182
\"182\"
183
\"183\"
184
\"184\"
185
\"185\"
186
\"186\"
187
\"187\"
188
\"188\"
189
\"189\"
190
\"190\"
191
\"191\"
192
\"192\"
193
\"193\"
194
\"194\"
195
\"195\"
196
\"196\"
197
\"197\"
198
\"198\"
199
\"199\"
200
\"200\"
201
\"201\"
202
\"202\"
203
\"203\"
204
\"204\"
205
\"205\"
206
\"206\"
207
\"207\"
208
\"208\"
209
\"209\"
210
\"210\"
211
\"211\"
212
\"212\"
213
\"213\"
214
\"214\"
215
\"215\"
216
\"216\"
217
\"217\"
218
\"218\"
219
\"219\"
220
\"220\"
221
\"221\"
222
\"222\"
223
\"223\"
224
\"224\"
225
\"225\"
226
\"226\"
227
\"227\"
228
\"228\"
229
\"229\"
230
\"230\"
231
\"231\"
232
\"232\"
233
\"233\"
234
\"234\"
235
\"235\"
236
\"236\"
237
\"237\"
238
\"238\"
239
\"239\"
" ], "text/plain": [ "" @@ -517,7 +494,7 @@ "source": [ "images = [array_to_image(inputs[i]) for i in range(len(inputs))]\n", "bigger = [image.resize((36,36), resample=0) for image in images]\n", - "gallery(bigger)" + "gallery(bigger, gallery_shape=(10, None))" ] }, { @@ -537,7 +514,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -545,29 +522,14 @@ "id": "qM8BYwXiWpCJ", "outputId": "3bd58799-663a-4349-eaf6-c2883f50de0a" }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading data from https://raw.githubusercontent.com/ArtificialIntelligenceToolkit/datasets/master/validate_6x6/validate_6x6.data\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "8192it [00:00, 13727422.44it/s]\n" - ] - } - ], + "outputs": [], "source": [ "test_inputs, test_targets = get_dataset(\"validate_6x6\")" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -616,14 +578,14 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "metadata": { "id": "teYgZ4_md74G" }, "outputs": [], "source": [ "def build_network(hidden_layer_size):\n", - " return SimpleNetwork(\n", + " return SequentialNetwork(\n", " (6,6),\n", " \"Flatten\",\n", " hidden_layer_size,\n", @@ -644,7 +606,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "metadata": { "id": "KxQLML_ZevzQ" }, @@ -670,7 +632,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -680,27 +642,89 @@ }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: \"SimpleNetwork\"\n", - "_________________________________________________________________\n", - " Layer (type) Output Shape Param # \n", - "=================================================================\n", - " input (InputLayer) [(None, 6, 6)] 0 \n", - " \n", - " flatten (Flatten) (None, 36) 0 \n", - " \n", - " hidden_2 (Dense) (None, 10) 370 \n", - " \n", - " output (Dense) (None, 10) 110 \n", - " \n", - "=================================================================\n", - "Total params: 480 (1.88 KB)\n", - "Trainable params: 480 (1.88 KB)\n", - "Non-trainable params: 0 (0.00 Byte)\n", - "_________________________________________________________________\n" - ] + "data": { + "text/html": [ + "
Model: \"SequentialNetwork\"\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1mModel: \"SequentialNetwork\"\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
+       "┃ Layer (type)                     Output Shape                  Param # ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
+       "│ input (InputLayer)              │ (None, 6, 6)           │             0 │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ flatten (Flatten)               │ (None, 36)             │             0 │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ hidden_2 (Dense)                │ (None, 10)             │           370 │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ output (Dense)                  │ (None, 10)             │           110 │\n",
+       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", + "│ input (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m6\u001b[0m, \u001b[38;5;34m6\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", + "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", + "│ flatten (\u001b[38;5;33mFlatten\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m36\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", + "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", + "│ hidden_2 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m10\u001b[0m) │ \u001b[38;5;34m370\u001b[0m │\n", + "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", + "│ output (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m10\u001b[0m) │ \u001b[38;5;34m110\u001b[0m │\n", + "└─────────────────────────────────┴────────────────────────┴───────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
 Total params: 480 (1.88 KB)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m480\u001b[0m (1.88 KB)\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
 Trainable params: 480 (1.88 KB)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m480\u001b[0m (1.88 KB)\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
 Non-trainable params: 0 (0.00 B)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ @@ -720,7 +744,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -740,19 +764,19 @@ " \n", " \n", " \n", - " Layer: output 'Dense'\n", + " </defs><rect x=\"99.0\" y=\"24\" width=\"202\" height=\"52\" style=\"fill:none;stroke:black;stroke-width:2\"/><image id=\"keras-network_output\" class=\"keras-network\" x=\"100.0\" y=\"25\" height=\"50\" width=\"200\" preserveAspectRatio=\"none\" image-rendering=\"optimizeSpeed\" xlink:href=\"\"><title>Layer: output 'Dense'\n", "Act function: softmax\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", - "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", + "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", "Act function: sigmoid\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", - "Shape = (None, 10)hidden_2Layer: flatten 'Flatten'\n", - "Actual minmax: (0.0, 1.0)\n", - "Shape = (None, 36)flattenLayer: input 'InputLayer'\n", - "Actual minmax: (0.0, 1.0)\n", - "Shape = [(None, 6, 6)]inputActivations for SimpleNetwork" + "Shape = (None, 10)hidden_2Layer: flatten 'Flatten'\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 36)flattenLayer: input 'InputLayer'\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 6, 6)inputActivations for SequentialNetwork" ], "text/plain": [ "" @@ -763,7 +787,7 @@ } ], "source": [ - "net.display(inputs[0])" + "net.display(my_digit)" ] }, { @@ -783,7 +807,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -804,11 +828,11 @@ " \n", " \n", " \n", - " 2024-06-27T19:28:10.202516\n", + " 2024-10-20T11:49:21.287464\n", " image/svg+xml\n", " \n", " \n", - " Matplotlib v3.7.1, https://matplotlib.org/\n", + " Matplotlib v3.8.1, https://matplotlib.org/\n", " \n", " \n", " \n", @@ -839,12 +863,12 @@ " \n", " \n", " \n", - " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -880,7 +904,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -921,7 +945,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -957,7 +981,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -1004,7 +1028,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -1060,7 +1084,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -1208,12 +1232,12 @@ " \n", " \n", " \n", - " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -1238,12 +1262,12 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -1254,12 +1278,12 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -1270,12 +1294,12 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -1286,12 +1310,12 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -1352,677 +1376,778 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", @@ -2260,58 +2455,6 @@ "L -19 4666 \n", "z\n", "\" transform=\"scale(0.015625)\"/>\n", - " \n", - " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2395,11 +2542,26 @@ " \n", + "\" style=\"fill: none; stroke: #ff0000; stroke-width: 1.5; stroke-linecap: square\"/>\n", " \n", " \n", - " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", @@ -2459,7 +2606,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -2472,7 +2619,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -2487,7 +2634,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -2502,7 +2649,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -2517,7 +2664,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -2532,7 +2679,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -2560,7 +2707,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -2575,7 +2722,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -2590,7 +2737,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -2605,7 +2752,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -2620,7 +2767,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -2635,7 +2782,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -2667,28 +2814,6 @@ "L 1831 4666 \n", "z\n", "\" transform=\"scale(0.015625)\"/>\n", - " \n", " \n", " \n", " \n", + "L 443.514481 216.28 \n", + "L 443.745291 216.28 \n", + "L 443.9761 217.82 \n", + "L 444.206909 216.28 \n", + "L 447.899859 216.28 \n", + "L 448.130668 215.510001 \n", + "L 448.361477 216.28 \n", + "L 448.823096 216.28 \n", + "L 449.053905 215.510001 \n", + "L 449.284714 215.510001 \n", + "L 449.515524 214.74 \n", + "L 449.746333 214.74 \n", + "L 449.977142 215.510001 \n", + "L 450.207952 214.74 \n", + "L 450.438761 214.74 \n", + "L 450.66957 215.510001 \n", + "L 450.90038 214.74 \n", + "L 451.131189 215.510001 \n", + "L 451.361998 215.510001 \n", + "L 451.823617 213.970001 \n", + "L 452.285236 213.970001 \n", + "L 452.516045 213.199999 \n", + "L 452.746854 213.199999 \n", + "L 452.977664 211.66 \n", + "L 454.593329 211.66 \n", + "L 454.824138 210.119999 \n", + "L 455.054948 211.66 \n", + "L 455.285757 211.66 \n", + "L 455.516566 210.119999 \n", + "L 455.978185 205.500001 \n", + "L 456.439803 205.500001 \n", + "L 456.670613 204.730001 \n", + "L 456.901422 204.730001 \n", + "L 457.132231 197.03 \n", + "L 457.363041 196.260001 \n", + "L 457.59385 197.03 \n", + "L 457.824659 192.409999 \n", + "L 458.055469 197.03 \n", + "L 458.286278 190.870001 \n", + "L 458.747897 190.870001 \n", + "L 458.978706 189.330002 \n", + "L 459.209515 188.559997 \n", + "L 459.671134 188.559997 \n", + "L 459.901943 187.789998 \n", + "L 460.363562 187.789998 \n", + "L 460.594371 187.019999 \n", + "L 460.825181 187.019999 \n", + "L 461.05599 183.940002 \n", + "L 462.671655 183.940002 \n", + "L 462.902464 183.170003 \n", + "L 463.133274 183.940002 \n", + "L 463.364083 181.629999 \n", + "L 465.210558 181.629999 \n", + "L 465.441367 180.859999 \n", + "L 466.133795 180.859999 \n", + "L 466.364604 180.09 \n", + "L 466.595414 180.09 \n", + "L 466.826223 178.550001 \n", + "L 467.057032 177.780002 \n", + "L 467.287842 177.780002 \n", + "L 467.74946 176.239998 \n", + "L 468.672697 176.239998 \n", + "L 468.903507 175.469999 \n", + "L 470.288363 175.469999 \n", + "L 470.519172 174.7 \n", + "L 470.980791 174.7 \n", + "L 471.2116 173.93 \n", + "L 472.134837 173.93 \n", + "L 472.365647 173.160001 \n", + "L 472.596456 171.620003 \n", + "L 473.288884 169.309999 \n", + "L 473.519693 169.309999 \n", + "L 473.750503 168.54 \n", + "L 474.904549 168.54 \n", + "L 475.135358 164.689998 \n", + "L 475.366168 164.689998 \n", + "L 475.596977 163.919999 \n", + "L 475.827786 163.919999 \n", + "L 476.058596 163.15 \n", + "L 476.289405 163.919999 \n", + "L 476.751024 163.919999 \n", + "L 476.981833 163.15 \n", + "L 477.212642 163.919999 \n", + "L 477.443452 163.919999 \n", + "L 477.674261 163.15 \n", + "L 478.13588 163.15 \n", + "L 478.366689 163.919999 \n", + "L 478.828308 162.38 \n", + "L 479.289926 158.529999 \n", + "L 479.751545 156.99 \n", + "L 479.982354 156.99 \n", + "L 480.213164 156.220001 \n", + "L 480.443973 153.909997 \n", + "L 480.674782 149.290002 \n", + "L 480.905592 148.520003 \n", + "L 481.136401 149.290002 \n", + "L 481.36721 148.520003 \n", + "L 481.59802 148.520003 \n", + "L 481.828829 146.979999 \n", + "L 482.059638 143.900001 \n", + "L 483.675303 143.900001 \n", + "L 483.906113 142.360003 \n", + "L 484.136922 142.360003 \n", + "L 484.367731 143.900001 \n", + "L 484.82935 142.360003 \n", + "L 485.752587 142.360003 \n", + "L 485.983397 139.279995 \n", + "L 486.445015 137.739996 \n", + "L 486.675825 138.509996 \n", + "L 486.906634 137.739996 \n", + "L 487.137443 138.509996 \n", + "L 487.368253 137.739996 \n", + "L 487.599062 138.509996 \n", + "L 487.829871 136.969997 \n", + "L 488.060681 136.969997 \n", + "L 488.29149 135.429999 \n", + "L 488.522299 134.659999 \n", + "L 488.753108 136.199998 \n", + "L 488.983918 133.120001 \n", + "L 489.214727 133.89 \n", + "L 489.445536 132.350001 \n", + "L 489.676346 131.580002 \n", + "L 490.137964 131.580002 \n", + "L 490.368774 130.810003 \n", + "L 490.830392 130.810003 \n", + "L 491.061202 130.040004 \n", + "L 491.52282 130.040004 \n", + "L 491.75363 129.270004 \n", + "L 491.984439 129.270004 \n", + "L 492.215248 130.040004 \n", + "L 492.907676 130.040004 \n", + "L 493.138486 129.270004 \n", + "L 493.369295 130.040004 \n", + "L 493.600104 130.040004 \n", + "L 493.830914 129.270004 \n", + "L 494.754151 129.270004 \n", + "L 494.98496 128.500005 \n", + "L 495.446579 128.500005 \n", + "L 496.139007 126.189996 \n", + "L 497.293053 126.189996 \n", + "L 497.523863 125.419997 \n", + "L 497.754672 123.109999 \n", + "L 498.4471 123.109999 \n", + "L 498.677909 122.34 \n", + "L 499.139528 122.34 \n", + "L 499.370337 121.570001 \n", + "L 499.601147 121.570001 \n", + "L 499.831956 120.800001 \n", + "L 500.062765 121.570001 \n", + "L 500.293575 120.800001 \n", + "L 500.986003 120.800001 \n", + "L 501.216812 120.030002 \n", + "L 501.67843 120.030002 \n", + "L 501.90924 118.490004 \n", + "L 502.140049 120.030002 \n", + "L 502.370858 120.030002 \n", + "L 502.601668 116.950005 \n", + "L 502.832477 119.260003 \n", + "L 503.063286 119.260003 \n", + "L 503.294096 118.490004 \n", + "L 503.524905 118.490004 \n", + "L 503.755714 116.950005 \n", + "L 503.986524 116.950005 \n", + "L 504.217333 116.179995 \n", + "L 504.909761 116.179995 \n", + "L 505.14057 114.639996 \n", + "L 505.37138 113.869997 \n", + "L 505.602189 114.639996 \n", + "L 505.832998 113.869997 \n", + "L 506.756236 113.869997 \n", + "L 506.987045 114.639996 \n", + "L 507.217854 113.869997 \n", + "L 507.448664 113.869997 \n", + "L 507.679473 113.099998 \n", + "L 507.910282 113.099998 \n", + "L 508.141092 112.329999 \n", + "L 508.371901 112.329999 \n", + "L 508.60271 110.79 \n", + "L 508.833519 111.559999 \n", + "L 509.064329 110.79 \n", + "L 509.987566 110.79 \n", + "L 510.449185 109.250001 \n", + "L 510.679994 109.250001 \n", + "L 510.910803 110.79 \n", + "L 511.141613 110.020001 \n", + "L 511.372422 110.020001 \n", + "L 511.603231 109.250001 \n", + "L 512.295659 109.250001 \n", + "L 512.757278 107.710003 \n", + "L 512.988087 109.250001 \n", + "L 513.218897 109.250001 \n", + "L 513.911325 106.940004 \n", + "L 514.603753 106.940004 \n", + "L 514.834562 108.480002 \n", + "L 515.29618 106.940004 \n", + "L 516.219418 106.940004 \n", + "L 516.450227 106.170004 \n", + "L 516.681036 106.940004 \n", + "L 516.911846 105.400005 \n", + "L 517.142655 106.170004 \n", + "L 517.373464 106.170004 \n", + "L 517.835083 104.629995 \n", + "L 520.604795 104.629995 \n", + "L 520.835604 103.859996 \n", + "L 521.066414 104.629995 \n", + "L 521.758841 104.629995 \n", + "L 521.989651 99.24 \n", + "L 522.22046 102.319997 \n", + "L 522.451269 102.319997 \n", + "L 522.682079 103.859996 \n", + "L 523.143697 102.319997 \n", + "L 523.605316 102.319997 \n", + "L 523.836125 99.24 \n", + "L 524.066935 94.620004 \n", + "L 524.297744 102.319997 \n", + "L 524.528553 102.319997 \n", + "L 524.759363 100.779999 \n", + "L 524.990172 94.620004 \n", + "L 525.6826 94.620004 \n", + "L 526.144219 102.319997 \n", + "L 526.375028 94.620004 \n", + "L 527.990693 94.620004 \n", + "L 528.221502 99.24 \n", + "L 528.452312 95.390004 \n", + "L 528.683121 94.620004 \n", + "L 530.760405 94.620004 \n", + "L 530.991214 93.850005 \n", + "L 531.222024 91.539996 \n", + "L 531.452833 93.850005 \n", + "L 531.683642 90.769997 \n", + "L 531.914452 91.539996 \n", + "L 532.60688 91.539996 \n", + "L 532.837689 90.769997 \n", + "L 533.530117 90.769997 \n", + "L 533.760926 89.999998 \n", + "L 533.991736 89.999998 \n", + "L 534.222545 89.229999 \n", + "L 534.453354 90.769997 \n", + "L 535.145782 88.459999 \n", + "L 535.376591 88.459999 \n", + "L 535.607401 87.69 \n", + "L 535.83821 87.69 \n", + "L 536.069019 86.920001 \n", + "L 536.299829 87.69 \n", + "L 536.761447 87.69 \n", + "L 536.992257 86.920001 \n", + "L 537.223066 85.380002 \n", + "L 537.684685 86.920001 \n", + "L 538.377113 84.610003 \n", + "L 538.838731 86.150001 \n", + "L 539.069541 86.150001 \n", + "L 539.30035 84.610003 \n", + "L 539.531159 84.610003 \n", + "L 539.761969 85.380002 \n", + "L 539.992778 84.610003 \n", + "L 540.223587 84.610003 \n", + "L 540.454397 85.380002 \n", + "L 540.685206 85.380002 \n", + "L 540.916015 84.610003 \n", + "L 542.070062 84.610003 \n", + "L 542.300871 83.840004 \n", + "L 542.53168 83.840004 \n", + "L 542.76249 83.070004 \n", + "L 544.839774 83.070004 \n", + "L 545.070583 82.300005 \n", + "L 545.301392 82.300005 \n", + "L 545.532202 81.529995 \n", + "L 545.763011 82.300005 \n", + "L 545.99382 81.529995 \n", + "L 546.22463 82.300005 \n", + "L 546.686248 80.759996 \n", + "L 547.147867 80.759996 \n", + "L 547.840295 83.070004 \n", + "L 548.071104 81.529995 \n", + "L 548.301913 80.759996 \n", + "L 548.532723 80.759996 \n", + "L 548.763532 79.989996 \n", + "L 548.994341 79.989996 \n", + "L 549.45596 81.529995 \n", + "L 549.917579 79.989996 \n", + "L 553.148909 79.989996 \n", + "L 553.379719 78.449998 \n", + "L 553.610528 79.989996 \n", + "L 555.226193 79.989996 \n", + "L 555.457002 79.219997 \n", + "L 556.38024 79.219997 \n", + "L 556.611049 78.449998 \n", + "L 556.841858 79.989996 \n", + "L 557.072668 79.219997 \n", + "L 557.303477 79.989996 \n", + "L 557.534286 78.449998 \n", + "L 559.149952 78.449998 \n", + "L 559.380761 79.219997 \n", + "L 559.61157 78.449998 \n", + "L 561.227235 78.449998 \n", + "L 561.458045 77.679999 \n", + "L 561.688854 77.679999 \n", + "L 561.919663 78.449998 \n", + "L 562.150473 78.449998 \n", + "L 562.381282 77.679999 \n", + "L 562.612091 78.449998 \n", + "L 562.842901 77.679999 \n", + "L 564.689375 77.679999 \n", + "L 564.920185 76.909999 \n", + "L 565.150994 77.679999 \n", + "L 568.382324 77.679999 \n", + "L 568.613134 76.14 \n", + "L 568.843943 77.679999 \n", + "L 569.305562 77.679999 \n", + "L 569.536371 76.14 \n", + "L 569.99799 77.679999 \n", + "L 570.228799 76.909999 \n", + "L 570.459608 76.909999 \n", + "L 570.690418 77.679999 \n", + "L 570.921227 74.600001 \n", + "L 571.152036 74.600001 \n", + "L 571.382846 75.370001 \n", + "L 571.613655 74.600001 \n", + "L 571.844464 74.600001 \n", + "L 572.075274 76.14 \n", + "L 572.306083 75.370001 \n", + "L 572.536892 73.830002 \n", + "L 572.767702 73.830002 \n", + "L 572.998511 74.600001 \n", + "L 573.22932 73.830002 \n", + "L 574.152558 73.830002 \n", + "L 574.383367 72.290004 \n", + "L 574.844985 72.290004 \n", + "L 575.075795 73.060003 \n", + "L 575.537413 73.060003 \n", + "L 575.768223 72.290004 \n", + "L 576.69146 72.290004 \n", + "L 576.922269 71.520004 \n", + "L 577.153079 71.520004 \n", + "L 577.383888 70.750005 \n", + "L 577.614697 72.290004 \n", + "L 577.845507 69.979995 \n", + "L 578.076316 69.979995 \n", + "L 578.537935 71.520004 \n", + "L 578.768744 69.979995 \n", + "L 578.999553 70.750005 \n", + "L 579.230363 69.979995 \n", + "L 579.922791 69.979995 \n", + "L 580.1536 68.439996 \n", + "L 580.384409 69.979995 \n", + "L 580.615219 69.209996 \n", + "L 580.846028 69.979995 \n", + "L 581.076837 69.979995 \n", + "L 581.307646 70.750005 \n", + "L 581.538456 68.439996 \n", + "L 581.769265 69.209996 \n", + "L 582.461693 69.209996 \n", + "L 582.692502 69.979995 \n", + "L 582.923312 69.209996 \n", + "L 583.61574 69.209996 \n", + "L 583.846549 69.979995 \n", + "L 584.077358 69.209996 \n", + "L 584.538977 69.209996 \n", + "L 584.769786 68.439996 \n", + "L 585.000596 68.439996 \n", + "L 585.231405 69.209996 \n", + "L 585.462214 69.209996 \n", + "L 585.693024 68.439996 \n", + "L 585.923833 69.209996 \n", + "L 586.385452 69.209996 \n", + "L 586.616261 68.439996 \n", + "L 586.84707 69.209996 \n", + "L 587.07788 68.439996 \n", + "L 587.308689 69.209996 \n", + "L 587.539498 68.439996 \n", + "L 587.770307 68.439996 \n", + "L 588.001117 69.209996 \n", + "L 588.231926 68.439996 \n", + "L 589.616782 68.439996 \n", + "L 589.847591 69.209996 \n", + "L 590.078401 69.209996 \n", + "L 590.30921 68.439996 \n", + "L 593.540541 68.439996 \n", + "L 593.77135 69.209996 \n", + "L 594.002159 68.439996 \n", + "L 595.387015 68.439996 \n", + "L 595.617824 67.669997 \n", + "L 595.848634 68.439996 \n", + "L 598.156727 68.439996 \n", + "L 598.618346 66.899998 \n", + "L 598.849155 67.669997 \n", + "L 599.310774 67.669997 \n", + "L 599.541583 66.899998 \n", + "L 599.772392 66.899998 \n", + "L 600.003202 68.439996 \n", + "L 600.234011 66.899998 \n", + "L 600.69563 66.899998 \n", + "L 600.926439 68.439996 \n", + "L 601.157248 68.439996 \n", + "L 601.388057 66.899998 \n", + "L 622.391706 66.899998 \n", + "L 622.622515 66.129999 \n", + "L 623.314943 66.129999 \n", + "L 623.545752 66.899998 \n", + "L 623.776562 66.129999 \n", + "L 624.007371 66.899998 \n", + "L 624.23818 66.899998 \n", + "L 624.46899 66.129999 \n", + "L 624.699799 64.59 \n", + "L 625.161418 66.129999 \n", + "L 625.392227 66.129999 \n", + "L 625.623036 64.59 \n", + "L 625.853846 64.59 \n", + "L 626.084655 63.820001 \n", + "L 626.315464 64.59 \n", + "L 626.546274 66.129999 \n", + "L 626.777083 66.129999 \n", + "L 627.007892 64.59 \n", + "L 627.469511 66.129999 \n", + "L 628.161939 66.129999 \n", + "L 628.392748 63.820001 \n", + "L 628.623557 65.359999 \n", + "L 628.854367 66.129999 \n", + "L 629.085176 64.59 \n", + "L 629.315985 63.820001 \n", + "L 629.777604 63.820001 \n", + "L 630.239223 65.359999 \n", + "L 630.470032 63.820001 \n", + "L 630.700841 63.820001 \n", + "L 630.931651 66.129999 \n", + "L 631.16246 64.59 \n", + "L 631.393269 63.820001 \n", + "L 631.854888 63.820001 \n", + "L 632.085697 65.359999 \n", + "L 632.316507 63.820001 \n", + "L 634.6246 63.820001 \n", + "L 634.855409 65.359999 \n", + "L 635.086218 63.820001 \n", + "L 636.471074 63.820001 \n", + "L 636.471074 63.820001 \n", + "\" clip-path=\"url(#p3f403f150e)\" style=\"fill: none; stroke: #0000ff; stroke-width: 1.5; stroke-linecap: square\"/>\n", " \n", " \n", " \n", + "\" clip-path=\"url(#p3f403f150e)\" style=\"fill: none; stroke: #00bfbf; stroke-width: 1.5; stroke-linecap: square\"/>\n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -3339,10 +3471,10 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -3359,16 +3491,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch 1000/1000 loss: 0.002076978562399745 - tolerance_accuracy: 0.9666666388511658 - val_loss: 0.02979998290538788 - val_tolerance_accuracy: 0.6000000238418579\n" + "Epoch 1000/1000 loss: 0.0023138849064707756 - tolerance_accuracy: 0.9416666626930237 - val_loss: 0.040301691740751266 - val_tolerance_accuracy: 0.6000000238418579\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 21, + "execution_count": 22, "metadata": {}, "output_type": "execute_result" } @@ -3398,7 +3530,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "metadata": { "id": "w4bVfKh7ZNoz" }, @@ -3415,7 +3547,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3435,19 +3567,19 @@ " \n", " \n", " \n", - " Layer: output 'Dense'\n", + " </defs><rect x=\"99.0\" y=\"24\" width=\"202\" height=\"52\" style=\"fill:none;stroke:black;stroke-width:2\"/><image id=\"keras-network_output\" class=\"keras-network\" x=\"100.0\" y=\"25\" height=\"50\" width=\"200\" preserveAspectRatio=\"none\" image-rendering=\"optimizeSpeed\" xlink:href=\"\"><title>Layer: output 'Dense'\n", "Act function: softmax\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", - "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", + "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", "Act function: sigmoid\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", "Shape = (None, 10)hidden_2Layer: flatten 'Flatten'\n", - "Actual minmax: (0.0, 1.0)\n", + "Actual minmax: (0.0, +Infinity)\n", "Shape = (None, 36)flattenLayer: input 'InputLayer'\n", - "Actual minmax: (0.0, 1.0)\n", - "Shape = [(None, 6, 6)]inputActivations for SimpleNetwork" + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 6, 6)inputActivations for SequentialNetwork" ], "text/plain": [ "" @@ -3460,7 +3592,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "0.0, 0.0, 0.0, 0.29, 0.01, 0.01, 0.0, 0.01, 0.02, 0.65\n" + "0.0, 0.0, 0.0, 0.39, 0.01, 0.03, 0.0, 0.01, 0.01, 0.55\n" ] } ], @@ -3485,7 +3617,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "metadata": { "id": "hoonJ4gWiYXn" }, @@ -3499,7 +3631,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3519,19 +3651,19 @@ " \n", " \n", " \n", - " Layer: output 'Dense'\n", + " </defs><rect x=\"99.0\" y=\"24\" width=\"202\" height=\"52\" style=\"fill:none;stroke:black;stroke-width:2\"/><image id=\"keras-network_output\" class=\"keras-network\" x=\"100.0\" y=\"25\" height=\"50\" width=\"200\" preserveAspectRatio=\"none\" image-rendering=\"optimizeSpeed\" xlink:href=\"\"><title>Layer: output 'Dense'\n", "Act function: softmax\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", - "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", + "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", "Act function: sigmoid\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", "Shape = (None, 10)hidden_2Layer: flatten 'Flatten'\n", - "Actual minmax: (0.0, 1.0)\n", + "Actual minmax: (0.0, +Infinity)\n", "Shape = (None, 36)flattenLayer: input 'InputLayer'\n", - "Actual minmax: (0.0, 1.0)\n", - "Shape = [(None, 6, 6)]inputActivations for SimpleNetwork" + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 6, 6)inputActivations for SequentialNetwork" ], "text/plain": [ "" @@ -3544,7 +3676,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "0.06, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.0, 0.45, 0.48\n" + "0.04, 0.0, 0.03, 0.69, 0.0, 0.01, 0.0, 0.0, 0.07, 0.16\n" ] } ], @@ -3565,7 +3697,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 27, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3585,19 +3717,19 @@ " \n", " \n", " \n", - " Layer: output 'Dense'\n", + " </defs><rect x=\"99.0\" y=\"24\" width=\"202\" height=\"52\" style=\"fill:none;stroke:black;stroke-width:2\"/><image id=\"keras-network_output\" class=\"keras-network\" x=\"100.0\" y=\"25\" height=\"50\" width=\"200\" preserveAspectRatio=\"none\" image-rendering=\"optimizeSpeed\" xlink:href=\"\"><title>Layer: output 'Dense'\n", "Act function: softmax\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", - "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", + "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", "Act function: sigmoid\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", "Shape = (None, 10)hidden_2Layer: flatten 'Flatten'\n", - "Actual minmax: (0.0, 1.0)\n", + "Actual minmax: (0.0, +Infinity)\n", "Shape = (None, 36)flattenLayer: input 'InputLayer'\n", - "Actual minmax: (0.0, 1.0)\n", - "Shape = [(None, 6, 6)]inputActivations for SimpleNetwork" + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 6, 6)inputActivations for SequentialNetwork" ], "text/plain": [ "" @@ -3610,7 +3742,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "0.06, 0.38, 0.03, 0.06, 0.05, 0.01, 0.01, 0.38, 0.0, 0.01\n" + "0.04, 0.39, 0.03, 0.18, 0.1, 0.04, 0.03, 0.17, 0.0, 0.02\n" ] } ], @@ -3635,7 +3767,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 28, "metadata": { "id": "qN3gIVuCoUA5" }, @@ -3653,7 +3785,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 29, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3673,19 +3805,19 @@ " \n", " \n", " \n", - " Layer: output 'Dense'\n", + " </defs><rect x=\"99.0\" y=\"24\" width=\"202\" height=\"52\" style=\"fill:none;stroke:black;stroke-width:2\"/><image id=\"keras-network_output\" class=\"keras-network\" x=\"100.0\" y=\"25\" height=\"50\" width=\"200\" preserveAspectRatio=\"none\" image-rendering=\"optimizeSpeed\" xlink:href=\"\"><title>Layer: output 'Dense'\n", "Act function: softmax\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", - "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", + "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", "Act function: sigmoid\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", "Shape = (None, 10)hidden_2Layer: flatten 'Flatten'\n", - "Actual minmax: (0.0, 1.0)\n", + "Actual minmax: (0.0, +Infinity)\n", "Shape = (None, 36)flattenLayer: input 'InputLayer'\n", - "Actual minmax: (0.0, 1.0)\n", - "Shape = [(None, 6, 6)]inputActivations for SimpleNetwork" + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 6, 6)inputActivations for SequentialNetwork" ], "text/plain": [ "" @@ -3698,7 +3830,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "0.0, 0.0, 0.0, 0.0, 0.98, 0.0, 0.01, 0.0, 0.0, 0.0\n" + "0.0, 0.0, 0.0, 0.0, 0.93, 0.0, 0.01, 0.0, 0.0, 0.05\n" ] } ], @@ -3730,7 +3862,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 30, "metadata": { "id": "oIVYN7qbCzaj" }, @@ -3741,7 +3873,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 31, "metadata": { "id": "k7FKqroNZl8l" }, @@ -3767,7 +3899,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": 32, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3779,12 +3911,13 @@ "outputs": [ { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAAAAABVicqIAAAAe0lEQVR4nO3ZQQqAMAwEQOv//1zvhhKiYiHMnhMHclnUMY9qRnnjLG88CAQCgUDaI7GB0hYLK/eNMNDnXBAIBAJZZLyvjzARBvqcCwKBQCA7kfwlKC+t9Jl9zgWBQCCQncgXX+7S9DkXBAKBQBapl0OMP0EQCAQC+QW5ABUfCsKeZOCqAAAAAElFTkSuQmCC\n", + "image/jpeg": "/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAgGBgcGBQgHBwcJCQgKDBQNDAsLDBkSEw8UHRofHh0aHBwgJC4nICIsIxwcKDcpLDAxNDQ0Hyc5PTgyPC4zNDL/wAALCABkAGQBAREA/8QAHwAAAQUBAQEBAQEAAAAAAAAAAAECAwQFBgcICQoL/8QAtRAAAgEDAwIEAwUFBAQAAAF9AQIDAAQRBRIhMUEGE1FhByJxFDKBkaEII0KxwRVS0fAkM2JyggkKFhcYGRolJicoKSo0NTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqDhIWGh4iJipKTlJWWl5iZmqKjpKWmp6ipqrKztLW2t7i5usLDxMXGx8jJytLT1NXW19jZ2uHi4+Tl5ufo6erx8vP09fb3+Pn6/9oACAEBAAA/APn+iiivt/wJ/wAk88Nf9gq1/wDRS10FfIHxt/5K9rv/AG7/APpPHXn9fT/7OP8AyTzUP+wrJ/6Kir2CiiivgCiiivt/wJ/yTzw1/wBgq1/9FLXQV8gfG3/kr2u/9u//AKTx15/X0/8As4/8k81D/sKyf+ioq9gooorn/wDhBPB//QqaH/4Lof8A4mj/AIQTwf8A9Cpof/guh/8Aia+PPGkENr468Q29vFHDBFqdykccahVRRKwAAHAAHGKw6+3/AAJ/yTzw1/2CrX/0UtdBWPfeE/Dep3kl5f8Ah/Sru6kxvmnso5HbAAGWIycAAfhVf/hBPB//AEKmh/8Aguh/+JrU03SdN0a3a30vT7SxgZy7R2sKxKWwBkhQBnAAz7CrlFFFFFfEHjv/AJKH4l/7Ct1/6Naufr7f8Cf8k88Nf9gq1/8ARS10FFFFFFFFFFfEHjv/AJKH4l/7Ct1/6Naufrcg8aeKrW3it7fxLrMMESBI447+VVRQMAABsAAcYqT/AITvxh/0Neuf+DGb/wCKr6n+EF/ean8LdGvL+7nu7qTz9808hkdsTyAZY8nAAH4V3FFFfOnx98S69o3jqxt9L1vUrGBtMjdo7W6eJS3myjJCkDOABn2FeV/8J34w/wChr1z/AMGM3/xVH/Cd+MP+hr1z/wAGM3/xVfb9FfEHjv8A5KH4l/7Ct1/6Naufoor6/wDgl/ySHQv+3j/0okr0CiivmD9o7/koen/9gqP/ANGy14/RX3/RXD3/AMIPAmp6jc395oXmXV1K80z/AGucbnYkscB8DJJ6VX/4Ul8PP+he/wDJ24/+OV8seLLG30zxlrlhZx+Xa2uoXEMKbidqLIwUZPJwAOtY9dhonxS8ZeHNHg0nSdZ+z2MG7y4vssL7dzFjyyEnkk8mtD/hdvxD/wChh/8AJK3/APjdH/C7fiH/ANDD/wCSVv8A/G6P+F2/EP8A6GH/AMkrf/43Xp/w40TTvi34euNf8cW/9q6nb3bWUU+9oNsKojhdsRVT80jnJGeevArsP+FJfDz/AKF7/wAnbj/45R/wpL4ef9C9/wCTtx/8cr0CiiiviDx3/wAlD8S/9hW6/wDRrVz9FFFFfT/7OP8AyTzUP+wrJ/6Kir2CivmD/ho7xh/0DdD/AO/E3/x2j/ho7xh/0DdD/wC/E3/x2j/ho7xh/wBA3Q/+/E3/AMdo/wCGjvGH/QN0P/vxN/8AHa8r1bUptZ1m+1S4WNZ724kuJFjBChnYsQMknGT6mqde8fDv4KeG/F3gTTdcv73VY7q683ekEsYQbZXQYBjJ6KO9dP8A8M4+D/8AoJa5/wB/4f8A41XjnxZ8Fab4D8VWul6XPdzQS2SXDNdOrMGLuuBtVRjCDt61wdd54K+LOveA9Gm0vS7TTZoJbhrhmuo3ZgxVVwNrqMYQdvWuk/4aO8Yf9A3Q/wDvxN/8do/4aO8Yf9A3Q/8AvxN/8drx+iiiiivr/wCCX/JIdC/7eP8A0okr0CvmD9o7/koen/8AYKj/APRsteP0UUV9P/8ADOPg/wD6CWuf9/4f/jVH/DOPg/8A6CWuf9/4f/jVfOniXTYdG8Vavpdu0jQWV7NbxtIQWKo5UE4AGcD0FZdfRfhr4BeFdZ8K6RqlxqGsrPe2UNxIsc0QUM6BiBmMnGT6mtT/AIZx8H/9BLXP+/8AD/8AGq5DW/iPrHwk1ifwPoFtY3OmaZt8mW/R3mbzFErbijKp+aRgMKOAOvWs/wD4aO8Yf9A3Q/8AvxN/8drp/Dfhuz+O+nSeKPFEk9nfWsp09I9MYRxmNQJASJA53ZlbnOMAceux/wAM4+D/APoJa5/3/h/+NUf8M4+D/wDoJa5/3/h/+NUf8M4+D/8AoJa5/wB/4f8A41R/wzj4P/6CWuf9/wCH/wCNV7BRXxB47/5KH4l/7Ct1/wCjWrn6+3/An/JPPDX/AGCrX/0UtdBXyB8bf+Sva7/27/8ApPHXn9fT/wCzj/yTzUP+wrJ/6Kir2CiiiiiviDx3/wAlD8S/9hW6/wDRrVz9fb/gT/knnhr/ALBVr/6KWugr5A+Nv/JXtd/7d/8A0njrz+vp/wDZx/5J5qH/AGFZP/RUVewUUUV//9k=", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAAAAABVicqIAAAA6klEQVR4Ae3XQQ7CMBBD0QZx/yuD6GpkSx55QVefVZMxDbxEGuVc2+ejgaMTltDASyf+MWaRShUuuCqBKszpgqsSqMLHOo72JAtU77/DHOHKDC64KoEqzOmCqxKowu81rf3lWhuMfYMjvCrPAFxTY32GayWaAbimxvoM10o0A49w+SVo/oTfs7UgDdjYutoj/4RFbCPSBFxJx2pwGUmagCvpWA0uI0kTfUvym9b6DvYkbYHV4DKSNAFX0rEaXEaSJuBKOlbzS5B2B7vSaMAbjK7CnqhIHMMVebQIl4rEMVyRR4twqUgcP8L1BQaQC8NeWuBoAAAAAElFTkSuQmCC", "text/plain": [ "" ] }, - "execution_count": 31, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } @@ -3805,7 +3938,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": 33, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3825,19 +3958,19 @@ " \n", " \n", " \n", - " Layer: output 'Dense'\n", + " </defs><rect x=\"99.0\" y=\"24\" width=\"202\" height=\"52\" style=\"fill:none;stroke:black;stroke-width:2\"/><image id=\"keras-network_output\" class=\"keras-network\" x=\"100.0\" y=\"25\" height=\"50\" width=\"200\" preserveAspectRatio=\"none\" image-rendering=\"optimizeSpeed\" xlink:href=\"\"><title>Layer: output 'Dense'\n", "Act function: softmax\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", - "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", + "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", "Act function: sigmoid\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", - "Shape = (None, 10)hidden_2Layer: flatten 'Flatten'\n", - "Actual minmax: (0.0, 1.0)\n", - "Shape = (None, 36)flattenLayer: input 'InputLayer'\n", - "Actual minmax: (0.0, 1.0)\n", - "Shape = [(None, 6, 6)]inputActivations for SimpleNetwork" + "Shape = (None, 10)hidden_2Layer: flatten 'Flatten'\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 36)flattenLayer: input 'InputLayer'\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 6, 6)inputActivations for SequentialNetwork" ], "text/plain": [ "" @@ -3850,7 +3983,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "0.01, 0.0, 0.0, 0.0, 0.02, 0.0, 0.96, 0.0, 0.01, 0.0\n" + "0.97, 0.0, 0.0, 0.01, 0.0, 0.0, 0.0, 0.01, 0.0, 0.0\n" ] } ], @@ -3878,7 +4011,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 34, "metadata": { "id": "WS0lLtdtanY-" }, @@ -3899,7 +4032,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 41, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -3912,7 +4045,7 @@ { "data": { "text/html": [ - "
0
\"0\"
1
\"1\"
2
\"2\"
3
\"3\"
4
\"4\"
5
\"5\"
6
\"6\"
7
\"7\"
8
\"8\"
9
\"9\"
10
\"10\"
11
\"11\"
12
\"12\"
13
\"13\"
14
\"14\"
15
\"15\"
16
\"16\"
17
\"17\"
18
\"18\"
19
\"19\"
20
\"20\"
21
\"21\"
22
\"22\"
23
\"23\"
24
\"24\"
25
\"25\"
26
\"26\"
27
\"27\"
28
\"28\"
29
\"29\"
30
\"30\"
31
\"31\"
32
\"32\"
33
\"33\"
34
\"34\"
35
\"35\"
36
\"36\"
37
\"37\"
38
\"38\"
39
\"39\"
40
\"40\"
41
\"41\"
42
\"42\"
43
\"43\"
44
\"44\"
45
\"45\"
46
\"46\"
47
\"47\"
48
\"48\"
49
\"49\"
50
\"50\"
51
\"51\"
52
\"52\"
53
\"53\"
54
\"54\"
55
\"55\"
56
\"56\"
57
\"57\"
58
\"58\"
59
\"59\"
60
\"60\"
61
\"61\"
62
\"62\"
63
\"63\"
64
\"64\"
65
\"65\"
66
\"66\"
67
\"67\"
68
\"68\"
69
\"69\"
70
\"70\"
71
\"71\"
72
\"72\"
73
\"73\"
74
\"74\"
75
\"75\"
76
\"76\"
77
\"77\"
78
\"78\"
79
\"79\"
80
\"80\"
81
\"81\"
82
\"82\"
83
\"83\"
84
\"84\"
85
\"85\"
86
\"86\"
87
\"87\"
88
\"88\"
89
\"89\"
90
\"90\"
91
\"91\"
92
\"92\"
93
\"93\"
94
\"94\"
95
\"95\"
96
\"96\"
97
\"97\"
98
\"98\"
99
\"99\"
100
\"100\"
101
\"101\"
102
\"102\"
103
\"103\"
104
\"104\"
105
\"105\"
106
\"106\"
107
\"107\"
108
\"108\"
109
\"109\"
110
\"110\"
111
\"111\"
112
\"112\"
113
\"113\"
114
\"114\"
115
\"115\"
116
\"116\"
117
\"117\"
118
\"118\"
119
\"119\"
120
\"120\"
121
\"121\"
122
\"122\"
123
\"123\"
124
\"124\"
125
\"125\"
126
\"126\"
127
\"127\"
128
\"128\"
129
\"129\"
130
\"130\"
131
\"131\"
132
\"132\"
133
\"133\"
134
\"134\"
135
\"135\"
136
\"136\"
137
\"137\"
138
\"138\"
139
\"139\"
140
\"140\"
141
\"141\"
142
\"142\"
143
\"143\"
144
\"144\"
145
\"145\"
146
\"146\"
147
\"147\"
148
\"148\"
149
\"149\"
150
\"150\"
151
\"151\"
152
\"152\"
153
\"153\"
154
\"154\"
155
\"155\"
156
\"156\"
157
\"157\"
158
\"158\"
159
\"159\"
160
\"160\"
161
\"161\"
162
\"162\"
163
\"163\"
164
\"164\"
165
\"165\"
166
\"166\"
167
\"167\"
168
\"168\"
169
\"169\"
170
\"170\"
171
\"171\"
172
\"172\"
173
\"173\"
174
\"174\"
175
\"175\"
176
\"176\"
177
\"177\"
178
\"178\"
179
\"179\"
180
\"180\"
181
\"181\"
182
\"182\"
183
\"183\"
184
\"184\"
185
\"185\"
186
\"186\"
187
\"187\"
188
\"188\"
189
\"189\"
190
\"190\"
191
\"191\"
192
\"192\"
193
\"193\"
194
\"194\"
195
\"195\"
196
\"196\"
197
\"197\"
198
\"198\"
199
\"199\"
200
\"200\"
201
\"201\"
202
\"202\"
203
\"203\"
204
\"204\"
205
\"205\"
206
\"206\"
207
\"207\"
208
\"208\"
209
\"209\"
210
\"210\"
211
\"211\"
212
\"212\"
213
\"213\"
214
\"214\"
215
\"215\"
216
\"216\"
217
\"217\"
218
\"218\"
219
\"219\"
220
\"220\"
221
\"221\"
222
\"222\"
223
\"223\"
224
\"224\"
225
\"225\"
226
\"226\"
227
\"227\"
228
\"228\"
229
\"229\"
230
\"230\"
231
\"231\"
232
\"232\"
233
\"233\"
234
\"234\"
235
\"235\"
236
\"236\"
237
\"237\"
238
\"238\"
239
\"239\"
" + "
0
\"0\"
1
\"1\"
2
\"2\"
3
\"3\"
4
\"4\"
5
\"5\"
6
\"6\"
7
\"7\"
8
\"8\"
9
\"9\"
10
\"10\"
11
\"11\"
12
\"12\"
13
\"13\"
14
\"14\"
15
\"15\"
16
\"16\"
17
\"17\"
18
\"18\"
19
\"19\"
20
\"20\"
21
\"21\"
22
\"22\"
23
\"23\"
24
\"24\"
25
\"25\"
26
\"26\"
27
\"27\"
28
\"28\"
29
\"29\"
30
\"30\"
31
\"31\"
32
\"32\"
33
\"33\"
34
\"34\"
35
\"35\"
36
\"36\"
37
\"37\"
38
\"38\"
39
\"39\"
40
\"40\"
41
\"41\"
42
\"42\"
43
\"43\"
44
\"44\"
45
\"45\"
46
\"46\"
47
\"47\"
48
\"48\"
49
\"49\"
50
\"50\"
51
\"51\"
52
\"52\"
53
\"53\"
54
\"54\"
55
\"55\"
56
\"56\"
57
\"57\"
58
\"58\"
59
\"59\"
60
\"60\"
61
\"61\"
62
\"62\"
63
\"63\"
64
\"64\"
65
\"65\"
66
\"66\"
67
\"67\"
68
\"68\"
69
\"69\"
70
\"70\"
71
\"71\"
72
\"72\"
73
\"73\"
74
\"74\"
75
\"75\"
76
\"76\"
77
\"77\"
78
\"78\"
79
\"79\"
80
\"80\"
81
\"81\"
82
\"82\"
83
\"83\"
84
\"84\"
85
\"85\"
86
\"86\"
87
\"87\"
88
\"88\"
89
\"89\"
90
\"90\"
91
\"91\"
92
\"92\"
93
\"93\"
94
\"94\"
95
\"95\"
96
\"96\"
97
\"97\"
98
\"98\"
99
\"99\"
100
\"100\"
101
\"101\"
102
\"102\"
103
\"103\"
104
\"104\"
105
\"105\"
106
\"106\"
107
\"107\"
108
\"108\"
109
\"109\"
110
\"110\"
111
\"111\"
112
\"112\"
113
\"113\"
114
\"114\"
115
\"115\"
116
\"116\"
117
\"117\"
118
\"118\"
119
\"119\"
120
\"120\"
121
\"121\"
122
\"122\"
123
\"123\"
124
\"124\"
125
\"125\"
126
\"126\"
127
\"127\"
128
\"128\"
129
\"129\"
130
\"130\"
131
\"131\"
132
\"132\"
133
\"133\"
134
\"134\"
135
\"135\"
136
\"136\"
137
\"137\"
138
\"138\"
139
\"139\"
140
\"140\"
141
\"141\"
142
\"142\"
143
\"143\"
144
\"144\"
145
\"145\"
146
\"146\"
147
\"147\"
148
\"148\"
149
\"149\"
150
\"150\"
151
\"151\"
152
\"152\"
153
\"153\"
154
\"154\"
155
\"155\"
156
\"156\"
157
\"157\"
158
\"158\"
159
\"159\"
160
\"160\"
161
\"161\"
162
\"162\"
163
\"163\"
164
\"164\"
165
\"165\"
166
\"166\"
167
\"167\"
168
\"168\"
169
\"169\"
170
\"170\"
171
\"171\"
172
\"172\"
173
\"173\"
174
\"174\"
175
\"175\"
176
\"176\"
177
\"177\"
178
\"178\"
179
\"179\"
180
\"180\"
181
\"181\"
182
\"182\"
183
\"183\"
184
\"184\"
185
\"185\"
186
\"186\"
187
\"187\"
188
\"188\"
189
\"189\"
190
\"190\"
191
\"191\"
192
\"192\"
193
\"193\"
194
\"194\"
195
\"195\"
196
\"196\"
197
\"197\"
198
\"198\"
199
\"199\"
200
\"200\"
201
\"201\"
202
\"202\"
203
\"203\"
204
\"204\"
205
\"205\"
206
\"206\"
207
\"207\"
208
\"208\"
209
\"209\"
210
\"210\"
211
\"211\"
212
\"212\"
213
\"213\"
214
\"214\"
215
\"215\"
216
\"216\"
217
\"217\"
218
\"218\"
219
\"219\"
220
\"220\"
221
\"221\"
222
\"222\"
223
\"223\"
224
\"224\"
225
\"225\"
226
\"226\"
227
\"227\"
228
\"228\"
229
\"229\"
230
\"230\"
231
\"231\"
232
\"232\"
233
\"233\"
234
\"234\"
235
\"235\"
236
\"236\"
237
\"237\"
238
\"238\"
239
\"239\"
" ], "text/plain": [ "" @@ -3925,7 +4058,7 @@ "source": [ "images = [array_to_image(inputs[i]) for i in range(len(inputs))]\n", "bigger = [image.resize((36,36), resample=0) for image in images]\n", - "gallery(bigger)" + "gallery(bigger, gallery_shape=(10, None))" ] }, { @@ -3941,7 +4074,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 36, "metadata": { "id": "_k4uKVTTburh" }, @@ -3952,7 +4085,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 37, "metadata": { "colab": { "base_uri": "https://localhost:8080/" @@ -3962,36 +4095,98 @@ }, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: \"SimpleNetwork\"\n", - "_________________________________________________________________\n", - " Layer (type) Output Shape Param # \n", - "=================================================================\n", - " input (InputLayer) [(None, 6, 6)] 0 \n", - " \n", - " flatten_1 (Flatten) (None, 36) 0 \n", - " \n", - " hidden_2 (Dense) (None, 10) 370 \n", - " \n", - " output (Dense) (None, 10) 110 \n", - " \n", - "=================================================================\n", - "Total params: 480 (1.88 KB)\n", - "Trainable params: 480 (1.88 KB)\n", - "Non-trainable params: 0 (0.00 Byte)\n", - "_________________________________________________________________\n" - ] - } - ], - "source": [ - "net2.summary()" - ] - }, - { + "data": { + "text/html": [ + "
Model: \"SequentialNetwork\"\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1mModel: \"SequentialNetwork\"\u001b[0m\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
+       "┃ Layer (type)                     Output Shape                  Param # ┃\n",
+       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
+       "│ input (InputLayer)              │ (None, 6, 6)           │             0 │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ flatten_1 (Flatten)             │ (None, 36)             │             0 │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ hidden_2 (Dense)                │ (None, 10)             │           370 │\n",
+       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
+       "│ output (Dense)                  │ (None, 10)             │           110 │\n",
+       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
+       "
\n" + ], + "text/plain": [ + "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", + "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", + "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", + "│ input (\u001b[38;5;33mInputLayer\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m6\u001b[0m, \u001b[38;5;34m6\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", + "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", + "│ flatten_1 (\u001b[38;5;33mFlatten\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m36\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", + "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", + "│ hidden_2 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m10\u001b[0m) │ \u001b[38;5;34m370\u001b[0m │\n", + "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", + "│ output (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m10\u001b[0m) │ \u001b[38;5;34m110\u001b[0m │\n", + "└─────────────────────────────────┴────────────────────────┴───────────────┘\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
 Total params: 480 (1.88 KB)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m480\u001b[0m (1.88 KB)\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
 Trainable params: 480 (1.88 KB)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m480\u001b[0m (1.88 KB)\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
 Non-trainable params: 0 (0.00 B)\n",
+       "
\n" + ], + "text/plain": [ + "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "net2.summary()" + ] + }, + { "cell_type": "code", - "execution_count": 37, + "execution_count": 38, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -4012,11 +4207,11 @@ " \n", " \n", " \n", - " 2024-06-27T19:29:42.383101\n", + " 2024-10-20T11:50:40.070139\n", " image/svg+xml\n", " \n", " \n", - " Matplotlib v3.7.1, https://matplotlib.org/\n", + " Matplotlib v3.8.1, https://matplotlib.org/\n", " \n", " \n", " \n", @@ -4047,12 +4242,12 @@ " \n", " \n", " \n", - " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -4088,7 +4283,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -4129,7 +4324,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -4165,7 +4360,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -4212,7 +4407,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -4268,7 +4463,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -4416,12 +4611,12 @@ " \n", " \n", " \n", - " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -4446,12 +4641,12 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -4462,12 +4657,12 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -4478,12 +4673,12 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -4494,12 +4689,12 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -4560,548 +4755,795 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", " \n", - " \n", + " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -5474,11 +5938,26 @@ " \n", + "\" style=\"fill: none; stroke: #ff0000; stroke-width: 1.5; stroke-linecap: square\"/>\n", " \n", " \n", - " \n", + " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", @@ -5538,7 +6002,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -5551,7 +6015,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -5566,7 +6030,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -5581,7 +6045,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -5596,7 +6060,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -5611,7 +6075,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -5639,7 +6103,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -5654,7 +6118,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -5669,7 +6133,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -5684,7 +6148,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -5699,7 +6163,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -5714,7 +6178,7 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -5746,28 +6210,6 @@ "L 1831 4666 \n", "z\n", "\" transform=\"scale(0.015625)\"/>\n", - " \n", " \n", " \n", " \n", + "L 571.382846 67.669997 \n", + "L 587.539498 67.669997 \n", + "L 588.001117 66.129999 \n", + "L 588.231926 67.669997 \n", + "L 589.155163 67.669997 \n", + "L 589.385973 66.129999 \n", + "L 589.616782 66.899998 \n", + "L 589.847591 66.899998 \n", + "L 590.078401 67.669997 \n", + "L 590.30921 66.129999 \n", + "L 590.770829 66.129999 \n", + "L 591.001638 65.359999 \n", + "L 591.232447 66.129999 \n", + "L 591.463257 65.359999 \n", + "L 591.694066 65.359999 \n", + "L 591.924875 66.129999 \n", + "L 592.155685 65.359999 \n", + "L 594.002159 65.359999 \n", + "L 594.232969 66.129999 \n", + "L 594.463778 65.359999 \n", + "L 597.695108 65.359999 \n", + "L 597.925918 64.59 \n", + "L 598.156727 65.359999 \n", + "L 598.387536 65.359999 \n", + "L 598.618346 63.820001 \n", + "L 598.849155 63.820001 \n", + "L 599.310774 65.359999 \n", + "L 599.541583 63.820001 \n", + "L 599.772392 63.820001 \n", + "L 600.003202 65.359999 \n", + "L 600.234011 64.59 \n", + "L 600.46482 64.59 \n", + "L 600.69563 63.820001 \n", + "L 601.849676 63.820001 \n", + "L 602.080485 64.59 \n", + "L 602.311295 63.820001 \n", + "L 615.698235 63.820001 \n", + "L 616.159854 62.280002 \n", + "L 616.390663 63.820001 \n", + "L 617.083091 63.820001 \n", + "L 617.54471 62.280002 \n", + "L 632.316507 62.280002 \n", + "L 632.778125 60.740004 \n", + "L 633.008935 62.280002 \n", + "L 633.239744 62.280002 \n", + "L 633.470553 60.740004 \n", + "L 633.701363 59.970004 \n", + "L 634.162981 59.970004 \n", + "L 634.39379 61.510003 \n", + "L 634.6246 62.280002 \n", + "L 635.086218 62.280002 \n", + "L 635.317028 60.740004 \n", + "L 635.547837 60.740004 \n", + "L 635.778646 59.970004 \n", + "L 636.009456 59.970004 \n", + "L 636.240265 59.200005 \n", + "L 636.471074 59.200005 \n", + "L 636.471074 59.200005 \n", + "\" clip-path=\"url(#p1e00e9049d)\" style=\"fill: none; stroke: #0000ff; stroke-width: 1.5; stroke-linecap: square\"/>\n", " \n", " \n", " \n", + "L 467.057032 237.84 \n", + "L 467.287842 219.36 \n", + "L 473.288884 219.36 \n", + "L 473.519693 200.879999 \n", + "L 473.750503 219.36 \n", + "L 473.981312 200.879999 \n", + "L 558.688333 200.879999 \n", + "L 558.919142 182.399998 \n", + "L 559.149952 182.399998 \n", + "L 559.380761 200.879999 \n", + "L 559.84238 200.879999 \n", + "L 560.073189 182.399998 \n", + "L 576.69146 182.399998 \n", + "L 576.922269 163.919999 \n", + "L 577.153079 182.399998 \n", + "L 578.537935 182.399998 \n", + "L 578.768744 163.919999 \n", + "L 578.999553 182.399998 \n", + "L 579.461172 182.399998 \n", + "L 579.691981 163.919999 \n", + "L 579.922791 163.919999 \n", + "L 580.1536 182.399998 \n", + "L 580.384409 163.919999 \n", + "L 581.538456 163.919999 \n", + "L 581.769265 182.399998 \n", + "L 582.000074 163.919999 \n", + "L 583.38493 163.919999 \n", + "L 583.61574 182.399998 \n", + "L 583.846549 163.919999 \n", + "L 589.616782 163.919999 \n", + "L 589.847591 145.44 \n", + "L 590.078401 163.919999 \n", + "L 591.001638 163.919999 \n", + "L 591.232447 145.44 \n", + "L 591.694066 145.44 \n", + "L 591.924875 163.919999 \n", + "L 592.155685 145.44 \n", + "L 592.386494 145.44 \n", + "L 592.617303 163.919999 \n", + "L 592.848113 145.44 \n", + "L 636.471074 145.44 \n", + "L 636.471074 145.44 \n", + "\" clip-path=\"url(#p1e00e9049d)\" style=\"fill: none; stroke: #00bfbf; stroke-width: 1.5; stroke-linecap: square\"/>\n", " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -6390,10 +6866,10 @@ " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", - " \n", + " \n", " \n", " \n", " \n", @@ -6410,16 +6886,16 @@ "name": "stdout", "output_type": "stream", "text": [ - "Epoch 1000/1000 loss: 0.0026288372464478016 - tolerance_accuracy: 0.949999988079071 - val_loss: 0.03410140797495842 - val_tolerance_accuracy: 0.6000000238418579\n" + "Epoch 1000/1000 loss: 0.003038567490875721 - tolerance_accuracy: 0.9666666388511658 - val_loss: 0.0379040353000164 - val_tolerance_accuracy: 0.5\n" ] }, { "data": { "text/plain": [ - "" + "" ] }, - "execution_count": 37, + "execution_count": 38, "metadata": {}, "output_type": "execute_result" } @@ -6447,7 +6923,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 39, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -6467,19 +6943,19 @@ " \n", " \n", " \n", - " Layer: output 'Dense'\n", + " </defs><rect x=\"99.0\" y=\"24\" width=\"202\" height=\"52\" style=\"fill:none;stroke:black;stroke-width:2\"/><image id=\"keras-network_output\" class=\"keras-network\" x=\"100.0\" y=\"25\" height=\"50\" width=\"200\" preserveAspectRatio=\"none\" image-rendering=\"optimizeSpeed\" xlink:href=\"\"><title>Layer: output 'Dense'\n", "Act function: softmax\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", - "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", + "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", "Act function: sigmoid\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", - "Shape = (None, 10)hidden_2Layer: flatten_1 'Flatten'\n", - "Actual minmax: (0.0, 1.0)\n", - "Shape = (None, 36)flatten_1Layer: input 'InputLayer'\n", - "Actual minmax: (0.0, 1.0)\n", - "Shape = [(None, 6, 6)]inputActivations for SimpleNetwork" + "Shape = (None, 10)hidden_2Layer: flatten_1 'Flatten'\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 36)flatten_1Layer: input 'InputLayer'\n", + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 6, 6)inputActivations for SequentialNetwork" ], "text/plain": [ "" @@ -6492,7 +6968,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "0.01, 0.0, 0.0, 0.0, 0.01, 0.0, 0.85, 0.0, 0.12, 0.0\n" + "0.2, 0.48, 0.0, 0.0, 0.28, 0.0, 0.04, 0.0, 0.0, 0.0\n" ] } ], @@ -6544,7 +7020,7 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 40, "metadata": { "colab": { "base_uri": "https://localhost:8080/", @@ -6564,19 +7040,19 @@ " \n", " \n", " \n", - " Layer: output 'Dense'\n", + " </defs><rect x=\"99.0\" y=\"24\" width=\"202\" height=\"52\" style=\"fill:none;stroke:black;stroke-width:2\"/><image id=\"keras-network_output\" class=\"keras-network\" x=\"100.0\" y=\"25\" height=\"50\" width=\"200\" preserveAspectRatio=\"none\" image-rendering=\"optimizeSpeed\" xlink:href=\"\"><title>Layer: output 'Dense'\n", "Act function: softmax\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", - "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", + "Shape = (None, 10)outputLayer: hidden_2 'Dense'\n", "Act function: sigmoid\n", "Act output range: (0.0, 1.0)\n", "Actual minmax: (0.0, 1.0)\n", "Shape = (None, 10)hidden_2Layer: flatten_1 'Flatten'\n", - "Actual minmax: (0.0, 1.0)\n", + "Actual minmax: (0.0, +Infinity)\n", "Shape = (None, 36)flatten_1Layer: input 'InputLayer'\n", - "Actual minmax: (0.0, 1.0)\n", - "Shape = [(None, 6, 6)]inputActivations for SimpleNetwork" + "Actual minmax: (0.0, +Infinity)\n", + "Shape = (None, 6, 6)inputActivations for SequentialNetwork" ], "text/plain": [ "" @@ -6589,7 +7065,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "0.7, 0.0, 0.0, 0.0, 0.28, 0.0, 0.0, 0.0, 0.0, 0.01\n" + "0.45, 0.02, 0.0, 0.0, 0.52, 0.0, 0.01, 0.0, 0.0, 0.0\n" ] } ], @@ -6654,7 +7130,14 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.12" + "version": "3.10.13" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } } }, "nbformat": 4, diff --git a/setup.py b/setup.py index b9919c2..d53efd0 100644 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ def get_version(file, name="__version__"): "aitk.utils": ["fonts/*.ttf"], "aitk.robots": ["worlds/*.json", "worlds/*.png"], }, - install_requires=["Pillow", "ipywidgets", "tqdm", "numpy<=1.26.4", "matplotlib", "tensorflow<=2.15.1"], + install_requires=["Pillow", "ipywidgets", "tqdm", "numpy", "matplotlib", "tensorflow>=2.17.0"], packages=setuptools.find_packages(), python_requires=">=3.9", license="BSD-3-Clause", diff --git a/tests/test_networks/test_network.py b/tests/test_networks/test_network.py new file mode 100644 index 0000000..37ca4e7 --- /dev/null +++ b/tests/test_networks/test_network.py @@ -0,0 +1,203 @@ +# -*- coding: utf-8 -*- +# ****************************************************** +# aitk.networks: Keras model wrapper with visualizations +# +# Copyright (c) 2024 Douglas S. Blank +# +# https://github.com/ArtificialIntelligenceToolkit/aitk.networks +# +# ****************************************************** + +from tensorflow.keras.layers import Dense, InputLayer + +from aitk.networks import Network, SimpleNetwork +from aitk.utils import get_dataset + + +def test_network_names(): + network = Network() + network.add(InputLayer([1])) + network.add(InputLayer([2])) + network.add(Dense(5)) + network.add(Dense(6)) + + assert network._layers[0].name.startswith("input") + assert network._layers[1].name.startswith("input_") + assert network._layers[2].name.startswith("dense") + assert network._layers[3].name.startswith("dense_") + + +def test_network_names_again(): + # Should still follow this pattern + network = Network() + network.add(InputLayer([1])) + network.add(InputLayer([2])) + network.add(Dense(5)) + network.add(Dense(6)) + + assert network._layers[0].name.startswith("input") + assert network._layers[1].name.startswith("input_") + assert network._layers[2].name.startswith("dense") + assert network._layers[3].name.startswith("dense_") + + +def test_network_sequential_1(): + network = Network() + network.add(InputLayer([2])) + network.add(Dense(5)) + network.add(Dense(10)) + + network.connect() + network.compile() + + output = network.propagate([1, 1]) + + assert len(output) == 10 + + +def test_network_sequential_2(): + network = SimpleNetwork( + InputLayer([2]), + Dense(5), + Dense(10), + ) + + network.connect() + network.compile() + + output = network.propagate([1, 1]) + + assert len(output) == 10 + + +def test_network_sequential_3(): + network = SimpleNetwork( + [2], + 5, + 10, + ) + + network.connect() + network.compile() + + output = network.propagate([1, 1]) + + assert len(output) == 10 + + +def test_network_sequential_4(): + network = SimpleNetwork( + 2, + 5, + 10, + ) + + network.connect() + network.compile() + + output = network.propagate([1, 1]) + + assert len(output) == 10 + + +def test_network_display(): + network = SimpleNetwork( + 2, + 5, + 10, + ) + + network.connect() + network.compile() + + output = network.display([1, 1], return_type="image") + + assert output.size[0] > 300 + assert output.size[1] > 200 + + +def test_network_multi_inputs(): + network = Network() + network.add(InputLayer([1], name="input-1")) + network.add(InputLayer([2], name="input-2")) + network.add(Dense(5, name="hidden")) + network.add(Dense(6, name="output")) + + network.connect("input-1", "hidden") + network.connect("input-2", "hidden") + network.connect("hidden", "output") + + network.compile() + + output = network.propagate([[1], [1, 2]]) + + assert len(output) == 6 + + +def test_network_multi_outputs(): + network = Network() + network.add(InputLayer([1], name="input-1")) + network.add(Dense(5, name="hidden")) + network.add(Dense(2, name="output-1")) + network.add(Dense(3, name="output-2")) + + network.connect("input-1", "hidden") + network.connect("hidden", "output-1") + network.connect("hidden", "output-2") + + network.compile() + + output = network.propagate([1]) + + assert len(output) == 2 + assert len(output[0]) == 2 + assert len(output[1]) == 3 + + +def test_network_multi_inputs_outputs(): + network = Network() + network.add(InputLayer([1], name="input-1")) + network.add(InputLayer([2], name="input-2")) + network.add(Dense(5, name="hidden")) + network.add(Dense(2, name="output-1")) + network.add(Dense(3, name="output-2")) + + network.connect("input-1", "hidden") + network.connect("input-2", "hidden") + network.connect("hidden", "output-1") + network.connect("hidden", "output-2") + + network.compile() + + output = network.propagate([[1], [0, 0.5]]) + + assert len(output) == 2 + assert len(output[0]) == 2 + assert len(output[1]) == 3 + + +def test_network_predict(): + network = Network() + network.add(InputLayer([2])) + network.add(Dense(5)) + network.add(Dense(10)) + + network.connect() + network.compile() + + output = network.predict([[1, 1]]) + + assert len(output) == 1 + assert len(output[0]) == 10 + + +def test_network_model(): + from tensorflow.keras.applications import VGG16 + + dataset = get_dataset("dogs-vs-cats-100") + cats = dataset["cats"] + dogs = dataset["dogs"] + + vgg16 = VGG16(weights="imagenet") + vgg16_network = Network(vgg16) + vgg16_network.display(cats[0], rotate=True, scale=1.5, return_type="image") diff --git a/tests/test_networks/test_network_methods.py b/tests/test_networks/test_network_methods.py new file mode 100644 index 0000000..849cea0 --- /dev/null +++ b/tests/test_networks/test_network_methods.py @@ -0,0 +1,275 @@ +# -*- coding: utf-8 -*- +# ****************************************************** +# aitk.networks: Keras model wrapper with visualizations +# +# Copyright (c) 2024 Douglas S. Blank +# +# https://github.com/ArtificialIntelligenceToolkit/aitk.networks +# +# ****************************************************** + +import numpy as np +from tensorflow.keras.layers import Dense, InputLayer + +from aitk.networks import Network, SimpleNetwork +from aitk.utils import get_dataset + + +def test_set_weights(): + network = SimpleNetwork(3, 2, 1) + network.set_weights([1, 1, 1, 1, 1, 1, -2.5, -1.5, -3, 2, 0]) + inputs = [[0, 0, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1]] + expected_outputs = [[0.53426534], [0.5517651], [0.5280447], [0.44220227]] + for i in range(len(inputs)): + output = network.propagate(inputs[i]) + assert np.allclose(output, expected_outputs[i]) + + +def test_get_weights(): + network = SimpleNetwork(3, 2, 1) + network.set_weights([1, 1, 1, 1, 1, 1, -2.5, -1.5, -3, 2, 0]) + # weights are returned in this order: + # input->hidden + # hidden->output + # hidden biases + # output biases + weights = network.get_weights() + assert len(weights[0]) == 3 + assert len(weights[1]) == 2 + assert len(weights[2]) == 2 + assert len(weights[3]) == 1 + + +def test_get_weights_flat(): + network = SimpleNetwork(3, 2, 1) + original = [1, 1, 1, 1, 1, 1, -2.5, -1.5, -3, 2, 0] + network.set_weights(original) + weights = network.get_weights(flat=True) + assert np.allclose(weights, original) + + +def test_propagate_to(): + network = SimpleNetwork(3, 2, 1) + network.set_weights([1, 1, 1, 1, 1, 1, -2.5, -1.5, -3, 2, 0]) + inputs = [[0, 0, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1]] + expected_activations = [ + [0.075858176, 0.18242551], + [0.18242551, 0.37754068], + [0.37754068, 0.62245935], + [0.62245935, 0.8175745], + ] + for i in range(len(inputs)): + actual_activations = list(network.propagate_to(inputs[i], "hidden")) + assert np.allclose(actual_activations, expected_activations[i]) + + +def test_predict(): + network = SimpleNetwork(3, 2, 1) + network.set_weights([1, 1, 1, 1, 1, 1, -2.5, -1.5, -3, 2, 0]) + inputs = [[0, 0, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1]] + expected_activations = [0.53426534, 0.5517651, 0.5280447, 0.44220227] + results = network.predict(np.array(inputs)) + actual_activations = list(np.array(results).flatten()) + assert np.allclose(actual_activations, expected_activations) + + +def test_predict_to(): + network = SimpleNetwork(3, 2, 1) + network.set_weights([1, 1, 1, 1, 1, 1, -2.5, -1.5, -3, 2, 0]) + inputs = [[0, 0, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1]] + result = network.predict_to(np.array(inputs), "hidden") + expected_activations = [ + [0.075858176, 0.18242551], + [0.18242551, 0.37754068], + [0.37754068, 0.62245935], + [0.62245935, 0.8175745], + ] + for i in range(len(result)): + assert np.allclose(list(result[i]), expected_activations[i]) + + +def test_predict_from_simple_network(): + network = SimpleNetwork(3, 2, 1) + network.set_weights([1, 1, 1, 1, 1, 1, -2.5, -1.5, -3, 2, 0]) + expected_activations = [0.53426534, 0.5517651, 0.5280447, 0.44220227] + hiddens = [ + [0.07585818, 0.18242551], + [0.18242551, 0.37754068], + [0.37754068, 0.62245935], + [0.62245935, 0.8175745], + ] + results = network.predict_from(np.array(hiddens), "hidden", "output") + actual_activations = list(np.array(results).flatten()) + assert np.allclose(actual_activations, expected_activations) + + +def test_propagate(): + network = SimpleNetwork(3, 2, 1) + network.set_weights([1, 1, 1, 1, 1, 1, -2.5, -1.5, -3, 2, 0]) + inputs = [[0, 0, 0], [1, 0, 0], [1, 1, 0], [1, 1, 1]] + expected_activations = [[0.53426534], [0.5517651], [0.5280447], [0.44220227]] + for i in range(len(inputs)): + result = network.propagate(np.array(inputs[i])) + assert np.allclose(result, expected_activations[i]) + + +def test_topological_sort(): + # output + # / \ + # hiddenA hiddenB + # | | + # inputA inputB + network = Network() + network.add(InputLayer([2], name="inputA")) + network.add(InputLayer([3], name="inputB")) + network.add(Dense(2, name="hiddenA")) + network.add(Dense(3, name="hiddenB")) + network.add(Dense(1, name="output")) + network.connect("inputA", "hiddenA") + network.connect("inputB", "hiddenB") + network.connect("hiddenA", "output") + network.connect("hiddenB", "output") + network.compile() + result = network.topological_sort(network._layers, network._get_input_layers()) + names = [layer.name for layer in result] + assert names[0][:-1] == names[1][:-1] == "input" + assert names[2][:-1] == names[3][:-1] == "hidden" + assert names[4] == "output" + + +def test_predict_from_network(): + network = Network() + network.add(InputLayer([2], name="inputA")) + network.add(InputLayer([3], name="inputB")) + network.add(Dense(2, name="hiddenA")) + network.add(Dense(3, name="hiddenB")) + network.add(Dense(1, name="output")) + network.connect("inputA", "hiddenA") + network.connect("inputB", "hiddenB") + network.connect("hiddenA", "output") + network.connect("hiddenB", "output") + network.compile() + + output = network.propagate([[1, 1], [0, 0, 0]]) + hidden_a_activations = network.propagate_to([[1, 1], [0, 0, 0]], "hiddenA") + predict_from_outputs = network.predict_from( + np.array([hidden_a_activations.tolist() + [0, 0, 0]]), "hiddenA", "output" + ) + + assert np.allclose(output, predict_from_outputs[0]) + + +def test_get_input_from_dataset(): + network = SimpleNetwork((6, 6), "Flatten", 10, (10, "softmax")) + test_inputs, test_targets = get_dataset("validate_6x6") + result = network.get_input_from_dataset(0, test_inputs) + diff = result - test_inputs[0] + assert np.count_nonzero(diff) == 0 + + +def test_get_target_from_dataset(): + network = SimpleNetwork((6, 6), "Flatten", 10, (10, "softmax")) + test_inputs, test_targets = get_dataset("validate_6x6") + result = network.get_target_from_dataset(0, test_targets) + diff = result - test_targets[0] + assert np.count_nonzero(diff) == 0 + + +def test_get_input_from_banked_dataset(): + # outputA outputB + # \ / + # hidden + # / \ + # inputA inputB + network = Network() + network.add(InputLayer([2], name="inputA")) + network.add(InputLayer([3], name="inputB")) + network.add(Dense(4, name="hidden")) + network.add(Dense(1, name="outputA")) + network.add(Dense(2, name="outputB")) + network.connect("inputA", "hidden") + network.connect("inputB", "hidden") + network.connect("hidden", "outputA") + network.connect("hidden", "outputB") + network.compile() + inputs = [ + np.array([[0, 0], [1, 0], [1, 1]]), + np.array([[0, 0, 0], [1, 0, 1], [1, 1, 1]]), + ] + result = network.get_input_from_dataset(2, inputs) + diff = inputs[0][2] - result[0] + assert np.count_nonzero(diff) == 0 + diff = inputs[1][2] - result[1] + assert np.count_nonzero(diff) == 0 + + +def test_get_target_from_banked_dataset(): + # outputA outputB + # \ / + # hidden + # / \ + # inputA inputB + network = Network() + network.add(InputLayer([2], name="inputA")) + network.add(InputLayer([3], name="inputB")) + network.add(Dense(4, name="hidden")) + network.add(Dense(1, name="outputA")) + network.add(Dense(2, name="outputB")) + network.connect("inputA", "hidden") + network.connect("inputB", "hidden") + network.connect("hidden", "outputA") + network.connect("hidden", "outputB") + network.compile() + inputs = [ + np.array([[0, 0], [1, 0], [1, 1]]), + np.array([[0, 0, 0], [1, 0, 1], [1, 1, 1]]), + ] + result = network.get_target_from_dataset(2, inputs) + diff = inputs[0][2] - result[0] + assert np.count_nonzero(diff) == 0 + diff = inputs[1][2] - result[1] + assert np.count_nonzero(diff) == 0 + + +def test_train_from_set_weights(): + network = SimpleNetwork(3, 2, 1) + network.set_weights([1, 1, 1, 1, 1, 1, -2.5, -1.5, -3, 2, 0]) + train_inputs = [ + [0, 0, 0], + [1, 0, 0], + [0, 1, 0], + [0, 0, 1], + [1, 1, 0], + [1, 0, 1], + [0, 1, 1], + [1, 1, 1], + ] + train_targets = [[0], [0], [0], [0], [1], [1], [1], [0]] + history = network.fit( + train_inputs, + train_targets, + batch_size=8, + report_rate=100, + epochs=1000, + accuracy=1.0, + tolerance=0.2, + ) + assert len(history.history["tolerance_accuracy"]) == 874 + expected_weights = [ + 2.348937, + 4.2549586, + 2.348937, + 4.2549586, + 2.348937, + 4.2549586, + -5.95034, + -5.579458, + -6.8648214, + 7.5803447, + -3.8168766, + ] + weights = network.get_weights() + actual_weights = [] + for array in weights: + actual_weights += list(array.flatten()) + assert np.allclose(expected_weights, actual_weights)