From ec5ffb601d0bdb7c24a92b18e4d51e1f1b68284d Mon Sep 17 00:00:00 2001
From: daviddiazvico <david.diaz.vico@outlook.com>
Date: Wed, 4 Sep 2019 00:34:50 +0200
Subject: [PATCH] Bug fixed

---
 .travis.yml                         |   2 +-
 README.md                           |  81 ++++----
 azure-pipelines.yml                 |  31 ++++
 setup.py                            |  20 +-
 skdatasets/repositories/__init__.py |   4 +-
 skdatasets/utils/experiment.py      |  88 ++++-----
 skdatasets/utils/experiments.ipynb  | 272 ---------------------------
 skdatasets/utils/scores.ipynb       | 195 ++++++++++++++++++++
 skdatasets/utils/scores.py          | 121 ------------
 skdatasets/utils/validation.ipynb   | 276 ++++++++++++++++++++++++++++
 skdatasets/utils/validation.py      | 204 --------------------
 tests/utils/MLPClassifier.json      |   4 +-
 tests/utils/MLPRegressor.json       |   4 +-
 tests/utils/test_run.py             |   7 +-
 tests/utils/test_scores.py          |  47 -----
 tests/utils/test_validation.py      |  45 -----
 16 files changed, 598 insertions(+), 803 deletions(-)
 create mode 100644 azure-pipelines.yml
 delete mode 100644 skdatasets/utils/experiments.ipynb
 create mode 100644 skdatasets/utils/scores.ipynb
 delete mode 100644 skdatasets/utils/scores.py
 create mode 100644 skdatasets/utils/validation.ipynb
 delete mode 100644 skdatasets/utils/validation.py
 delete mode 100644 tests/utils/test_scores.py
 delete mode 100644 tests/utils/test_validation.py

diff --git a/.travis.yml b/.travis.yml
index 524db85..26ea6a5 100755
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,7 +16,7 @@ script:
   - python setup.py test
   - mkdir docs
   - export PYTHONPATH=`pwd`
-  - sphinx-quickstart -q -p scikit-datasets -a "David Diaz Vico" -v 0.1 -r 0.1.30 -l en --ext-autodoc --ext-viewcode --ext-githubpages --extensions sphinxcontrib.napoleon --no-makefile --no-batchfile docs
+  - sphinx-quickstart -q -p scikit-datasets -a "David Diaz Vico" -v 0.1 -r 0.1.31 -l en --ext-autodoc --ext-viewcode --ext-githubpages --extensions sphinxcontrib.napoleon --no-makefile --no-batchfile docs
   - sphinx-apidoc -o docs/_static/ skdatasets -F -a -l
   - travis-sphinx -v build -s docs -n
 after_success:
diff --git a/README.md b/README.md
index 372ba6b..dff242f 100755
--- a/README.md
+++ b/README.md
@@ -1,41 +1,42 @@
-# scikit-datasets
-Scikit-learn-compatible datasets
-
-## Status
-[![Build Status](https://travis-ci.com/daviddiazvico/scikit-datasets.svg?branch=master)](https://travis-ci.com/daviddiazvico/scikit-datasets)
-[![Maintainability](https://api.codeclimate.com/v1/badges/a37c9ee152b41a0cb577/maintainability)](https://codeclimate.com/github/daviddiazvico/scikit-datasets/maintainability)
-[![Test Coverage](https://api.codeclimate.com/v1/badges/a37c9ee152b41a0cb577/test_coverage)](https://codeclimate.com/github/daviddiazvico/scikit-datasets/test_coverage)
-
-## Installation
-Available in [PyPI](https://pypi.python.org/pypi?:action=display&name=scikit-datasets)
-```
-pip install scikit-datasets
-```
-
-## Documentation
-Autogenerated and hosted in [GitHub Pages](https://daviddiazvico.github.io/scikit-datasets/)
-
-## Distribution
-Run the following command from the project home to create the distribution
-```
-python setup.py sdist bdist_wheel
-```
-and upload the package to [testPyPI](https://testpypi.python.org/)
-```
-twine upload --repository-url https://test.pypi.org/legacy/ dist/*
-```
-or [PyPI](https://pypi.python.org/)
-```
-twine upload dist/*
-```
-
-## Citation
-If you find scikit-datasets useful, please cite it in your publications. You can use this [BibTeX](http://www.bibtex.org/) entry:
-```
-@misc{scikit-datasets,
-      title={scikit-datasets},
-      author={Diaz-Vico, David},
-      year={2017},
-      publisher={GitHub},
-      howpublished={\url{https://github.com/daviddiazvico/scikit-datasets}}}
+# scikit-datasets
+Scikit-learn-compatible datasets
+
+## Status
+[![Build Status](https://travis-ci.com/daviddiazvico/scikit-datasets.svg?branch=master)](https://travis-ci.com/daviddiazvico/scikit-datasets)
+[![Maintainability](https://api.codeclimate.com/v1/badges/a37c9ee152b41a0cb577/maintainability)](https://codeclimate.com/github/daviddiazvico/scikit-datasets/maintainability)
+[![Test Coverage](https://api.codeclimate.com/v1/badges/a37c9ee152b41a0cb577/test_coverage)](https://codeclimate.com/github/daviddiazvico/scikit-datasets/test_coverage)
+[![Build Status](https://dev.azure.com/daviddiazvico0337/daviddiazvico/_apis/build/status/daviddiazvico.scikit-datasets?branchName=master)](https://dev.azure.com/daviddiazvico0337/daviddiazvico/_build/latest?definitionId=1&branchName=master)
+
+## Installation
+Available in [PyPI](https://pypi.python.org/pypi?:action=display&name=scikit-datasets)
+```
+pip install scikit-datasets
+```
+
+## Documentation
+Autogenerated and hosted in [GitHub Pages](https://daviddiazvico.github.io/scikit-datasets/)
+
+## Distribution
+Run the following command from the project home to create the distribution
+```
+python setup.py sdist bdist_wheel
+```
+and upload the package to [testPyPI](https://testpypi.python.org/)
+```
+twine upload --repository-url https://test.pypi.org/legacy/ dist/*
+```
+or [PyPI](https://pypi.python.org/)
+```
+twine upload dist/*
+```
+
+## Citation
+If you find scikit-datasets useful, please cite it in your publications. You can use this [BibTeX](http://www.bibtex.org/) entry:
+```
+@misc{scikit-datasets,
+      title={scikit-datasets},
+      author={Diaz-Vico, David},
+      year={2017},
+      publisher={GitHub},
+      howpublished={\url{https://github.com/daviddiazvico/scikit-datasets}}}
 ```
\ No newline at end of file
diff --git a/azure-pipelines.yml b/azure-pipelines.yml
new file mode 100644
index 0000000..ebcef56
--- /dev/null
+++ b/azure-pipelines.yml
@@ -0,0 +1,31 @@
+# Python package
+# Create and test a Python package on multiple Python versions.
+# Add steps that analyze code, save the dist with the build record, publish to a PyPI-compatible index, and more:
+# https://docs.microsoft.com/azure/devops/pipelines/languages/python
+
+trigger:
+- master
+
+pool:
+  vmImage: 'ubuntu-latest'
+strategy:
+  matrix:
+    Python36:
+      python.version: '3.6'
+    Python37:
+      python.version: '3.7'
+
+steps:
+- task: UsePythonVersion@0
+  inputs:
+    versionSpec: '$(python.version)'
+  displayName: 'Use Python $(python.version)'
+
+- script: |
+    python -m pip install --upgrade pip
+  displayName: 'Install dependencies'
+
+- script: |
+    pip install pytest-azurepipelines
+    python setup.py test
+  displayName: 'Test'
diff --git a/setup.py b/setup.py
index ce44d3a..6865ff6 100755
--- a/setup.py
+++ b/setup.py
@@ -9,30 +9,28 @@
 
 setup(name='scikit-datasets',
       packages=find_packages(),
-      version='0.1.30',
+      version='0.1.31',
       description='Scikit-learn-compatible datasets',
 #      long_description=open('README.md', 'r').read(),
       author='David Diaz Vico',
       author_email='david.diaz.vico@outlook.com',
       url='https://github.com/daviddiazvico/scikit-datasets',
-      download_url='https://github.com/daviddiazvico/scikit-datasets/archive/v0.1.30.tar.gz',
+      download_url='https://github.com/daviddiazvico/scikit-datasets/archive/v0.1.31.tar.gz',
       keywords=['scikit-learn'],
       classifiers=['Intended Audience :: Science/Research',
                    'Topic :: Scientific/Engineering',
                    'Programming Language :: Python',
                    'Programming Language :: Python :: 3',
-                   'Programming Language :: Python :: 3.6'],
-      install_requires=['scikit-learn'],
+                   'Programming Language :: Python :: 3.7'],
+      install_requires=['numpy', 'scipy', 'scikit-learn'],
       extras_require={'cran':  ['rdata'],
                       'forex': ['forex_python'],
                       'keel': ['pandas'],
                       'keras': ['keras'],
-                      'utils.estimator': ['jsonpickle'],
-                      'utils.experiments': ['sacred'],
-                      'utils.scores': ['pandas', 'scipy', 'statsmodels'],
-                      'utils.validation': ['seaborn']},
+                      'utils.estimator': ['jsonpickle==0.9.6'],
+                      'utils.experiments': ['sacred']},
       setup_requires=['pytest-runner'],
-      tests_require=['coverage', 'forex_python', 'jsonpickle', 'keras',
-                     'pandas', 'pytest', 'pytest-cov', 'rdata', 'sacred',
-                     'scipy', 'seaborn', 'statsmodels', 'tensorflow'],
+      tests_require=['coverage', 'forex_python', 'jsonpickle==0.9.6', 'keras',
+                     'pandas', 'pymongo', 'pytest', 'pytest-cov', 'rdata',
+                     'sacred', 'tensorflow'],
       test_suite='tests')
diff --git a/skdatasets/repositories/__init__.py b/skdatasets/repositories/__init__.py
index 9fc3543..627c3fe 100644
--- a/skdatasets/repositories/__init__.py
+++ b/skdatasets/repositories/__init__.py
@@ -29,8 +29,8 @@
 
 
 def fetch(repository, dataset, collection=None, **kwargs):
-    try:
+    if collection:
         data = repos[repository].fetch(collection, dataset, **kwargs)
-    except:
+    else:
         data = repos[repository].fetch(dataset, **kwargs)
     return data
diff --git a/skdatasets/utils/experiment.py b/skdatasets/utils/experiment.py
index b7b6e33..e89a181 100644
--- a/skdatasets/utils/experiment.py
+++ b/skdatasets/utils/experiment.py
@@ -4,11 +4,9 @@
 """
 
 import numpy as np
-import os
 from sacred import Experiment, Ingredient
 from sklearn.model_selection import cross_validate, PredefinedSplit
-
-from skdatasets.utils.validation import scatter_plot, metaparameter_plot, history_plot
+from tempfile import TemporaryFile
 
 
 def experiment(dataset, estimator):
@@ -41,20 +39,17 @@ def experiment(dataset, estimator):
     experiment = Experiment(ingredients=(_dataset, _estimator))
 
     @experiment.automain
-    def run(cross_validate=cross_validate, return_estimator=False):
+    def run(return_estimator=False, save_output=False):
         """Run the experiment.
 
         Run the experiment.
 
         Parameters
         ----------
-        cross_validate : function, default=cross_validate
-            Function to evaluate metrics by cross-validation. Must receive the
-            estimator, X, y (migth be None) and cv (migth be None). Must return
-            a dictionary with the cross-validation score and maybe other info,
-            like a list of fitted estimators.
         return_estimator : boolean, default False
             Whether to return the estimator or estimators fitted.
+        save_output : boolean, default False
+            Whether to save the output as an artifact.
 
         """
         data = dataset()
@@ -62,17 +57,6 @@ def run(cross_validate=cross_validate, return_estimator=False):
             if a not in data:
                 setattr(data, a, None)
 
-        def _explicit_folds(data):
-            """Prepare a dataset where the CV folds are explicit."""
-            X = np.array([]).reshape((0, *data.inner_cv[0][0].shape[1:]))
-            y = np.array([]).reshape((0, *data.inner_cv[0][1].shape[1:]))
-            cv = []
-            for i, (X_, y_, X_test_, y_test_) in enumerate(data.inner_cv):
-                X = np.concatenate((X, X_, X_test_))
-                y = np.concatenate((y, y_, y_test_))
-                cv = cv + [-1]*len(X_) + [i]*len(X_test_)
-            return X, y, cv
-
         def _estimator(cv=None):
             """Create an estimator with or without hyperparameter search."""
             try:
@@ -81,46 +65,43 @@ def _estimator(cv=None):
                 e = estimator()
             return e
 
-        def _plots(e, i, X, y):
-            """Create different descriptive plots."""
-            # Metaparameter plots
-            image_files = metaparameter_plot(e, image_file=f'metaparameter_{i}.pdf')
-            for image_file in image_files:
-                experiment.add_artifact(image_file)
-                print("Removing " + image_file)
-                os.remove(image_file)
-            # Scatter plots
-            image_files = scatter_plot(X, y, e, image_file=f'scatter_{i}.pdf')
-            for image_file in image_files:
-                experiment.add_artifact(image_file)
-                print("Removing " + image_file)
-                os.remove(image_file)
+        def _output(e, X):
+            """Generate the outputs of an estimator."""
+            outputs = dict()
+            for output in ('transform', 'predict'):
+                if hasattr(e, output):
+                    outputs[output] = getattr(e, output)(X)
+            return outputs
 
         # Inner CV for metaparameter search
-        if hasattr(data.inner_cv, '__iter__'):
-            # Explicit CV folds
-            X, y, cv = _explicit_folds(data)
+        if hasattr(data.inner_cv, '__iter__'):  # Explicit CV folds
+            X = np.array([]).reshape((0, *data.inner_cv[0][0].shape[1:]))
+            y = np.array([]).reshape((0, *data.inner_cv[0][1].shape[1:]))
+            cv = []
+            for i, (X_, y_, X_test_, y_test_) in enumerate(data.inner_cv):
+                X = np.concatenate((X, X_, X_test_))
+                y = np.concatenate((y, y_, y_test_))
+                cv = cv + [-1]*len(X_) + [i]*len(X_test_)
             e = _estimator(cv=PredefinedSplit(cv))
             e.fit(X, y=y)
             if hasattr(e, 'best_estimator_'):
                 e.fit = e.best_estimator_.fit
-        else:
-            # Automatic/indexed CV folds
+        else:  # Automatic/indexed CV folds
             e = _estimator(cv=data.inner_cv)
 
         # Outer CV/test partition for model assessment
-        if data.data_test is not None:
-            # Test partition
+        if data.data_test is not None:  # Test partition
             e.fit(data.data, y=data.target)
             scores = {'test_score': [e.score(data.data_test,
                                              y=data.target_test)]}
             if return_estimator:
                 scores['estimator'] = [e]
-            _plots(e, 0, data.data_test, data.target_test)
-        else:
-            # Outer CV
-            if hasattr(data.outer_cv, '__iter__'):
-                # Explicit CV folds
+            if save_output:
+                with TemporaryFile() as tmpfile:
+                    np.save(tmpfile, _output(e, data.data_test))
+                    experiment.add_artifact(tmpfile, name='output.npy')
+        else:  # Outer CV
+            if hasattr(data.outer_cv, '__iter__'):  # Explicit CV folds
                 scores = {'test_score': []}
                 if return_estimator:
                     scores['estimator'] = []
@@ -129,16 +110,15 @@ def _plots(e, i, X, y):
                     scores['test_score'].append(e.score(X_test, y=y_test))
                     if return_estimator:
                         scores['estimator'].append(e)
-                    _plots(e, i, X_test, y_test)
-            else:
-                # Automatic/indexed CV folds
+                    if save_output:
+                        with TemporaryFile() as tmpfile:
+                            np.save(tmpfile, _output(e, X_test))
+                            experiment.add_artifact(tmpfile,
+                                                    name=f'output_{i}.npy')
+            else:  # Automatic/indexed CV folds
                 scores = cross_validate(e, data.data, y=data.target,
                                         cv=data.outer_cv,
-                                        return_estimator=True)
-                for i, e in enumerate(scores['estimator']):
-                    _plots(e, i, data.data, data.target)
-                if not return_estimator:
-                    scores.pop('estimator')
+                                        return_estimator=return_estimator)
         experiment.log_scalar('score_mean', np.nanmean(scores['test_score']))
         experiment.log_scalar('score_std', np.nanstd(scores['test_score']))
         experiment.info.update(scores)
diff --git a/skdatasets/utils/experiments.ipynb b/skdatasets/utils/experiments.ipynb
deleted file mode 100644
index e67211d..0000000
--- a/skdatasets/utils/experiments.ipynb
+++ /dev/null
@@ -1,272 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "%matplotlib inline"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Experiments notebook"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "import numpy as np\n",
-    "import os\n",
-    "import pandas as pd\n",
-    "import pickle\n",
-    "from sklearn.model_selection import cross_val_predict"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from skdatasets import load\n",
-    "from skdatasets.utils.scores import hypotheses_table, scores_table"
-	"from skdatasets.utils.validation import classifier_scatter, metaparameter_plot" 
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def parse_experiment_score(config, info=None):\n",
-    "    repository = config['dataset']['repository']\n",
-    "    dataset = config['dataset']['dataset']\n",
-    "    predictor = config['estimator']['predictor']\n",
-    "    score = np.nan\n",
-    "    if info is not None:\n",
-    "        score = info['score']['values'] if type(info['score']) == dict else info['score']\n",
-    "    return repository, dataset, predictor, score"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def describe_dataset(X, y=None):\n",
-    "    n_patterns = len(X)\n",
-    "    dimension = X.shape[1]\n",
-    "    n_classes = class_ratios = None\n",
-    "    if (y is not None) and (y.dtype.kind in ('b', 'u', 'i')):\n",
-    "        counts = np.unique(y, return_counts=True)[1]\n",
-    "        n_classes = len(counts)\n",
-    "        class_ratios = np.max(counts) / np.min(counts)\n",
-    "    return n_patterns, dimension, n_classes, class_ratios"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def datasets_table(datasets):\n",
-    "    table = pd.DataFrame(columns=('n. patterns', 'dimension', 'n. classes', 'class ratios'))\n",
-    "    for repository, dataset, X, y in datasets:\n",
-    "        table.at[repository + ':' + dataset] = describe_dataset(X, y=y)\n",
-    "    return table.dropna(axis=1, how='all')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "folder = '../.results'"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Scores, hypotheses and datasets"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "scores = pd.DataFrame()\n",
-    "stds = pd.DataFrame()\n",
-    "for dirpath, dirnames, filenames in os.walk(folder):\n",
-    "    try:\n",
-    "        config = json.load(open(os.path.join(dirpath, 'config.json')))\n",
-    "        info = json.load(open(os.path.join(dirpath, 'info.json')))\n",
-    "        repository, dataset, predictor, score = parse_experiment_score(config, info=info)\n",
-    "        scores.at[repository + ':' + dataset, predictor] = np.nanmean(score)\n",
-    "        stds.at[repository + ':' + dataset, predictor] = np.nanstd(score)\n",
-    "    except:\n",
-    "        pass"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [],
-   "source": [
-    "scores_table(scores.index, scores.columns.values, scores.values, stds.values)\n",
-    "# TODO: scores_table(scores, stds)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "hypotheses_table_ = hypotheses_table(scores.values, scores.columns.values, multitest='friedmanchisquare')\n",
-    "# TODO: hypotheses_table(scores, multitest='friedmanchisquare')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "hypotheses_table_[0]\n",
-    "# TODO: all in one table"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "hypotheses_table_[1]\n",
-    "# TODO: all in one table"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "datasets = [row.split(':') for row in scores.index]\n",
-    "datasets = [(r, d, *load(r, d, return_X_y=True)[:2]) for r, d in datasets]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "datasets_table(datasets)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Meta-parameter search and prediction scatter"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "experiment = '1'\n",
-    "param = 'classifier__C'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "config = json.load(open(os.path.join(folder, experiment, 'config.json')))\n",
-    "repository, dataset, predictor, _ = parse_experiment_score(config)\n",
-    "info = json.load(open(os.path.join(folder, experiment, 'info.json')))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "estimator = pickle.load(open(os.path.join(folder, experiment, 'estimator.pkl'), 'rb'))\n",
-    "metaparameter_plot(estimator, param, '/tmp/' + repository + '-' + dataset + '-' + predictor + '-' + param + '.png')\n",
-    "# TODO: metaparameter_plot(info, param)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "estimator = pickle.load(open(os.path.join(folder, experiment, 'estimator.pkl'), 'rb'))\n",
-    "X, y, X_test, _, _, outer_cv = load(repository, dataset, return_X_y=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "if X_test is not None:\n",
-    "    classifier_scatter(X_test, estimator.predict(X_test), '/tmp/' + repository + '-' + dataset + '-' + estimator + '_scatter.png')\n",
-    "# TODO:    classifier_scatter(X_test, estimator.predict(X_test))\n",
-    "else:\n",
-    "    preds = cross_val_predict(estimator, X, y=y)\n",
-    "    classifier_scatter(X, cross_val_predict(estimator, X, y=y, cv=outer_cv), '/tmp/' + repository + '-' + dataset + '-' + estimator + '_scatter.png')\n",
-    "# TODO:    classifier_scatter(X, cross_val_predict(estimator, X, y=y, cv=outer_cv))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python [conda env:experiments]",
-   "language": "python",
-   "name": "conda-env-experiments-py"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.6"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/skdatasets/utils/scores.ipynb b/skdatasets/utils/scores.ipynb
new file mode 100644
index 0000000..19989a3
--- /dev/null
+++ b/skdatasets/utils/scores.ipynb
@@ -0,0 +1,195 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import itertools as it\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from scipy.stats import kruskal, friedmanchisquare, mannwhitneyu, rankdata, wilcoxon\n",
+    "from statsmodels.sandbox.stats.multicomp import multipletests\n",
+    "\n",
+    "\n",
+    "def scores_table(datasets, estimators, scores, stds=None,\n",
+    "                 greater_is_better=True, method='average'):\n",
+    "    \"\"\" Scores table.\n",
+    "\n",
+    "        Prints a table where each row represents a dataset and each column\n",
+    "        represents an estimator.\n",
+    "\n",
+    "        Parameters\n",
+    "        ----------\n",
+    "        datasets: array-like\n",
+    "                  List of dataset names.\n",
+    "        estimators: array-like\n",
+    "                    List of estimator names.\n",
+    "        scores: array-like\n",
+    "                Matrix of scores where each column represents a model.\n",
+    "        stds: array_like, default=None\n",
+    "              Matrix of standard deviations where each column represents a\n",
+    "              model.\n",
+    "        greater_is_better: boolean, default=True\n",
+    "                           Whether a greater score is better (score) or worse\n",
+    "                           (loss).\n",
+    "        method: {'average', 'min', 'max', 'dense', 'ordinal'}, default='average'\n",
+    "                Method used to solve ties.\n",
+    "\n",
+    "        Returns\n",
+    "        -------\n",
+    "        table: array-like\n",
+    "               Table of mean and standard deviation of each estimator-dataset\n",
+    "               pair. A ranking of estimators is also generated.\n",
+    "    \"\"\"\n",
+    "    ranks = np.asarray([rankdata(-m, method=method) if greater_is_better else rankdata(m, method=method) for m in scores])\n",
+    "    table = pd.DataFrame(data=scores, index=datasets, columns=estimators)\n",
+    "    for i, d in enumerate(datasets):\n",
+    "        for j, e in enumerate(estimators):\n",
+    "            table.loc[d, e] = '{0:.2f}'.format(scores[i, j])\n",
+    "            if stds is not None:\n",
+    "                table.loc[d, e] += ' ±{0:.2f}'.format(stds[i, j])\n",
+    "            table.loc[d, e] += ' ({0:.1f})'.format(ranks[i, j])\n",
+    "    table.loc['rank mean'] = np.around(np.mean(ranks, axis=0), decimals=4)\n",
+    "    return table\n",
+    "\n",
+    "\n",
+    "def hypotheses_table(samples, models, alpha=0.05, multitest=None,\n",
+    "                     test='wilcoxon', correction=None, multitest_args=dict(),\n",
+    "                     test_args=dict()):\n",
+    "    \"\"\" Hypotheses table.\n",
+    "\n",
+    "        Prints a hypothesis table with a selected test and correction.\n",
+    "\n",
+    "        Parameters\n",
+    "        ----------\n",
+    "        samples: array-like\n",
+    "                 Matrix of samples where each column represent a model.\n",
+    "        models: array-like\n",
+    "                Model names.\n",
+    "        alpha: float in [0, 1], default=0.05\n",
+    "               Significance level.\n",
+    "        multitest: {'kruskal', 'friedmanchisquare'}\n",
+    "                   default=None\n",
+    "                   Ranking multitest used.\n",
+    "        test: {'mannwhitneyu', 'wilcoxon'},\n",
+    "              default='wilcoxon'\n",
+    "              Ranking test used.\n",
+    "        correction: {'bonferroni', 'sidak', 'holm-sidak', 'holm',\n",
+    "                     'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by', 'fdr_tsbh',\n",
+    "                     'fdr_tsbky'},\n",
+    "              default=None\n",
+    "              Method used to adjust the p-values.\n",
+    "        multitest_args: dict\n",
+    "                        Optional ranking test arguments.\n",
+    "        test_args: dict\n",
+    "                   Optional ranking test arguments.\n",
+    "\n",
+    "        Returns\n",
+    "        -------\n",
+    "        multitest_table: array-like\n",
+    "                         Table of p-value and rejection/non-rejection for the\n",
+    "                         multitest hypothesis.\n",
+    "        test_table: array-like\n",
+    "               Table of p-values and rejection/non-rejection for each test\n",
+    "               hypothesis.\n",
+    "    \"\"\"\n",
+    "    versus = list(it.combinations(range(len(models)), 2))\n",
+    "    comparisons = [models[vs[0]] + \" vs \" + models[vs[1]] for vs in versus]\n",
+    "    multitests = {'kruskal': kruskal, 'friedmanchisquare': friedmanchisquare}\n",
+    "    tests = {'mannwhitneyu': mannwhitneyu, 'wilcoxon': wilcoxon}\n",
+    "    multitest_table = None\n",
+    "    if multitest is not None:\n",
+    "        multitest_table = pd.DataFrame(index=[multitest], columns=['p-value',\n",
+    "                                                                   'Hypothesis'])\n",
+    "        statistic, pvalue = multitests[multitest](*samples, **multitest_args)\n",
+    "        reject = 'Rejected' if pvalue <= alpha else 'Not rejected'\n",
+    "        multitest_table.loc[multitest] = ['{0:.2f}'.format(pvalue), reject]\n",
+    "        if pvalue > alpha:\n",
+    "            return multitest_table, None\n",
+    "    pvalues = [tests[test](samples[:, vs[0]], samples[:, vs[1]], **test_args)[1] for vs in versus]\n",
+    "    if correction is not None:\n",
+    "        reject, pvalues, alphac_sidak, alphac_bonf = multipletests(pvalues,\n",
+    "                                                                   alpha,\n",
+    "                                                                   method=correction)\n",
+    "    else:\n",
+    "        reject = ['Rejected' if pvalue <= alpha else 'Not rejected' for pvalue in pvalues]\n",
+    "    test_table = pd.DataFrame(index=comparisons, columns=['p-value',\n",
+    "                                                          'Hypothesis'])\n",
+    "    for i, d in enumerate(comparisons):\n",
+    "        test_table.loc[d] = ['{0:.2f}'.format(pvalues[i]), reject[i]]\n",
+    "    return multitest_table, test_table\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "from skdatasets.utils._scores import scores_table, hypotheses_table\n",
+    "\n",
+    "\n",
+    "datasets = ['a4a', 'a8a', 'combined', 'dna', 'ijcnn1', 'letter', 'pendigits',\n",
+    "            'satimage', 'shuttle', 'usps', 'w7a', 'w8a']\n",
+    "estimators = ['LogisticRegression', 'MLPClassifier0', 'MLPClassifier1',\n",
+    "              'MLPClassifier2', 'MLPClassifier3', 'MLPClassifier4',\n",
+    "              'MLPClassifier5']\n",
+    "scores = np.asarray(((89.79, 89.78, 89.76, 89.88, 89.85, 89.91, 89.93),\n",
+    "                     (90.73, 90.73, 90.73, 90.85, 90.83, 90.81, 90.80),\n",
+    "                     (92.36, 92.31, 94.58, 94.82, 94.84, 94.92, 94.89),\n",
+    "                     (99.28, 99.27, 99.28, 99.26, 99.27, 99.25, 99.25),\n",
+    "                     (91.34, 91.34, 99.29, 99.33, 99.34, 99.53, 99.54),\n",
+    "                     (98.07, 98.04, 99.94, 99.95, 99.96, 99.96, 99.95),\n",
+    "                     (99.17, 99.08, 99.87, 99.87, 99.88, 99.90, 99.89),\n",
+    "                     (96.67, 96.28, 98.84, 98.87, 98.90, 98.87, 98.92),\n",
+    "                     (95.85, 92.83, 99.88, 99.93, 99.96, 99.98, 99.99),\n",
+    "                     (99.12, 99.11, 99.65, 99.58, 99.58, 99.65, 99.60),\n",
+    "                     (95.93, 95.40, 94.58, 96.31, 96.34, 96.58, 96.50),\n",
+    "                     (95.80, 95.99, 95.35, 96.20, 96.22, 96.36, 96.71)))\n",
+    "\n",
+    "\n",
+    "def test_scores_table():\n",
+    "    \"\"\"Tests scores table.\"\"\"\n",
+    "    scores_table(datasets, estimators, scores)\n",
+    "    scores_table(datasets, estimators, scores, stds=scores/10.0)\n",
+    "\n",
+    "\n",
+    "def test_hypotheses_table():\n",
+    "    \"\"\"Tests hypotheses table.\"\"\"\n",
+    "    for multitest in ('kruskal', 'friedmanchisquare', None):\n",
+    "        for test in ('mannwhitneyu', 'wilcoxon'):\n",
+    "            hypotheses_table(scores, estimators, multitest=multitest, test=test)\n",
+    "            for correction in ('bonferroni', 'sidak', 'holm-sidak', 'holm',\n",
+    "                               'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by',\n",
+    "                               'fdr_tsbh', 'fdr_tsbky'):\n",
+    "                hypotheses_table(scores, estimators, multitest=multitest,\n",
+    "                                 test=test, correction=correction)\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/skdatasets/utils/scores.py b/skdatasets/utils/scores.py
deleted file mode 100644
index b8613fb..0000000
--- a/skdatasets/utils/scores.py
+++ /dev/null
@@ -1,121 +0,0 @@
-"""
-Scikit-learn-compatible visualizations for scores and hypothesis testing.
-
-@author: David Diaz Vico
-@license: MIT
-"""
-
-import itertools as it
-import numpy as np
-import pandas as pd
-from scipy.stats import kruskal, friedmanchisquare, mannwhitneyu, rankdata, wilcoxon
-from statsmodels.sandbox.stats.multicomp import multipletests
-
-
-def scores_table(datasets, estimators, scores, stds=None,
-                 greater_is_better=True, method='average'):
-    """ Scores table.
-
-        Prints a table where each row represents a dataset and each column
-        represents an estimator.
-
-        Parameters
-        ----------
-        datasets: array-like
-                  List of dataset names.
-        estimators: array-like
-                    List of estimator names.
-        scores: array-like
-                Matrix of scores where each column represents a model.
-        stds: array_like, default=None
-              Matrix of standard deviations where each column represents a
-              model.
-        greater_is_better: boolean, default=True
-                           Whether a greater score is better (score) or worse
-                           (loss).
-        method: {'average', 'min', 'max', 'dense', 'ordinal'}, default='average'
-                Method used to solve ties.
-
-        Returns
-        -------
-        table: array-like
-               Table of mean and standard deviation of each estimator-dataset
-               pair. A ranking of estimators is also generated.
-    """
-    ranks = np.asarray([rankdata(-m, method=method) if greater_is_better else rankdata(m, method=method) for m in scores])
-    table = pd.DataFrame(data=scores, index=datasets, columns=estimators)
-    for i, d in enumerate(datasets):
-        for j, e in enumerate(estimators):
-            table.loc[d, e] = '{0:.2f}'.format(scores[i, j])
-            if stds is not None:
-                table.loc[d, e] += ' ±{0:.2f}'.format(stds[i, j])
-            table.loc[d, e] += ' ({0:.1f})'.format(ranks[i, j])
-    table.loc['rank mean'] = np.around(np.mean(ranks, axis=0), decimals=4)
-    return table
-
-
-def hypotheses_table(samples, models, alpha=0.05, multitest=None,
-                     test='wilcoxon', correction=None, multitest_args=dict(),
-                     test_args=dict()):
-    """ Hypotheses table.
-
-        Prints a hypothesis table with a selected test and correction.
-
-        Parameters
-        ----------
-        samples: array-like
-                 Matrix of samples where each column represent a model.
-        models: array-like
-                Model names.
-        alpha: float in [0, 1], default=0.05
-               Significance level.
-        multitest: {'kruskal', 'friedmanchisquare'}
-                   default=None
-                   Ranking multitest used.
-        test: {'mannwhitneyu', 'wilcoxon'},
-              default='wilcoxon'
-              Ranking test used.
-        correction: {'bonferroni', 'sidak', 'holm-sidak', 'holm',
-                     'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by', 'fdr_tsbh',
-                     'fdr_tsbky'},
-              default=None
-              Method used to adjust the p-values.
-        multitest_args: dict
-                        Optional ranking test arguments.
-        test_args: dict
-                   Optional ranking test arguments.
-
-        Returns
-        -------
-        multitest_table: array-like
-                         Table of p-value and rejection/non-rejection for the
-                         multitest hypothesis.
-        test_table: array-like
-               Table of p-values and rejection/non-rejection for each test
-               hypothesis.
-    """
-    versus = list(it.combinations(range(len(models)), 2))
-    comparisons = [models[vs[0]] + " vs " + models[vs[1]] for vs in versus]
-    multitests = {'kruskal': kruskal, 'friedmanchisquare': friedmanchisquare}
-    tests = {'mannwhitneyu': mannwhitneyu, 'wilcoxon': wilcoxon}
-    multitest_table = None
-    if multitest is not None:
-        multitest_table = pd.DataFrame(index=[multitest], columns=['p-value',
-                                                                   'Hypothesis'])
-        statistic, pvalue = multitests[multitest](*samples, **multitest_args)
-        reject = 'Rejected' if pvalue <= alpha else 'Not rejected'
-        multitest_table.loc[multitest] = ['{0:.2f}'.format(pvalue), reject]
-        if pvalue > alpha:
-            return multitest_table, None
-    pvalues = [tests[test](samples[:, vs[0]], samples[:, vs[1]], **test_args)[1] for vs in versus]
-    if correction is not None:
-        reject, pvalues, alphac_sidak, alphac_bonf = multipletests(pvalues,
-                                                                   alpha,
-                                                                   method=correction)
-    else:
-        reject = ['Rejected' if pvalue <= alpha else 'Not rejected' for pvalue in pvalues]
-    test_table = pd.DataFrame(index=comparisons, columns=['p-value',
-                                                          'Hypothesis'])
-    for i, d in enumerate(comparisons):
-        test_table.loc[d] = ['{0:.2f}'.format(pvalues[i]), reject[i]]
-    return multitest_table, test_table
diff --git a/skdatasets/utils/validation.ipynb b/skdatasets/utils/validation.ipynb
new file mode 100644
index 0000000..db00883
--- /dev/null
+++ b/skdatasets/utils/validation.ipynb
@@ -0,0 +1,276 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import seaborn as sns\n",
+    "from sklearn.utils.multiclass import type_of_target\n",
+    "\n",
+    "\n",
+    "sns.set(style=\"white\", palette=\"muted\", color_codes=True)\n",
+    "\n",
+    "\n",
+    "def scatter_plot(X, y, estimator, image_file='scatter.pdf', max_features=10,\n",
+    "                 max_data=200, **kwargs):\n",
+    "    \"\"\" Scatter plot.\n",
+    "\n",
+    "        Scatter plot of the transformations or/and predictions of the estimator.\n",
+    "\n",
+    "        Parameters\n",
+    "        ----------\n",
+    "        X : array-like, shape (n_samples, features_shape)\n",
+    "           Input data.\n",
+    "        y : numpy array of shape [n_samples]\n",
+    "           Target values.\n",
+    "        estimator : estimator\n",
+    "            Fitted sklearn Transformer/Predictor object.\n",
+    "        image_file: string, default=...\n",
+    "            ...\n",
+    "        max_features : integer, default=10\n",
+    "            Maximum number of features to use in the plot\n",
+    "        max_data : integer, default=200\n",
+    "            Maximum number of data to use in the plot\n",
+    "        **kwargs : optional savefig named args\n",
+    "\n",
+    "        Returns\n",
+    "        -------\n",
+    "        List of image filenames.\n",
+    "    \"\"\"\n",
+    "    image_files = list()\n",
+    "    max_data = min(X.shape[0], max_data)\n",
+    "    max_features = min(X.shape[1], max_features)\n",
+    "    target_type = type_of_target(y)\n",
+    "    X = X[:max_data]\n",
+    "    y = y[:max_data]\n",
+    "    if hasattr(estimator, 'transform'):\n",
+    "        # Transformer\n",
+    "        plt.figure()\n",
+    "        transfs = estimator.transform(X)\n",
+    "        transfs = transfs[:max_data, :max_features]\n",
+    "        if target_type in ('binary', 'multiclass'):\n",
+    "            # Classification/clustering\n",
+    "            names = list(range(transfs.shape[1]))\n",
+    "            names.append('class')\n",
+    "            data = pd.DataFrame(data=np.append(transfs,\n",
+    "                                               np.reshape(y, (len(y), 1)),\n",
+    "                                               axis=1),\n",
+    "                                columns=names)\n",
+    "            sns.set()\n",
+    "            sns.pairplot(data, hue='class', x_vars=names[:-1],\n",
+    "                         y_vars=names[:-1])\n",
+    "        elif target_type == 'continuous':\n",
+    "            # Regression\n",
+    "            names = list(range(transfs.shape[1]))\n",
+    "            names.append('y')\n",
+    "            data = pd.DataFrame(data=np.append(transfs,\n",
+    "                                               np.reshape(y, (-1, 1)),\n",
+    "                                               axis=1),\n",
+    "                                columns=names)\n",
+    "            sns.set()\n",
+    "            sns.pairplot(data, hue='y', x_vars=names[:-1],\n",
+    "                         y_vars=names[:-1])\n",
+    "            pass\n",
+    "        transformer_image_file = 'transformer_' + image_file\n",
+    "        plt.savefig(transformer_image_file, **kwargs)\n",
+    "        image_files.append(transformer_image_file)\n",
+    "    if hasattr(estimator, 'predict'):\n",
+    "        # Predictor\n",
+    "        plt.figure()\n",
+    "        preds = estimator.predict(X)\n",
+    "        try:\n",
+    "            X = X[:, :max_features]\n",
+    "        except:\n",
+    "            X = X\n",
+    "        try:\n",
+    "            X = X.A\n",
+    "        except:\n",
+    "            X = X\n",
+    "        preds = preds[:max_data]\n",
+    "        if target_type in ('binary', 'multiclass'):\n",
+    "            # Classification/clustering\n",
+    "            names = list(range(X.shape[1]))\n",
+    "            names.append('class')\n",
+    "            diffs = y.flatten()\n",
+    "            diffs[y.flatten() != preds.flatten()] = -1\n",
+    "            data = pd.DataFrame(data=np.hstack((X, np.reshape(diffs, (-1, 1)))),\n",
+    "                                columns=names)\n",
+    "            sns.set()\n",
+    "            sns.pairplot(data, hue='class', x_vars=names[:-1],\n",
+    "                         y_vars=names[:-1])\n",
+    "        elif target_type == 'continuous':\n",
+    "            # Regression\n",
+    "            data = pd.DataFrame(data=np.hstack((np.reshape(y, (-1, 1)),\n",
+    "                                                np.reshape(preds, (-1, 1)),\n",
+    "                                                np.reshape(y - preds, (-1, 1)))),\n",
+    "                                columns=('y', 'preds', 'error'))\n",
+    "            sns.set()\n",
+    "            sns.scatterplot(x='y', y='preds', hue='error', data=data)\n",
+    "        predictor_image_file = 'predictor_' + image_file\n",
+    "        plt.savefig(predictor_image_file, **kwargs)\n",
+    "        image_files.append(predictor_image_file)\n",
+    "    return image_files\n",
+    "\n",
+    "\n",
+    "def metaparameter_plot(estimator, image_file='metaparameter.pdf', **kwargs):\n",
+    "    \"\"\" Metaparameter plot.\n",
+    "\n",
+    "        Train and test metric plotted along a meta-parameter search space.\n",
+    "\n",
+    "        Parameters\n",
+    "        ----------\n",
+    "        estimator : estimator\n",
+    "            Fitted sklearn SearchCV object.\n",
+    "        image_file: string, default=...\n",
+    "            ...\n",
+    "        **kwargs : optional savefig named args\n",
+    "\n",
+    "        Returns\n",
+    "        -------\n",
+    "        List of image filenames.\n",
+    "    \"\"\"\n",
+    "    image_files = list()\n",
+    "    if hasattr(estimator, 'cv_results_'):\n",
+    "        for k, v in estimator.cv_results_.items():\n",
+    "            if k[:6] == 'param_':\n",
+    "                try:\n",
+    "                    param_range = v.data.astype('float32')\n",
+    "                except:\n",
+    "                    continue\n",
+    "                test_mean = estimator.cv_results_['mean_test_score']\n",
+    "                test_std = estimator.cv_results_['std_test_score']\n",
+    "                try:\n",
+    "                    train_mean = estimator.cv_results_['mean_train_score']\n",
+    "                    train_std = estimator.cv_results_['std_train_score']\n",
+    "                except:\n",
+    "                    pass\n",
+    "                plt.figure()\n",
+    "                plt.autoscale(enable=True, axis='x')\n",
+    "                plt.xlabel(k)\n",
+    "                plt.ylabel('score')\n",
+    "                plt.plot(param_range, test_mean, 'o', label='Test', color='g')\n",
+    "                plt.fill_between(param_range, test_mean - test_std,\n",
+    "                                 test_mean + test_std, alpha=0.2, color='g')\n",
+    "                plt.plot(param_range[estimator.best_index_],\n",
+    "                         test_mean[estimator.best_index_], 'o', label='Best',\n",
+    "                         color='r')\n",
+    "                try:\n",
+    "                    plt.plot(param_range, train_mean, 'o', label='Train',\n",
+    "                             color='b')\n",
+    "                    plt.fill_between(param_range, train_mean - train_std,\n",
+    "                                     train_mean + train_std, alpha=0.2,\n",
+    "                                     color='b')\n",
+    "                    plt.plot(param_range[estimator.best_index_],\n",
+    "                             train_mean[estimator.best_index_], 'o', color='r')\n",
+    "                except:\n",
+    "                    pass\n",
+    "                plt.axvline(x=param_range[estimator.best_index_], color='r')\n",
+    "                plt.legend(loc='best')\n",
+    "                image_file = k + '_' + image_file\n",
+    "                plt.savefig(image_file, **kwargs)\n",
+    "                image_files.append(image_file)\n",
+    "    return image_files\n",
+    "\n",
+    "\n",
+    "def history_plot(history, image_file='history.pdf', **kwargs):\n",
+    "    \"\"\" History plot.\n",
+    "\n",
+    "        Loss plotted for each training epoch.\n",
+    "\n",
+    "        Parameters\n",
+    "        ----------\n",
+    "        history : history object\n",
+    "            Keras-like history object returned from fit.\n",
+    "        image_file: string, default=...\n",
+    "            ...\n",
+    "        **kwargs : optional savefig named args\n",
+    "\n",
+    "        Returns\n",
+    "        -------\n",
+    "        None.\n",
+    "    \"\"\"\n",
+    "    image_file = None\n",
+    "    plt.figure()\n",
+    "    plt.xlabel('Epoch')\n",
+    "    plt.ylabel('Loss')\n",
+    "    for k, v in history.history.items():\n",
+    "        plt.plot(v, label=k)\n",
+    "    plt.legend(loc='best')\n",
+    "    plt.savefig(image_file, **kwargs)\n",
+    "    return image_file\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import matplotlib.pyplot as plt\n",
+    "plt.switch_backend('agg')\n",
+    "from sklearn.datasets import load_boston, load_iris\n",
+    "from sklearn.decomposition import PCA\n",
+    "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
+    "from sklearn.dummy import DummyRegressor\n",
+    "from sklearn.model_selection import GridSearchCV\n",
+    "\n",
+    "from skdatasets.utils._validation import scatter_plot, metaparameter_plot\n",
+    "\n",
+    "\n",
+    "def test_scatter_plot():\n",
+    "    \"\"\"Tests scatter plot.\"\"\"\n",
+    "    X, y = load_boston(return_X_y=True)\n",
+    "    estimator = PCA(n_components=10)\n",
+    "    estimator.fit(X, y)\n",
+    "    image_files = scatter_plot(X, y, estimator)\n",
+    "    assert len(image_files) == 1\n",
+    "    estimator = DummyRegressor()\n",
+    "    estimator.fit(X, y)\n",
+    "    image_files = scatter_plot(X, y, estimator)\n",
+    "    assert len(image_files) == 1\n",
+    "    X, y = load_iris(return_X_y=True)\n",
+    "    estimator = LinearDiscriminantAnalysis()\n",
+    "    estimator.fit(X, y)\n",
+    "    image_files = scatter_plot(X, y, estimator)\n",
+    "    assert len(image_files) == 2\n",
+    "\n",
+    "\n",
+    "def test_metaparameter_plot():\n",
+    "    \"\"\"Tests metaparameter plot.\"\"\"\n",
+    "    X, y = load_boston(return_X_y=True)\n",
+    "    estimator = GridSearchCV(DummyRegressor(),\n",
+    "                             {'strategy': ['mean', 'median', 'constant'],\n",
+    "                              'constant': [1.0, 2.0, 3.0]})\n",
+    "    estimator.fit(X, y)\n",
+    "    image_files = metaparameter_plot(estimator)\n",
+    "    assert len(image_files) == 1\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/skdatasets/utils/validation.py b/skdatasets/utils/validation.py
deleted file mode 100644
index 307c8cf..0000000
--- a/skdatasets/utils/validation.py
+++ /dev/null
@@ -1,204 +0,0 @@
-"""
-Scikit-learn-compatible visualizations for model validation.
-
-@author: David Diaz Vico
-@license: MIT
-"""
-
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-import seaborn as sns
-from sklearn.utils.multiclass import type_of_target
-
-
-sns.set(style="white", palette="muted", color_codes=True)
-
-
-def scatter_plot(X, y, estimator, image_file='scatter.pdf', max_features=10,
-                 max_data=200, **kwargs):
-    """ Scatter plot.
-
-        Scatter plot of the transformations or/and predictions of the estimator.
-
-        Parameters
-        ----------
-        X : array-like, shape (n_samples, features_shape)
-           Input data.
-        y : numpy array of shape [n_samples]
-           Target values.
-        estimator : estimator
-            Fitted sklearn Transformer/Predictor object.
-        image_file: string, default=...
-            ...
-        max_features : integer, default=10
-            Maximum number of features to use in the plot
-        max_data : integer, default=200
-            Maximum number of data to use in the plot
-        **kwargs : optional savefig named args
-
-        Returns
-        -------
-        List of image filenames.
-    """
-    image_files = list()
-    max_data = min(X.shape[0], max_data)
-    max_features = min(X.shape[1], max_features)
-    target_type = type_of_target(y)
-    X = X[:max_data]
-    y = y[:max_data]
-    if hasattr(estimator, 'transform'):
-        # Transformer
-        plt.figure()
-        transfs = estimator.transform(X)
-        transfs = transfs[:max_data, :max_features]
-        if target_type in ('binary', 'multiclass'):
-            # Classification/clustering
-            names = list(range(transfs.shape[1]))
-            names.append('class')
-            data = pd.DataFrame(data=np.append(transfs,
-                                               np.reshape(y, (len(y), 1)),
-                                               axis=1),
-                                columns=names)
-            sns.set()
-            sns.pairplot(data, hue='class', x_vars=names[:-1],
-                         y_vars=names[:-1])
-        elif target_type == 'continuous':
-            # Regression
-            names = list(range(transfs.shape[1]))
-            names.append('y')
-            data = pd.DataFrame(data=np.append(transfs,
-                                               np.reshape(y, (-1, 1)),
-                                               axis=1),
-                                columns=names)
-            sns.set()
-            sns.pairplot(data, hue='y', x_vars=names[:-1],
-                         y_vars=names[:-1])
-            pass
-        transformer_image_file = 'transformer_' + image_file
-        plt.savefig(transformer_image_file, **kwargs)
-        image_files.append(transformer_image_file)
-    if hasattr(estimator, 'predict'):
-        # Predictor
-        plt.figure()
-        preds = estimator.predict(X)
-        try:
-            X = X[:, :max_features]
-        except:
-            X = X
-        try:
-            X = X.A
-        except:
-            X = X
-        preds = preds[:max_data]
-        if target_type in ('binary', 'multiclass'):
-            # Classification/clustering
-            names = list(range(X.shape[1]))
-            names.append('class')
-            diffs = y.flatten()
-            diffs[y.flatten() != preds.flatten()] = -1
-            data = pd.DataFrame(data=np.hstack((X, np.reshape(diffs, (-1, 1)))),
-                                columns=names)
-            sns.set()
-            sns.pairplot(data, hue='class', x_vars=names[:-1],
-                         y_vars=names[:-1])
-        elif target_type == 'continuous':
-            # Regression
-            data = pd.DataFrame(data=np.hstack((np.reshape(y, (-1, 1)),
-                                                np.reshape(preds, (-1, 1)),
-                                                np.reshape(y - preds, (-1, 1)))),
-                                columns=('y', 'preds', 'error'))
-            sns.set()
-            sns.scatterplot(x='y', y='preds', hue='error', data=data)
-        predictor_image_file = 'predictor_' + image_file
-        plt.savefig(predictor_image_file, **kwargs)
-        image_files.append(predictor_image_file)
-    return image_files
-
-
-def metaparameter_plot(estimator, image_file='metaparameter.pdf', **kwargs):
-    """ Metaparameter plot.
-
-        Train and test metric plotted along a meta-parameter search space.
-
-        Parameters
-        ----------
-        estimator : estimator
-            Fitted sklearn SearchCV object.
-        image_file: string, default=...
-            ...
-        **kwargs : optional savefig named args
-
-        Returns
-        -------
-        List of image filenames.
-    """
-    image_files = list()
-    if hasattr(estimator, 'cv_results_'):
-        for k, v in estimator.cv_results_.items():
-            if k[:6] == 'param_':
-                try:
-                    param_range = v.data.astype('float32')
-                except:
-                    continue
-                test_mean = estimator.cv_results_['mean_test_score']
-                test_std = estimator.cv_results_['std_test_score']
-                try:
-                    train_mean = estimator.cv_results_['mean_train_score']
-                    train_std = estimator.cv_results_['std_train_score']
-                except:
-                    pass
-                plt.figure()
-                plt.autoscale(enable=True, axis='x')
-                plt.xlabel(k)
-                plt.ylabel('score')
-                plt.plot(param_range, test_mean, 'o', label='Test', color='g')
-                plt.fill_between(param_range, test_mean - test_std,
-                                 test_mean + test_std, alpha=0.2, color='g')
-                plt.plot(param_range[estimator.best_index_],
-                         test_mean[estimator.best_index_], 'o', label='Best',
-                         color='r')
-                try:
-                    plt.plot(param_range, train_mean, 'o', label='Train',
-                             color='b')
-                    plt.fill_between(param_range, train_mean - train_std,
-                                     train_mean + train_std, alpha=0.2,
-                                     color='b')
-                    plt.plot(param_range[estimator.best_index_],
-                             train_mean[estimator.best_index_], 'o', color='r')
-                except:
-                    pass
-                plt.axvline(x=param_range[estimator.best_index_], color='r')
-                plt.legend(loc='best')
-                image_file = k + '_' + image_file
-                plt.savefig(image_file, **kwargs)
-                image_files.append(image_file)
-    return image_files
-
-
-def history_plot(history, image_file='history.pdf', **kwargs):
-    """ History plot.
-
-        Loss plotted for each training epoch.
-
-        Parameters
-        ----------
-        history : history object
-            Keras-like history object returned from fit.
-        image_file: string, default=...
-            ...
-        **kwargs : optional savefig named args
-
-        Returns
-        -------
-        None.
-    """
-    image_file = None
-    plt.figure()
-    plt.xlabel('Epoch')
-    plt.ylabel('Loss')
-    for k, v in history.history.items():
-        plt.plot(v, label=k)
-    plt.legend(loc='best')
-    plt.savefig(image_file, **kwargs)
-    return image_file
diff --git a/tests/utils/MLPClassifier.json b/tests/utils/MLPClassifier.json
index 6c8e2c8..c2f191a 100755
--- a/tests/utils/MLPClassifier.json
+++ b/tests/utils/MLPClassifier.json
@@ -21,7 +21,7 @@
                         {
                             "py/object": "sklearn.neural_network.MLPClassifier",
                             "py/state": {
-                                "hidden_layer_sizes": [100],
+                                "hidden_layer_sizes": [10],
                                 "activation": "relu",
                                 "solver": "adam",
                                 "alpha": 0.0001,
@@ -29,7 +29,7 @@
                                 "learning_rate": "constant",
                                 "learning_rate_init": 0.001,
                                 "power_t": 0.5,
-                                "max_iter": 200,
+                                "max_iter": 2,
                                 "shuffle": true,
                                 "random_state": null,
                                 "tol": 0.0001,
diff --git a/tests/utils/MLPRegressor.json b/tests/utils/MLPRegressor.json
index b206344..04fe687 100755
--- a/tests/utils/MLPRegressor.json
+++ b/tests/utils/MLPRegressor.json
@@ -21,7 +21,7 @@
                         {
                             "py/object": "sklearn.neural_network.MLPRegressor",
                             "py/state": {
-                                "hidden_layer_sizes": [100],
+                                "hidden_layer_sizes": [10],
                                 "activation": "relu",
                                 "solver": "adam",
                                 "alpha": 0.0001,
@@ -29,7 +29,7 @@
                                 "learning_rate": "constant",
                                 "learning_rate_init": 0.001,
                                 "power_t": 0.5,
-                                "max_iter": 200,
+                                "max_iter": 2,
                                 "shuffle": true,
                                 "random_state": null,
                                 "tol": 0.0001,
diff --git a/tests/utils/test_run.py b/tests/utils/test_run.py
index a9db7ae..c9ad463 100644
--- a/tests/utils/test_run.py
+++ b/tests/utils/test_run.py
@@ -22,10 +22,13 @@ def test_multiclass_classification():
     assert ret == 0
     ret = subprocess.call(['skdatasets/utils/run.py', '-r', 'uci', '-d', 'wine', '-e', 'tests/utils/MLPClassifier.json'])
     assert ret == 0
-    
+    ret = subprocess.call(['skdatasets/utils/run.py', '-r', 'libsvm', '-c', 'multiclass', '-d', 'shuttle', '-e', 'tests/utils/MLPClassifier.json'])
+    assert ret == 0
+    ret = subprocess.call(['skdatasets/utils/run.py', '-r', 'libsvm', '-c', 'multiclass', '-d', 'usps', '-e', 'tests/utils/MLPClassifier.json'])
+    assert ret == 0
+
 
 def test_regression():
     """Tests regression experiment."""
     ret = subprocess.call(['skdatasets/utils/run.py', '-r', 'libsvm', '-c', 'regression', '-d', 'housing', '-e', 'tests/utils/MLPRegressor.json'])
     assert ret == 0
-    
\ No newline at end of file
diff --git a/tests/utils/test_scores.py b/tests/utils/test_scores.py
deleted file mode 100644
index 2908654..0000000
--- a/tests/utils/test_scores.py
+++ /dev/null
@@ -1,47 +0,0 @@
-"""
-Tests.
-
-@author: David Diaz Vico
-@license: MIT
-"""
-
-import numpy as np
-
-from skdatasets.utils.scores import scores_table, hypotheses_table
-
-
-datasets = ['a4a', 'a8a', 'combined', 'dna', 'ijcnn1', 'letter', 'pendigits',
-            'satimage', 'shuttle', 'usps', 'w7a', 'w8a']
-estimators = ['LogisticRegression', 'MLPClassifier0', 'MLPClassifier1',
-              'MLPClassifier2', 'MLPClassifier3', 'MLPClassifier4',
-              'MLPClassifier5']
-scores = np.asarray(((89.79, 89.78, 89.76, 89.88, 89.85, 89.91, 89.93),
-                     (90.73, 90.73, 90.73, 90.85, 90.83, 90.81, 90.80),
-                     (92.36, 92.31, 94.58, 94.82, 94.84, 94.92, 94.89),
-                     (99.28, 99.27, 99.28, 99.26, 99.27, 99.25, 99.25),
-                     (91.34, 91.34, 99.29, 99.33, 99.34, 99.53, 99.54),
-                     (98.07, 98.04, 99.94, 99.95, 99.96, 99.96, 99.95),
-                     (99.17, 99.08, 99.87, 99.87, 99.88, 99.90, 99.89),
-                     (96.67, 96.28, 98.84, 98.87, 98.90, 98.87, 98.92),
-                     (95.85, 92.83, 99.88, 99.93, 99.96, 99.98, 99.99),
-                     (99.12, 99.11, 99.65, 99.58, 99.58, 99.65, 99.60),
-                     (95.93, 95.40, 94.58, 96.31, 96.34, 96.58, 96.50),
-                     (95.80, 95.99, 95.35, 96.20, 96.22, 96.36, 96.71)))
-
-
-def test_scores_table():
-    """Tests scores table."""
-    scores_table(datasets, estimators, scores)
-    scores_table(datasets, estimators, scores, stds=scores/10.0)
-
-
-def test_hypotheses_table():
-    """Tests hypotheses table."""
-    for multitest in ('kruskal', 'friedmanchisquare', None):
-        for test in ('mannwhitneyu', 'wilcoxon'):
-            hypotheses_table(scores, estimators, multitest=multitest, test=test)
-            for correction in ('bonferroni', 'sidak', 'holm-sidak', 'holm',
-                               'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by',
-                               'fdr_tsbh', 'fdr_tsbky'):
-                hypotheses_table(scores, estimators, multitest=multitest,
-                                 test=test, correction=correction)
diff --git a/tests/utils/test_validation.py b/tests/utils/test_validation.py
deleted file mode 100644
index 9d5da03..0000000
--- a/tests/utils/test_validation.py
+++ /dev/null
@@ -1,45 +0,0 @@
-"""
-Tests.
-
-@author: David Diaz Vico
-@license: MIT
-"""
-
-import matplotlib.pyplot as plt
-plt.switch_backend('agg')
-from sklearn.datasets import load_boston, load_iris
-from sklearn.decomposition import PCA
-from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
-from sklearn.dummy import DummyRegressor
-from sklearn.model_selection import GridSearchCV
-
-from skdatasets.utils.validation import scatter_plot, metaparameter_plot
-
-
-def test_scatter_plot():
-    """Tests scatter plot."""
-    X, y = load_boston(return_X_y=True)
-    estimator = PCA(n_components=10)
-    estimator.fit(X, y)
-    image_files = scatter_plot(X, y, estimator)
-    assert len(image_files) == 1
-    estimator = DummyRegressor()
-    estimator.fit(X, y)
-    image_files = scatter_plot(X, y, estimator)
-    assert len(image_files) == 1
-    X, y = load_iris(return_X_y=True)
-    estimator = LinearDiscriminantAnalysis()
-    estimator.fit(X, y)
-    image_files = scatter_plot(X, y, estimator)
-    assert len(image_files) == 2
-
-
-def test_metaparameter_plot():
-    """Tests metaparameter plot."""
-    X, y = load_boston(return_X_y=True)
-    estimator = GridSearchCV(DummyRegressor(),
-                             {'strategy': ['mean', 'median', 'constant'],
-                              'constant': [1.0, 2.0, 3.0]})
-    estimator.fit(X, y)
-    image_files = metaparameter_plot(estimator)
-    assert len(image_files) == 1