diff --git a/.gitignore b/.gitignore
old mode 100755
new mode 100644
diff --git a/.travis.yml b/.travis.yml
old mode 100755
new mode 100644
index eb10833..73e002f
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,7 +16,7 @@ script:
   - python setup.py test
   - mkdir docs
   - export PYTHONPATH=`pwd`
-  - sphinx-quickstart -q -p scikit-datasets -a "David Diaz Vico" -v 0.1 -r 0.1.33 -l en --ext-autodoc --ext-viewcode --ext-githubpages --extensions sphinxcontrib.napoleon --no-makefile --no-batchfile docs
+  - sphinx-quickstart -q -p scikit-datasets -a "David Diaz Vico" -v 0.1 -r 0.1.34 -l en --ext-autodoc --ext-viewcode --ext-githubpages --extensions sphinxcontrib.napoleon --no-makefile --no-batchfile docs
   - sphinx-apidoc -o docs/_static/ skdatasets -F -a -l
   - travis-sphinx -v build -s docs -n
 after_success:
diff --git a/LICENSE b/LICENSE
old mode 100755
new mode 100644
diff --git a/README.md b/README.md
old mode 100755
new mode 100644
diff --git a/setup.cfg b/setup.cfg
old mode 100755
new mode 100644
diff --git a/setup.py b/setup.py
index 4382875..a89a65c 100755
--- a/setup.py
+++ b/setup.py
@@ -9,13 +9,13 @@
 
 setup(name='scikit-datasets',
       packages=find_packages(),
-      version='0.1.33',
+      version='0.1.34',
       description='Scikit-learn-compatible datasets',
 #      long_description=open('README.md', 'r').read(),
       author='David Diaz Vico',
       author_email='david.diaz.vico@outlook.com',
       url='https://github.com/daviddiazvico/scikit-datasets',
-      download_url='https://github.com/daviddiazvico/scikit-datasets/archive/v0.1.33.tar.gz',
+      download_url='https://github.com/daviddiazvico/scikit-datasets/archive/v0.1.34.tar.gz',
       keywords=['scikit-learn'],
       classifiers=['Intended Audience :: Science/Research',
                    'Topic :: Scientific/Engineering',
@@ -28,9 +28,10 @@
                       'keel': ['pandas'],
                       'keras': ['keras'],
                       'utils.estimator': ['jsonpickle==0.9.6'],
-                      'utils.experiments': ['sacred']},
+                      'utils.experiments': ['sacred'],
+                      'utils.scores': ['statsmodels']},
       setup_requires=['pytest-runner'],
       tests_require=['coverage', 'forex_python', 'jsonpickle==0.9.6', 'keras',
                      'pandas', 'pymongo', 'pytest', 'pytest-cov', 'rdata',
-                     'sacred', 'tensorflow'],
+                     'sacred', 'statsmodels', 'tensorflow'],
       test_suite='tests')
diff --git a/skdatasets/__init__.py b/skdatasets/__init__.py
old mode 100755
new mode 100644
diff --git a/skdatasets/utils/scores.ipynb b/skdatasets/utils/scores.ipynb
deleted file mode 100644
index 19989a3..0000000
--- a/skdatasets/utils/scores.ipynb
+++ /dev/null
@@ -1,195 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import itertools as it\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "from scipy.stats import kruskal, friedmanchisquare, mannwhitneyu, rankdata, wilcoxon\n",
-    "from statsmodels.sandbox.stats.multicomp import multipletests\n",
-    "\n",
-    "\n",
-    "def scores_table(datasets, estimators, scores, stds=None,\n",
-    "                 greater_is_better=True, method='average'):\n",
-    "    \"\"\" Scores table.\n",
-    "\n",
-    "        Prints a table where each row represents a dataset and each column\n",
-    "        represents an estimator.\n",
-    "\n",
-    "        Parameters\n",
-    "        ----------\n",
-    "        datasets: array-like\n",
-    "                  List of dataset names.\n",
-    "        estimators: array-like\n",
-    "                    List of estimator names.\n",
-    "        scores: array-like\n",
-    "                Matrix of scores where each column represents a model.\n",
-    "        stds: array_like, default=None\n",
-    "              Matrix of standard deviations where each column represents a\n",
-    "              model.\n",
-    "        greater_is_better: boolean, default=True\n",
-    "                           Whether a greater score is better (score) or worse\n",
-    "                           (loss).\n",
-    "        method: {'average', 'min', 'max', 'dense', 'ordinal'}, default='average'\n",
-    "                Method used to solve ties.\n",
-    "\n",
-    "        Returns\n",
-    "        -------\n",
-    "        table: array-like\n",
-    "               Table of mean and standard deviation of each estimator-dataset\n",
-    "               pair. A ranking of estimators is also generated.\n",
-    "    \"\"\"\n",
-    "    ranks = np.asarray([rankdata(-m, method=method) if greater_is_better else rankdata(m, method=method) for m in scores])\n",
-    "    table = pd.DataFrame(data=scores, index=datasets, columns=estimators)\n",
-    "    for i, d in enumerate(datasets):\n",
-    "        for j, e in enumerate(estimators):\n",
-    "            table.loc[d, e] = '{0:.2f}'.format(scores[i, j])\n",
-    "            if stds is not None:\n",
-    "                table.loc[d, e] += ' ±{0:.2f}'.format(stds[i, j])\n",
-    "            table.loc[d, e] += ' ({0:.1f})'.format(ranks[i, j])\n",
-    "    table.loc['rank mean'] = np.around(np.mean(ranks, axis=0), decimals=4)\n",
-    "    return table\n",
-    "\n",
-    "\n",
-    "def hypotheses_table(samples, models, alpha=0.05, multitest=None,\n",
-    "                     test='wilcoxon', correction=None, multitest_args=dict(),\n",
-    "                     test_args=dict()):\n",
-    "    \"\"\" Hypotheses table.\n",
-    "\n",
-    "        Prints a hypothesis table with a selected test and correction.\n",
-    "\n",
-    "        Parameters\n",
-    "        ----------\n",
-    "        samples: array-like\n",
-    "                 Matrix of samples where each column represent a model.\n",
-    "        models: array-like\n",
-    "                Model names.\n",
-    "        alpha: float in [0, 1], default=0.05\n",
-    "               Significance level.\n",
-    "        multitest: {'kruskal', 'friedmanchisquare'}\n",
-    "                   default=None\n",
-    "                   Ranking multitest used.\n",
-    "        test: {'mannwhitneyu', 'wilcoxon'},\n",
-    "              default='wilcoxon'\n",
-    "              Ranking test used.\n",
-    "        correction: {'bonferroni', 'sidak', 'holm-sidak', 'holm',\n",
-    "                     'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by', 'fdr_tsbh',\n",
-    "                     'fdr_tsbky'},\n",
-    "              default=None\n",
-    "              Method used to adjust the p-values.\n",
-    "        multitest_args: dict\n",
-    "                        Optional ranking test arguments.\n",
-    "        test_args: dict\n",
-    "                   Optional ranking test arguments.\n",
-    "\n",
-    "        Returns\n",
-    "        -------\n",
-    "        multitest_table: array-like\n",
-    "                         Table of p-value and rejection/non-rejection for the\n",
-    "                         multitest hypothesis.\n",
-    "        test_table: array-like\n",
-    "               Table of p-values and rejection/non-rejection for each test\n",
-    "               hypothesis.\n",
-    "    \"\"\"\n",
-    "    versus = list(it.combinations(range(len(models)), 2))\n",
-    "    comparisons = [models[vs[0]] + \" vs \" + models[vs[1]] for vs in versus]\n",
-    "    multitests = {'kruskal': kruskal, 'friedmanchisquare': friedmanchisquare}\n",
-    "    tests = {'mannwhitneyu': mannwhitneyu, 'wilcoxon': wilcoxon}\n",
-    "    multitest_table = None\n",
-    "    if multitest is not None:\n",
-    "        multitest_table = pd.DataFrame(index=[multitest], columns=['p-value',\n",
-    "                                                                   'Hypothesis'])\n",
-    "        statistic, pvalue = multitests[multitest](*samples, **multitest_args)\n",
-    "        reject = 'Rejected' if pvalue <= alpha else 'Not rejected'\n",
-    "        multitest_table.loc[multitest] = ['{0:.2f}'.format(pvalue), reject]\n",
-    "        if pvalue > alpha:\n",
-    "            return multitest_table, None\n",
-    "    pvalues = [tests[test](samples[:, vs[0]], samples[:, vs[1]], **test_args)[1] for vs in versus]\n",
-    "    if correction is not None:\n",
-    "        reject, pvalues, alphac_sidak, alphac_bonf = multipletests(pvalues,\n",
-    "                                                                   alpha,\n",
-    "                                                                   method=correction)\n",
-    "    else:\n",
-    "        reject = ['Rejected' if pvalue <= alpha else 'Not rejected' for pvalue in pvalues]\n",
-    "    test_table = pd.DataFrame(index=comparisons, columns=['p-value',\n",
-    "                                                          'Hypothesis'])\n",
-    "    for i, d in enumerate(comparisons):\n",
-    "        test_table.loc[d] = ['{0:.2f}'.format(pvalues[i]), reject[i]]\n",
-    "    return multitest_table, test_table\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import numpy as np\n",
-    "\n",
-    "from skdatasets.utils._scores import scores_table, hypotheses_table\n",
-    "\n",
-    "\n",
-    "datasets = ['a4a', 'a8a', 'combined', 'dna', 'ijcnn1', 'letter', 'pendigits',\n",
-    "            'satimage', 'shuttle', 'usps', 'w7a', 'w8a']\n",
-    "estimators = ['LogisticRegression', 'MLPClassifier0', 'MLPClassifier1',\n",
-    "              'MLPClassifier2', 'MLPClassifier3', 'MLPClassifier4',\n",
-    "              'MLPClassifier5']\n",
-    "scores = np.asarray(((89.79, 89.78, 89.76, 89.88, 89.85, 89.91, 89.93),\n",
-    "                     (90.73, 90.73, 90.73, 90.85, 90.83, 90.81, 90.80),\n",
-    "                     (92.36, 92.31, 94.58, 94.82, 94.84, 94.92, 94.89),\n",
-    "                     (99.28, 99.27, 99.28, 99.26, 99.27, 99.25, 99.25),\n",
-    "                     (91.34, 91.34, 99.29, 99.33, 99.34, 99.53, 99.54),\n",
-    "                     (98.07, 98.04, 99.94, 99.95, 99.96, 99.96, 99.95),\n",
-    "                     (99.17, 99.08, 99.87, 99.87, 99.88, 99.90, 99.89),\n",
-    "                     (96.67, 96.28, 98.84, 98.87, 98.90, 98.87, 98.92),\n",
-    "                     (95.85, 92.83, 99.88, 99.93, 99.96, 99.98, 99.99),\n",
-    "                     (99.12, 99.11, 99.65, 99.58, 99.58, 99.65, 99.60),\n",
-    "                     (95.93, 95.40, 94.58, 96.31, 96.34, 96.58, 96.50),\n",
-    "                     (95.80, 95.99, 95.35, 96.20, 96.22, 96.36, 96.71)))\n",
-    "\n",
-    "\n",
-    "def test_scores_table():\n",
-    "    \"\"\"Tests scores table.\"\"\"\n",
-    "    scores_table(datasets, estimators, scores)\n",
-    "    scores_table(datasets, estimators, scores, stds=scores/10.0)\n",
-    "\n",
-    "\n",
-    "def test_hypotheses_table():\n",
-    "    \"\"\"Tests hypotheses table.\"\"\"\n",
-    "    for multitest in ('kruskal', 'friedmanchisquare', None):\n",
-    "        for test in ('mannwhitneyu', 'wilcoxon'):\n",
-    "            hypotheses_table(scores, estimators, multitest=multitest, test=test)\n",
-    "            for correction in ('bonferroni', 'sidak', 'holm-sidak', 'holm',\n",
-    "                               'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by',\n",
-    "                               'fdr_tsbh', 'fdr_tsbky'):\n",
-    "                hypotheses_table(scores, estimators, multitest=multitest,\n",
-    "                                 test=test, correction=correction)\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/skdatasets/utils/scores.py b/skdatasets/utils/scores.py
new file mode 100644
index 0000000..93be91d
--- /dev/null
+++ b/skdatasets/utils/scores.py
@@ -0,0 +1,120 @@
+"""
+@author: David Diaz Vico
+@license: MIT
+"""
+
+import itertools as it
+import numpy as np
+import pandas as pd
+from scipy.stats import kruskal, friedmanchisquare, mannwhitneyu, rankdata, wilcoxon
+from statsmodels.sandbox.stats.multicomp import multipletests
+
+
+def scores_table(datasets, estimators, scores, stds=None,
+                 greater_is_better=True, method='average',
+                 score_decimals=2, rank_decimals=0):
+    """ Scores table.
+
+        Prints a table where each row represents a dataset and each column
+        represents an estimator.
+
+        Parameters
+        ----------
+        datasets: array-like
+                  List of dataset names.
+        estimators: array-like
+                    List of estimator names.
+        scores: array-like
+                Matrix of scores where each column represents a model.
+        stds: array_like, default=None
+              Matrix of standard deviations where each column represents a
+              model.
+        greater_is_better: boolean, default=True
+                           Whether a greater score is better (score) or worse
+                           (loss).
+        method: {'average', 'min', 'max', 'dense', 'ordinal'}, default='average'
+                Method used to solve ties.
+
+        Returns
+        -------
+        table: array-like
+               Table of mean and standard deviation of each estimator-dataset
+               pair. A ranking of estimators is also generated.
+    """
+    ranks = np.asarray([rankdata(-m, method=method) if greater_is_better else rankdata(m, method=method) for m in scores])
+    table = pd.DataFrame(data=scores, index=datasets, columns=estimators)
+    for i, d in enumerate(datasets):
+        for j, e in enumerate(estimators):
+            table.loc[d, e] = f'{scores[i, j]:.{score_decimals}f}'
+            if stds is not None:
+                table.loc[d, e] += f' ±{stds[i, j]:.{score_decimals}f}'
+            table.loc[d, e] += f' ({ranks[i, j]:.{rank_decimals}f})'
+    table.loc['rank mean'] = np.around(np.mean(ranks, axis=0), decimals=score_decimals)
+    return table
+
+
+def hypotheses_table(samples, models, alpha=0.05, multitest=None,
+                     test='wilcoxon', correction=None, multitest_args=dict(),
+                     test_args=dict()):
+    """ Hypotheses table.
+
+        Prints a hypothesis table with a selected test and correction.
+
+        Parameters
+        ----------
+        samples: array-like
+                 Matrix of samples where each column represent a model.
+        models: array-like
+                Model names.
+        alpha: float in [0, 1], default=0.05
+               Significance level.
+        multitest: {'kruskal', 'friedmanchisquare'}
+                   default=None
+                   Ranking multitest used.
+        test: {'mannwhitneyu', 'wilcoxon'},
+              default='wilcoxon'
+              Ranking test used.
+        correction: {'bonferroni', 'sidak', 'holm-sidak', 'holm',
+                     'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by', 'fdr_tsbh',
+                     'fdr_tsbky'},
+              default=None
+              Method used to adjust the p-values.
+        multitest_args: dict
+                        Optional ranking test arguments.
+        test_args: dict
+                   Optional ranking test arguments.
+
+        Returns
+        -------
+        multitest_table: array-like
+                         Table of p-value and rejection/non-rejection for the
+                         multitest hypothesis.
+        test_table: array-like
+               Table of p-values and rejection/non-rejection for each test
+               hypothesis.
+    """
+    versus = list(it.combinations(range(len(models)), 2))
+    comparisons = [models[vs[0]] + " vs " + models[vs[1]] for vs in versus]
+    multitests = {'kruskal': kruskal, 'friedmanchisquare': friedmanchisquare}
+    tests = {'mannwhitneyu': mannwhitneyu, 'wilcoxon': wilcoxon}
+    multitest_table = None
+    if multitest is not None:
+        multitest_table = pd.DataFrame(index=[multitest], columns=['p-value',
+                                                                   'Hypothesis'])
+        statistic, pvalue = multitests[multitest](*samples, **multitest_args)
+        reject = 'Rejected' if pvalue <= alpha else 'Not rejected'
+        multitest_table.loc[multitest] = ['{0:.2f}'.format(pvalue), reject]
+        if pvalue > alpha:
+            return multitest_table, None
+    pvalues = [tests[test](samples[:, vs[0]], samples[:, vs[1]], **test_args)[1] for vs in versus]
+    if correction is not None:
+        reject, pvalues, alphac_sidak, alphac_bonf = multipletests(pvalues,
+                                                                   alpha,
+                                                                   method=correction)
+    else:
+        reject = ['Rejected' if pvalue <= alpha else 'Not rejected' for pvalue in pvalues]
+    test_table = pd.DataFrame(index=comparisons, columns=['p-value',
+                                                          'Hypothesis'])
+    for i, d in enumerate(comparisons):
+        test_table.loc[d] = ['{0:.2f}'.format(pvalues[i]), reject[i]]
+    return multitest_table, test_table
diff --git a/skdatasets/utils/validation.ipynb b/skdatasets/utils/validation.ipynb
deleted file mode 100644
index db00883..0000000
--- a/skdatasets/utils/validation.ipynb
+++ /dev/null
@@ -1,276 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "import numpy as np\n",
-    "import pandas as pd\n",
-    "import seaborn as sns\n",
-    "from sklearn.utils.multiclass import type_of_target\n",
-    "\n",
-    "\n",
-    "sns.set(style=\"white\", palette=\"muted\", color_codes=True)\n",
-    "\n",
-    "\n",
-    "def scatter_plot(X, y, estimator, image_file='scatter.pdf', max_features=10,\n",
-    "                 max_data=200, **kwargs):\n",
-    "    \"\"\" Scatter plot.\n",
-    "\n",
-    "        Scatter plot of the transformations or/and predictions of the estimator.\n",
-    "\n",
-    "        Parameters\n",
-    "        ----------\n",
-    "        X : array-like, shape (n_samples, features_shape)\n",
-    "           Input data.\n",
-    "        y : numpy array of shape [n_samples]\n",
-    "           Target values.\n",
-    "        estimator : estimator\n",
-    "            Fitted sklearn Transformer/Predictor object.\n",
-    "        image_file: string, default=...\n",
-    "            ...\n",
-    "        max_features : integer, default=10\n",
-    "            Maximum number of features to use in the plot\n",
-    "        max_data : integer, default=200\n",
-    "            Maximum number of data to use in the plot\n",
-    "        **kwargs : optional savefig named args\n",
-    "\n",
-    "        Returns\n",
-    "        -------\n",
-    "        List of image filenames.\n",
-    "    \"\"\"\n",
-    "    image_files = list()\n",
-    "    max_data = min(X.shape[0], max_data)\n",
-    "    max_features = min(X.shape[1], max_features)\n",
-    "    target_type = type_of_target(y)\n",
-    "    X = X[:max_data]\n",
-    "    y = y[:max_data]\n",
-    "    if hasattr(estimator, 'transform'):\n",
-    "        # Transformer\n",
-    "        plt.figure()\n",
-    "        transfs = estimator.transform(X)\n",
-    "        transfs = transfs[:max_data, :max_features]\n",
-    "        if target_type in ('binary', 'multiclass'):\n",
-    "            # Classification/clustering\n",
-    "            names = list(range(transfs.shape[1]))\n",
-    "            names.append('class')\n",
-    "            data = pd.DataFrame(data=np.append(transfs,\n",
-    "                                               np.reshape(y, (len(y), 1)),\n",
-    "                                               axis=1),\n",
-    "                                columns=names)\n",
-    "            sns.set()\n",
-    "            sns.pairplot(data, hue='class', x_vars=names[:-1],\n",
-    "                         y_vars=names[:-1])\n",
-    "        elif target_type == 'continuous':\n",
-    "            # Regression\n",
-    "            names = list(range(transfs.shape[1]))\n",
-    "            names.append('y')\n",
-    "            data = pd.DataFrame(data=np.append(transfs,\n",
-    "                                               np.reshape(y, (-1, 1)),\n",
-    "                                               axis=1),\n",
-    "                                columns=names)\n",
-    "            sns.set()\n",
-    "            sns.pairplot(data, hue='y', x_vars=names[:-1],\n",
-    "                         y_vars=names[:-1])\n",
-    "            pass\n",
-    "        transformer_image_file = 'transformer_' + image_file\n",
-    "        plt.savefig(transformer_image_file, **kwargs)\n",
-    "        image_files.append(transformer_image_file)\n",
-    "    if hasattr(estimator, 'predict'):\n",
-    "        # Predictor\n",
-    "        plt.figure()\n",
-    "        preds = estimator.predict(X)\n",
-    "        try:\n",
-    "            X = X[:, :max_features]\n",
-    "        except:\n",
-    "            X = X\n",
-    "        try:\n",
-    "            X = X.A\n",
-    "        except:\n",
-    "            X = X\n",
-    "        preds = preds[:max_data]\n",
-    "        if target_type in ('binary', 'multiclass'):\n",
-    "            # Classification/clustering\n",
-    "            names = list(range(X.shape[1]))\n",
-    "            names.append('class')\n",
-    "            diffs = y.flatten()\n",
-    "            diffs[y.flatten() != preds.flatten()] = -1\n",
-    "            data = pd.DataFrame(data=np.hstack((X, np.reshape(diffs, (-1, 1)))),\n",
-    "                                columns=names)\n",
-    "            sns.set()\n",
-    "            sns.pairplot(data, hue='class', x_vars=names[:-1],\n",
-    "                         y_vars=names[:-1])\n",
-    "        elif target_type == 'continuous':\n",
-    "            # Regression\n",
-    "            data = pd.DataFrame(data=np.hstack((np.reshape(y, (-1, 1)),\n",
-    "                                                np.reshape(preds, (-1, 1)),\n",
-    "                                                np.reshape(y - preds, (-1, 1)))),\n",
-    "                                columns=('y', 'preds', 'error'))\n",
-    "            sns.set()\n",
-    "            sns.scatterplot(x='y', y='preds', hue='error', data=data)\n",
-    "        predictor_image_file = 'predictor_' + image_file\n",
-    "        plt.savefig(predictor_image_file, **kwargs)\n",
-    "        image_files.append(predictor_image_file)\n",
-    "    return image_files\n",
-    "\n",
-    "\n",
-    "def metaparameter_plot(estimator, image_file='metaparameter.pdf', **kwargs):\n",
-    "    \"\"\" Metaparameter plot.\n",
-    "\n",
-    "        Train and test metric plotted along a meta-parameter search space.\n",
-    "\n",
-    "        Parameters\n",
-    "        ----------\n",
-    "        estimator : estimator\n",
-    "            Fitted sklearn SearchCV object.\n",
-    "        image_file: string, default=...\n",
-    "            ...\n",
-    "        **kwargs : optional savefig named args\n",
-    "\n",
-    "        Returns\n",
-    "        -------\n",
-    "        List of image filenames.\n",
-    "    \"\"\"\n",
-    "    image_files = list()\n",
-    "    if hasattr(estimator, 'cv_results_'):\n",
-    "        for k, v in estimator.cv_results_.items():\n",
-    "            if k[:6] == 'param_':\n",
-    "                try:\n",
-    "                    param_range = v.data.astype('float32')\n",
-    "                except:\n",
-    "                    continue\n",
-    "                test_mean = estimator.cv_results_['mean_test_score']\n",
-    "                test_std = estimator.cv_results_['std_test_score']\n",
-    "                try:\n",
-    "                    train_mean = estimator.cv_results_['mean_train_score']\n",
-    "                    train_std = estimator.cv_results_['std_train_score']\n",
-    "                except:\n",
-    "                    pass\n",
-    "                plt.figure()\n",
-    "                plt.autoscale(enable=True, axis='x')\n",
-    "                plt.xlabel(k)\n",
-    "                plt.ylabel('score')\n",
-    "                plt.plot(param_range, test_mean, 'o', label='Test', color='g')\n",
-    "                plt.fill_between(param_range, test_mean - test_std,\n",
-    "                                 test_mean + test_std, alpha=0.2, color='g')\n",
-    "                plt.plot(param_range[estimator.best_index_],\n",
-    "                         test_mean[estimator.best_index_], 'o', label='Best',\n",
-    "                         color='r')\n",
-    "                try:\n",
-    "                    plt.plot(param_range, train_mean, 'o', label='Train',\n",
-    "                             color='b')\n",
-    "                    plt.fill_between(param_range, train_mean - train_std,\n",
-    "                                     train_mean + train_std, alpha=0.2,\n",
-    "                                     color='b')\n",
-    "                    plt.plot(param_range[estimator.best_index_],\n",
-    "                             train_mean[estimator.best_index_], 'o', color='r')\n",
-    "                except:\n",
-    "                    pass\n",
-    "                plt.axvline(x=param_range[estimator.best_index_], color='r')\n",
-    "                plt.legend(loc='best')\n",
-    "                image_file = k + '_' + image_file\n",
-    "                plt.savefig(image_file, **kwargs)\n",
-    "                image_files.append(image_file)\n",
-    "    return image_files\n",
-    "\n",
-    "\n",
-    "def history_plot(history, image_file='history.pdf', **kwargs):\n",
-    "    \"\"\" History plot.\n",
-    "\n",
-    "        Loss plotted for each training epoch.\n",
-    "\n",
-    "        Parameters\n",
-    "        ----------\n",
-    "        history : history object\n",
-    "            Keras-like history object returned from fit.\n",
-    "        image_file: string, default=...\n",
-    "            ...\n",
-    "        **kwargs : optional savefig named args\n",
-    "\n",
-    "        Returns\n",
-    "        -------\n",
-    "        None.\n",
-    "    \"\"\"\n",
-    "    image_file = None\n",
-    "    plt.figure()\n",
-    "    plt.xlabel('Epoch')\n",
-    "    plt.ylabel('Loss')\n",
-    "    for k, v in history.history.items():\n",
-    "        plt.plot(v, label=k)\n",
-    "    plt.legend(loc='best')\n",
-    "    plt.savefig(image_file, **kwargs)\n",
-    "    return image_file\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import matplotlib.pyplot as plt\n",
-    "plt.switch_backend('agg')\n",
-    "from sklearn.datasets import load_boston, load_iris\n",
-    "from sklearn.decomposition import PCA\n",
-    "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n",
-    "from sklearn.dummy import DummyRegressor\n",
-    "from sklearn.model_selection import GridSearchCV\n",
-    "\n",
-    "from skdatasets.utils._validation import scatter_plot, metaparameter_plot\n",
-    "\n",
-    "\n",
-    "def test_scatter_plot():\n",
-    "    \"\"\"Tests scatter plot.\"\"\"\n",
-    "    X, y = load_boston(return_X_y=True)\n",
-    "    estimator = PCA(n_components=10)\n",
-    "    estimator.fit(X, y)\n",
-    "    image_files = scatter_plot(X, y, estimator)\n",
-    "    assert len(image_files) == 1\n",
-    "    estimator = DummyRegressor()\n",
-    "    estimator.fit(X, y)\n",
-    "    image_files = scatter_plot(X, y, estimator)\n",
-    "    assert len(image_files) == 1\n",
-    "    X, y = load_iris(return_X_y=True)\n",
-    "    estimator = LinearDiscriminantAnalysis()\n",
-    "    estimator.fit(X, y)\n",
-    "    image_files = scatter_plot(X, y, estimator)\n",
-    "    assert len(image_files) == 2\n",
-    "\n",
-    "\n",
-    "def test_metaparameter_plot():\n",
-    "    \"\"\"Tests metaparameter plot.\"\"\"\n",
-    "    X, y = load_boston(return_X_y=True)\n",
-    "    estimator = GridSearchCV(DummyRegressor(),\n",
-    "                             {'strategy': ['mean', 'median', 'constant'],\n",
-    "                              'constant': [1.0, 2.0, 3.0]})\n",
-    "    estimator.fit(X, y)\n",
-    "    image_files = metaparameter_plot(estimator)\n",
-    "    assert len(image_files) == 1\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.4"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/tests/__init__.py b/tests/__init__.py
old mode 100755
new mode 100644
diff --git a/tests/repositories/__init__.py b/tests/repositories/__init__.py
old mode 100755
new mode 100644
diff --git a/tests/utils/LinearRegression.json b/tests/utils/LinearRegression.json
old mode 100755
new mode 100644
diff --git a/tests/utils/LinearRegressionCustom.json b/tests/utils/LinearRegressionCustom.json
old mode 100755
new mode 100644
diff --git a/tests/utils/MLPClassifier.json b/tests/utils/MLPClassifier.json
old mode 100755
new mode 100644
diff --git a/tests/utils/MLPRegressor.json b/tests/utils/MLPRegressor.json
old mode 100755
new mode 100644
diff --git a/tests/utils/__init__.py b/tests/utils/__init__.py
old mode 100755
new mode 100644
diff --git a/tests/utils/test_scores.py b/tests/utils/test_scores.py
new file mode 100644
index 0000000..089e050
--- /dev/null
+++ b/tests/utils/test_scores.py
@@ -0,0 +1,45 @@
+"""
+@author: David Diaz Vico
+@license: MIT
+"""
+
+import numpy as np
+
+from skdatasets.utils.scores import scores_table, hypotheses_table
+
+
+datasets = ['a4a', 'a8a', 'combined', 'dna', 'ijcnn1', 'letter', 'pendigits',
+            'satimage', 'shuttle', 'usps', 'w7a', 'w8a']
+estimators = ['LogisticRegression', 'MLPClassifier0', 'MLPClassifier1',
+              'MLPClassifier2', 'MLPClassifier3', 'MLPClassifier4',
+              'MLPClassifier5']
+scores = np.asarray(((89.79, 89.78, 89.76, 89.88, 89.85, 89.91, 89.93),
+                     (90.73, 90.73, 90.73, 90.85, 90.83, 90.81, 90.80),
+                     (92.36, 92.31, 94.58, 94.82, 94.84, 94.92, 94.89),
+                     (99.28, 99.27, 99.28, 99.26, 99.27, 99.25, 99.25),
+                     (91.34, 91.34, 99.29, 99.33, 99.34, 99.53, 99.54),
+                     (98.07, 98.04, 99.94, 99.95, 99.96, 99.96, 99.95),
+                     (99.17, 99.08, 99.87, 99.87, 99.88, 99.90, 99.89),
+                     (96.67, 96.28, 98.84, 98.87, 98.90, 98.87, 98.92),
+                     (95.85, 92.83, 99.88, 99.93, 99.96, 99.98, 99.99),
+                     (99.12, 99.11, 99.65, 99.58, 99.58, 99.65, 99.60),
+                     (95.93, 95.40, 94.58, 96.31, 96.34, 96.58, 96.50),
+                     (95.80, 95.99, 95.35, 96.20, 96.22, 96.36, 96.71)))
+
+
+def test_scores_table():
+    """Tests scores table."""
+    scores_table(datasets, estimators, scores)
+    scores_table(datasets, estimators, scores, stds=scores/10.0)
+
+
+def test_hypotheses_table():
+    """Tests hypotheses table."""
+    for multitest in ('kruskal', 'friedmanchisquare', None):
+        for test in ('mannwhitneyu', 'wilcoxon'):
+            hypotheses_table(scores, estimators, multitest=multitest, test=test)
+            for correction in ('bonferroni', 'sidak', 'holm-sidak', 'holm',
+                               'simes-hochberg', 'hommel', 'fdr_bh', 'fdr_by',
+                               'fdr_tsbh', 'fdr_tsbky'):
+                hypotheses_table(scores, estimators, multitest=multitest,
+                                 test=test, correction=correction)