From 1e0dc41ed1a7346076f32741323049b631f93e6e Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Thu, 7 Mar 2019 17:24:45 +0200 Subject: [PATCH 01/21] Major overhaul of the codebase + ParamSpace API This version will introduce a self-contained ParamSpace API allowing Scan() a single line interface to qualified permutations. The change allows important streamlining of the main procedural codes of Talos. These simplifications are already reflected in this commit. Other changes: - a major rehaul of reducers; reduction strategies can now be easily added in a single file/function - it's now very - moved all logging/results related codes to /logging - Deals with #153 - Major cleaning up of Scan() arguments - Adds a reducer that takes in a metric, threshold, and loss e.g ['val_acc', 0.9, False] where the experiment will be ended once a given metric threshold is met by a model - /examples is now /templates - many redundant functions / files were deleted - ~100 lines of code were removed from the mainline codes and those codes were notably streamlined - reduction no longer has prepare or finish - learning entropy (metrics/entropy.py) is completely rewritten A big bunch of other things, so do check it out for yourself. NOTE: this version is still under testing. --- setup.py | 7 +- talos/__init__.py | 16 +- talos/commands/autom8.py | 3 - talos/commands/deploy.py | 18 +- talos/commands/evaluate.py | 27 ++- talos/commands/kerasmodel.py | 28 +-- talos/commands/predict.py | 7 +- talos/commands/reporting.py | 51 ++-- talos/commands/restore.py | 25 +- talos/logging/__init__.py | 0 talos/logging/logging_finish.py | 9 + talos/logging/logging_run.py | 46 ++++ talos/{utils => logging}/results.py | 58 +++-- talos/metrics/entropy.py | 82 +++---- talos/metrics/keras_metrics.py | 3 - talos/model/layers.py | 9 +- talos/parameters/ParamGrid.py | 144 ------------ talos/parameters/ParamSpace.py | 259 +++++++++++++++++++++ talos/parameters/round_params.py | 33 --- talos/reducers/correlation.py | 79 ++++--- talos/reducers/limit_by_metric.py | 19 ++ talos/reducers/reduce_finish.py | 19 -- talos/reducers/reduce_prepare.py | 24 -- talos/reducers/reduce_run.py | 76 ++++-- talos/reducers/sample_reducer.py | 21 +- talos/scan/Scan.py | 44 ++-- talos/scan/scan_addon.py | 22 +- talos/scan/scan_finish.py | 39 ++-- talos/scan/scan_prepare.py | 66 +++--- talos/scan/scan_round.py | 71 ++---- talos/scan/scan_run.py | 38 +-- talos/{examples => templates}/__init__.py | 0 talos/{examples => templates}/datasets.py | 0 talos/{examples => templates}/models.py | 9 +- talos/{examples => templates}/params.py | 0 talos/{examples => templates}/pipelines.py | 0 talos/utils/last_neuron.py | 16 -- talos/utils/pred_class.py | 21 -- talos/utils/validation_split.py | 17 +- test/core_tests/test_auto_scan.py | 3 +- test/core_tests/test_scan.py | 52 ++--- 41 files changed, 756 insertions(+), 705 deletions(-) create mode 100644 talos/logging/__init__.py create mode 100644 talos/logging/logging_finish.py create mode 100644 talos/logging/logging_run.py rename talos/{utils => logging}/results.py (55%) delete mode 100644 talos/parameters/ParamGrid.py create mode 100644 talos/parameters/ParamSpace.py delete mode 100644 talos/parameters/round_params.py create mode 100644 talos/reducers/limit_by_metric.py delete mode 100644 talos/reducers/reduce_finish.py delete mode 100644 talos/reducers/reduce_prepare.py rename talos/{examples => templates}/__init__.py (100%) rename talos/{examples => templates}/datasets.py (100%) rename talos/{examples => templates}/models.py (97%) rename talos/{examples => templates}/params.py (100%) rename talos/{examples => templates}/pipelines.py (100%) delete mode 100644 talos/utils/last_neuron.py delete mode 100644 talos/utils/pred_class.py diff --git a/setup.py b/setup.py index 7ce22d89..85c86bbd 100755 --- a/setup.py +++ b/setup.py @@ -16,7 +16,7 @@ URL = 'http://autonom.io' LICENSE = 'MIT' DOWNLOAD_URL = 'https://github.com/autonomio/talos/' -VERSION = '0.5.0' +VERSION = '0.6.0' try: from setuptools import setup @@ -52,13 +52,14 @@ install_requires=install_requires, packages=['talos', 'talos.scan', - 'talos.examples', + 'talos.templates', 'talos.utils', 'talos.model', 'talos.parameters', 'talos.reducers', 'talos.metrics', - 'talos.commands'], + 'talos.commands', + 'talos.logging'], classifiers=['Intended Audience :: Science/Research', 'Programming Language :: Python :: 2.7', diff --git a/talos/__init__.py b/talos/__init__.py index 1df2c8e0..265d353f 100755 --- a/talos/__init__.py +++ b/talos/__init__.py @@ -9,18 +9,10 @@ from .commands.params import Params from .commands.kerasmodel import KerasModel from . import utils -from . import examples as templates +from . import templates # the purpose of everything below is to keep the namespace completely clean -del_from_utils = ['best_model', 'connection_check', 'detector', - 'exceptions', 'last_neuron', 'load_model', 'validation_split', - 'pred_class', 'results', 'string_cols_to_numeric'] - -for key in del_from_utils: - if key.startswith('__') is False: - delattr(utils, key) - template_sub = [templates.datasets, templates.models, templates.params, @@ -35,7 +27,7 @@ if key not in keep_from_templates: delattr(sub, key) -del commands, parameters, scan, reducers, model, metrics, key, del_from_utils -del examples, sub, keep_from_templates, template_sub +del commands, scan, model, metrics, key +del sub, keep_from_templates, template_sub -__version__ = "0.5.0" +__version__ = "0.6.0" diff --git a/talos/commands/autom8.py b/talos/commands/autom8.py index 5256af77..258dfa7d 100644 --- a/talos/commands/autom8.py +++ b/talos/commands/autom8.py @@ -1,6 +1,3 @@ -from ..scan.Scan import Scan - - def Autom8(scan_object, x_val, y_val, diff --git a/talos/commands/deploy.py b/talos/commands/deploy.py index 1e515070..c51311a5 100644 --- a/talos/commands/deploy.py +++ b/talos/commands/deploy.py @@ -1,11 +1,3 @@ -import os -import pandas as pd -import shutil -import numpy as np - -from ..utils.best_model import best_model, activate_model - - class Deploy: '''Functionality for deploying a model to a filename''' @@ -32,6 +24,8 @@ def __init__(self, scan_object, model_name, metric='val_acc', asc=False): ''' + import os + self.scan_object = scan_object os.mkdir(model_name) self.path = model_name + '/' + model_name @@ -39,6 +33,8 @@ def __init__(self, scan_object, model_name, metric='val_acc', asc=False): self.metric = metric self.asc = asc self.data = scan_object.data + + from ..utils.best_model import best_model, activate_model self.best_model = best_model(scan_object, metric, asc) self.model = activate_model(scan_object, self.best_model) @@ -71,6 +67,8 @@ def save_details(self): def save_data(self): + import pandas as pd + x = pd.DataFrame(self.scan_object.x[:100]) y = pd.DataFrame(self.scan_object.y[:100]) @@ -83,6 +81,8 @@ def save_results(self): def save_params(self): + import numpy as np + np.save(self.path + '_params', self.scan_object.params) def save_readme(self): @@ -95,5 +95,7 @@ def save_readme(self): def package(self): + import shutil + shutil.make_archive(self.model_name, 'zip', self.model_name) shutil.rmtree(self.model_name) diff --git a/talos/commands/evaluate.py b/talos/commands/evaluate.py index 8d11e9cc..e5a8ddc1 100644 --- a/talos/commands/evaluate.py +++ b/talos/commands/evaluate.py @@ -1,10 +1,3 @@ -from sklearn.metrics import mean_absolute_error, f1_score -from numpy import mean, std - -from ..utils.validation_split import kfold -from ..utils.best_model import best_model, activate_model - - class Evaluate: '''Class for evaluating models based on the Scan() object''' @@ -58,12 +51,18 @@ def evaluate(self, x, y, ''' + import numpy as np + import sklearn as sk + out = [] if model_id is None: + from ..utils.best_model import best_model model_id = best_model(self.scan_object, metric, asc) + from ..utils.best_model import activate_model model = activate_model(self.scan_object, model_id) + from ..utils.validation_split import kfold kx, ky = kfold(x, y, folds, shuffle) for i in range(folds): @@ -72,25 +71,25 @@ def evaluate(self, x, y, if mode == 'binary': y_pred = y_pred >= .5 - scores = f1_score(y_pred, ky[i], average='binary') + scores = sk.metrics.f1_score(y_pred, ky[i], average='binary') elif mode == 'multi_class': y_pred = y_pred.argmax(axis=-1) - scores = f1_score(y_pred, ky[i], average='macro') + scores = sk.metrics.f1_score(y_pred, ky[i], average='macro') if mode == 'multi_label': y_pred = model.predict(kx[i]).argmax(axis=1) - scores = f1_score(y_pred, - ky[i].argmax(axis=1), - average='macro') + scores = sk.metrics.f1_score(y_pred, + ky[i].argmax(axis=1), + average='macro') elif mode == 'regression': y_pred = model.predict(kx[i]) - scores = mean_absolute_error(y_pred, ky[i]) + scores = sk.metrics.mean_absolute_error(y_pred, ky[i]) out.append(scores) if print_out is True: - print("mean : %.2f \n std : %.2f" % (mean(out), std(out))) + print("mean : %.2f \n std : %.2f" % (np.mean(out), np.std(out))) return out diff --git a/talos/commands/kerasmodel.py b/talos/commands/kerasmodel.py index fb48764a..563096f1 100644 --- a/talos/commands/kerasmodel.py +++ b/talos/commands/kerasmodel.py @@ -1,18 +1,3 @@ -import numpy as np - -from talos.model.layers import hidden_layers -from talos.model.normalizers import lr_normalizer - -from keras.models import Sequential -from keras.layers import Dropout, Flatten -from keras.layers import LSTM, Conv1D, SimpleRNN, Dense, Bidirectional - -try: - from wrangle.reshape_to_conv1d import reshape_to_conv1d as array_reshape_conv1d -except ImportError: - from wrangle import array_reshape_conv1d - - class KerasModel: def __init__(self): @@ -31,11 +16,18 @@ def __init__(self): def _create_input_model(self, x_train, y_train, x_val, y_val, params): + import numpy as np + import wrangle as wr + + from keras.models import Sequential + from keras.layers import Dropout, Flatten + from keras.layers import LSTM, Conv1D, SimpleRNN, Dense, Bidirectional + model = Sequential() if params['network'] != 'dense': - x_train = array_reshape_conv1d(x_train) - x_val = array_reshape_conv1d(x_val) + x_train = wr.array_reshape_conv1d(x_train) + x_val = wr.array_reshape_conv1d(x_val) if params['network'] == 'conv1d': model.add(Conv1D(params['first_neuron'], x_train.shape[1])) @@ -58,6 +50,7 @@ def _create_input_model(self, x_train, y_train, x_val, y_val, params): model.add(Dropout(params['dropout'])) # add hidden layers to the model + from talos.model.layers import hidden_layers hidden_layers(model, params, 1) # output layer (this is scetchy) @@ -73,6 +66,7 @@ def _create_input_model(self, x_train, y_train, x_val, y_val, params): activation=params['last_activation'])) # bundle the optimizer with learning rate changes + from talos.model.normalizers import lr_normalizer optimizer = params['optimizer'](lr=lr_normalizer(params['lr'], params['optimizer'])) diff --git a/talos/commands/predict.py b/talos/commands/predict.py index e46926e2..ff9d7275 100644 --- a/talos/commands/predict.py +++ b/talos/commands/predict.py @@ -1,6 +1,3 @@ -from ..utils.best_model import best_model, activate_model - - class Predict: '''Class for making predictions on the models that are stored @@ -19,8 +16,10 @@ def predict(self, x, model_id=None, metric='val_acc', asc=False): is not given, then best_model will be used.''' if model_id is None: + from ..utils.best_model import best_model model_id = best_model(self.scan_object, metric, asc) + from ..utils.best_model import activate_model model = activate_model(self.scan_object, model_id) return model.predict(x) @@ -31,8 +30,10 @@ def predict_classes(self, x, model_id=None, metric='val_acc', asc=False): is not given, then best_model will be used.''' if model_id is None: + from ..utils.best_model import best_model model_id = best_model(self.scan_object, metric, asc) + from ..utils.best_model import activate_model model = activate_model(self.scan_object, model_id) return model.predict_classes(x) diff --git a/talos/commands/reporting.py b/talos/commands/reporting.py index e7d1485e..41718428 100644 --- a/talos/commands/reporting.py +++ b/talos/commands/reporting.py @@ -1,11 +1,3 @@ -from pandas import read_csv -from ..utils.connection_check import is_connected -from ..metrics.names import metric_names - -if is_connected() is True: - from astetik import line, hist, corr, regs, bargrid, kde, box - - class Reporting: '''A suite of commands that are useful for analyzing the results @@ -18,8 +10,10 @@ def __init__(self, source=None): '''Takes as input a filename to the experiment log or the Scan object''' + import pandas as pd + if isinstance(source, str): - self.data = read_csv(source) + self.data = pd.read_csv(source) else: self.data = source.data @@ -53,6 +47,7 @@ def correlate(self, metric='val_acc'): '''Returns a correlation table against a given metric. Drops all other metrics and correlates against hyperparameters only.''' + from ..metrics.names import metric_names columns = [c for c in self.data.columns if c not in metric_names()] out = self.data[columns] out.insert(0, metric, self.data[metric]) @@ -70,8 +65,8 @@ def plot_line(self, metric='val_acc'): metric :: the metric to correlate against ''' - - return line(self.data, metric) + import astetik as ast + return ast.line(self.data, metric) def plot_hist(self, metric='val_acc', bins=10): @@ -83,8 +78,8 @@ def plot_hist(self, metric='val_acc', bins=10): bins :: number of bins to use in histogram ''' - - return hist(self.data, metric, bins=bins) + import astetik as ast + return ast.hist(self.data, metric, bins=bins) def plot_corr(self, metric='val_acc', color_grades=5): @@ -95,9 +90,11 @@ def plot_corr(self, metric='val_acc', color_grades=5): metric :: the metric to correlate against color_grades :: number of colors to use in heatmap''' + import astetik as ast + cols = self._cols(metric) - return corr(self.data[cols], color_grades=color_grades) + return ast.corr(self.data[cols], color_grades=color_grades) def plot_regs(self, x='val_acc', y='val_loss'): @@ -107,7 +104,9 @@ def plot_regs(self, x='val_acc', y='val_loss'): y = data for the y axis ''' - return regs(self.data, x, y) + import astetik as ast + + return ast.regs(self.data, x, y) def plot_box(self, x, y='val_acc', hue=None): @@ -117,26 +116,31 @@ def plot_box(self, x, y='val_acc', hue=None): y = data for the y axis hue = data for the hue separation ''' + import astetik as ast - return box(self.data, x, y, hue) + return ast.box(self.data, x, y, hue) def plot_bars(self, x, y, hue, col): '''A comparison plot with 4 axis''' - return bargrid(self.data, - x=x, - y=y, - hue=hue, - col=col, - col_wrap=4) + import astetik as ast + + return ast.bargrid(self.data, + x=x, + y=y, + hue=hue, + col=col, + col_wrap=4) def plot_kde(self, x='val_acc', y=None): '''Kernel Destiny Estimation type histogram with support for 1 or 2 axis of data''' - return kde(self.data, x, y) + import astetik as ast + + return ast.kde(self.data, x, y) def table(self, metric='val_acc', sort_by=None, ascending=False): @@ -181,6 +185,7 @@ def _cols(self, metric): '''Helper to remove other than desired metric from data table''' + from ..metrics.names import metric_names cols = [col for col in self.data.columns if col not in metric_names()] if isinstance(metric, list) is False: diff --git a/talos/commands/restore.py b/talos/commands/restore.py index 1b185791..738b4063 100644 --- a/talos/commands/restore.py +++ b/talos/commands/restore.py @@ -1,11 +1,3 @@ -from zipfile import ZipFile - -from pandas import read_csv -from numpy import load - -from talos.utils.load_model import load_model - - class Restore: '''Restores the scan_object that had been stored locally as a result @@ -21,6 +13,11 @@ class Restore: def __init__(self, path_to_zip): + from zipfile import ZipFile + + import pandas as pd + import numpy as np + # create paths self.path_to_zip = path_to_zip self.extract_to = path_to_zip.replace('.zip', '') @@ -33,22 +30,24 @@ def __init__(self, path_to_zip): z.extractall(self.extract_to) # add params dictionary - self.params = load(self.file_prefix + '_params.npy').item() + self.params = np.load(self.file_prefix + '_params.npy').item() # add experiment details - self.details = read_csv(self.file_prefix + '_details.txt', header=None) + self.details = pd.read_csv(self.file_prefix + '_details.txt', + header=None) # add x data sample - self.x = read_csv(self.file_prefix + '_x.csv', header=None) + self.x = pd.read_csv(self.file_prefix + '_x.csv', header=None) # add y data sample - self.y = read_csv(self.file_prefix + '_y.csv', header=None) + self.y = pd.read_csv(self.file_prefix + '_y.csv', header=None) # add model + from talos.utils.load_model import load_model self.model = load_model(self.file_prefix + '_model') # add results - self.results = read_csv(self.file_prefix + '_results.csv') + self.results = pd.read_csv(self.file_prefix + '_results.csv') self.results.drop('Unnamed: 0', axis=1, inplace=True) # clean up diff --git a/talos/logging/__init__.py b/talos/logging/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/talos/logging/logging_finish.py b/talos/logging/logging_finish.py new file mode 100644 index 00000000..e28c978a --- /dev/null +++ b/talos/logging/logging_finish.py @@ -0,0 +1,9 @@ +def logging_finish(self): + + from .results import result_todf, peak_epochs_todf + + # save the results + self = result_todf(self) + self.peak_epochs = peak_epochs_todf(self) + + return self diff --git a/talos/logging/logging_run.py b/talos/logging/logging_run.py new file mode 100644 index 00000000..12d9491e --- /dev/null +++ b/talos/logging/logging_run.py @@ -0,0 +1,46 @@ +def logging_run(self, round_start, start, model_history): + + import time + + # count the duration of the round + self._round_seconds = time.time() - start + + # set end time and log + round_end = time.strftime('%D-%H%M%S') + self.round_times.append([round_start, round_end, self._round_seconds]) + + # handle first round only things + if self.param_object.round_counter == 1: + + # capture the history keys for later + self._all_keys = list(model_history.history.keys()) + self._metric_keys = [k for k in self._all_keys if 'val_' not in k] + self._val_keys = [k for k in self._all_keys if 'val_' in k] + + # create a header column for output + from .results import create_header + _results_header = create_header(self) + self.result.append(_results_header) + + # save the results + from .results import save_result + save_result(self) + + # avoid doing this again + self.first_round = False + + # create log and other stats + from ..metrics.entropy import epoch_entropy + self.epoch_entropy.append(epoch_entropy(self, model_history.history)) + + # get round results to the results table and save it + from .results import run_round_results + _round_results = run_round_results(self, model_history) + + self.result.append(_round_results) + + from .results import save_result + save_result(self) + + # return the Scan() self + return self diff --git a/talos/utils/results.py b/talos/logging/results.py similarity index 55% rename from talos/utils/results.py rename to talos/logging/results.py index d1d9fe8c..a79f479e 100644 --- a/talos/utils/results.py +++ b/talos/logging/results.py @@ -1,34 +1,34 @@ -from numpy import array, argpartition, savetxt -from pandas import DataFrame +def create_header(self): + '''Called from logging/logging_run.py -def create_header(self, out): - - '''Creates the Header column - On the first round creates the header columns - for the experiment output log. + Creates a header to the results table on + the first round of the experiment before + logging any results. ''' - _rr_out = [] - - _rr_out.append('round_epochs') - [_rr_out.append(i) for i in list(out.history.keys())] - [_rr_out.append(key) for key in self.params.keys()] + _results_header = [] - self.peak_epochs.append(list(out.history.keys())) + _results_header.append('round_epochs') + [_results_header.append(i) for i in self._all_keys] + [_results_header.append(key) for key in self.params.keys()] - return ",".join(str(i) for i in _rr_out) + return ",".join(str(i) for i in _results_header) def run_round_results(self, out): - '''THE MAIN FUNCTION FOR CREATING RESULTS FOR EACH ROUNDself. + '''Called from logging/logging_run.py + + THE MAIN FUNCTION FOR CREATING RESULTS FOR EACH ROUNDself. Takes in the history object from model.fit() and handles it. NOTE: The epoch level data will be dropped here each round. ''' + import numpy as np + _rr_out = [] self._round_epochs = len(list(out.history.values())[0]) @@ -39,20 +39,20 @@ def run_round_results(self, out): # iterates through the keys and records last or peak for metrics for key in out.history.keys(): - t_t = array(out.history[key]) + t_t = np.array(out.history[key]) # this handles metrics (NOTE: 'acc' have to be in metric name) if 'acc' in key: - best_epoch = argpartition(t_t, len(t_t) - 1)[-1] + best_epoch = np.argpartition(t_t, len(t_t) - 1)[-1] # this handles losses (takes minimum value epoch) else: - best_epoch = argpartition(t_t, 0)[0] + best_epoch = np.argpartition(t_t, 0)[0] if self.last_epoch_value: value_to_report = out.history[key][-1] else: - value_to_report = array(out.history[key])[best_epoch] + value_to_report = np.array(out.history[key])[best_epoch] _rr_out.append(value_to_report) p_epochs.append(best_epoch) @@ -67,18 +67,24 @@ def run_round_results(self, out): def save_result(self): + '''SAVES THE RESULTS/PARAMETERS TO A CSV SPECIFIC TO THE EXPERIMENT''' - savetxt(self.experiment_name + '.csv', - self.result, - fmt='%s', - delimiter=',') + import numpy as np + + np.savetxt(self.experiment_name + '.csv', + self.result, + fmt='%s', + delimiter=',') def result_todf(self): + '''ADDS A DATAFRAME VERSION OF THE RESULTS TO THE CLASS OBJECT''' - self.result = DataFrame(self.result) + import pandas as pd + + self.result = pd.DataFrame(self.result) self.result.columns = self.result.iloc[0] self.result = self.result.drop(0) @@ -87,4 +93,6 @@ def result_todf(self): def peak_epochs_todf(self): - return DataFrame(self.peak_epochs, columns=self.peak_epochs[0]).drop(0) + import pandas as pd + + return pd.DataFrame(self.peak_epochs, columns=self.peak_epochs[0]).drop(0) diff --git a/talos/metrics/entropy.py b/talos/metrics/entropy.py index 08dfbd7f..80393595 100644 --- a/talos/metrics/entropy.py +++ b/talos/metrics/entropy.py @@ -1,54 +1,46 @@ -from scipy.stats import entropy -from numpy import nan +def epoch_entropy(self, history): + '''Called from logging/logging_run.py -def epoch_entropy(history): + Computes the entropy for epoch metric + variation. If validation is on, + then returns KL divergence instead of + simple Shannon entropy. When Keras + validation_freq is on, Shannon entropy + is returned. Basically, all experiments + should use validation, so Shannon is + provided mearly as a fallback. - '''MEASURE EPOCH ENTROPY + ''' - BINARY/CATEGORICAL: + import warnings + from scipy.stats import entropy - Measures the KL divergence of the acc and loss results - per epoch of a given permutation. + warnings.simplefilter('ignore') - CONTINUOUS: + out = [] - Measures shannon entropy for loss. + # set the default entropy mode to shannon + mode = 'shannon' - # TODO Right now this does not handle all cases well and needs - to be thought about properly. - ''' + # try to make sure each metric has validation + if len(self._metric_keys) == len(self._val_keys): + # make sure that the length of the arrays are same + for i in range(len(self._metric_keys)): + if len(history[self._metric_keys[i]]) == len(history[self._val_keys[i]]): + mode = 'kl_divergence' + else: + break + + # handle the case where only shannon entropy can be used + if mode == 'shannon': + for i in range(len(self._metric_keys)): + out.append(entropy(history[self._metric_keys[i]])) + + # handle the case where kl divergence can be used + elif mode == 'kl_divergence': + for i in range(len(self._metric_keys)): + out.append(entropy(history[self._val_keys[i]], + history[self._metric_keys[i]])) - keys = list(history.history.keys()) - no_of_items = len(keys) - - if no_of_items == 1: - if 'loss' in keys: - loss_entropy = entropy(history.history['loss']) - acc_entropy = nan - else: - loss_entropy = nan - acc_entropy = nan - - elif no_of_items == 2: - if 'acc' in keys and 'loss' in keys: - loss_entropy = entropy(history.history['loss']) - acc_entropy = entropy(history.history['acc']) - else: - loss_entropy = nan - acc_entropy = nan - - elif no_of_items >= 4: - if 'acc' in keys: - acc_entropy = entropy(history.history['val_acc'], - history.history['acc']) - else: - acc_entropy = nan - - if 'loss' in keys: - loss_entropy = entropy(history.history['val_loss'], - history.history['loss']) - else: - loss_entropy = nan - - return [acc_entropy, loss_entropy] + return out diff --git a/talos/metrics/keras_metrics.py b/talos/metrics/keras_metrics.py index 5ce7f32a..65b4fb00 100644 --- a/talos/metrics/keras_metrics.py +++ b/talos/metrics/keras_metrics.py @@ -1,6 +1,3 @@ - - - def root_mean_squared_error(y_true, y_pred): from keras import backend as K return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) diff --git a/talos/model/layers.py b/talos/model/layers.py index 6577ce45..1349671b 100644 --- a/talos/model/layers.py +++ b/talos/model/layers.py @@ -1,8 +1,3 @@ -from keras.layers import Dense, Dropout -from .network_shape import network_shape -from ..utils.exceptions import TalosParamsError - - def hidden_layers(model, params, last_neuron): '''HIDDEN LAYER Generator @@ -15,6 +10,10 @@ def hidden_layers(model, params, last_neuron): Handles things in a way where any number of layers can be tried with matching hyperparameters.''' + from keras.layers import Dense, Dropout + from .network_shape import network_shape + from ..utils.exceptions import TalosParamsError + try: kernel_initializer = params['kernel_initializer'] except KeyError: diff --git a/talos/parameters/ParamGrid.py b/talos/parameters/ParamGrid.py deleted file mode 100644 index f3958749..00000000 --- a/talos/parameters/ParamGrid.py +++ /dev/null @@ -1,144 +0,0 @@ -import numpy as np - -from ..reducers.sample_reducer import sample_reducer -from ..reducers.permutation_filter import permutation_filter - - -class ParamGrid: - - '''Suite for handling parameters internally within Talos - - Takes as input the parameter dictionary from the user, and - returns a class object which can then be used to pick parameters - for each round together with other parameter related operations. - - ''' - - def __init__(self, main_self): - - self.main_self = main_self - - # creates a reference dictionary for column number to label - self.param_reference = {} - for i, col in enumerate(self.main_self.params.keys()): - self.param_reference[col] = i - - # convert the input to useful format - self._p = self._param_input_conversion() - - # create a list of lists, each list being a parameter sequence - ls = [list(self._p[key]) for key in self._p.keys()] - - # get the number of total dimensions / permutations - virtual_grid_size = 1 - for l in ls: - virtual_grid_size *= len(l) - final_grid_size = virtual_grid_size - - # calculate the size of the downsample - if self.main_self.grid_downsample is not None: - final_grid_size = int(virtual_grid_size * self.main_self.grid_downsample) - - # take round_limit into account - if self.main_self.round_limit is not None: - final_grid_size = min(final_grid_size, self.main_self.round_limit) - - # create the params grid - self.param_grid = self._create_param_grid(ls, - final_grid_size, - virtual_grid_size) - - # handle the case where permutation filter is provided - if self.main_self.permutation_filter is not None: - self = permutation_filter(self, - ls, - final_grid_size, - virtual_grid_size) - - # initialize with random shuffle if needed - if self.main_self.shuffle: - np.random.shuffle(self.param_grid) - - # create a index for logging purpose - self.param_log = list(range(len(self.param_grid))) - - # add the log index to param grid - self.param_grid = np.column_stack((self.param_grid, self.param_log)) - - def _create_param_grid(self, ls, final_grid_size, virtual_grid_size): - - # select permutations according to downsample - if final_grid_size < virtual_grid_size: - out = sample_reducer(self, final_grid_size, virtual_grid_size) - else: - out = range(0, final_grid_size) - - # build the parameter permutation grid - param_grid = self._create_param_permutations(ls, out) - - return param_grid - - def _create_param_permutations(self, ls, permutation_index): - - '''Expand params dictionary to permutations - - Takes the input params dictionary and expands it to - actual parameter permutations for the experiment. - ''' - - final_grid = [] - for i in permutation_index: - p = [] - for l in reversed(ls): - i, s = divmod(int(i), len(l)) - p.insert(0, l[s]) - final_grid.append(tuple(p)) - - _param_grid_out = np.array(final_grid, dtype='object') - - return _param_grid_out - - def _param_input_conversion(self): - - '''DETECT PARAM FORMAT - - Checks of the hyperparameter input format is list - or tupple in the params dictionary and expands accordingly. - - ''' - - out = {} - - for param in self.main_self.params.keys(): - - # for range/step style input - if isinstance(self.main_self.params[param], tuple): - out[param] = self._param_range(self.main_self.params[param][0], - self.main_self.params[param][1], - self.main_self.params[param][2]) - # all other input styles - else: - out[param] = self.main_self.params[param] - - return out - - def _param_range(self, start, end, n): - - '''Deal with ranged inputs in params dictionary - - A helper function to handle the cases where params - dictionary input is in the format (start, end, steps) - and is called internally through ParamGrid(). - ''' - - try: - out = np.arange(start, end, (end - start) / n, dtype=float) - # this is for python2 - except ZeroDivisionError: - out = np.arange(start, end, (end - start) / float(n), dtype=float) - - if type(start) == int and type(end) == int: - out = out.astype(int) - out = np.unique(out) - - return out diff --git a/talos/parameters/ParamSpace.py b/talos/parameters/ParamSpace.py new file mode 100644 index 00000000..37279ad4 --- /dev/null +++ b/talos/parameters/ParamSpace.py @@ -0,0 +1,259 @@ +import inspect + +import numpy as np +import itertools as it +from datetime import datetime + + +class ParamSpace: + + def __init__(self, + params, + random_method='uniform_mersenne', + fraction_limit=None, + round_limit=None, + time_limit=None, + boolean_limit=None): + + # set all the arguments + self.params = params + self.fraction_limit = fraction_limit + self.round_limit = round_limit + self.time_limit = time_limit + self.boolean_limit = boolean_limit + self.random_method = random_method + + # set a counter + self.round_counter = 0 + + # capture the parameter names for columns later + self.column_names = self._generate_column_names() + + # handle tuple conversion to discrete values + self.p = self._param_input_conversion() + + # create list of list from the params dictionary + self._params_temp = [list(self.p[key]) for key in self.p.keys()] + + # establish max dimensions + self.dimensions = np.prod([len(l) for l in self._params_temp]) + + # apply all the set limits + self.param_index = self._param_apply_limits() + + # create the parameter space + self.param_space = self._param_space_creation() + + # handle the boolean limits separately + if self.boolean_limit is not None: + index = self._convert_lambda(self.boolean_limit)(self.param_space) + self.param_space = self.param_space[index] + + # reset index + self.param_index = list(range(len(self.param_index))) + + def _generate_column_names(self): + + '''Used for storing the corresponding + column names based on the input parameters. + Returns a dictionary where label is paramater + name and value is integer index.''' + + out = [] + + for col in self.params.keys(): + out.append(col) + + return out + + def _param_input_conversion(self): + + '''Parameters may be input as lists of single or + multiple values (discrete values) or tuples + (range of values). This helper checks the format of + each input and handles it accordingly.''' + + out = {} + + # go through each parameter type + for param in self.params.keys(): + + # deal with range (tuple) values + if isinstance(self.params[param], tuple): + out[param] = self._param_range_expansion(self.params[param]) + + # deal with range (list) values + elif isinstance(self.params[param], list): + out[param] = self.params[param] + + return out + + def _param_apply_limits(self): + + from talos.reducers.sample_reducer import sample_reducer + + if self.boolean_limit is not None: + # NOTE: this is handled in __init__ + pass + + # a time limit is set + if self.time_limit is not None: + # NOTE: this is handled in _time_left + pass + + # a fractional limit is set + if self.fraction_limit is not None: + return sample_reducer(self.fraction_limit, + self.dimensions, + self.random_method) + + # a round limit is set + if self.round_limit is not None: + return sample_reducer(self.round_limit, + self.dimensions, + self.random_method) + + # no limits are set + return list(range(self.dimensions)) + + def _param_range_expansion(self, param_values): + + '''Expands a range (tuple) input into discrete + values. Helper for _param_input_conversion. + Expects to have a input as (start, end, steps). + ''' + + start = param_values[0] + end = param_values[1] + steps = param_values[2] + + out = np.arange(start, end, (end - start) / steps, dtype=float) + + # inputs are all ints + if isinstance(start, int) and isinstance(end, int): + out = out.astype(int) + out = np.unique(out) + + return out + + def _param_space_creation(self): + + '''Expand params dictionary to permutations + + Takes the input params dictionary and expands it to + actual parameter permutations for the experiment. + ''' + + # handle the cases where parameter space is still large + if len(self.param_index) > 100000: + + final_grid = list(it.product(*self._params_temp)) + out = np.array(final_grid, dtype='object') + + # handle the cases where parameter space is already smaller + else: + final_grid = [] + for i in self.param_index: + p = [] + for l in reversed(self._params_temp): + i, s = divmod(int(i), len(l)) + p.insert(0, l[s]) + final_grid.append(tuple(p)) + + out = np.array(final_grid, dtype='object') + + return out + + def _check_time_limit(self): + + if self.time_limit is None: + return True + + stop = datetime.strptime(self.time_limit, "%Y-%m-%d %H:%M") + + return stop > datetime.now() + + def round_parameters(self): + + if len(self.param_index) > 0: + if self._check_time_limit(): + self.round_counter += 1 + index = self.param_index.pop(0) + values = self.param_space[index] + return self._round_parameters_todict(values) + + return False + + def _round_parameters_todict(self, values): + + round_param_dict = {} + + for i, value in enumerate(values): + key = self.column_names[i] + round_param_dict[key] = value + + return round_param_dict + + def _convert_lambda(self, fn): + + '''Converts a lambda function into a format + where parameter labels are changed to the column + indexes in parameter space.''' + + # get the source code for the lambda function + fn_string = inspect.getsource(fn) + fn_string = fn_string.replace('"', '\'') + + # look for column/label names + for i, name in enumerate(self.column_names): + index = ':,' + str(i) + fn_string = fn_string.replace(name, index) + + # cleanup the string + fn_string = fn_string.split('lambda')[1] + fn_string = fn_string.replace('[\':', '[:') + fn_string = fn_string.replace('\']', ']') + fn_string = 'lambda ' + fn_string + + # pass it back as a function + return eval(fn_string) + + def remove_is_not(self, label, value): + + '''Removes baesd on exact match but reversed''' + + col = self.column_names.index(label) + self.param_space = self.param_space[self.param_space[:, col] == value] + self.param_index = list(range(len(self.param_space))) + + def remove_is(self, label, value): + + '''Removes based on exact match''' + + col = self.column_names.index(label) + self.param_space = self.param_space[self.param_space[:, col] != value] + self.param_index = list(range(len(self.param_space))) + + def remove_ge(self, label, value): + + '''Removes based on greater-or-equal''' + + col = self.column_names.index(label) + self.param_space = self.param_space[self.param_space[:, col] >= value] + self.param_index = list(range(len(self.param_space))) + + def remove_le(self, label, value): + + '''Removes based on lesser-or-equal''' + + col = self.column_names.index(label) + self.param_space = self.param_space[self.param_space[:, col] <= value] + self.param_index = list(range(len(self.param_space))) + + def remove_lambda(self, function): + + '''Removes based on a lambda function''' + + index = self._convert_lambda(function)(self.param_space) + self.param_space = self.param_space[index] + self.param_index = list(range(len(self.param_space))) diff --git a/talos/parameters/round_params.py b/talos/parameters/round_params.py deleted file mode 100644 index 52d860fa..00000000 --- a/talos/parameters/round_params.py +++ /dev/null @@ -1,33 +0,0 @@ -from numpy import random - - -def create_params_dict(self, _choice): - _round_params_dict = {} - x = 0 - for key in self.param_reference.keys(): - _round_params_dict[key] = self.param_grid[_choice][x] - x += 1 - - return _round_params_dict - - -def round_params(self): - - '''Picks the paramaters for a round based on the available - paramater permutations using the param_log index''' - - # pick the permutation for the round - if self.search_method == 'random': - _choice = random.choice(self.param_log) - - elif self.search_method == 'linear': - _choice = min(self.param_log) - - elif self.search_method == 'reverse': - _choice = max(self.param_log) - - # remove the current choice from permutations - self.param_log.remove(_choice) - - # create a dictionary for the current round - return create_params_dict(self, _choice) diff --git a/talos/reducers/correlation.py b/talos/reducers/correlation.py index 86da4735..62764228 100644 --- a/talos/reducers/correlation.py +++ b/talos/reducers/correlation.py @@ -1,43 +1,66 @@ -import pandas as pd - - def correlation(self): - '''Correlation Reducers + '''This is called from reduce_run.py. - Note that this set of reducers works only for the continuous - and stepped (e.g. batch size) hyperparameters. + Performs a spearman rank order correlation + based reduction. First looks for a parameter + that correlates with reduction_metric and + correlation meets reduction_threshold and + then converts the match parameter into + a 2d multilabel shape. Then new correlation + against reduction_metric is performed to identify + which particular value is to be dropped. ''' + import pandas as pd + import wrangle as wr + + data = pd.read_csv(self.experiment_name + '.csv') + data = data[[self.reduction_metric] + self.param_object.column_names] + + corr = data.copy(deep=True) - out = self.param_table.corr(method='spearman')[self.reduction_metric] - out = out.dropna() + # drop the row for reduction metric and sort + corr = corr.dropna() + corr = data.corr('spearman') + corr = corr[self.reduction_metric] + corr = corr.apply(abs)[1:] + corr = corr.sort_values(ascending=False) - if len(out) <= 1: + # check if reduction threshold is met: + if corr.values[0] <= self.reduction_threshold is self.minimize_loss: + return False + + # filter out where only one value is present + if len(corr) <= 1: self._reduce_keys = None - return self + return False + + label = corr.index.values[0] + if label not in self.param_object.column_names: + return False + + # convert parameter values to multilabel (2d) + corr = wr.col_to_multilabel(data[[label]], label) - out = out[1:].sort_values(ascending=False) - out = out.index[-1], out[-1] + # combine the reduction_metric with the multilabel data + corr = wr.df_merge(corr, data[self.reduction_metric]) - if abs(out[1]) >= self.reduction_threshold: - dummy_cols = pd.get_dummies(self.param_table[out[0]]) - dummy_cols.insert(0, - self.reduction_metric, - self.param_table[self.reduction_metric]) + # repeate same as above + corr = corr.corr('spearman') + corr = corr[self.reduction_metric] + corr = corr.apply(abs)[1:] + corr = corr.sort_values(ascending=False) - # case where threshold is not met - else: + if len(corr) <= 1: self._reduce_keys = None - return self + return False - # all other cases continue - to_drop_temp = dummy_cols.corr(method='spearman')[self.reduction_metric] + label = corr.index.values[0] + if label not in self.param_object.column_names: + return False - # pick the drop method based on paramaters - if self.reduce_loss is False: - self._reduce_keys = to_drop_temp.sort_values().index[0], out[0] - else: - self._reduce_keys = to_drop_temp.sort_values().index[-2], out[0] + value = corr.values[0] + label = corr.index[0] - return self + return value, label diff --git a/talos/reducers/limit_by_metric.py b/talos/reducers/limit_by_metric.py new file mode 100644 index 00000000..e4414fac --- /dev/null +++ b/talos/reducers/limit_by_metric.py @@ -0,0 +1,19 @@ +def limit_by_metric(self): + + '''Takes as input metric, threshold, and loss and + and returs a True if metric threshold have been + met and False if not. + + USE: space.check_metric(model_history) + ''' + + temp = self.performance_target + + metric = temp[0] + threshold = temp[1] + loss = temp[2] + + if loss is True: + return min(self.model_history[metric]) < threshold + elif loss is False: + return max(self.model_history[metric]) > threshold diff --git a/talos/reducers/reduce_finish.py b/talos/reducers/reduce_finish.py deleted file mode 100644 index c5970175..00000000 --- a/talos/reducers/reduce_finish.py +++ /dev/null @@ -1,19 +0,0 @@ -def reduce_finish(self): - - '''Takes input from a Reducer in form of a tuple - where the values the hyperparamater name and the - value to drop. Returns self with a modified param_log.''' - - # get the column index - - to_remove_col = self.param_reference[self._reduce_keys[1]] - - value_to_remove = self._reduce_keys[0] - - # pick the index numbers for dropping available permutations - indexs_to_drop = self.param_grid[self.param_grid[:, to_remove_col] == value_to_remove][:,-1] - - # drop the index numbers - self.param_log = list(set(self.param_log).difference(set(indexs_to_drop))) - - return self diff --git a/talos/reducers/reduce_prepare.py b/talos/reducers/reduce_prepare.py deleted file mode 100644 index 22e91904..00000000 --- a/talos/reducers/reduce_prepare.py +++ /dev/null @@ -1,24 +0,0 @@ -import pandas as pd - -from ..metrics.names import metric_names - - -def reduce_prepare(self): - - ''' - Preparation procedures for applying a reduction algorithm. - ''' - - # load the data from the experiment log - self.data = pd.read_csv(self.experiment_name + '.csv') - self.names = metric_names() - - # apply the lookback window - if self.reduction_window is not None: - self.data = self.data.tail(self.reduction_window) - - self.param_columns = [col for col in self.data.columns if col not in metric_names()] - self.param_table = self.data[self.param_columns] - self.param_table.insert(0, self.reduction_metric, self.data[self.reduction_metric]) - - return self diff --git a/talos/reducers/reduce_run.py b/talos/reducers/reduce_run.py index 0930effd..80d88db2 100644 --- a/talos/reducers/reduce_run.py +++ b/talos/reducers/reduce_run.py @@ -1,8 +1,3 @@ -from .reduce_prepare import reduce_prepare -from .reduce_finish import reduce_finish -from .correlation import correlation - - def reduce_run(self): '''The process run script for reduce @@ -10,21 +5,68 @@ def reduce_run(self): related with reduction. When new reduction methods are added, they need to be added as options here. + + To add new reducers, create a file in /reducers + which is where this file is located. In that file, + take as input self from Scan() and give as output + either False, which does nothing, or a tuple of + 'value' and 'label' where value is a parameter + value and label is parameter name. For example + batch_size and 128. Then add a reference to + reduce_run.py and make sure that you process + the self.param_object.param_index there before + wrapping up. + ''' - # prepare log for reduction analysis - self = reduce_prepare(self) + from .correlation import correlation + from .limit_by_metric import limit_by_metric - # run the selected reduction method - if self.reduction_method == 'correlation': - self = correlation(self) + # check if performance target is met + if self.performance_target is not None: + status = limit_by_metric(self) - # TODO: the case where reduction_method - # is not selected or is wrong could be - # handled better. + # handle the case where performance target is met + if status is True: + self.param_object.param_index = [] - # handle the dropping of permutations - if self._reduce_keys is None: + # stop here is no reduction method is set + if self.reduction_method is None: return self - else: - return reduce_finish(self) + + # setup what's required for updating progress bar + left = (self.param_object.round_counter + 1) + right = self.reduction_interval + index_len = len(self.param_object.param_index) + len_before_reduce = index_len + + # apply window based reducers + if left % right == 0: + + # check if correlation reducer can do something + if self.reduction_method == 'correlation': + label_and_value = correlation(self) + + # check if random forrest can do something + if self.reduction_method == 'random_forrest': + pass + + # check if random forrest can do something + if self.reduction_method == 'extra_trees': + pass + + # check if monte carlo can do something + if self.reduction_method == 'monte_carlo': + pass + + # modify the parameter space accordingly + if label_and_value is not False: + self.param_object.remove_is(label_and_value[0], + label_and_value[1]) + + # finish up by updating progress bar + total_reduced = len_before_reduce - index_len + total_reduced = max(0, total_reduced) + self.pbar.update(total_reduced) + + return self diff --git a/talos/reducers/sample_reducer.py b/talos/reducers/sample_reducer.py index e5cde054..aa3199e4 100644 --- a/talos/reducers/sample_reducer.py +++ b/talos/reducers/sample_reducer.py @@ -1,9 +1,4 @@ -import chances - -from ..utils.exceptions import TalosDataError - - -def sample_reducer(self, length, max_value): +def sample_reducer(limit, max_value, random_method): '''Sample Reducer (Helper) @@ -23,17 +18,21 @@ def sample_reducer(self, length, max_value): ''' - random_method = self.main_self.random_method + import chances as ch # calculate the size of the downsample - n = int(max_value * self.main_self.grid_downsample) + if isinstance(limit, float): + n = int(max_value * limit) + if isinstance(limit, int): + n = limit # throw an error if + from ..utils.exceptions import TalosDataError if n < 1: - raise TalosDataError("No permutations in grid. Incease grid_downsample") + raise TalosDataError("Limiters lead to < 1 permutations.") # Initialize Randomizer() - r = chances.Randomizer(max_value, length) + r = ch.Randomizer(max_value, n) # use the user selected method if random_method == 'sobol': @@ -57,7 +56,7 @@ def sample_reducer(self, length, max_value): elif random_method == 'ambience': out = r.ambience() else: - print('check random_method, no eligble method found. Using uniform mersenne.') + print('No eligble random_method found. Using uniform_mersenne.') out = r.uniform_mersenne() return out diff --git a/talos/scan/Scan.py b/talos/scan/Scan.py index e2d557cd..07a179a5 100755 --- a/talos/scan/Scan.py +++ b/talos/scan/Scan.py @@ -1,9 +1,3 @@ -from collections import OrderedDict - -from .scan_prepare import scan_prepare -from .scan_run import scan_run - - class Scan: """Hyperparamater scanning and optimization @@ -118,62 +112,60 @@ def model(): """ - # TODO: refactor this so that we don't initialize global variables global self def __init__(self, x, y, params, model, - dataset_name=None, - experiment_no=None, experiment_name=None, x_val=None, y_val=None, val_split=.3, - shuffle=True, + random_method='uniform_mersenne', + performance_target=None, + fraction_limit=None, round_limit=None, time_limit=None, - grid_downsample=1.0, - random_method='uniform_mersenne', - seed=None, - search_method='random', - permutation_filter=None, + boolean_limit=None, reduction_method=None, reduction_interval=50, reduction_window=20, reduction_threshold=0.2, reduction_metric='val_acc', - reduce_loss=False, + minimize_loss=False, + seed=None, last_epoch_value=False, clear_tf_session=True, disable_progress_bar=False, print_params=False, debug=False): - # NOTE: these need to be follow the order from __init__ - # and all paramaters needs to be included here and only here. + from collections import OrderedDict self.x = x self.y = y self.params = OrderedDict(params) self.model = model - self.dataset_name = dataset_name - self.experiment_no = experiment_no self.experiment_name = experiment_name self.x_val = x_val self.y_val = y_val self.val_split = val_split - self.shuffle = shuffle self.random_method = random_method - self.search_method = search_method + + # reducers + self.performance_target = performance_target + self.fraction_limit = fraction_limit self.round_limit = round_limit self.time_limit = time_limit - self.permutation_filter = permutation_filter + self.boolean_limit = boolean_limit + + # reduction related self.reduction_method = reduction_method self.reduction_interval = reduction_interval self.reduction_window = reduction_window - self.grid_downsample = grid_downsample self.reduction_threshold = reduction_threshold self.reduction_metric = reduction_metric - self.reduce_loss = reduce_loss + self.minimize_loss = minimize_loss + + # other self.debug = debug self.seed = seed self.clear_tf_session = clear_tf_session @@ -186,5 +178,5 @@ def __init__(self, x, y, params, model, def runtime(self): - self = scan_prepare(self) + from .scan_run import scan_run self = scan_run(self) diff --git a/talos/scan/scan_addon.py b/talos/scan/scan_addon.py index a90981d5..218c253b 100644 --- a/talos/scan/scan_addon.py +++ b/talos/scan/scan_addon.py @@ -1,15 +1,3 @@ -# for func_best_model -from ..utils.best_model import best_model, activate_model - -# for func_evaluate -import warnings -from tqdm import tqdm -from numpy import mean, std -import numpy as np - -from ..commands.evaluate import Evaluate - - def func_best_model(scan_object, metric='val_acc', asc=False): '''Picks the best model based on a given metric and @@ -17,8 +5,11 @@ def func_best_model(scan_object, metric='val_acc', asc=False): NOTE: for loss 'asc' should be True''' + import warnings as warnings + warnings.simplefilter('ignore') + from ..utils.best_model import best_model, activate_model model_no = best_model(scan_object, metric, asc) out = activate_model(scan_object, model_no) @@ -40,6 +31,9 @@ def func_evaluate(scan_object, adding them to the data frame. ''' + import warnings as warnings + from tqdm import tqdm + import numpy as np warnings.simplefilter('ignore') @@ -55,8 +49,8 @@ def func_evaluate(scan_object, pbar = tqdm(total=data_len) + from ..commands.evaluate import Evaluate for i in range(len(scan_object.data)): - if i in list(picks): evaluate_object = Evaluate(scan_object) temp = evaluate_object.evaluate(x_val, y_val, @@ -65,7 +59,7 @@ def func_evaluate(scan_object, folds=folds, shuffle=shuffle, asc=asc) - out.append([mean(temp), std(temp)]) + out.append([np.mean(temp), np.std(temp)]) pbar.update(1) else: out.append([np.nan, np.nan]) diff --git a/talos/scan/scan_finish.py b/talos/scan/scan_finish.py index 7355ffbf..570142fd 100644 --- a/talos/scan/scan_finish.py +++ b/talos/scan/scan_finish.py @@ -1,29 +1,24 @@ -import time -from pandas import Series, DataFrame - -from ..scan.scan_addon import func_best_model, func_evaluate -from ..utils.string_cols_to_numeric import string_cols_to_numeric - +def scan_finish(self): -attrs_final = ['data', 'x', 'y', 'peak_epochs_df', 'round_times', - 'params', 'saved_models', 'saved_weights'] + attrs_final = ['data', 'x', 'y', 'learning_entropy', 'round_times', + 'params', 'saved_models', 'saved_weights', 'round_history'] -attrs_to_keep = attrs_final + ['random_method', 'grid_downsample', - 'reduction_interval', 'reduce_loss', - 'reduction_method', 'reduction_metric', - 'reduction_threshold', 'reduction_window', - 'experiment_name'] + attrs_to_keep = attrs_final + ['random_method', 'grid_downsample', + 'reduction_interval', 'reduce_loss', + 'reduction_method', 'reduction_metric', + 'reduction_threshold', 'reduction_window', + 'experiment_name', 'round_history'] - -def scan_finish(self): + import time + import pandas as pd # create a dataframe with permutation times - self.round_times = DataFrame(self.round_times) + self.round_times = pd.DataFrame(self.round_times) self.round_times.columns = ['start', 'end', 'duration'] - # combine entropy tables - self.peak_epochs_df['acc_epoch'] = [i[0] for i in self.epoch_entropy] - self.peak_epochs_df['loss_epoch'] = [i[1] for i in self.epoch_entropy] + # combine epoch entropy tables + self.learning_entropy = pd.DataFrame(self.epoch_entropy) + self.learning_entropy.columns = self._metric_keys # clean the results into a dataframe self.data = self.result[self.result.columns[0]].str.split(',', expand=True) @@ -58,9 +53,13 @@ def scan_finish(self): delattr(self, key) # add details dictionary as series - self.details = Series(out) + self.details = pd.Series(out) # add best_model + + from ..scan.scan_addon import func_best_model, func_evaluate + from ..utils.string_cols_to_numeric import string_cols_to_numeric + self.best_model = func_best_model.__get__(self) self.evaluate_models = func_evaluate.__get__(self) diff --git a/talos/scan/scan_prepare.py b/talos/scan/scan_prepare.py index eed4997a..9458a42f 100644 --- a/talos/scan/scan_prepare.py +++ b/talos/scan/scan_prepare.py @@ -1,35 +1,13 @@ -from time import strftime -from datetime import datetime - -from ..utils.validation_split import validation_split -from ..utils.detector import prediction_type -from ..parameters.ParamGrid import ParamGrid -from ..utils.pred_class import classify -from ..utils.last_neuron import last_neuron - - def scan_prepare(self): '''Includes all preparation procedures up until starting the first scan through scan_run()''' - # create the name for the experiment - if self.dataset_name is None: - self.dataset_name = strftime('%D%H%M%S').replace('/', '') - - if self.experiment_no is None: - self.experiment_no = '' + import time as ti + # create the name for the experiment if self.experiment_name is None: - self.experiment_name = self.dataset_name + '_' + self.experiment_no - - # handle the case where a time limit is set - if self.time_limit is not None: - self._stoptime = datetime.strptime(self.time_limit, - "%Y-%m-%d %H:%M") - - # create the round times list - self.round_times = [] + self.experiment_name = ti.strftime('%D%H%M%S').replace('/', '') # for the case where x_val or y_val is missing when other is present self.custom_val_split = False @@ -41,29 +19,37 @@ def scan_prepare(self): self.custom_val_split = True # create the paramater object and move to self - self.paramgrid_object = ParamGrid(self) - self.param_log = self.paramgrid_object.param_log - self.param_grid = self.paramgrid_object.param_grid - self.param_reference = self.paramgrid_object.param_reference - del self.paramgrid_object - - self.round_counter = 0 + from ..parameters.ParamSpace import ParamSpace + self.param_object = ParamSpace(params=self.params, + random_method=self.random_method, + fraction_limit=self.fraction_limit, + round_limit=self.round_limit, + time_limit=self.time_limit, + boolean_limit=self.boolean_limit + ) + + # create various stores + self.round_history = [] self.peak_epochs = [] self.epoch_entropy = [] - self.round_models = [] + self.round_times = [] + self.result = [] + self.saved_models = [] + self.saved_weights = [] # create the data asset self.y_max = self.y.max() + + # handle validation split + from ..utils.validation_split import validation_split self = validation_split(self) - self.shape = classify(self.y) - self.last_neuron = last_neuron(self) + # set data and len + self.shape = [self.x.shape, self.y.shape] self._data_len = len(self.x) - self = prediction_type(self) - self.result = [] - # model saving - self.saved_models = [] - self.saved_weights = [] + # infer prediction type + from ..utils.detector import prediction_type + self = prediction_type(self) return self diff --git a/talos/scan/scan_round.py b/talos/scan/scan_round.py index 138f350f..9addb535 100644 --- a/talos/scan/scan_round.py +++ b/talos/scan/scan_round.py @@ -1,73 +1,30 @@ -from time import strftime, time - -from keras import backend as K - -from ..parameters.round_params import round_params -from ..utils.results import create_header -from ..metrics.entropy import epoch_entropy -from ..model.ingest_model import ingest_model -from ..utils.results import run_round_results, save_result -from ..reducers.reduce_run import reduce_run -from ..utils.exceptions import TalosReturnError, TalosTypeError - - def scan_round(self): '''The main operational function that manages the experiment on the level of execution of each round.''' - # determine the parameters for the particular execution - self.round_params = round_params(self) + import time as ti # print round params if self.print_params is True: print(self.round_params) # set start time - round_start = strftime('%D-%H%M%S') - start = time() + round_start = ti.strftime('%D-%H%M%S') + start = ti.time() # fit the model - try: - _hr_out, self.keras_model = ingest_model(self) - except TypeError as err: - if err.args[0] == "unsupported operand type(s) for +: 'int' and 'numpy.str_'": - raise TalosTypeError("Activation should be as object and not string in params") - else: - print('ERROR MESSAGE : ' + err.args[0]) - raise TalosReturnError("Make sure that input model returns 'out, model' where out is history object from model.fit()") - - # count the duration of the round - self._round_seconds = time() - start + from ..model.ingest_model import ingest_model + self.model_history, self.keras_model = ingest_model(self) + self.round_history.append(self.model_history.history) - # set end time and log - round_end = strftime('%D-%H%M%S') - self.round_times.append([round_start, round_end, self._round_seconds]) + # handle logging of results + from ..logging.logging_run import logging_run + self = logging_run(self, round_start, start, self.model_history) - # create log and other stats - try: - self.epoch_entropy.append(epoch_entropy(_hr_out)) - except (TypeError, AttributeError): - raise TalosReturnError("Make sure that input model returns in the order 'out, model'") - - if self.round_counter == 0: - _for_header = create_header(self, _hr_out) - self.result.append(_for_header) - save_result(self) - - _hr_out = run_round_results(self, _hr_out) - - self.result.append(_hr_out) - save_result(self) - - # apply reduction - if self.reduction_method is not None: - if (self.round_counter + 1) % self.reduction_interval == 0: - len_before_reduce = len(self.param_log) - self = reduce_run(self) - total_reduced = len_before_reduce - len(self.param_log) - # update the progress bar - self.pbar.update(total_reduced) + # apply reductions + from ..reducers.reduce_run import reduce_run + self = reduce_run(self) # save model and weights self.saved_models.append(self.keras_model.to_json()) @@ -75,9 +32,7 @@ def scan_round(self): # clear tensorflow sessions if self.clear_tf_session is True: + from keras import backend as K K.clear_session() - # round is completed - self.round_counter += 1 - return self diff --git a/talos/scan/scan_run.py b/talos/scan/scan_run.py index 25520426..1d8e71d4 100644 --- a/talos/scan/scan_run.py +++ b/talos/scan/scan_run.py @@ -1,32 +1,34 @@ -from tqdm import tqdm -from datetime import datetime - -from ..utils.results import result_todf, peak_epochs_todf -from .scan_round import scan_round -from .scan_finish import scan_finish - - def scan_run(self): '''The high-level management of the scan procedures onwards from preparation. Manages round_run()''' + from tqdm import tqdm + + from .scan_prepare import scan_prepare + self = scan_prepare(self) + # initiate the progress bar - self.pbar = tqdm(total=len(self.param_log), + self.pbar = tqdm(total=len(self.param_object.param_index), disable=self.disable_progress_bar) - # start the main loop of the program - while len(self.param_log) != 0: + # the main cycle of the experiment + while True: + self.round_params = self.param_object.round_parameters() + # break when there is no more permutations left + if self.round_params is False: + break + # otherwise proceed with next permutation + from .scan_round import scan_round self = scan_round(self) self.pbar.update(1) - if self.time_limit is not None: - if datetime.now() > self._stoptime: - print("Time limit reached, experiment finished") - break + + # close progress bar before finishing self.pbar.close() - # save the results - self = result_todf(self) - self.peak_epochs_df = peak_epochs_todf(self) + # finish + from ..logging.logging_finish import logging_finish + self = logging_finish(self) + from .scan_finish import scan_finish self = scan_finish(self) diff --git a/talos/examples/__init__.py b/talos/templates/__init__.py similarity index 100% rename from talos/examples/__init__.py rename to talos/templates/__init__.py diff --git a/talos/examples/datasets.py b/talos/templates/datasets.py similarity index 100% rename from talos/examples/datasets.py rename to talos/templates/datasets.py diff --git a/talos/examples/models.py b/talos/templates/models.py similarity index 97% rename from talos/examples/models.py rename to talos/templates/models.py index 0967bee8..89b3a329 100644 --- a/talos/examples/models.py +++ b/talos/templates/models.py @@ -1,13 +1,11 @@ -#!/usr/bin/env python - - def breast_cancer(x_train, y_train, x_val, y_val, params): from keras.models import Sequential from keras.layers import Dropout, Dense from talos.model import lr_normalizer, early_stopper, hidden_layers - from talos.metrics.keras_metrics import matthews_correlation_acc, precision_acc + from talos.metrics.keras_metrics import matthews_correlation_acc + from talos.metrics.keras_metrics import precision_acc from talos.metrics.keras_metrics import recall_acc, fmeasure_acc model = Sequential() @@ -49,7 +47,8 @@ def cervical_cancer(x_train, y_train, x_val, y_val, params): from keras.layers import Dropout, Dense from talos.model import lr_normalizer, early_stopper, hidden_layers - from talos.metrics.keras_metrics import matthews_correlation_acc, precision_acc + from talos.metrics.keras_metrics import matthews_correlation_acc + from talos.metrics.keras_metrics import precision_acc from talos.metrics.keras_metrics import recall_acc, fmeasure_acc model = Sequential() diff --git a/talos/examples/params.py b/talos/templates/params.py similarity index 100% rename from talos/examples/params.py rename to talos/templates/params.py diff --git a/talos/examples/pipelines.py b/talos/templates/pipelines.py similarity index 100% rename from talos/examples/pipelines.py rename to talos/templates/pipelines.py diff --git a/talos/utils/last_neuron.py b/talos/utils/last_neuron.py deleted file mode 100644 index 9926477a..00000000 --- a/talos/utils/last_neuron.py +++ /dev/null @@ -1,16 +0,0 @@ -def last_neuron(self): - - labels = list(set(self.y.flatten('F'))) - - try: - last_neuron = self.y.shape[1] - return last_neuron - except IndexError: - if len(labels) == 2 and max(labels) == 1: - last_neuron = 1 - elif len(labels) == 2 and max(labels) > 1: - last_neuron = 3 - elif len(labels) > 2: - last_neuron = len(labels) - - return last_neuron diff --git a/talos/utils/pred_class.py b/talos/utils/pred_class.py deleted file mode 100644 index 2a693bfc..00000000 --- a/talos/utils/pred_class.py +++ /dev/null @@ -1,21 +0,0 @@ -def classify(y): - - '''Detects if prediction is binary, multi-label or multi-class''' - - shape = detect_shape(y) - - if shape > 1: - return 'multi_class' - - elif y.max() <= 1: - return 'binary_class' - else: - return 'multi_label' - - -def detect_shape(y): - - try: - return y.shape[1] - except IndexError: - return 1 diff --git a/talos/utils/validation_split.py b/talos/utils/validation_split.py index f024c30a..feb2bf20 100644 --- a/talos/utils/validation_split.py +++ b/talos/utils/validation_split.py @@ -1,10 +1,3 @@ -import numpy as np -try: - from wrangle.array.array_random_shuffle import array_random_shuffle as shuffle -except ImportError: - from wrangle import shuffle - - def validation_split(self): """Defines the attributes `x_train`, `y_train`, `x_val` and `y_val`. The validation (cross-validation, aka development) sets are determined @@ -17,8 +10,8 @@ def validation_split(self): # self.x/y_val are already set else: - if self.shuffle: - random_shuffle(self) + # shuffle the data before splitting + random_shuffle(self) # deduce the midway point for input data limit = int(len(self.x) * (1 - self.val_split)) @@ -40,6 +33,8 @@ def random_shuffle(self): def randomize(x): + import numpy as np + '''Helper function to support the case where x consist of a list of arrays.''' @@ -69,8 +64,10 @@ def randomize(x): def kfold(x, y, folds=10, shuffled=True): + import wrangle as wr + if shuffled is True: - x, y = shuffle(x, y) + x, y = wr.array_random_shuffle(x, y) out_x = [] out_y = [] diff --git a/test/core_tests/test_auto_scan.py b/test/core_tests/test_auto_scan.py index eef41b74..48b78261 100644 --- a/test/core_tests/test_auto_scan.py +++ b/test/core_tests/test_auto_scan.py @@ -17,6 +17,7 @@ def test_auto_scan(): p[key] = [p[key][0]] ta.Scan(x, y, p, ta.KerasModel().model, - permutation_filter=lambda p: p['batch_size'] < 150,) + boolean_limit=lambda p: p['batch_size'] < 150 + ) return "Finished testing auto Scan()" diff --git a/test/core_tests/test_scan.py b/test/core_tests/test_scan.py index 74b08b5f..a0eb3d76 100644 --- a/test/core_tests/test_scan.py +++ b/test/core_tests/test_scan.py @@ -150,18 +150,16 @@ def values_list_test(self): y_val=self.y_val, params=self.values_list, round_limit=5, - dataset_name='BinaryTest', - experiment_no='000', + experiment_name='BinaryTest', model=ta.templates.models.cervical_cancer, random_method='crypto_uniform', seed=2423, - search_method='linear', reduction_method='correlation', - reduction_interval=2, - reduction_window=2, + reduction_interval=3, + reduction_window=3, reduction_threshold=0.2, reduction_metric='val_loss', - reduce_loss=True, + minimize_loss=True, last_epoch_value=True, clear_tf_session=False, disable_progress_bar=True, @@ -174,16 +172,17 @@ def values_range_test(self): self.y_train, params=self.values_range, model=ta.templates.models.cervical_cancer, - grid_downsample=0.0001, - permutation_filter=lambda p: p['first_neuron'] * p['hidden_layers'] < 220, + fraction_limit=0.0001, random_method='sobol', reduction_method='correlation', - reduction_interval=2, - reduction_window=2, + reduction_interval=3, + reduction_window=3, reduction_threshold=0.2, reduction_metric='val_acc', - reduce_loss=False, - debug=True) + minimize_loss=False, + debug=True, + boolean_limit=lambda p: p['first_neuron'] * p['hidden_layers'] < 220 + ) class MultiLabelTest: @@ -213,23 +212,22 @@ def values_list_test(self): y_val=self.y_val, params=self.values_list, round_limit=5, - dataset_name='MultiLabelTest', - experiment_no='000', + experiment_name='MultiLabelTest', model=ta.templates.models.iris, random_method='crypto_uniform', seed=2423, - search_method='linear', - permutation_filter=lambda p: p['first_neuron'] * p['hidden_layers'] < 9, reduction_method='correlation', - reduction_interval=2, - reduction_window=2, + reduction_interval=3, + reduction_window=3, reduction_threshold=0.2, reduction_metric='val_loss', - reduce_loss=True, + minimize_loss=True, last_epoch_value=True, clear_tf_session=False, disable_progress_bar=True, - debug=True) + debug=True, + boolean_limit=lambda p: p['first_neuron'] * p['hidden_layers'] < 9 + ) # comprehensive def values_range_test(self): @@ -238,14 +236,14 @@ def values_range_test(self): self.y, params=self.values_range, model=ta.templates.models.iris, - grid_downsample=0.0001, + fraction_limit=0.0001, random_method='sobol', reduction_method='correlation', - reduction_interval=2, - reduction_window=2, + reduction_interval=3, + reduction_window=3, reduction_threshold=0.2, reduction_metric='val_acc', - reduce_loss=False, + minimize_loss=False, debug=True) @@ -255,7 +253,9 @@ def __init__(self): print("ReportingTest : Running Binary test...") - r = Reporting('BinaryTest_000.csv') + r = Reporting('BinaryTest.csv') + + print(len(r.data.val_acc)) x = r.data x = r.correlate() @@ -269,7 +269,7 @@ def __init__(self): x = r.plot_line() print("ReportingTest : Running MultiLabel test...") - r = Reporting('MultiLabelTest_000.csv') + r = Reporting('MultiLabelTest.csv') x = r.data x = r.correlate() From 5e43f4b86154e999ecc9aa6dc89fce01a65a45d8 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Fri, 8 Mar 2019 11:16:47 +0200 Subject: [PATCH 02/21] many small improvements - KerasModel now accepts 'task' argument which can be either prediction task type or a list ot metrics. If prediction task name (e.g. binary) is passed, metric is automatically picked. - Params ranged values after editing the dictionary are now lists and not ranges - Removed permutation_filter as this is now handled in ParamSpace remove_lambda() - changed clear_tf_session to clear_session to support any backend - changed the previous tf specific session clearing to universal one - added a couple of metrics and cleaned up the metrics names --- talos/commands/kerasmodel.py | 44 ++++++++++++++++++++---- talos/commands/params.py | 2 +- talos/metrics/keras_metrics.py | 51 +++++++++++++++++++++------- talos/reducers/permutation_filter.py | 35 ------------------- talos/scan/Scan.py | 4 +-- talos/scan/scan_round.py | 15 ++++++-- talos/templates/models.py | 28 +++++++-------- test/core_tests/test_scan.py | 4 +-- 8 files changed, 105 insertions(+), 78 deletions(-) delete mode 100644 talos/reducers/permutation_filter.py diff --git a/talos/commands/kerasmodel.py b/talos/commands/kerasmodel.py index 563096f1..ce89f6d7 100644 --- a/talos/commands/kerasmodel.py +++ b/talos/commands/kerasmodel.py @@ -1,19 +1,49 @@ class KerasModel: - def __init__(self): + def __init__(self, task=None): - '''An input model for Scan(). Optimized for being used together with - Params(). For example: + ''' + + Creates an input model for Scan(). Optimized for being used together + with Params(). For example: - Scan(x=x, y=y, params=Params().params, model=KerasModel().model) + p = talos.Params().params + model = talos.KerasModel(task='binary').model - NOTE: the grid from Params() is very large, so grid_downsample or - round_limit accordingly in Scan(). + talos.Scan(x, y, p, model) + NOTE: the parameter space from Params() is very large, so use limits + in or reducers in Scan() accordingly. + + task : string or list + If 'continuous' then mae is used for metric, if 'binary', + 'multiclass', or 'multilabel', f1score is used. Accuracy is always + used. You can also input a list with one or more custom metrics or + names of Keras or Talos metrics. ''' + # pick the right metrics + self.metrics = self._set_metric(task) + + # create the model self.model = self._create_input_model + def _set_metric(self, task): + + """Sets the metric for the model based on the experiment type + or a list of metrics from user.""" + + import talos as ta + + if task is None: + return ['acc'] + elif task in ['binary', 'multiclass', 'multilabel']: + return [ta.utils.metric.f1score, 'acc'] + elif task == 'continuous': + return [ta.utils.metrics.mae, 'acc'] + elif isinstance(task, list): + return task + ['acc'] + def _create_input_model(self, x_train, y_train, x_val, y_val, params): import numpy as np @@ -73,7 +103,7 @@ def _create_input_model(self, x_train, y_train, x_val, y_val, params): # compile the model model.compile(optimizer=optimizer, loss=params['losses'], - metrics=['acc']) + metrics=self.metrics) # fit the model out = model.fit(x_train, y_train, diff --git a/talos/commands/params.py b/talos/commands/params.py index 662c1392..a652a323 100644 --- a/talos/commands/params.py +++ b/talos/commands/params.py @@ -136,7 +136,7 @@ def batch_size(self, bottom_value=8, max_value=None, steps=None): if max_value is None and steps is None: values = [int(np.exp2(i/2)) for i in range(3, 15)] else: - values = range(bottom_value, max_value, steps) + values = list(range(bottom_value, max_value, steps)) self._append_params('batch_size', values) diff --git a/talos/metrics/keras_metrics.py b/talos/metrics/keras_metrics.py index 65b4fb00..1f99453f 100644 --- a/talos/metrics/keras_metrics.py +++ b/talos/metrics/keras_metrics.py @@ -1,13 +1,40 @@ -def root_mean_squared_error(y_true, y_pred): +def mae(y_true, y_pred): + from keras import backend as K + return K.mean(K.abs(y_pred - y_true), axis=-1) + + +def mse(y_true, y_pred): + from keras import backend as K + return K.mean(K.square(y_pred - y_true), axis=-1) + + +def rmae(y_true, y_pred): + from keras import backend as K + return K.sqrt(K.mean(K.abs(y_pred - y_true), axis=-1)) + + +def rmse(y_true, y_pred): from keras import backend as K return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) -def matthews_correlation_acc(y_true, y_pred): +def mape(y_true, y_pred): + from keras import backend as K + diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), + K.epsilon(), + None)) + return 100. * K.mean(diff, axis=-1) + + +def msle(y_true, y_pred): + from keras import backend as K + first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.) + second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.) + return K.mean(K.square(first_log - second_log), axis=-1) + + +def matthews(y_true, y_pred): - '''Calculates the Matthews correlation coefficient measure for quality - of binary classification problems. - ''' from keras import backend as K y_pred_pos = K.round(K.clip(y_pred, 0, 1)) y_pred_neg = 1 - y_pred_pos @@ -27,7 +54,7 @@ def matthews_correlation_acc(y_true, y_pred): return numerator / (denominator + K.epsilon()) -def precision_acc(y_true, y_pred): +def precision(y_true, y_pred): '''Calculates the precision, a metric for multi-label classification of how many selected items are relevant. @@ -39,7 +66,7 @@ def precision_acc(y_true, y_pred): return precision -def recall_acc(y_true, y_pred): +def recall(y_true, y_pred): '''Calculates the recall, a metric for multi-label classification of how many relevant items are selected. @@ -51,7 +78,7 @@ def recall_acc(y_true, y_pred): return recall -def fbeta_score_acc(y_true, y_pred, beta=1): +def fbeta(y_true, y_pred, beta=1): '''Calculates the F score, the weighted harmonic mean of precision and recall. This is useful for multi-label classification, where input samples can be @@ -73,14 +100,14 @@ def fbeta_score_acc(y_true, y_pred, beta=1): if K.sum(K.round(K.clip(y_true, 0, 1))) == 0: return 0 - p = precision_acc(y_true, y_pred) - r = recall_acc(y_true, y_pred) + p = precision(y_true, y_pred) + r = recall(y_true, y_pred) bb = beta ** 2 fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon()) return fbeta_score -def fmeasure_acc(y_true, y_pred): +def f1score(y_true, y_pred): '''Calculates the f-measure, the harmonic mean of precision and recall. ''' - return fbeta_score_acc(y_true, y_pred, beta=1) + return fbeta(y_true, y_pred, beta=1) diff --git a/talos/reducers/permutation_filter.py b/talos/reducers/permutation_filter.py deleted file mode 100644 index 9c26a41a..00000000 --- a/talos/reducers/permutation_filter.py +++ /dev/null @@ -1,35 +0,0 @@ -def permutation_filter(self, ls, final_grid_size, virtual_grid_size): - - '''Handles the filtering for ta.Scan(... permutation_filter= ...)''' - - from ..parameters.round_params import create_params_dict - - # handle the filtering with the current params grid - - def fn(i): - - params_dict = create_params_dict(self, i) - fn = self.main_self.permutation_filter(params_dict) - - return fn - - grid_indices = list(filter(fn, range(len(self.param_grid)))) - self.param_grid = self.param_grid[grid_indices] - final_expanded_grid_size = final_grid_size - - while len(self.param_grid) < final_grid_size and final_expanded_grid_size < virtual_grid_size: - final_expanded_grid_size *= 2 - - if final_expanded_grid_size > virtual_grid_size: - final_expanded_grid_size = virtual_grid_size - - self.param_grid = self._create_param_grid(ls, - final_expanded_grid_size, - virtual_grid_size) - - grid_indices = list(filter(fn, range(len(self.param_grid)))) - self.param_grid = self.param_grid[grid_indices] - - self.param_grid = self.param_grid[:final_grid_size] - - return self diff --git a/talos/scan/Scan.py b/talos/scan/Scan.py index 07a179a5..9f7a310c 100755 --- a/talos/scan/Scan.py +++ b/talos/scan/Scan.py @@ -133,7 +133,7 @@ def __init__(self, x, y, params, model, minimize_loss=False, seed=None, last_epoch_value=False, - clear_tf_session=True, + clear_session=True, disable_progress_bar=False, print_params=False, debug=False): @@ -168,7 +168,7 @@ def __init__(self, x, y, params, model, # other self.debug = debug self.seed = seed - self.clear_tf_session = clear_tf_session + self.clear_session = clear_session self.disable_progress_bar = disable_progress_bar self.last_epoch_value = last_epoch_value self.print_params = print_params diff --git a/talos/scan/scan_round.py b/talos/scan/scan_round.py index 9addb535..073e926c 100644 --- a/talos/scan/scan_round.py +++ b/talos/scan/scan_round.py @@ -4,6 +4,7 @@ def scan_round(self): on the level of execution of each round.''' import time as ti + import gc # print round params if self.print_params is True: @@ -31,8 +32,16 @@ def scan_round(self): self.saved_weights.append(self.keras_model.get_weights()) # clear tensorflow sessions - if self.clear_tf_session is True: - from keras import backend as K - K.clear_session() + if self.clear_session is True: + + del self.keras_model + gc.collect() + + # try TF specific and pass for everyone else + try: + from keras import backend as K + K.clear_session() + except: + pass return self diff --git a/talos/templates/models.py b/talos/templates/models.py index 89b3a329..f72f9320 100644 --- a/talos/templates/models.py +++ b/talos/templates/models.py @@ -4,9 +4,7 @@ def breast_cancer(x_train, y_train, x_val, y_val, params): from keras.layers import Dropout, Dense from talos.model import lr_normalizer, early_stopper, hidden_layers - from talos.metrics.keras_metrics import matthews_correlation_acc - from talos.metrics.keras_metrics import precision_acc - from talos.metrics.keras_metrics import recall_acc, fmeasure_acc + from talos.metrics.keras_metrics import matthews, precision, recall, f1score model = Sequential() model.add(Dense(params['first_neuron'], @@ -24,10 +22,10 @@ def breast_cancer(x_train, y_train, x_val, y_val, params): params['optimizer'])), loss=params['losses'], metrics=['acc', - fmeasure_acc, - recall_acc, - precision_acc, - matthews_correlation_acc]) + f1score, + recall, + precision, + matthews]) results = model.fit(x_train, y_train, batch_size=params['batch_size'], @@ -36,7 +34,7 @@ def breast_cancer(x_train, y_train, x_val, y_val, params): validation_data=[x_val, y_val], callbacks=[early_stopper(params['epochs'], mode='moderate', - monitor='val_fmeasure')]) + monitor='val_f1score')]) return results, model @@ -47,9 +45,7 @@ def cervical_cancer(x_train, y_train, x_val, y_val, params): from keras.layers import Dropout, Dense from talos.model import lr_normalizer, early_stopper, hidden_layers - from talos.metrics.keras_metrics import matthews_correlation_acc - from talos.metrics.keras_metrics import precision_acc - from talos.metrics.keras_metrics import recall_acc, fmeasure_acc + from talos.metrics.keras_metrics import matthews, precision, recall, f1score model = Sequential() model.add(Dense(params['first_neuron'], @@ -67,10 +63,10 @@ def cervical_cancer(x_train, y_train, x_val, y_val, params): params['optimizer'])), loss=params['losses'], metrics=['acc', - fmeasure_acc, - recall_acc, - precision_acc, - matthews_correlation_acc]) + f1score, + recall, + precision, + matthews]) results = model.fit(x_train, y_train, batch_size=params['batch_size'], @@ -79,7 +75,7 @@ def cervical_cancer(x_train, y_train, x_val, y_val, params): validation_data=[x_val, y_val], callbacks=[early_stopper(params['epochs'], mode='moderate', - monitor='val_fmeasure')]) + monitor='val_f1score')]) return results, model diff --git a/test/core_tests/test_scan.py b/test/core_tests/test_scan.py index a0eb3d76..add036da 100644 --- a/test/core_tests/test_scan.py +++ b/test/core_tests/test_scan.py @@ -161,7 +161,7 @@ def values_list_test(self): reduction_metric='val_loss', minimize_loss=True, last_epoch_value=True, - clear_tf_session=False, + clear_session=False, disable_progress_bar=True, debug=True) @@ -223,7 +223,7 @@ def values_list_test(self): reduction_metric='val_loss', minimize_loss=True, last_epoch_value=True, - clear_tf_session=False, + clear_session=False, disable_progress_bar=True, debug=True, boolean_limit=lambda p: p['first_neuron'] * p['hidden_layers'] < 9 From a5cf5911849412e56788324fddb989eec0d56978 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Mon, 11 Mar 2019 16:19:58 +0200 Subject: [PATCH 03/21] fixes automl features for continuous predictions - fixes KerasModel in a way where all prediction task types are supported - added output_layer for infering the right activation and last neuron count - renamed layers.py to hidden_layers.py --- examples/models.py | 2 +- talos/commands/kerasmodel.py | 39 +++++++++---------- talos/commands/params.py | 2 +- talos/model/__init__.py | 2 +- talos/model/{layers.py => hidden_layers.py} | 0 talos/model/output_layer.py | 42 +++++++++++++++++++++ talos/utils/__init__.py | 2 +- test/core_tests/test_auto_scan.py | 2 +- 8 files changed, 65 insertions(+), 26 deletions(-) rename talos/model/{layers.py => hidden_layers.py} (100%) create mode 100644 talos/model/output_layer.py diff --git a/examples/models.py b/examples/models.py index 93cfd76d..b4ebadd1 100644 --- a/examples/models.py +++ b/examples/models.py @@ -1,7 +1,7 @@ from keras.models import Sequential from keras.layers import Dropout, Dense from ..model.normalizers import lr_normalizer -from ..model.layers import hidden_layers +from ..model.hidden_layers import hidden_layers from ..metrics.keras_metrics import fmeasure diff --git a/talos/commands/kerasmodel.py b/talos/commands/kerasmodel.py index ce89f6d7..7219d72f 100644 --- a/talos/commands/kerasmodel.py +++ b/talos/commands/kerasmodel.py @@ -1,6 +1,6 @@ class KerasModel: - def __init__(self, task=None): + def __init__(self, task): ''' @@ -22,31 +22,30 @@ def __init__(self, task=None): names of Keras or Talos metrics. ''' + self.task = task + # pick the right metrics - self.metrics = self._set_metric(task) + self.metrics = self._set_metric() # create the model self.model = self._create_input_model - def _set_metric(self, task): + def _set_metric(self): """Sets the metric for the model based on the experiment type or a list of metrics from user.""" import talos as ta - if task is None: - return ['acc'] - elif task in ['binary', 'multiclass', 'multilabel']: - return [ta.utils.metric.f1score, 'acc'] - elif task == 'continuous': + if self.task in ['binary', 'multiclass', 'multilabel']: + return [ta.utils.metrics.f1score, 'acc'] + elif self.task == 'continuous': return [ta.utils.metrics.mae, 'acc'] - elif isinstance(task, list): - return task + ['acc'] + elif isinstance(self.task, list): + return self.task + ['acc'] def _create_input_model(self, x_train, y_train, x_val, y_val, params): - import numpy as np import wrangle as wr from keras.models import Sequential @@ -80,20 +79,18 @@ def _create_input_model(self, x_train, y_train, x_val, y_val, params): model.add(Dropout(params['dropout'])) # add hidden layers to the model - from talos.model.layers import hidden_layers + from talos.model.hidden_layers import hidden_layers hidden_layers(model, params, 1) - # output layer (this is scetchy) - try: - last_neuron = y_train.shape[1] - except IndexError: - if len(np.unique(y_train)) == 2: - last_neuron = 1 - else: - last_neuron = len(np.unique(y_train)) + # get the right activation and last_neuron based on task + from talos.model.output_layer import output_layer + activation, last_neuron = output_layer(self.task, + params['last_activation'], + y_train, + y_val) model.add(Dense(last_neuron, - activation=params['last_activation'])) + activation=activation)) # bundle the optimizer with learning rate changes from talos.model.normalizers import lr_normalizer diff --git a/talos/commands/params.py b/talos/commands/params.py index a652a323..b1fcd12d 100644 --- a/talos/commands/params.py +++ b/talos/commands/params.py @@ -124,7 +124,7 @@ def neurons(self, bottom_value=8, max_value=None, steps=None): if max_value is None and steps is None: values = [int(np.exp2(i)) for i in range(3, 11)] else: - values = range(bottom_value, max_value, steps) + values = list(range(bottom_value, max_value, steps)) self._append_params('first_neuron', values) diff --git a/talos/model/__init__.py b/talos/model/__init__.py index 7a3c79bb..dddd929c 100644 --- a/talos/model/__init__.py +++ b/talos/model/__init__.py @@ -1,3 +1,3 @@ from .early_stopper import early_stopper -from .layers import hidden_layers +from .hidden_layers import hidden_layers from .normalizers import lr_normalizer diff --git a/talos/model/layers.py b/talos/model/hidden_layers.py similarity index 100% rename from talos/model/layers.py rename to talos/model/hidden_layers.py diff --git a/talos/model/output_layer.py b/talos/model/output_layer.py new file mode 100644 index 00000000..bed4d89f --- /dev/null +++ b/talos/model/output_layer.py @@ -0,0 +1,42 @@ +def output_layer(task, last_activation, y_train, y_val): + + import numpy as np + + # output layer + if task == 'binary': + activation = last_activation + last_neuron = 1 + + elif task == 'multiclass': + activation = last_activation + last_neuron = np.unique(np.hstack((y_train, y_val))) + + elif task == 'multilabel': + activation = last_activation + last_neuron = y_train.shape[1] + + elif task == 'continuous': + activation = None + last_neuron = 1 + + elif isinstance(task, list): + try: + # multilabel + activation = last_activation + last_neuron = y_train.shape[1] + except IndexError: + uniques = np.unique(np.hstack((y_train, y_val))) + # binary + if uniques == 2: + activation = last_activation + last_neuron = 1 + # multiclass (note this supports only < 10 classes) + elif uniques <= 10: + activation = last_activation + last_neuron = np.unique(np.hstack((y_train, y_val))) + # continuous maybe, or too many classes + else: + activation = None + last_neuron = 1 + + return activation, last_neuron diff --git a/talos/utils/__init__.py b/talos/utils/__init__.py index 6e59bec5..d9d08023 100644 --- a/talos/utils/__init__.py +++ b/talos/utils/__init__.py @@ -6,7 +6,7 @@ print('Matplotlib backend loading failed') from ..model.normalizers import lr_normalizer -from ..model.layers import hidden_layers +from ..model.hidden_layers import hidden_layers from ..model.early_stopper import early_stopper from .generator import generator from . import gpu_utils diff --git a/test/core_tests/test_auto_scan.py b/test/core_tests/test_auto_scan.py index 48b78261..66bd609c 100644 --- a/test/core_tests/test_auto_scan.py +++ b/test/core_tests/test_auto_scan.py @@ -16,7 +16,7 @@ def test_auto_scan(): for key in p.keys(): p[key] = [p[key][0]] - ta.Scan(x, y, p, ta.KerasModel().model, + ta.Scan(x, y, p, ta.KerasModel('binary').model, boolean_limit=lambda p: p['batch_size'] < 150 ) From 247e50c6a9e33b8a57479dfaf258e27cb84e34b6 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Mon, 11 Mar 2019 17:59:13 +0200 Subject: [PATCH 04/21] added rmsle to metrics --- talos/metrics/keras_metrics.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/talos/metrics/keras_metrics.py b/talos/metrics/keras_metrics.py index 1f99453f..f6b71540 100644 --- a/talos/metrics/keras_metrics.py +++ b/talos/metrics/keras_metrics.py @@ -33,6 +33,13 @@ def msle(y_true, y_pred): return K.mean(K.square(first_log - second_log), axis=-1) +def rmsle(y_true, y_pred): + from keras import backend as K + first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.) + second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.) + return K.sqrt(K.mean(K.square(first_log - second_log), axis=-1)) + + def matthews(y_true, y_pred): from keras import backend as K From 432f366dc129339b387c2673246de573a517b2df Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Wed, 10 Apr 2019 20:09:45 +0300 Subject: [PATCH 05/21] removed peak_epoch logging - removed the option for metric to be logged at peak epoch (now always last epoch is recorded for consistency with Keras) - this means that also the record of which epoch was the peak is removed (for now, later this will be added back) - made some changes to logging in an effort to remove all kinds of mystique that remains in it - cleaned up codes --- talos/logging/logging_finish.py | 3 +- talos/logging/logging_run.py | 7 ++--- talos/logging/results.py | 54 ++++----------------------------- talos/reducers/correlation.py | 10 +++--- talos/scan/Scan.py | 7 +---- talos/scan/scan_prepare.py | 10 ++++-- test/core_tests/test_scan.py | 2 -- 7 files changed, 25 insertions(+), 68 deletions(-) diff --git a/talos/logging/logging_finish.py b/talos/logging/logging_finish.py index e28c978a..b2228853 100644 --- a/talos/logging/logging_finish.py +++ b/talos/logging/logging_finish.py @@ -1,9 +1,8 @@ def logging_finish(self): - from .results import result_todf, peak_epochs_todf + from .results import result_todf # save the results self = result_todf(self) - self.peak_epochs = peak_epochs_todf(self) return self diff --git a/talos/logging/logging_run.py b/talos/logging/logging_run.py index 12d9491e..bf3cab09 100644 --- a/talos/logging/logging_run.py +++ b/talos/logging/logging_run.py @@ -10,7 +10,7 @@ def logging_run(self, round_start, start, model_history): self.round_times.append([round_start, round_end, self._round_seconds]) # handle first round only things - if self.param_object.round_counter == 1: + if self.first_round: # capture the history keys for later self._all_keys = list(model_history.history.keys()) @@ -18,9 +18,8 @@ def logging_run(self, round_start, start, model_history): self._val_keys = [k for k in self._all_keys if 'val_' in k] # create a header column for output - from .results import create_header - _results_header = create_header(self) - self.result.append(_results_header) + _results_header = ['round_epochs'] + self._all_keys + self._param_dict_keys + self.result.append(",".join(str(i) for i in _results_header)) # save the results from .results import save_result diff --git a/talos/logging/results.py b/talos/logging/results.py index a79f479e..45dc9b48 100644 --- a/talos/logging/results.py +++ b/talos/logging/results.py @@ -1,21 +1,3 @@ -def create_header(self): - - '''Called from logging/logging_run.py - - Creates a header to the results table on - the first round of the experiment before - logging any results. - ''' - - _results_header = [] - - _results_header.append('round_epochs') - [_results_header.append(i) for i in self._all_keys] - [_results_header.append(key) for key in self.params.keys()] - - return ",".join(str(i) for i in _results_header) - - def run_round_results(self, out): '''Called from logging/logging_run.py @@ -27,43 +9,19 @@ def run_round_results(self, out): ''' - import numpy as np - - _rr_out = [] - self._round_epochs = len(list(out.history.values())[0]) - # otherwise proceed to create the value row - _rr_out.append(self._round_epochs) - p_epochs = [] + _round_result_out = [self._round_epochs] - # iterates through the keys and records last or peak for metrics + # record the last epoch result for key in out.history.keys(): - t_t = np.array(out.history[key]) - - # this handles metrics (NOTE: 'acc' have to be in metric name) - if 'acc' in key: - best_epoch = np.argpartition(t_t, len(t_t) - 1)[-1] - - # this handles losses (takes minimum value epoch) - else: - best_epoch = np.argpartition(t_t, 0)[0] - - if self.last_epoch_value: - value_to_report = out.history[key][-1] - else: - value_to_report = np.array(out.history[key])[best_epoch] - - _rr_out.append(value_to_report) - p_epochs.append(best_epoch) - - # this takes care of the separate entity with just peak epoch data - self.peak_epochs.append(p_epochs) + _round_result_out.append(out.history[key][-1]) + # record the round hyper-parameters for key in self.round_params.keys(): - _rr_out.append(self.round_params[key]) + _round_result_out.append(self.round_params[key]) - return ",".join(str(i) for i in _rr_out) + return ",".join(str(i) for i in _round_result_out) def save_result(self): diff --git a/talos/reducers/correlation.py b/talos/reducers/correlation.py index 62764228..e546ea68 100644 --- a/talos/reducers/correlation.py +++ b/talos/reducers/correlation.py @@ -20,10 +20,12 @@ def correlation(self): corr = data.copy(deep=True) - # drop the row for reduction metric and sort corr = corr.dropna() - corr = data.corr('spearman') + corr = corr.corr('spearman') + corr = corr[self.reduction_metric] + + # drop the row for reduction metric and sort corr = corr.apply(abs)[1:] corr = corr.sort_values(ascending=False) @@ -44,9 +46,9 @@ def correlation(self): corr = wr.col_to_multilabel(data[[label]], label) # combine the reduction_metric with the multilabel data - corr = wr.df_merge(corr, data[self.reduction_metric]) + corr = wr.df_merge(corr, pd.DataFrame(data[self.reduction_metric])) - # repeate same as above + # repeat same as above corr = corr.corr('spearman') corr = corr[self.reduction_metric] corr = corr.apply(abs)[1:] diff --git a/talos/scan/Scan.py b/talos/scan/Scan.py index 9f7a310c..03b9d375 100755 --- a/talos/scan/Scan.py +++ b/talos/scan/Scan.py @@ -99,9 +99,6 @@ def model(): Limits the number of rounds (permutations) in the experiment. reduction_metric : {'val_acc'} Metric used to tune the reductions. - last_epoch_value : bool - Set to True if the last epoch metric values are logged as opposed - to the default which is peak epoch values for each round. disable_progress_bar : bool Disable TQDM live progress bar. print_params : bool @@ -132,7 +129,6 @@ def __init__(self, x, y, params, model, reduction_metric='val_acc', minimize_loss=False, seed=None, - last_epoch_value=False, clear_session=True, disable_progress_bar=False, print_params=False, @@ -170,11 +166,10 @@ def __init__(self, x, y, params, model, self.seed = seed self.clear_session = clear_session self.disable_progress_bar = disable_progress_bar - self.last_epoch_value = last_epoch_value self.print_params = print_params # input parameters section ends - self._null = self.runtime() + self.runtime() def runtime(self): diff --git a/talos/scan/scan_prepare.py b/talos/scan/scan_prepare.py index 9458a42f..d4ebcf1c 100644 --- a/talos/scan/scan_prepare.py +++ b/talos/scan/scan_prepare.py @@ -15,10 +15,10 @@ def scan_prepare(self): (self.x_val is None and self.y_val is not None): raise RuntimeError("If x_val/y_val is inputted, other must as well.") - elif (self.x_val is not None and self.y_val is not None): + elif self.x_val is not None and self.y_val is not None: self.custom_val_split = True - # create the paramater object and move to self + # create the parameter object and move to self from ..parameters.ParamSpace import ParamSpace self.param_object = ParamSpace(params=self.params, random_method=self.random_method, @@ -28,6 +28,9 @@ def scan_prepare(self): boolean_limit=self.boolean_limit ) + # mark that it's a first round + self.first_round = True + # create various stores self.round_history = [] self.peak_epochs = [] @@ -37,6 +40,9 @@ def scan_prepare(self): self.saved_models = [] self.saved_weights = [] + # create reference for parameter keys + self._param_dict_keys = sorted(list(self.params.keys())) + # create the data asset self.y_max = self.y.max() diff --git a/test/core_tests/test_scan.py b/test/core_tests/test_scan.py index add036da..f80c5b74 100644 --- a/test/core_tests/test_scan.py +++ b/test/core_tests/test_scan.py @@ -160,7 +160,6 @@ def values_list_test(self): reduction_threshold=0.2, reduction_metric='val_loss', minimize_loss=True, - last_epoch_value=True, clear_session=False, disable_progress_bar=True, debug=True) @@ -222,7 +221,6 @@ def values_list_test(self): reduction_threshold=0.2, reduction_metric='val_loss', minimize_loss=True, - last_epoch_value=True, clear_session=False, disable_progress_bar=True, debug=True, From f6b6b187a65dfe138bf3b4c2df7e4c77ad935ada Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Wed, 10 Apr 2019 20:48:19 +0300 Subject: [PATCH 06/21] unifies the use of self._param_dict_keys This is related with the cases where < 3.6 python can cause various logging related problems, such as the #273 #267 and #255. --- talos/parameters/ParamSpace.py | 54 ++++++++++++++++------------------ talos/reducers/correlation.py | 6 ++-- talos/scan/scan_prepare.py | 7 +++-- 3 files changed, 32 insertions(+), 35 deletions(-) diff --git a/talos/parameters/ParamSpace.py b/talos/parameters/ParamSpace.py index 37279ad4..a736c5a8 100644 --- a/talos/parameters/ParamSpace.py +++ b/talos/parameters/ParamSpace.py @@ -9,6 +9,7 @@ class ParamSpace: def __init__(self, params, + param_keys, random_method='uniform_mersenne', fraction_limit=None, round_limit=None, @@ -17,6 +18,7 @@ def __init__(self, # set all the arguments self.params = params + self.param_keys = param_keys self.fraction_limit = fraction_limit self.round_limit = round_limit self.time_limit = time_limit @@ -26,9 +28,6 @@ def __init__(self, # set a counter self.round_counter = 0 - # capture the parameter names for columns later - self.column_names = self._generate_column_names() - # handle tuple conversion to discrete values self.p = self._param_input_conversion() @@ -52,20 +51,6 @@ def __init__(self, # reset index self.param_index = list(range(len(self.param_index))) - def _generate_column_names(self): - - '''Used for storing the corresponding - column names based on the input parameters. - Returns a dictionary where label is paramater - name and value is integer index.''' - - out = [] - - for col in self.params.keys(): - out.append(col) - - return out - def _param_input_conversion(self): '''Parameters may be input as lists of single or @@ -76,7 +61,7 @@ def _param_input_conversion(self): out = {} # go through each parameter type - for param in self.params.keys(): + for param in self.param_keys: # deal with range (tuple) values if isinstance(self.params[param], tuple): @@ -175,13 +160,24 @@ def _check_time_limit(self): def round_parameters(self): + # permutations remain in index if len(self.param_index) > 0: - if self._check_time_limit(): - self.round_counter += 1 - index = self.param_index.pop(0) - values = self.param_space[index] - return self._round_parameters_todict(values) + # time limit has not been met yet + if self._check_time_limit(): + self.round_counter += 1 + + # get current index + index = self.param_index.pop(0) + + # get the values based on the index + values = self.param_space[index] + t = self._round_parameters_todict(values) + + # pass the parameters to Scan + return t + + # the experiment is finished return False def _round_parameters_todict(self, values): @@ -189,7 +185,7 @@ def _round_parameters_todict(self, values): round_param_dict = {} for i, value in enumerate(values): - key = self.column_names[i] + key = self.param_keys[i] round_param_dict[key] = value return round_param_dict @@ -205,7 +201,7 @@ def _convert_lambda(self, fn): fn_string = fn_string.replace('"', '\'') # look for column/label names - for i, name in enumerate(self.column_names): + for i, name in enumerate(self.param_keys): index = ':,' + str(i) fn_string = fn_string.replace(name, index) @@ -222,7 +218,7 @@ def remove_is_not(self, label, value): '''Removes baesd on exact match but reversed''' - col = self.column_names.index(label) + col = self.param_keys.index(label) self.param_space = self.param_space[self.param_space[:, col] == value] self.param_index = list(range(len(self.param_space))) @@ -230,7 +226,7 @@ def remove_is(self, label, value): '''Removes based on exact match''' - col = self.column_names.index(label) + col = self.param_keys.index(label) self.param_space = self.param_space[self.param_space[:, col] != value] self.param_index = list(range(len(self.param_space))) @@ -238,7 +234,7 @@ def remove_ge(self, label, value): '''Removes based on greater-or-equal''' - col = self.column_names.index(label) + col = self.param_keys.index(label) self.param_space = self.param_space[self.param_space[:, col] >= value] self.param_index = list(range(len(self.param_space))) @@ -246,7 +242,7 @@ def remove_le(self, label, value): '''Removes based on lesser-or-equal''' - col = self.column_names.index(label) + col = self.param_keys.index(label) self.param_space = self.param_space[self.param_space[:, col] <= value] self.param_index = list(range(len(self.param_space))) diff --git a/talos/reducers/correlation.py b/talos/reducers/correlation.py index e546ea68..66d23133 100644 --- a/talos/reducers/correlation.py +++ b/talos/reducers/correlation.py @@ -16,7 +16,7 @@ def correlation(self): import wrangle as wr data = pd.read_csv(self.experiment_name + '.csv') - data = data[[self.reduction_metric] + self.param_object.column_names] + data = data[[self.reduction_metric] + self._param_dict_keys] corr = data.copy(deep=True) @@ -39,7 +39,7 @@ def correlation(self): return False label = corr.index.values[0] - if label not in self.param_object.column_names: + if label not in self._param_dict_keys: return False # convert parameter values to multilabel (2d) @@ -59,7 +59,7 @@ def correlation(self): return False label = corr.index.values[0] - if label not in self.param_object.column_names: + if label not in self._param_dict_keys: return False value = corr.values[0] diff --git a/talos/scan/scan_prepare.py b/talos/scan/scan_prepare.py index d4ebcf1c..c7ea0fda 100644 --- a/talos/scan/scan_prepare.py +++ b/talos/scan/scan_prepare.py @@ -18,9 +18,13 @@ def scan_prepare(self): elif self.x_val is not None and self.y_val is not None: self.custom_val_split = True + # create reference for parameter keys + self._param_dict_keys = sorted(list(self.params.keys())) + # create the parameter object and move to self from ..parameters.ParamSpace import ParamSpace self.param_object = ParamSpace(params=self.params, + param_keys=self._param_dict_keys, random_method=self.random_method, fraction_limit=self.fraction_limit, round_limit=self.round_limit, @@ -40,9 +44,6 @@ def scan_prepare(self): self.saved_models = [] self.saved_weights = [] - # create reference for parameter keys - self._param_dict_keys = sorted(list(self.params.keys())) - # create the data asset self.y_max = self.y.max() From 82397d4976d693f99aa3e530f777b72448f88447 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Wed, 10 Apr 2019 21:23:10 +0300 Subject: [PATCH 07/21] fixes bug for python versions below <3.6 --- talos/parameters/ParamSpace.py | 17 +++++++++-------- talos/scan/Scan.py | 2 +- talos/scan/scan_run.py | 3 +++ talos/utils/__init__.py | 2 +- 4 files changed, 14 insertions(+), 10 deletions(-) diff --git a/talos/parameters/ParamSpace.py b/talos/parameters/ParamSpace.py index a736c5a8..5ae83f94 100644 --- a/talos/parameters/ParamSpace.py +++ b/talos/parameters/ParamSpace.py @@ -32,7 +32,7 @@ def __init__(self, self.p = self._param_input_conversion() # create list of list from the params dictionary - self._params_temp = [list(self.p[key]) for key in self.p.keys()] + self._params_temp = [list(self.p[key]) for key in self.param_keys] # establish max dimensions self.dimensions = np.prod([len(l) for l in self._params_temp]) @@ -172,23 +172,24 @@ def round_parameters(self): # get the values based on the index values = self.param_space[index] - t = self._round_parameters_todict(values) + round_parameters = self._round_parameters_todict(values) # pass the parameters to Scan - return t + return round_parameters # the experiment is finished return False def _round_parameters_todict(self, values): - round_param_dict = {} + round_parameters = {} - for i, value in enumerate(values): - key = self.param_keys[i] - round_param_dict[key] = value + for i, key in enumerate(self.param_keys): + round_parameters[key] = values[i] + print(values[i], key) - return round_param_dict + + return round_parameters def _convert_lambda(self, fn): diff --git a/talos/scan/Scan.py b/talos/scan/Scan.py index 03b9d375..c8f01926 100755 --- a/talos/scan/Scan.py +++ b/talos/scan/Scan.py @@ -138,7 +138,7 @@ def __init__(self, x, y, params, model, self.x = x self.y = y - self.params = OrderedDict(params) + self.params = params self.model = model self.experiment_name = experiment_name self.x_val = x_val diff --git a/talos/scan/scan_run.py b/talos/scan/scan_run.py index 1d8e71d4..2a2bfef4 100644 --- a/talos/scan/scan_run.py +++ b/talos/scan/scan_run.py @@ -14,7 +14,10 @@ def scan_run(self): # the main cycle of the experiment while True: + + # get the parameters self.round_params = self.param_object.round_parameters() + # break when there is no more permutations left if self.round_params is False: break diff --git a/talos/utils/__init__.py b/talos/utils/__init__.py index d9d08023..059bc3b5 100644 --- a/talos/utils/__init__.py +++ b/talos/utils/__init__.py @@ -2,7 +2,7 @@ try: from kerasplotlib import TrainingLog as live -except ImportError: +except RuntimeError: print('Matplotlib backend loading failed') from ..model.normalizers import lr_normalizer From d0f18bacbf87118ba071a3450b52a1778f8cdf55 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Wed, 10 Apr 2019 21:56:18 +0300 Subject: [PATCH 08/21] results are handled better now - handles the error resulting form matplotlib on some cases with mac os x and virtualenv - removes stringification of the results and resolve #271 - the above also makes type handling in scan_finish redundant and is removed --- talos/logging/logging_run.py | 2 +- talos/logging/results.py | 9 +++++---- talos/parameters/ParamSpace.py | 2 -- talos/scan/scan_finish.py | 7 +------ talos/utils/__init__.py | 2 +- ...ls_to_numeric.py => _string_cols_to_numeric.py} | 0 test.file | Bin 0 -> 28 bytes 7 files changed, 8 insertions(+), 14 deletions(-) rename talos/utils/{string_cols_to_numeric.py => _string_cols_to_numeric.py} (100%) create mode 100644 test.file diff --git a/talos/logging/logging_run.py b/talos/logging/logging_run.py index bf3cab09..bf60c660 100644 --- a/talos/logging/logging_run.py +++ b/talos/logging/logging_run.py @@ -19,7 +19,7 @@ def logging_run(self, round_start, start, model_history): # create a header column for output _results_header = ['round_epochs'] + self._all_keys + self._param_dict_keys - self.result.append(",".join(str(i) for i in _results_header)) + self.result.append(_results_header) # save the results from .results import save_result diff --git a/talos/logging/results.py b/talos/logging/results.py index 45dc9b48..df6a002c 100644 --- a/talos/logging/results.py +++ b/talos/logging/results.py @@ -21,7 +21,7 @@ def run_round_results(self, out): for key in self.round_params.keys(): _round_result_out.append(self.round_params[key]) - return ",".join(str(i) for i in _round_result_out) + return _round_result_out def save_result(self): @@ -42,9 +42,10 @@ def result_todf(self): import pandas as pd - self.result = pd.DataFrame(self.result) - self.result.columns = self.result.iloc[0] - self.result = self.result.drop(0) + # create dataframe for results + cols = self.result[0] + self.result = pd.DataFrame(self.result[1:]) + self.result.columns = cols return self diff --git a/talos/parameters/ParamSpace.py b/talos/parameters/ParamSpace.py index 5ae83f94..80beb436 100644 --- a/talos/parameters/ParamSpace.py +++ b/talos/parameters/ParamSpace.py @@ -186,8 +186,6 @@ def _round_parameters_todict(self, values): for i, key in enumerate(self.param_keys): round_parameters[key] = values[i] - print(values[i], key) - return round_parameters diff --git a/talos/scan/scan_finish.py b/talos/scan/scan_finish.py index 570142fd..ef8209a3 100644 --- a/talos/scan/scan_finish.py +++ b/talos/scan/scan_finish.py @@ -21,8 +21,7 @@ def scan_finish(self): self.learning_entropy.columns = self._metric_keys # clean the results into a dataframe - self.data = self.result[self.result.columns[0]].str.split(',', expand=True) - self.data.columns = self.result.columns[0].split(',') + self.data = self.result # remove redundant columns keys = list(self.__dict__.keys()) @@ -58,7 +57,6 @@ def scan_finish(self): # add best_model from ..scan.scan_addon import func_best_model, func_evaluate - from ..utils.string_cols_to_numeric import string_cols_to_numeric self.best_model = func_best_model.__get__(self) self.evaluate_models = func_evaluate.__get__(self) @@ -66,7 +64,4 @@ def scan_finish(self): # reset the index self.data.index = range(len(self.data)) - # convert to numeric - self.data = string_cols_to_numeric(self.data) - return self diff --git a/talos/utils/__init__.py b/talos/utils/__init__.py index 059bc3b5..1d74228f 100644 --- a/talos/utils/__init__.py +++ b/talos/utils/__init__.py @@ -2,7 +2,7 @@ try: from kerasplotlib import TrainingLog as live -except RuntimeError: +except (RuntimeError, ImportError): print('Matplotlib backend loading failed') from ..model.normalizers import lr_normalizer diff --git a/talos/utils/string_cols_to_numeric.py b/talos/utils/_string_cols_to_numeric.py similarity index 100% rename from talos/utils/string_cols_to_numeric.py rename to talos/utils/_string_cols_to_numeric.py diff --git a/test.file b/test.file new file mode 100644 index 0000000000000000000000000000000000000000..a54e264909a2e1f9d069125477e021cc63d4a4b2 GIT binary patch literal 28 jcmZo*jxA)+@J>liD$UF($;>O}Dk#d#E8!|+Ow|JbhGGdk literal 0 HcmV?d00001 From ac3da39c2f66231bfaeaa30d5bd03243bc9b9958 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Thu, 11 Apr 2019 10:41:21 +0300 Subject: [PATCH 09/21] removed redundant util --- talos/utils/_string_cols_to_numeric.py | 35 ------------------------ test/core_tests/test_reporting_object.py | 2 ++ 2 files changed, 2 insertions(+), 35 deletions(-) delete mode 100644 talos/utils/_string_cols_to_numeric.py diff --git a/talos/utils/_string_cols_to_numeric.py b/talos/utils/_string_cols_to_numeric.py deleted file mode 100644 index bb8494da..00000000 --- a/talos/utils/_string_cols_to_numeric.py +++ /dev/null @@ -1,35 +0,0 @@ -def isnumber(value): - - '''Checks if a string can be converted into - a float (or int as a by product). Helper function - for string_cols_to_numeric''' - - try: - float(value) - return True - except ValueError: - return False - - -def string_cols_to_numeric(data, destructive=False): - - '''Takes in a dataframe and attempts to convert numeric columns - into floats or ints respectively.''' - - if destructive is False: - data = data.copy(deep=True) - - for col in data.columns: - - if data[col].apply(isnumber).sum() == len(data): - try: - data[col] = data[col].astype(int) - except: # intentionally silent - try: - data[col] = data[col].astype(float) - except: # intentionally silent - data[col] = data[col] - else: - data[col] = data[col] - - return data diff --git a/test/core_tests/test_reporting_object.py b/test/core_tests/test_reporting_object.py index a33ee363..0a6cb281 100644 --- a/test/core_tests/test_reporting_object.py +++ b/test/core_tests/test_reporting_object.py @@ -14,6 +14,8 @@ def test_reporting_object(scan_object): r.high() r.low() + print(r.data) + r.plot_bars('first_neuron', 'val_acc', 'batch_size', 'hidden_layers') r.plot_box('first_neuron') r.plot_corr('val_loss') From a9fbe3550af3511ff53b51e3327ec9f090e46849 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Thu, 11 Apr 2019 10:57:44 +0300 Subject: [PATCH 10/21] added sample reference for citations --- README.md | 5 +++++ test/core_tests/test_scan_object.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 701eadb7..30594c7a 100755 --- a/README.md +++ b/README.md @@ -109,6 +109,11 @@ If you want ask a **"how can I use Talos to..."** question, the right place is [ If you found a bug or want to suggest a feature, check the [issues](https://github.com/autonomio/talos/issues) or [create](https://github.com/autonomio/talos/issues/new/choose) a new issue. +### Citations + +If you use Talos for published work, please cite: + +`Autonomio Talos [Computer software]. (2018). Retrieved from http://github.com/autonomio/talos.` ### License diff --git a/test/core_tests/test_scan_object.py b/test/core_tests/test_scan_object.py index a777868e..a0b9231b 100644 --- a/test/core_tests/test_scan_object.py +++ b/test/core_tests/test_scan_object.py @@ -7,7 +7,7 @@ def test_scan_object(): print("Running Scan object test...") # the create the test based on it - scan_object = ta.templates.pipelines.iris() + scan_object = ta.templates.pipelines.iris(round_limit=5) keras_model = scan_object.best_model() scan_object.evaluate_models(x_val=scan_object.x, y_val=scan_object.y) From b950869e0085553e75ea99881883cb7772d85c3d Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Thu, 11 Apr 2019 11:06:22 +0300 Subject: [PATCH 11/21] debugging --- test/core_tests/test_reporting_object.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/core_tests/test_reporting_object.py b/test/core_tests/test_reporting_object.py index 0a6cb281..944d67bd 100644 --- a/test/core_tests/test_reporting_object.py +++ b/test/core_tests/test_reporting_object.py @@ -15,6 +15,7 @@ def test_reporting_object(scan_object): r.low() print(r.data) + print(r.data.dtypes) r.plot_bars('first_neuron', 'val_acc', 'batch_size', 'hidden_layers') r.plot_box('first_neuron') From c1916430e6a89c7e68647586c107b2a1658a80dd Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Thu, 11 Apr 2019 18:39:21 +0300 Subject: [PATCH 12/21] handles the matplotlib framework issue on Mac OSX --- environment.yml | 95 ++++++++++++++++++++++++++++++++++++ talos/commands/reporting.py | 67 +++++++++++++++---------- talos/templates/params.py | 1 - test/core_tests/test_scan.py | 3 -- test_script.py | 12 ++--- 5 files changed, 142 insertions(+), 36 deletions(-) create mode 100644 environment.yml diff --git a/environment.yml b/environment.yml new file mode 100644 index 00000000..5d3061a9 --- /dev/null +++ b/environment.yml @@ -0,0 +1,95 @@ +name: talos_37 +channels: + - intel + - defaults +dependencies: + - _tflow_select=2.3.0=mkl + - absl-py=0.7.0=py37_0 + - astor=0.7.1=py37_0 + - blas=1.0=mkl + - c-ares=1.15.0=h1de35cc_1 + - ca-certificates=2019.1.23=0 + - certifi=2019.3.9=py37_0 + - gast=0.2.2=py37_0 + - grpcio=1.14.1=py37h9011c5e_0 + - h5py=2.8.0=py37h878fce3_3 + - keras-applications=1.0.7=py_0 + - keras-preprocessing=1.0.9=py_0 + - libcxx=4.0.1=hcfea43d_1 + - libcxxabi=4.0.1=hcfea43d_1 + - libgfortran=3.0.1=h93005f0_2 + - libprotobuf=3.6.1=hd9629dc_0 + - markdown=3.0.1=py37_0 + - mkl_fft=1.0.10=py37h5e564d8_0 + - mkl_random=1.0.2=py37h27c97d8_0 + - mock=2.0.0=py37_0 + - ncurses=6.1=h0a44026_1 + - numpy=1.16.2=py37hacdab7b_0 + - numpy-base=1.16.2=py37h6575580_0 + - pbr=5.1.3=py_0 + - pip=19.0.3=py37_0 + - protobuf=3.6.1=py37h0a44026_0 + - python=3.7.0=hc167b69_0 + - readline=7.0=h1de35cc_5 + - scipy=1.2.1=py37h1410ff5_0 + - setuptools=40.8.0=py37_0 + - six=1.12.0=py37_0 + - tensorboard=1.13.1=py37haf313ee_0 + - tensorflow=1.13.1=mkl_py37h70c3834_0 + - tensorflow-base=1.13.1=mkl_py37h66b1bf0_0 + - tensorflow-estimator=1.13.0=py_0 + - termcolor=1.1.0=py37_1 + - tk=8.6.8=ha441bb4_0 + - werkzeug=0.14.1=py37_0 + - wheel=0.33.1=py37_0 + - xz=5.2.4=h1de35cc_4 + - hdf5=1.10.2=2 + - intel-openmp=2019.3=intel_199 + - intelpython=2019.3=0 + - libffi=3.2.1=11 + - mkl=2019.3=intel_199 + - openssl=1.0.2r=0 + - sqlite=3.27.2=2 + - tbb=2019.4=intel_199 + - zlib=1.2.11=5 + - pip: + - appnope==0.1.0 + - astetik==1.9.8 + - backcall==0.1.0 + - chances==0.1.6 + - chardet==3.0.4 + - cycler==0.10.0 + - decorator==4.4.0 + - geonamescache==1.0.1 + - idna==2.8 + - ipython==7.4.0 + - ipython-genutils==0.2.0 + - jedi==0.13.3 + - keras==2.2.4 + - kerasplotlib==0.1.4 + - kiwisolver==1.0.1 + - matplotlib==2.2.3 + - pandas==0.24.2 + - parso==0.4.0 + - patsy==0.5.1 + - pexpect==4.7.0 + - pickleshare==0.7.5 + - prompt-toolkit==2.0.9 + - ptyprocess==0.6.0 + - pygments==2.3.1 + - pyparsing==2.4.0 + - python-dateutil==2.8.0 + - pytz==2019.1 + - pyyaml==5.1 + - requests==2.21.0 + - scikit-learn==0.20.3 + - seaborn==0.9.0 + - sklearn==0.0 + - statsmodels==0.9.0 + - tqdm==4.31.1 + - traitlets==4.3.2 + - urllib3==1.24.1 + - wcwidth==0.1.7 + - wrangle==0.6.2 +prefix: /Users/mikko/miniconda3/envs/talos_37 + diff --git a/talos/commands/reporting.py b/talos/commands/reporting.py index 41718428..6f0205c1 100644 --- a/talos/commands/reporting.py +++ b/talos/commands/reporting.py @@ -65,8 +65,11 @@ def plot_line(self, metric='val_acc'): metric :: the metric to correlate against ''' - import astetik as ast - return ast.line(self.data, metric) + try: + import astetik as ast + return ast.line(self.data, metric) + except: + print('Matplotlib Runtime Error. Plots will not work.') def plot_hist(self, metric='val_acc', bins=10): @@ -78,8 +81,11 @@ def plot_hist(self, metric='val_acc', bins=10): bins :: number of bins to use in histogram ''' - import astetik as ast - return ast.hist(self.data, metric, bins=bins) + try: + import astetik as ast + return ast.hist(self.data, metric, bins=bins) + except RuntimeError: + print('Matplotlib Runtime Error. Plots will not work.') def plot_corr(self, metric='val_acc', color_grades=5): @@ -90,11 +96,12 @@ def plot_corr(self, metric='val_acc', color_grades=5): metric :: the metric to correlate against color_grades :: number of colors to use in heatmap''' - import astetik as ast - - cols = self._cols(metric) - - return ast.corr(self.data[cols], color_grades=color_grades) + try: + import astetik as ast + cols = self._cols(metric) + return ast.corr(self.data[cols], color_grades=color_grades) + except RuntimeError: + print('Matplotlib Runtime Error. Plots will not work.') def plot_regs(self, x='val_acc', y='val_loss'): @@ -104,9 +111,11 @@ def plot_regs(self, x='val_acc', y='val_loss'): y = data for the y axis ''' - import astetik as ast - - return ast.regs(self.data, x, y) + try: + import astetik as ast + return ast.regs(self.data, x, y) + except RuntimeError: + print('Matplotlib Runtime Error. Plots will not work.') def plot_box(self, x, y='val_acc', hue=None): @@ -116,31 +125,37 @@ def plot_box(self, x, y='val_acc', hue=None): y = data for the y axis hue = data for the hue separation ''' - import astetik as ast - - return ast.box(self.data, x, y, hue) + try: + import astetik as ast + return ast.box(self.data, x, y, hue) + except RuntimeError: + print('Matplotlib Runtime Error. Plots will not work.') def plot_bars(self, x, y, hue, col): '''A comparison plot with 4 axis''' - import astetik as ast - - return ast.bargrid(self.data, - x=x, - y=y, - hue=hue, - col=col, - col_wrap=4) + try: + import astetik as ast + return ast.bargrid(self.data, + x=x, + y=y, + hue=hue, + col=col, + col_wrap=4) + except RuntimeError: + print('Matplotlib Runtime Error. Plots will not work.') def plot_kde(self, x='val_acc', y=None): '''Kernel Destiny Estimation type histogram with support for 1 or 2 axis of data''' - import astetik as ast - - return ast.kde(self.data, x, y) + try: + import astetik as ast + return ast.kde(self.data, x, y) + except RuntimeError: + print('Matplotlib Runtime Error. Plots will not work.') def table(self, metric='val_acc', sort_by=None, ascending=False): diff --git a/talos/templates/params.py b/talos/templates/params.py index 00b53c02..3b16ff99 100644 --- a/talos/templates/params.py +++ b/talos/templates/params.py @@ -28,7 +28,6 @@ def iris(): 'dropout': (0, 0.5, 5), 'weight_regulizer': [None], 'emb_output_dims': [None], - 'shape': ['brick', 'triangle', 0.2], 'shapes': ['brick', 'triangle', 0.2], 'optimizer': [Adam, Nadam], 'losses': [logcosh, categorical_crossentropy], diff --git a/test/core_tests/test_scan.py b/test/core_tests/test_scan.py index f80c5b74..81dee0a3 100644 --- a/test/core_tests/test_scan.py +++ b/test/core_tests/test_scan.py @@ -253,8 +253,6 @@ def __init__(self): r = Reporting('BinaryTest.csv') - print(len(r.data.val_acc)) - x = r.data x = r.correlate() x = r.high() @@ -266,7 +264,6 @@ def __init__(self): x = r.plot_hist() x = r.plot_line() - print("ReportingTest : Running MultiLabel test...") r = Reporting('MultiLabelTest.csv') x = r.data diff --git a/test_script.py b/test_script.py index 154506f5..d248b7c8 100644 --- a/test_script.py +++ b/test_script.py @@ -18,6 +18,12 @@ '''NOTE: test/core_tests/test_scan.py needs to be edited as well!''' + # reporting specific testing + from test.core_tests.test_scan import ReportingTest, DatasetTest + + ReportingTest() + DatasetTest() + # testing different model types from test.core_tests.test_scan import BinaryTest, MultiLabelTest @@ -29,12 +35,6 @@ MultiLabelTest().values_list_test() MultiLabelTest().values_range_test() - # reporting specific testing - from test.core_tests.test_scan import ReportingTest, DatasetTest - - ReportingTest() - DatasetTest() - # MOVE TO command specific tests # Scan() object tests From 606ed20858c53c5e8f33f6436282620ed102c011 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Thu, 11 Apr 2019 18:49:16 +0300 Subject: [PATCH 13/21] changed order of tests --- test_script.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test_script.py b/test_script.py index d248b7c8..83cea2d2 100644 --- a/test_script.py +++ b/test_script.py @@ -18,12 +18,6 @@ '''NOTE: test/core_tests/test_scan.py needs to be edited as well!''' - # reporting specific testing - from test.core_tests.test_scan import ReportingTest, DatasetTest - - ReportingTest() - DatasetTest() - # testing different model types from test.core_tests.test_scan import BinaryTest, MultiLabelTest @@ -40,6 +34,12 @@ # Scan() object tests scan_object = test_scan_object() + # reporting specific testing + from test.core_tests.test_scan import ReportingTest, DatasetTest + + ReportingTest() + DatasetTest() + # reporting tests test_reporting_object(scan_object) test_params_object() From d78fb1957541b9fd39daedc3d022f459e4353e8f Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Mon, 27 May 2019 08:58:07 +0300 Subject: [PATCH 14/21] Removed tests for python 2.7 From v.0.6 onwards, Talos requires python >= 3.5. For older versions of python use, Talos v.0.5 which is the previous LTS. --- .travis.yml | 1 - environment.yml | 95 ------------------------------------- talos/templates/datasets.py | 3 +- 3 files changed, 2 insertions(+), 97 deletions(-) delete mode 100644 environment.yml diff --git a/.travis.yml b/.travis.yml index 41b4d2e3..de1a39a9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,7 +9,6 @@ notifications: on_success: never on_failure: always python: - - '2.7' - '3.5' - '3.6' install: diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 5d3061a9..00000000 --- a/environment.yml +++ /dev/null @@ -1,95 +0,0 @@ -name: talos_37 -channels: - - intel - - defaults -dependencies: - - _tflow_select=2.3.0=mkl - - absl-py=0.7.0=py37_0 - - astor=0.7.1=py37_0 - - blas=1.0=mkl - - c-ares=1.15.0=h1de35cc_1 - - ca-certificates=2019.1.23=0 - - certifi=2019.3.9=py37_0 - - gast=0.2.2=py37_0 - - grpcio=1.14.1=py37h9011c5e_0 - - h5py=2.8.0=py37h878fce3_3 - - keras-applications=1.0.7=py_0 - - keras-preprocessing=1.0.9=py_0 - - libcxx=4.0.1=hcfea43d_1 - - libcxxabi=4.0.1=hcfea43d_1 - - libgfortran=3.0.1=h93005f0_2 - - libprotobuf=3.6.1=hd9629dc_0 - - markdown=3.0.1=py37_0 - - mkl_fft=1.0.10=py37h5e564d8_0 - - mkl_random=1.0.2=py37h27c97d8_0 - - mock=2.0.0=py37_0 - - ncurses=6.1=h0a44026_1 - - numpy=1.16.2=py37hacdab7b_0 - - numpy-base=1.16.2=py37h6575580_0 - - pbr=5.1.3=py_0 - - pip=19.0.3=py37_0 - - protobuf=3.6.1=py37h0a44026_0 - - python=3.7.0=hc167b69_0 - - readline=7.0=h1de35cc_5 - - scipy=1.2.1=py37h1410ff5_0 - - setuptools=40.8.0=py37_0 - - six=1.12.0=py37_0 - - tensorboard=1.13.1=py37haf313ee_0 - - tensorflow=1.13.1=mkl_py37h70c3834_0 - - tensorflow-base=1.13.1=mkl_py37h66b1bf0_0 - - tensorflow-estimator=1.13.0=py_0 - - termcolor=1.1.0=py37_1 - - tk=8.6.8=ha441bb4_0 - - werkzeug=0.14.1=py37_0 - - wheel=0.33.1=py37_0 - - xz=5.2.4=h1de35cc_4 - - hdf5=1.10.2=2 - - intel-openmp=2019.3=intel_199 - - intelpython=2019.3=0 - - libffi=3.2.1=11 - - mkl=2019.3=intel_199 - - openssl=1.0.2r=0 - - sqlite=3.27.2=2 - - tbb=2019.4=intel_199 - - zlib=1.2.11=5 - - pip: - - appnope==0.1.0 - - astetik==1.9.8 - - backcall==0.1.0 - - chances==0.1.6 - - chardet==3.0.4 - - cycler==0.10.0 - - decorator==4.4.0 - - geonamescache==1.0.1 - - idna==2.8 - - ipython==7.4.0 - - ipython-genutils==0.2.0 - - jedi==0.13.3 - - keras==2.2.4 - - kerasplotlib==0.1.4 - - kiwisolver==1.0.1 - - matplotlib==2.2.3 - - pandas==0.24.2 - - parso==0.4.0 - - patsy==0.5.1 - - pexpect==4.7.0 - - pickleshare==0.7.5 - - prompt-toolkit==2.0.9 - - ptyprocess==0.6.0 - - pygments==2.3.1 - - pyparsing==2.4.0 - - python-dateutil==2.8.0 - - pytz==2019.1 - - pyyaml==5.1 - - requests==2.21.0 - - scikit-learn==0.20.3 - - seaborn==0.9.0 - - sklearn==0.0 - - statsmodels==0.9.0 - - tqdm==4.31.1 - - traitlets==4.3.2 - - urllib3==1.24.1 - - wcwidth==0.1.7 - - wrangle==0.6.2 -prefix: /Users/mikko/miniconda3/envs/talos_37 - diff --git a/talos/templates/datasets.py b/talos/templates/datasets.py index 706a101d..066568f9 100755 --- a/talos/templates/datasets.py +++ b/talos/templates/datasets.py @@ -29,9 +29,10 @@ def titanic(): left_index=True, right_index=True) - x = x.dropna() x = x.values + print('BE CAREFUL, this dataset has nan values.') + return x, y From 61498006b02ce801ed2a40051a4730b0a85375d2 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Mon, 27 May 2019 09:48:27 +0300 Subject: [PATCH 15/21] Changed order of deps --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 85c86bbd..47bdb466 100755 --- a/setup.py +++ b/setup.py @@ -24,7 +24,8 @@ except ImportError: from distutils.core import setup -install_requires = ['numpy', +install_requires = ['wrangle', + 'numpy', 'pandas', 'keras', 'astetik', @@ -32,7 +33,6 @@ 'tqdm', 'chances', 'kerasplotlib', - 'wrangle', 'requests'] From f8c87e6ead71104f7e7c4704323d959dbaad7280 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Mon, 27 May 2019 09:52:33 +0300 Subject: [PATCH 16/21] also changed the order of deps in requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index bd839f65..d0845f88 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +wrangle pandas numpy keras @@ -6,5 +7,4 @@ tqdm sklearn chances kerasplotlib -wrangle requests From 29ba33b60c05c3573d5f90ab99b5c130ac6f47b9 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Fri, 14 Jun 2019 08:39:48 +0300 Subject: [PATCH 17/21] fixed the issue in deploy.py causing error when x is not 2d --- talos/commands/deploy.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/talos/commands/deploy.py b/talos/commands/deploy.py index c51311a5..53b2e24b 100644 --- a/talos/commands/deploy.py +++ b/talos/commands/deploy.py @@ -69,8 +69,14 @@ def save_data(self): import pandas as pd - x = pd.DataFrame(self.scan_object.x[:100]) - y = pd.DataFrame(self.scan_object.y[:100]) + try: + x = pd.DataFrame(self.scan_object.x[:100]) + y = pd.DataFrame(self.scan_object.y[:100]) + + except ValueError: + t.x = np.zeros(500) + t.y = np.zeros(500) + print("data is not 2d, dummy data written instead.") x.to_csv(self.path + '_x.csv', header=None, index=None) y.to_csv(self.path + '_y.csv', header=None, index=None) From c960fc1eff34d33a377c8c5a2701877d55dda148 Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Wed, 3 Jul 2019 09:50:29 +0300 Subject: [PATCH 18/21] improved README.md --- README.md | 33 +++++++++++++++++++-------------- test.file | Bin 28 -> 0 bytes 2 files changed, 19 insertions(+), 14 deletions(-) delete mode 100644 test.file diff --git a/README.md b/README.md index 30594c7a..81d70c3f 100755 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ Talos radically transforms ordinary Keras workflows without taking away any of K Talos is made for data scientists and data engineers that want to remain in **complete control of their Keras models**, but are tired of mindless parameter hopping and confusing optimization solutions that add complexity instead of reducing it. Within minutes, without learning any new syntax, Talos allows you to configure, perform, and evaluate hyperparameter optimization experiments that yield state-of-the-art results across a wide range of prediction tasks. Talos provides the **simplest and yet most powerful** available method for hyperparameter optimization with Keras. -### Key Features +### :wrench: Key Features Based on what no doubt constitutes a "biased" review (being our own) of more than ~30 hyperparameter tuning and optimization solutions, Talos comes on top in terms of intuitive, easy-to-learn, highly permissive access to critical hyperparameter optimization capabilities. Key features include: @@ -68,7 +68,7 @@ Based on what no doubt constitutes a "biased" review (being our own) of more tha Talos works on **Linux, Mac OSX**, and **Windows** systems and can be operated cpu, gpu, and multi-gpu systems. -### Examples +### ๐Ÿ“ˆ Examples Get the below code [here](https://gist.github.com/mikkokotila/4c0d6298ff0a22dc561fb387a1b4b0bb). More examples further below. @@ -88,7 +88,7 @@ For more information on how Talos can help with your Keras workflow, visit the [ You may also want to check out a visualization of the [Talos Hyperparameter Tuning workflow](https://github.com/autonomio/talos/wiki/Workflow). -### Install +### ๐Ÿ’พ Install Stable version: @@ -98,23 +98,28 @@ Daily development version: #### `pip install git+https://github.com/autonomio/talos.git@daily-dev` -### Support +### ๐Ÿ’ฌ How to get Support -Check out [common errors](https://github.com/autonomio/talos/wiki/Troubleshooting) in the Wiki. +| I want to... | Go to... | +| -------------------------------- | ---------------------------------------------------------- | +| โ“ **...troubleshoot** | [Docs] ยท [Wiki] ยท [GitHub Issue Tracker] | +| ๐Ÿ› **...report a bug** | [GitHub Issue Tracker] | +| ๐ŸŽ **...suggest a new feature** | [GitHub Issue Tracker] | +| ๐Ÿ’ **...get support** | [Stack Overflow] ยท [Spectrum Chat] | +| :thought_balloon: **...have a discussion** | [Spectrum Chat] | +[github issue tracker]: https://github.com/automio/talos/issues +[docs]: https://autonomio.github.io/docs_talos +[wiki]: https://github.com/autonomio/talos/wiki +[stack overflow]: https://stackoverflow.com/questions/tagged/talos +[spectrum chat]: https://spectrum.chat/talos -Check the [Docs](https://autonomio.github.io/docs_talos) which is generally keeping up with Master (and pip package). - -If you want ask a **"how can I use Talos to..."** question, the right place is [StackOverflow](https://stackoverflow.com/questions/ask). - -If you found a bug or want to suggest a feature, check the [issues](https://github.com/autonomio/talos/issues) or [create](https://github.com/autonomio/talos/issues/new/choose) a new issue. - -### Citations +### ๐Ÿ“ข Citations If you use Talos for published work, please cite: -`Autonomio Talos [Computer software]. (2018). Retrieved from http://github.com/autonomio/talos.` +`Autonomio Talos [Computer software]. (2019). Retrieved from http://github.com/autonomio/talos.` -### License +### ๐Ÿ“ƒ License [MIT License](https://github.com/autonomio/talos/blob/master/LICENSE) diff --git a/test.file b/test.file deleted file mode 100644 index a54e264909a2e1f9d069125477e021cc63d4a4b2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 28 jcmZo*jxA)+@J>liD$UF($;>O}Dk#d#E8!|+Ow|JbhGGdk From 158aecb8028dd4a8dd1669858f8e501d29dca83c Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Wed, 3 Jul 2019 09:51:38 +0300 Subject: [PATCH 19/21] improved README.md --- README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/README.md b/README.md index 81d70c3f..ab56a441 100755 --- a/README.md +++ b/README.md @@ -50,6 +50,8 @@ Talos radically transforms ordinary Keras workflows without taking away any of K Talos is made for data scientists and data engineers that want to remain in **complete control of their Keras models**, but are tired of mindless parameter hopping and confusing optimization solutions that add complexity instead of reducing it. Within minutes, without learning any new syntax, Talos allows you to configure, perform, and evaluate hyperparameter optimization experiments that yield state-of-the-art results across a wide range of prediction tasks. Talos provides the **simplest and yet most powerful** available method for hyperparameter optimization with Keras. +
+ ### :wrench: Key Features Based on what no doubt constitutes a "biased" review (being our own) of more than ~30 hyperparameter tuning and optimization solutions, Talos comes on top in terms of intuitive, easy-to-learn, highly permissive access to critical hyperparameter optimization capabilities. Key features include: @@ -68,6 +70,8 @@ Based on what no doubt constitutes a "biased" review (being our own) of more tha Talos works on **Linux, Mac OSX**, and **Windows** systems and can be operated cpu, gpu, and multi-gpu systems. +
+ ### ๐Ÿ“ˆ Examples Get the below code [here](https://gist.github.com/mikkokotila/4c0d6298ff0a22dc561fb387a1b4b0bb). More examples further below. @@ -88,6 +92,8 @@ For more information on how Talos can help with your Keras workflow, visit the [ You may also want to check out a visualization of the [Talos Hyperparameter Tuning workflow](https://github.com/autonomio/talos/wiki/Workflow). +
+ ### ๐Ÿ’พ Install Stable version: @@ -98,6 +104,8 @@ Daily development version: #### `pip install git+https://github.com/autonomio/talos.git@daily-dev` +
+ ### ๐Ÿ’ฌ How to get Support | I want to... | Go to... | @@ -120,6 +128,8 @@ If you use Talos for published work, please cite: `Autonomio Talos [Computer software]. (2019). Retrieved from http://github.com/autonomio/talos.` +
+ ### ๐Ÿ“ƒ License [MIT License](https://github.com/autonomio/talos/blob/master/LICENSE) From 89e6c8804ebb9a3e0c3c74029bfca90be32ebd2f Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Wed, 3 Jul 2019 09:52:47 +0300 Subject: [PATCH 20/21] improved README.md --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index ab56a441..afb15c72 100755 --- a/README.md +++ b/README.md @@ -110,11 +110,11 @@ Daily development version: | I want to... | Go to... | | -------------------------------- | ---------------------------------------------------------- | -| โ“ **...troubleshoot** | [Docs] ยท [Wiki] ยท [GitHub Issue Tracker] | -| ๐Ÿ› **...report a bug** | [GitHub Issue Tracker] | -| ๐ŸŽ **...suggest a new feature** | [GitHub Issue Tracker] | -| ๐Ÿ’ **...get support** | [Stack Overflow] ยท [Spectrum Chat] | -| :thought_balloon: **...have a discussion** | [Spectrum Chat] | +| **...troubleshoot** | [Docs] ยท [Wiki] ยท [GitHub Issue Tracker] | +| **...report a bug** | [GitHub Issue Tracker] | +| **...suggest a new feature** | [GitHub Issue Tracker] | +| **...get support** | [Stack Overflow] ยท [Spectrum Chat] | +| **...have a discussion** | [Spectrum Chat] | [github issue tracker]: https://github.com/automio/talos/issues [docs]: https://autonomio.github.io/docs_talos From 9aeef748afdb5063a310d4c2eba349aac2c0592b Mon Sep 17 00:00:00 2001 From: Mikko Kotila Date: Wed, 3 Jul 2019 09:53:23 +0300 Subject: [PATCH 21/21] improved README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index afb15c72..4f966164 100755 --- a/README.md +++ b/README.md @@ -122,6 +122,8 @@ Daily development version: [stack overflow]: https://stackoverflow.com/questions/tagged/talos [spectrum chat]: https://spectrum.chat/talos +
+ ### ๐Ÿ“ข Citations If you use Talos for published work, please cite: