From 7645f0e7b92043c3edc0250fabe1fc23d6085654 Mon Sep 17 00:00:00 2001
From: Anna Bettina Ovsiyovna Steinberg <steinberga@cip.ifi.lmu.de>
Date: Wed, 10 Jul 2019 19:38:19 +0200
Subject: [PATCH] adding hyper parameter script

---
 src/py-scripts/hyperopt.py        | 163 ++++++++++++++++++++++++++++++
 src/py-scripts/wrapper.py         |  40 ++++++++
 src/sh-scripts/hyper_atis_fofe.sh |   5 +
 3 files changed, 208 insertions(+)
 create mode 100755 src/py-scripts/hyperopt.py
 create mode 100644 src/py-scripts/wrapper.py
 create mode 100755 src/sh-scripts/hyper_atis_fofe.sh

diff --git a/src/py-scripts/hyperopt.py b/src/py-scripts/hyperopt.py
new file mode 100755
index 0000000..ed3ba31
--- /dev/null
+++ b/src/py-scripts/hyperopt.py
@@ -0,0 +1,163 @@
+#!/usr/bin/python3
+
+import random
+import sys
+import time
+import copy
+from wrapper import Wrapper
+import argparse
+import torch
+import pickle
+
+
+def extend_subrange(subrange, fullrange, best_config):
+    is_extended = False
+    if subrange == {}:
+        # Initialize subrange to median values.
+        for param, vals in fullrange.items():
+            if type(vals) is set:
+                subrange[param] = list(vals)
+            else:
+                median_index = (len(vals)-1)//2
+                subrange[param] = [vals[median_index]]
+                if len(vals) > 1:
+                    subrange[param].append(vals[median_index + 1])
+            is_extended = True
+    else:
+        # Increase subrange if best config is on the corners and can be extended.
+        for param in fullrange.keys():
+            if type(fullrange[param]) is set:
+                continue
+            best_setting = best_config[param]
+            is_left_subrange = subrange[param][0] == best_setting
+            is_right_subrange = subrange[param][-1] == best_setting
+            is_left_fullrange = fullrange[param][0] == best_setting
+            is_right_fullrange = fullrange[param][-1] == best_setting
+            extend_index = fullrange[param].index(best_setting)
+            if is_left_subrange and not is_left_fullrange:
+                subrange[param].insert(0, fullrange[param][extend_index - 1])
+                is_extended = True
+            elif is_right_subrange and not is_right_fullrange:
+                subrange[param].append(fullrange[param][extend_index + 1])
+                is_extended = True
+    return is_extended
+
+
+def random_search(learner, params={}, seed=0, attempts_per_param=2):
+    """
+    Executes a random search over the parameters, given a learner (a wrapper over a learning algorithm), and a dictionary
+    mapping parameter names to their ranges (lists for ordered ranges, sets for unordered value alternatives).
+    The parameters for optimization are the maximal range, random sampling considers a smaller subrange of those.
+    The subrange is extended if optimal configurations lie on the boundary of the subrange.
+
+    The learner needs to implement the following method:
+
+    epochs_to_model_costs = learner.learn(num_epochs=num_epochs, config=config, seed=seed)
+
+    where num_epochs is the list of epochs/checkpoints to consider for optimization, and config is a dictionary with
+    a chosen (sampled) value for each hyper-parameters (number of epochs is not one of them).
+    The returned epochs_to_model_costs maps epoch numbers to tuples containing (model at epoch, validation loss, lest loss).
+
+    :param learner: Wrapper for learning algorithm.
+    :param params: Maximal range to optimize for.
+    :param seed: random seed.
+    :return:
+    """
+    print("full parameter range:")
+    print(params)
+    print("===")
+
+    shuffle_seed = 0
+    random.seed(shuffle_seed)
+    params_subrange = {}
+
+    best_cost = sys.float_info.max
+    associated_test_cost = sys.float_info.max
+    best_config = {}
+    tried_configs = set()
+
+    params_copy = params.copy()
+    num_epochs = params_copy["num_epochs"]
+    del params_copy["num_epochs"]
+
+    # Two samples for each parameter to optimize (only those that have a choice)
+    attempts_per_round = max(
+        1, attempts_per_param * sum([1 for l in params_copy.values() if len(l) > 1]))
+
+    while extend_subrange(params_subrange, params_copy, best_config):
+        print("params_subrange:")
+        print(params_subrange)
+        print("===")
+
+        for setting_nr in range(attempts_per_round):
+            start = time.time()
+
+            config = {}
+            for param, settings in params_subrange.items():
+                selection = random.choice(settings)
+                config[param] = selection
+
+            if frozenset(config.items()) not in tried_configs:
+                print(" === Running config: ===")
+                print(config)
+                tried_configs.add(frozenset(config.items()))
+                epochs_to_model_costs = learner.learn(
+                    num_epochs=num_epochs, config_dict=config, seed=seed)
+                shuffle_seed += 1
+                random.seed(shuffle_seed)
+                for num_epochs_selected, model_costs in epochs_to_model_costs.items():
+                    model, _, cost_valid, cost_test, _, _, _ = model_costs
+                    config["num_epochs"] = num_epochs_selected
+                    print(config)
+                    print("Cost (valid, test_info): %f, %s" %
+                          (cost_valid, str(cost_test)))
+                    if cost_valid < best_cost:
+                        best_config = copy.deepcopy(config)
+                        best_cost = cost_valid
+                        best_model = model
+                        associated_test_cost = cost_test
+                time_elapsed = time.time() - start
+                print("time (s):" + str(time_elapsed))
+                print("Best config and cost so far:")
+                print(best_config)
+                print(best_cost)
+                print(associated_test_cost)
+                print("===")
+            else:
+                print(" === already tried: ===")
+                print(config)
+                print("===")
+    print("Best config, dev cost, test cost:")
+    print(best_config)
+    print(best_cost)
+    print(associated_test_cost)
+    print("===")
+    return best_model, best_config, associated_test_cost
+
+
+parser = argparse.ArgumentParser(
+    description='Training program of the BIOS Tagger.')
+parser.add_argument('modelname', type=str,
+                    help='type of model to be trained: FOFE encodings or Classic embeddings')
+parser.add_argument('datafile', type=str,
+                    help='file or folder containing the data')
+parser.add_argument('paramfile', type=str,
+                    help='file or folder to save the model and metrics')
+parser.add_argument('--batch_size', type=int, default=8,
+                    help='size of the data batches')
+parser.add_argument('--num_epochs', nargs='+', type=int, default=[0, 1, 5, 10],
+                    help='number of epochs used for training')
+
+args = parser.parse_args()
+
+learner = Wrapper(args.modelname, args.datafile,
+                  args.paramfile, args.batch_size)
+params = {'num_epochs': args.num_epochs, 'embedding_size': [50], 'hidden_size': [50, 100, 200], 'dropout': {
+    0.3, 0.5, 0.7}, 'learn_rate': [0.001, 0.01, 0.1], 'reg_factor': [0.0, 0.001, 0.01]}
+
+best_model, best_config, associated_test_cost = random_search(
+    learner, params, seed=0, attempts_per_param=2)
+torch.save(best_model, "hyper_best_model_fofe.nnp")
+with open("hyper_best_config_fofe.txt", "wb") as hyper:
+    pickle.dump(best_config, hyper)
+    pickle.dump(associated_test_cost, hyper)
diff --git a/src/py-scripts/wrapper.py b/src/py-scripts/wrapper.py
new file mode 100644
index 0000000..9d9e8c5
--- /dev/null
+++ b/src/py-scripts/wrapper.py
@@ -0,0 +1,40 @@
+from tagger import Tagger
+
+
+class Wrapper:
+
+    """Wrapper for model to pass to hyper optimisation
+
+    Arguments:
+        modelname {string} - either "FOFE" for Fofe character encoding or "Classic" for classic trainable embedding layer
+        datafile {string} - path to data
+        paramfile {string} - path to save model and metrics to
+        batch_size {number} - size of training batches
+
+    Returns:
+        dictionary -- maps each evaluation epoch to tuple of model, train_loss, dev_loss, test_loss,
+                        accuracy, macro and weighted F1 score
+                        hyper optimisation script chooses best config based on this dictionary
+    """
+
+    def __init__(self, modelname, datafile, paramfile, batch_size):
+        self.modelname = modelname
+        self.datafile = datafile
+        self.batchsize = batch_size
+        self.paramfile = paramfile
+
+    def learn(self, num_epochs, config_dict, seed):
+        # config_dict contains a chosen value for each parameter
+        model = Tagger(self.modelname, self.datafile, self.paramfile,
+                       num_epochs, self.batchsize, **config_dict)
+        # train
+        metrics = model.train(num_epochs, seed, **config_dict)
+        # metrics is dict = {epoch: (model, train_loss, dev_loss,test_loss, acc, f1_macro, f1_weighted)}
+        return metrics
+
+
+if __name__ == '__main__':
+    learner = Wrapper('FOFE', 'Atis.json', "hyper", 8)
+    config_dict = {'embedding_size': 100, 'hidden_size': 100,
+                   'dropout_rate': 0.5, 'learn_rate': 0.01, 'reg_factor': 0.0}
+    learner.learn([0, 1, 5, 10], config_dict, 0)
diff --git a/src/sh-scripts/hyper_atis_fofe.sh b/src/sh-scripts/hyper_atis_fofe.sh
new file mode 100755
index 0000000..cb71a4d
--- /dev/null
+++ b/src/sh-scripts/hyper_atis_fofe.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+num_epochs=($(seq 0 5 30)) 
+
+../hyperopt.py FOFE ../data/Atis.json hyper --num_epochs "${num_epochs[@]}"