-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Anna Bettina Ovsiyovna Steinberg
committed
Jul 10, 2019
1 parent
2ac26ea
commit 7645f0e
Showing
3 changed files
with
208 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
#!/usr/bin/python3 | ||
|
||
import random | ||
import sys | ||
import time | ||
import copy | ||
from wrapper import Wrapper | ||
import argparse | ||
import torch | ||
import pickle | ||
|
||
|
||
def extend_subrange(subrange, fullrange, best_config): | ||
is_extended = False | ||
if subrange == {}: | ||
# Initialize subrange to median values. | ||
for param, vals in fullrange.items(): | ||
if type(vals) is set: | ||
subrange[param] = list(vals) | ||
else: | ||
median_index = (len(vals)-1)//2 | ||
subrange[param] = [vals[median_index]] | ||
if len(vals) > 1: | ||
subrange[param].append(vals[median_index + 1]) | ||
is_extended = True | ||
else: | ||
# Increase subrange if best config is on the corners and can be extended. | ||
for param in fullrange.keys(): | ||
if type(fullrange[param]) is set: | ||
continue | ||
best_setting = best_config[param] | ||
is_left_subrange = subrange[param][0] == best_setting | ||
is_right_subrange = subrange[param][-1] == best_setting | ||
is_left_fullrange = fullrange[param][0] == best_setting | ||
is_right_fullrange = fullrange[param][-1] == best_setting | ||
extend_index = fullrange[param].index(best_setting) | ||
if is_left_subrange and not is_left_fullrange: | ||
subrange[param].insert(0, fullrange[param][extend_index - 1]) | ||
is_extended = True | ||
elif is_right_subrange and not is_right_fullrange: | ||
subrange[param].append(fullrange[param][extend_index + 1]) | ||
is_extended = True | ||
return is_extended | ||
|
||
|
||
def random_search(learner, params={}, seed=0, attempts_per_param=2): | ||
""" | ||
Executes a random search over the parameters, given a learner (a wrapper over a learning algorithm), and a dictionary | ||
mapping parameter names to their ranges (lists for ordered ranges, sets for unordered value alternatives). | ||
The parameters for optimization are the maximal range, random sampling considers a smaller subrange of those. | ||
The subrange is extended if optimal configurations lie on the boundary of the subrange. | ||
The learner needs to implement the following method: | ||
epochs_to_model_costs = learner.learn(num_epochs=num_epochs, config=config, seed=seed) | ||
where num_epochs is the list of epochs/checkpoints to consider for optimization, and config is a dictionary with | ||
a chosen (sampled) value for each hyper-parameters (number of epochs is not one of them). | ||
The returned epochs_to_model_costs maps epoch numbers to tuples containing (model at epoch, validation loss, lest loss). | ||
:param learner: Wrapper for learning algorithm. | ||
:param params: Maximal range to optimize for. | ||
:param seed: random seed. | ||
:return: | ||
""" | ||
print("full parameter range:") | ||
print(params) | ||
print("===") | ||
|
||
shuffle_seed = 0 | ||
random.seed(shuffle_seed) | ||
params_subrange = {} | ||
|
||
best_cost = sys.float_info.max | ||
associated_test_cost = sys.float_info.max | ||
best_config = {} | ||
tried_configs = set() | ||
|
||
params_copy = params.copy() | ||
num_epochs = params_copy["num_epochs"] | ||
del params_copy["num_epochs"] | ||
|
||
# Two samples for each parameter to optimize (only those that have a choice) | ||
attempts_per_round = max( | ||
1, attempts_per_param * sum([1 for l in params_copy.values() if len(l) > 1])) | ||
|
||
while extend_subrange(params_subrange, params_copy, best_config): | ||
print("params_subrange:") | ||
print(params_subrange) | ||
print("===") | ||
|
||
for setting_nr in range(attempts_per_round): | ||
start = time.time() | ||
|
||
config = {} | ||
for param, settings in params_subrange.items(): | ||
selection = random.choice(settings) | ||
config[param] = selection | ||
|
||
if frozenset(config.items()) not in tried_configs: | ||
print(" === Running config: ===") | ||
print(config) | ||
tried_configs.add(frozenset(config.items())) | ||
epochs_to_model_costs = learner.learn( | ||
num_epochs=num_epochs, config_dict=config, seed=seed) | ||
shuffle_seed += 1 | ||
random.seed(shuffle_seed) | ||
for num_epochs_selected, model_costs in epochs_to_model_costs.items(): | ||
model, _, cost_valid, cost_test, _, _, _ = model_costs | ||
config["num_epochs"] = num_epochs_selected | ||
print(config) | ||
print("Cost (valid, test_info): %f, %s" % | ||
(cost_valid, str(cost_test))) | ||
if cost_valid < best_cost: | ||
best_config = copy.deepcopy(config) | ||
best_cost = cost_valid | ||
best_model = model | ||
associated_test_cost = cost_test | ||
time_elapsed = time.time() - start | ||
print("time (s):" + str(time_elapsed)) | ||
print("Best config and cost so far:") | ||
print(best_config) | ||
print(best_cost) | ||
print(associated_test_cost) | ||
print("===") | ||
else: | ||
print(" === already tried: ===") | ||
print(config) | ||
print("===") | ||
print("Best config, dev cost, test cost:") | ||
print(best_config) | ||
print(best_cost) | ||
print(associated_test_cost) | ||
print("===") | ||
return best_model, best_config, associated_test_cost | ||
|
||
|
||
parser = argparse.ArgumentParser( | ||
description='Training program of the BIOS Tagger.') | ||
parser.add_argument('modelname', type=str, | ||
help='type of model to be trained: FOFE encodings or Classic embeddings') | ||
parser.add_argument('datafile', type=str, | ||
help='file or folder containing the data') | ||
parser.add_argument('paramfile', type=str, | ||
help='file or folder to save the model and metrics') | ||
parser.add_argument('--batch_size', type=int, default=8, | ||
help='size of the data batches') | ||
parser.add_argument('--num_epochs', nargs='+', type=int, default=[0, 1, 5, 10], | ||
help='number of epochs used for training') | ||
|
||
args = parser.parse_args() | ||
|
||
learner = Wrapper(args.modelname, args.datafile, | ||
args.paramfile, args.batch_size) | ||
params = {'num_epochs': args.num_epochs, 'embedding_size': [50], 'hidden_size': [50, 100, 200], 'dropout': { | ||
0.3, 0.5, 0.7}, 'learn_rate': [0.001, 0.01, 0.1], 'reg_factor': [0.0, 0.001, 0.01]} | ||
|
||
best_model, best_config, associated_test_cost = random_search( | ||
learner, params, seed=0, attempts_per_param=2) | ||
torch.save(best_model, "hyper_best_model_fofe.nnp") | ||
with open("hyper_best_config_fofe.txt", "wb") as hyper: | ||
pickle.dump(best_config, hyper) | ||
pickle.dump(associated_test_cost, hyper) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
from tagger import Tagger | ||
|
||
|
||
class Wrapper: | ||
|
||
"""Wrapper for model to pass to hyper optimisation | ||
Arguments: | ||
modelname {string} - either "FOFE" for Fofe character encoding or "Classic" for classic trainable embedding layer | ||
datafile {string} - path to data | ||
paramfile {string} - path to save model and metrics to | ||
batch_size {number} - size of training batches | ||
Returns: | ||
dictionary -- maps each evaluation epoch to tuple of model, train_loss, dev_loss, test_loss, | ||
accuracy, macro and weighted F1 score | ||
hyper optimisation script chooses best config based on this dictionary | ||
""" | ||
|
||
def __init__(self, modelname, datafile, paramfile, batch_size): | ||
self.modelname = modelname | ||
self.datafile = datafile | ||
self.batchsize = batch_size | ||
self.paramfile = paramfile | ||
|
||
def learn(self, num_epochs, config_dict, seed): | ||
# config_dict contains a chosen value for each parameter | ||
model = Tagger(self.modelname, self.datafile, self.paramfile, | ||
num_epochs, self.batchsize, **config_dict) | ||
# train | ||
metrics = model.train(num_epochs, seed, **config_dict) | ||
# metrics is dict = {epoch: (model, train_loss, dev_loss,test_loss, acc, f1_macro, f1_weighted)} | ||
return metrics | ||
|
||
|
||
if __name__ == '__main__': | ||
learner = Wrapper('FOFE', 'Atis.json', "hyper", 8) | ||
config_dict = {'embedding_size': 100, 'hidden_size': 100, | ||
'dropout_rate': 0.5, 'learn_rate': 0.01, 'reg_factor': 0.0} | ||
learner.learn([0, 1, 5, 10], config_dict, 0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/bin/bash | ||
|
||
num_epochs=($(seq 0 5 30)) | ||
|
||
../hyperopt.py FOFE ../data/Atis.json hyper --num_epochs "${num_epochs[@]}" |