-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
144 lines (120 loc) · 5.34 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import argparse
parser = argparse.ArgumentParser(description="Download and transform EEG dataset")
parser.add_argument("model",
type=str,
choices=["dense", "cnn", "lstm"],
help="Neural network model type")
parser.add_argument("dataset",
type=str,
help="Path to dataset file/folder")
parser.add_argument("--units",
type=str,
default="64,32",
help="Layer unit separated by comma (e.g. 128,64,32)")
parser.add_argument("--epochs",
type=int,
default=20,
help="Number of epoch the neural network will be trained on")
parser.add_argument("--normalize",
action="store_true",
help="Enable batch normalization on each layer")
parser.add_argument("--dropout",
type=float,
help="Enable dropout regularization on each layer, the specified value determines dropout rate on each layer")
parser.add_argument("--logdir",
type=str,
help="Enable Tensorboard and save the log to the specified path")
parser.add_argument("--validation-split",
type=float,
default=0.0,
help="Run model validation while training, the specified value determines the validation size (0.0 - 1.0)")
parser.add_argument("--evaluation-split",
type=float,
help="Run model evaluation after training, the specified value determines the test size (0.0 - 1.0)")
parser.add_argument("--save",
type=str,
help="Save latest model to the specified folder as SavedModel")
parser.add_argument("--verbose",
action="store_true",
help="Enable default TensorFlow debug information")
# main app entry point
if __name__ == "__main__":
# parse arguments
args = vars(parser.parse_args())
# reduce TensorFlow verbosity
if not args["verbose"]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
# imports
from termcolor import colored
from typing import List, Any
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sibyl import deep_learning as dl
from sibyl.util import filesystem as fs
# validations
if not fs.is_file_extension(args["dataset"], [".csv", ".parquet"]):
print(colored("Dataset is not in .csv or .parquet extension", "red"))
exit()
if not fs.is_file_exists(args["dataset"]) and not fs.is_directory_exists(args["dataset"]) :
print(colored("Dataset file does not exists", "red"))
exit()
# load dataset
print(colored("Loading dataset...", "cyan"))
df: pd.DataFrame = None
if "parquet" in args["dataset"]:
df = pd.read_parquet(args["dataset"])
else:
df = pd.read_csv(args["dataset"])
# reshape the dataset
X, y = dl.reshape_data(df)
# prepare split data, if needed
X_train, X_test, y_train, y_test = None, None, None, None
if args["evaluation_split"] is not None:
print(colored("\nSplitting dataset for evaluation...", "cyan"))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=args["evaluation_split"], stratify=y, random_state=42)
print("Train shape: ", X_train.shape, y_train.shape)
print("Test shape: ", X_test.shape, y_test.shape)
else:
X_train, y_train = X, y
print("Train shape: ", X_train.shape, y_train.shape)
# build model sequence
print(colored("\nBuilding model...", "cyan"))
model_args = {
"kind": args["model"],
"units": [int(x) for x in args["units"].split(",")],
"input_shape": (X_train.shape[1], X_train.shape[2]),
"num_classes": y_train.shape[1],
"normalize": args["normalize"],
"dropout": args["dropout"]
}
model = dl.build_model(**model_args)
# create tensorboard, if needed
callbacks: List[tf.keras.callbacks.Callback] = []
if args["logdir"] is not None:
print(colored("\nPreparing TensorBoard...", "cyan"))
log_prefix = args["model"]
if args["normalize"]:
log_prefix = log_prefix + "norm"
if args["dropout"] is not None:
log_prefix = log_prefix + "dropout"
log_prefix = log_prefix + "_" + args["units"] + "_" + str(args["epochs"])
callbacks.append(dl.create_tensorboard(log_prefix, args["logdir"]))
# compile model with an optimizer and loss function
print(colored("\nFinalizing model...", "cyan"))
dl.finalize_model(model)
print(colored("\n{} --- Model Summary ---".format((" " * 20)), "cyan"))
print(model.summary())
# start model training
print(colored("\nTraining model...", "cyan"))
dl.train_model(model, X_train, y_train, args["epochs"], args["validation_split"], callbacks)
# save model, if needed
if args["save"] is not None:
print(colored("\nSaving model state...", "cyan"))
tf.saved_model.save(model, args["save"])
# perform evaluation
if args["evaluation_split"] is not None:
print(colored("\nRunning model evaluation...", "cyan"))
loss, acc = model.evaluate(X_test, y_test)
print("Loss: {}\nAcurracy: {}".format(loss, acc))