-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathconfig.py
100 lines (87 loc) · 4.16 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
class Config(object):
""" Wrapper class for various (hyper)parameters. """
def __init__(self):
"""Sets the default model hyperparameters."""
# File pattern of sharded TFRecord file containing SequenceExample protos.
# Must be provided in training and evaluation modes.
self.input_file_pattern = None
# Image format ("jpeg" or "png").
self.image_format = "jpeg"
# Dimensions of VGG-16 input images.
self.image_height = 224
self.image_width = 224
# Approximate number of values per input shard. Used to ensure sufficient
# mixing between shards in training.
self.values_per_input_shard = 100
# Minimum number of shards to keep in the input queue.
self.input_queue_capacity_factor = 2
# Number of threads for prefetching SequenceExample protos.
self.num_input_reader_threads = 1
# Number of threads for image preprocessing. Should be a multiple of 2.
self.num_preprocess_threads = 2
# about the model architecture
self.cnn = 'rpn' # 'rpn', 'vgg16' or 'resnet50'
self.max_caption_length = 20
self.dim_embedding = 512
self.num_lstm_units = 512
self.num_initalize_layers = 2 # 1 or 2
self.dim_initalize_layer = 512
self.num_decode_layers = 2 # 1 or 2
self.dim_decode_layer = 1024
# attention mechanism
self.attention_mechanism = "fc2" # "fc1", "fc2", "rnn" or "bias"
self.dim_attend_layer = 512 # for "fc1" and "fc2" only
self.dim_rnn_att_state = 256 # for rnn only
# about the weight initialization and regularization
self.fc_kernel_initializer_scale = 0.08
self.fc_kernel_regularizer_scale = 1e-4
self.fc_activity_regularizer_scale = 0.0
self.conv_kernel_regularizer_scale = 1e-4
self.conv_activity_regularizer_scale = 0.0
self.fc_drop_rate = 0.5
self.lstm_drop_rate = 0.3
self.attention_loss_factor = 0.01
# about the optimization
self.max_checkpoints_to_keep = 5
self.log_every_n_steps = 10
self.num_epochs = 100
self.batch_size = 4
self.optimizer = 'Adam' # 'Adam', 'RMSProp', 'Momentum' or 'SGD'
self.initial_learning_rate = 0.0001
self.learning_rate_decay_factor = 1.0
self.num_steps_per_decay = 100000
self.clip_gradients = 5.0
self.momentum = 0.0
self.use_nesterov = True
self.decay = 0.9
self.centered = True
self.beta1 = 0.9
self.beta2 = 0.999
self.epsilon = 1e-6
# about the saver
self.save_period = 1000
self.save_dir = '/home/hillyess/ai/project-image-caption/output/model'
self.eval_dir = '/home/hillyess/ai/project-image-caption/output/eval'
# about the vocabulary
self.vocabulary_file = './vocabulary.csv'
# Number of unique words in the vocab (plus 1, for <UNK>).
# The default value is larger than the expected actual vocab size to allow
# for differences between tokenizer versions used in preprocessing. There is
# no harm in using a value greater than the actual vocab size, but using a
# value less than the actual vocab size will result in an error.
self.vocabulary_size = 12000
# # about the training
# self.train_image_dir = './train/images/'
# self.train_caption_file = './train/captions_train2014.json'
# self.temp_annotation_file = './train/anns.csv'
# self.temp_data_file = './train/data.npy'
# about the evaluation
# self.eval_image_dir = './val/images/'
# self.eval_caption_file = './val/captions_val2014.json'
# self.eval_result_dir = '../val/results/'
# self.eval_result_file = '../val/results.json'
# self.save_eval_result_as_image = False
# about the testing
# self.test_image_dir = './test/images/'
# self.test_result_dir = './test/results/'
# self.test_result_file = './test/results.csv'