forked from zawagner22/cross-entropy-for-combinatorics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcode_template.py
306 lines (234 loc) · 11.6 KB
/
code_template.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
# Code to accompany the paper "Constructions in combinatorics via neural networks and LP solvers" by A Z Wagner
# Code template
#
# This code works on tensorflow version 1.14.0 and python version 3.6.3
#
# For later versions of tensorflow there seems to be a massive overhead in the predict function for some reason, and/or it produces mysterious errors.
# If the code doesn't work, make sure you are using these versions of tf and python.
#
# I used keras version 2.3.1, not sure if this is important, but I recommend this just to be safe.
#Use this file as a template if you are able to use numba njit() for the calc_score function in your problem.
#Otherwise, if this is not an option, modify the simpler code in the *demos* folder
import networkx as nx #for various graph parameters, such as eigenvalues, macthing number, etc. Does not work with numba (yet)
import random
import numpy as np
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import SGD, Adam
from keras.models import load_model
from statistics import mean
import pickle
import time
import math
import matplotlib.pyplot as plt
from numba import njit
N = 20 #number of vertices in the graph. Only used in the reward function, not directly relevant to the algorithm
DECISIONS = int(N*(N-1)/2) #The length of the word we are generating. If we are generating a graph, we create a 0-1 word of length (N choose 2)
LEARNING_RATE = 0.0001 #Increase this to make convergence faster, decrease if the algorithm gets stuck in local optima too often.
n_sessions =1000 #number of new sessions per iteration
percentile = 93 #top 100-X percentile we are learning from
super_percentile = 94 #top 100-X percentile that survives to next iteration
FIRST_LAYER_NEURONS = 128 #Number of neurons in the hidden layers.
SECOND_LAYER_NEURONS = 64
THIRD_LAYER_NEURONS = 4
n_actions = 2 #The size of the alphabet. In this file we will assume this is 2. There are a few things we need to change when the alphabet size is larger,
#such as one-hot encoding the input, and using categorical_crossentropy as a loss function.
observation_space = 2*DECISIONS #Leave this at 2*DECISIONS. The input vector will have size 2*DECISIONS, where the first DECISIONS letters encode our partial word (with zeros on
#the positions we haven't considered yet), and the next DECISIONS bits one-hot encode which letter we are considering now.
#So e.g. [0,1,0,0, 0,0,1,0] means we have the partial word 01 and we are considering the third letter now.
#Is there a better way to format the input to make it easier for the neural network to understand things?
len_game = DECISIONS
state_dim = (observation_space,)
INF = 1000000
#Model structure: a sequential network with three hidden layers, sigmoid activation in the output.
#I usually used relu activation in the hidden layers but play around to see what activation function and what optimizer works best.
#It is important that the loss is binary cross-entropy if alphabet size is 2.
model = Sequential()
model.add(Dense(FIRST_LAYER_NEURONS, activation="relu"))
model.add(Dense(SECOND_LAYER_NEURONS, activation="relu"))
model.add(Dense(THIRD_LAYER_NEURONS, activation="relu"))
model.add(Dense(1, activation="sigmoid"))
model.build((None, observation_space))
model.compile(loss="binary_crossentropy", optimizer=SGD(learning_rate = LEARNING_RATE)) #Adam optimizer also works well, with lower learning rate
print(model.summary())
def calc_score(state):
"""
Reward function for your problem.
Input: a 0-1 vector of length DECISIONS. It represents the graph (or other object) you have created.
Output: the reward/score for your construction. See files in the *demos* folder for examples.
"""
#
#
#
# -----Your code goes here-----
#
#
#
return reward
####No need to change anything below here.
jitted_calc_score = njit()(calc_score)
def play_game(n_sessions, actions,state_next,states,prob, step, total_score):
for i in range(n_sessions):
if np.random.rand() < prob[i]:
action = 1
else:
action = 0
actions[i][step-1] = action
state_next[i] = states[i,:,step-1]
if (action > 0):
state_next[i][step-1] = action
state_next[i][DECISIONS + step-1] = 0
if (step < DECISIONS):
state_next[i][DECISIONS + step] = 1
#calculate final score
terminal = step == DECISIONS
if terminal:
total_score[i] = jitted_calc_score(state_next[i])
# record sessions
if not terminal:
states[i,:,step] = state_next[i]
return actions, state_next,states, total_score, terminal
jitted_play_game = njit()(play_game)
def generate_session(agent, n_sessions, verbose = 1):
"""
Play n_session games using agent neural network.
Terminate when games finish
Code inspired by https://github.com/yandexdataschool/Practical_RL/blob/master/week01_intro/deep_crossentropy_method.ipynb
"""
states = np.zeros([n_sessions, observation_space, len_game], dtype=int)
actions = np.zeros([n_sessions, len_game], dtype = int)
state_next = np.zeros([n_sessions,observation_space], dtype = int)
prob = np.zeros(n_sessions)
states[:,DECISIONS,0] = 1
step = 0
total_score = np.zeros([n_sessions])
pred_time = 0
play_time = 0
while (True):
step += 1
tic = time.time()
prob = agent.predict(states[:,:,step-1], batch_size = n_sessions)
pred_time += time.time()-tic
tic = time.time()
actions, state_next,states, total_score, terminal = jitted_play_game(n_sessions, actions,state_next,states,prob, step, total_score)
play_time += time.time()-tic
if terminal:
break
if (verbose):
print("Predict: "+str(pred_time)+", play: " + str(play_time))
return states, actions, total_score
def select_elites(states_batch, actions_batch, rewards_batch, percentile=50):
"""
Select states and actions from games that have rewards >= percentile
:param states_batch: list of lists of states, states_batch[session_i][t]
:param actions_batch: list of lists of actions, actions_batch[session_i][t]
:param rewards_batch: list of rewards, rewards_batch[session_i]
:returns: elite_states,elite_actions, both 1D lists of states and respective actions from elite sessions
This function was mostly taken from https://github.com/yandexdataschool/Practical_RL/blob/master/week01_intro/deep_crossentropy_method.ipynb
If this function is the bottleneck, it can easily be sped up using numba
"""
counter = n_sessions * (100.0 - percentile) / 100.0
reward_threshold = np.percentile(rewards_batch,percentile)
elite_states = []
elite_actions = []
elite_rewards = []
for i in range(len(states_batch)):
if rewards_batch[i] >= reward_threshold-0.0000001:
if (counter > 0) or (rewards_batch[i] >= reward_threshold+0.0000001):
for item in states_batch[i]:
elite_states.append(item.tolist())
for item in actions_batch[i]:
elite_actions.append(item)
counter -= 1
elite_states = np.array(elite_states, dtype = int)
elite_actions = np.array(elite_actions, dtype = int)
return elite_states, elite_actions
def select_super_sessions(states_batch, actions_batch, rewards_batch, percentile=90):
"""
Select all the sessions that will survive to the next generation
Similar to select_elites function
If this function is the bottleneck, it can easily be sped up using numba
"""
counter = n_sessions * (100.0 - percentile) / 100.0
reward_threshold = np.percentile(rewards_batch,percentile)
super_states = []
super_actions = []
super_rewards = []
for i in range(len(states_batch)):
if rewards_batch[i] >= reward_threshold-0.0000001:
if (counter > 0) or (rewards_batch[i] >= reward_threshold+0.0000001):
super_states.append(states_batch[i])
super_actions.append(actions_batch[i])
super_rewards.append(rewards_batch[i])
counter -= 1
super_states = np.array(super_states, dtype = int)
super_actions = np.array(super_actions, dtype = int)
super_rewards = np.array(super_rewards)
return super_states, super_actions, super_rewards
super_states = np.empty((0,len_game,observation_space), dtype = int)
super_actions = np.array([], dtype = int)
super_rewards = np.array([])
sessgen_time = 0
fit_time = 0
score_time = 0
myRand = random.randint(0,1000) #used in the filename
for i in range(1000000): #1000000 generations should be plenty
#generate new sessions
#performance can be improved with joblib
tic = time.time()
sessions = generate_session(model,n_sessions,0) #change 0 to 1 to print out how much time each step in generate_session takes
sessgen_time = time.time()-tic
tic = time.time()
states_batch = np.array(sessions[0], dtype = int)
actions_batch = np.array(sessions[1], dtype = int)
rewards_batch = np.array(sessions[2])
states_batch = np.transpose(states_batch,axes=[0,2,1])
states_batch = np.append(states_batch,super_states,axis=0)
if i>0:
actions_batch = np.append(actions_batch,np.array(super_actions),axis=0)
rewards_batch = np.append(rewards_batch,super_rewards)
randomcomp_time = time.time()-tic
tic = time.time()
elite_states, elite_actions = select_elites(states_batch, actions_batch, rewards_batch, percentile=percentile) #pick the sessions to learn from
select1_time = time.time()-tic
tic = time.time()
super_sessions = select_super_sessions(states_batch, actions_batch, rewards_batch, percentile=super_percentile) #pick the sessions to survive
select2_time = time.time()-tic
tic = time.time()
super_sessions = [(super_sessions[0][i], super_sessions[1][i], super_sessions[2][i]) for i in range(len(super_sessions[2]))]
super_sessions.sort(key=lambda super_sessions: super_sessions[2],reverse=True)
select3_time = time.time()-tic
tic = time.time()
model.fit(elite_states, elite_actions) #learn from the elite sessions
fit_time = time.time()-tic
tic = time.time()
super_states = [super_sessions[i][0] for i in range(len(super_sessions))]
super_actions = [super_sessions[i][1] for i in range(len(super_sessions))]
super_rewards = [super_sessions[i][2] for i in range(len(super_sessions))]
rewards_batch.sort()
mean_all_reward = np.mean(rewards_batch[-100:])
mean_best_reward = np.mean(super_rewards)
score_time = time.time()-tic
print("\n" + str(i) + ". Best individuals: " + str(np.flip(np.sort(super_rewards))))
#uncomment below line to print out how much time each step in this loop takes.
print( "Mean reward: " + str(mean_all_reward) + "\nSessgen: " + str(sessgen_time) + ", other: " + str(randomcomp_time) + ", select1: " + str(select1_time) + ", select2: " + str(select2_time) + ", select3: " + str(select3_time) + ", fit: " + str(fit_time) + ", score: " + str(score_time))
if (i%20 == 1): #Write all important info to files every 20 iterations
with open('best_species_pickle_'+str(myRand)+'.txt', 'wb') as fp:
pickle.dump(super_actions, fp)
with open('best_species_txt_'+str(myRand)+'.txt', 'w') as f:
for item in super_actions:
f.write(str(item))
f.write("\n")
with open('best_species_rewards_'+str(myRand)+'.txt', 'w') as f:
for item in super_rewards:
f.write(str(item))
f.write("\n")
with open('best_100_rewards_'+str(myRand)+'.txt', 'a') as f:
f.write(str(mean_all_reward)+"\n")
with open('best_elite_rewards_'+str(myRand)+'.txt', 'a') as f:
f.write(str(mean_best_reward)+"\n")
if (i%200==2): # To create a timeline, like in Figure 3
with open('best_species_timeline_txt_'+str(myRand)+'.txt', 'a') as f:
f.write(str(super_actions[0]))
f.write("\n")