forked from blakeMilner/DeepQLearning
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest.moon
49 lines (32 loc) · 1.11 KB
/
test.moon
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
require 'xlua'
Brain = require 'deepqlearn'
randtable = (size, startnum, endnum) ->
rtable = {}
for i = 1, size
rtable[i + 1] = randf startnum, endnum
return rtable
-- simple test found in readme.md
num_outcomes = 3
Brain.init num_outcomes, num_outcomes
nb_train = 1000
nb_test = 1000
for k = 0, nb_train
rand_outcome = math.random 1, num_outcomes
state = randtable num_outcomes, rand_outcome, rand_outcome + 1
xlua.progress k, nb_train
newstate = table.copy state -- make a deep copy
action = Brain.forward newstate -- returns index of chosen action
reward = (action == rand_outcome) and 1 or 0
Brain.backward reward -- learning magic happens
Brain.epsilon_test_time = 0.0 -- don't make any more random choices
Brain.learning = false
-- get an optimal action from the learned policy
cnt = 0
for k = 1, nb_test
xlua.progress k, nb_test
rand_outcome = math.random 1, num_outcomes
state = randtable num_outcomes, rand_outcome, rand_outcome + 1
newstate = table.copy state
output = Brain.forward newstate
cnt += 1 if rand_outcome == output
print "Test cases correct: #{tostring(100 * cnt/nb_test)} %"