-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathmain.py
executable file
·69 lines (55 loc) · 1.79 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/usr/bin/env python3
from pprint import pprint
import numpy as np
from game import Game
from players import RLPlayer
#from matplotlib import pyplot as plt
#plt.ion()
#qlr,gamma,netlr (0.03)
player = RLPlayer(0.07, 0.99, 0.03)
rp = RLPlayer(0, 0)
match_size = 10
n_epochs = 2000
player_wins = []
for e in range(n_epochs):
print("Epoch: %d"%e)
player.wins = 0
# Anneal the exploration rate
player.epsilon = (np.exp(-0.017*e)+0.11)/1.1
player_gameplay_history = []
for _ in range(match_size):
#print("Game: %d"%g)
player.play_history = []
# Initialize a new game
g = Game()
g.addPlayer(player)
# Adds a player that won't log to it's move history
g.addPlayer(rp, False)
#g.addPlayer(player, False)
g.run()
#pprint(player.play_history)
final_score = list(g.getScore().items())
final_score.sort()
ttl = sum(map(lambda x: x[1], final_score))
#print(ttl)
# Only deal with 1 of the players (The one we're updating the
# weights for)
#player_score = int(final_score[0][1]/ttl >= 0.5)
player_score = (final_score[0][1]/ttl - 0.5)*2
player.wins += player_score > 0
#print(player_score)
player_gameplay_history.append((player.play_history, player_score))
print(player.epsilon, player.wins)
player_wins.append(player.wins)
for game, score in player_gameplay_history:
player.play_history = game
player.updateWeights(score)
suffix = "linear-0.03"
player.policy_net.save("best-%s.weights"%suffix)
print(sum(player_wins))
with open("%d-%d-%s.csv"%(n_epochs, match_size, suffix), "w") as f:
f.write("\n".join(map(str, player_wins)))
#plt.plot(player_wins)
#plt.draw()
#plt.ioff()
#plt.show()