-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathExpectimax1_v2.py
100 lines (95 loc) · 4.06 KB
/
Expectimax1_v2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from Game import Game
from math import pow
from copy import deepcopy
class Node:
def __init__(self, grid, agent_depth, max_depth, node_type, t, position=None):
self.grid = grid
self.max_depth = max_depth
self.agent_depth = agent_depth
self.node_type = node_type
w_n = [[15, 14, 13, 12], [8, 9, 10, 11], [7, 6, 5, 4], [0, 1, 2, 3]]
self.weight = [[int(pow(4, w_n[i][j])) for j in range(4)] for i in range(4)]
if self.node_type == "Chance":
self.position = position
self.t = t.copy() # threshold
self.scn = []
def evaluation(self):
if self.agent_depth >= self.max_depth or Game.is_end(self.grid):
score = 0
for i in range(4):
for j in range(4):
score += self.grid[i][j] * self.weight[i][j]
scn = []
if Game.is_end(self.grid):
for t in self.t:
if score >= t:
scn.append(0)
else:
scn.append(1000000)
else:
for t in self.t:
if score >= t:
scn.append(0)
else:
scn.append(1)
return score, scn
if self.node_type == "Max":
max_score = -1
act = None
operator = {"w": Game.up, "a": Game.left, "s": Game.down, "d": Game.right}
possible_action = []
a = -1
for op in ["w", "a", "s", "d"]:
new_grid = operator[op](self.grid)[0]
if new_grid != self.grid:
possible_action.append((new_grid, op))
scn = [1000000 for i in range(len(self.t))]
for g in possible_action:
child_node = Node(g[0], self.agent_depth, self.max_depth, "Min", self.t)
child_score, scn_c = child_node.evaluation()
if child_score > max_score:
max_score = child_score
act = g[1]
for i in range(len(scn)):
if scn_c[i] < scn[i]:
scn[i] = scn_c[i]
if self.agent_depth == 0:
return act, scn
else:
return max_score, scn
elif self.node_type == "Min":
min_score = int(pow(2, 50))
possible_position = []
for x in range(4):
for y in range(4):
if self.grid[x][y] == 0:
possible_position.append((self.weight[x][y], (x, y)))
possible_position = sorted(possible_position, reverse=True)
tiles = min(self.max_depth-self.agent_depth, len(possible_position))
scn = [0 for i in range(len(self.t))]
for i in range(tiles):
new_grid = deepcopy(self.grid)
new_position = possible_position[i][1]
child_node = Node(new_grid, self.agent_depth, self.max_depth, "Chance", self.t, new_position)
child_score, scn_c = child_node.evaluation()
if child_score < min_score:
min_score = child_score
for k in range(len(scn)):
scn[k] += scn_c[k]
return min_score, scn
else:
# probability of the new number 2 and 4
p = [0.8, 0.2]
average_score = 0
scn = [0 for i in range(len(self.t))]
for i in range(2):
new_grid = deepcopy(self.grid)
new_grid[self.position[0]][self.position[1]] = int(pow(2, i+1))
child_node = Node(new_grid, self.agent_depth+1, self.max_depth, "Max", self.t)
child_score, scn_c = child_node.evaluation()
average_score += int(p[i]*child_score)
for k in range(len(scn)):
scn[k] += scn_c[k]
return average_score, scn
if __name__ == '__main__':
pass