-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathRockPaperScissorsEnv.py
71 lines (62 loc) · 2.01 KB
/
RockPaperScissorsEnv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""# The environment: RockPaperScissorsEnv class"""
import ray
from gym.spaces import Discrete
from ray.rllib.env import BaseEnv
from ray.rllib.env.multi_agent_env import MultiAgentEnv
ROCK = 0
PAPER = 1
SCISSORS = 2
class RockPaperScissorsEnv(MultiAgentEnv):
"""Two-player environment for rock paper scissors.
The observation is simply the last opponent action."""
def __init__(self, _, population_size):
#def __init__(self, population_size):
self.population_size = population_size
self.action_space = Discrete(3)
self.observation_space = Discrete(3)
self.player_A = None
self.player_B = None
#self.player_A = "agt_0"
#self.player_B = "agt_1"
self.last_move = None
self.num_moves = 0
def reset(self):
g_helper = ray.get_actor("g_helper")
agt_i, agt_j = ray.get(g_helper.get_pair.remote())
self.player_A = agt_i
self.player_B = agt_j
self.last_move = (0, 0)
self.num_moves = 0
return {
self.player_A: self.last_move[1],
self.player_B: self.last_move[0],
}
def step(self, action_dict):
move1 = action_dict[self.player_A]
move2 = action_dict[self.player_B]
self.last_move = (move1, move2)
obs = {
self.player_A: self.last_move[1],
self.player_B: self.last_move[0],
}
r1, r2 = {
(ROCK, ROCK): (0, 0),
(ROCK, PAPER): (-1, 1),
(ROCK, SCISSORS): (1, -1),
(PAPER, ROCK): (1, -1),
(PAPER, PAPER): (0, 0),
(PAPER, SCISSORS): (-1, 1),
(SCISSORS, ROCK): (-1, 1),
(SCISSORS, PAPER): (1, -1),
(SCISSORS, SCISSORS): (0, 0),
}[move1, move2]
rew = {
self.player_A: r1,
self.player_B: r2,
}
self.num_moves += 1
done = {
"__all__": self.num_moves >= 10,
}
#print('obs', obs)
return obs, rew, done, {}