-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgrid_drive.py
373 lines (337 loc) · 15.9 KB
/
grid_drive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
# -*- coding: utf-8 -*-
import gym
from gym.utils import seeding
import numpy as np
import copy
from matplotlib import use as matplotlib_use, patches
matplotlib_use('Agg',force=True) # no display
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure
from matplotlib.patches import Circle, Rectangle
from matplotlib.collections import PatchCollection
from matplotlib.lines import Line2D
from environments.car_controller.grid_drive.lib.road_grid import RoadGrid
from environments.car_controller.grid_drive.lib.road_cultures import *
import logging
logger = logging.getLogger(__name__)
class GridDrive(gym.Env):
metadata = {'render.modes': ['human', 'rgb_array']}
GRID_DIMENSION = 15
MAX_SPEED = 120
SPEED_GAP = 10
MAX_GAPPED_SPEED = MAX_SPEED//SPEED_GAP
MAX_STEP = 2**5
DIRECTIONS = 4 # N,S,W,E
VISITED_CELL_GRID_IDX = -2
AGENT_CELL_GRID_IDX = -1
def get_state(self):
fc_dict = {
# "grid": self.grid_view,
"neighbours": self.grid.neighbour_features(),
}
if self.obs_car_features > 0:
fc_dict["agent_extra_properties"] = self.grid.agent.binary_features()
# fc_dict["agent_speed"] = np.array([self.speed/self.MAX_SPEED], dtype=np.float32)
return {
"cnn": {
"grid": self.grid_view,
},
"fc": fc_dict,
}
def seed(self, seed=None):
logger.warning(f"Setting random seed to: {seed}")
self.np_random, seed = seeding.np_random(seed)
return [seed]
def __init__(self, config=None):
logger.warning(f'Setting environment with reward_fn <{config["reward_fn"]}> and culture_level <{config["culture_level"]}>')
self.reward_fn = eval(f'self.{config["reward_fn"]}')
self.culture = eval(f'{config["culture_level"]}RoadCulture')(road_options={
'motorway': 1/2,
'stop_sign': 1/2,
'school': 1/2,
'single_lane': 1/2,
'town_road': 1/2,
'roadworks': 1/8,
'accident': 1/8,
'heavy_rain': 1/2,
'congestion_charge': 1/8,
}, agent_options={
'emergency_vehicle': 1/5,
'heavy_vehicle': 1/4,
'worker_vehicle': 1/3,
'tasked': 1/2,
'paid_charge': 1/2,
'speed': self.MAX_SPEED,
})
self.obs_road_features = len(self.culture.properties) # Number of binary ROAD features in Hard Culture
self.obs_car_features = len(self.culture.agent_properties)-1 # Number of binary CAR features in Hard Culture (excluded speed)
# Direction (N, S, W, E) + Speed [0-MAX_SPEED]
# self.action_space = gym.spaces.MultiDiscrete([self.DIRECTIONS, self.MAX_GAPPED_SPEED])
self.action_space = gym.spaces.Discrete(self.DIRECTIONS*self.MAX_GAPPED_SPEED)
fc_dict = {
# "grid": gym.spaces.MultiBinary([self.GRID_DIMENSION, self.GRID_DIMENSION, self.obs_road_features+2]), # Features representing the grid + visited cells + current position
"neighbours": gym.spaces.MultiBinary(self.obs_road_features * self.DIRECTIONS), # Neighbourhood view
}
if self.obs_car_features > 0:
fc_dict["agent_extra_properties"] = gym.spaces.MultiBinary(self.obs_car_features) # Car features
# fc_dict["agent_speed"] = gym.spaces.Box(low=0, high=1.0, shape=(1,), dtype=np.float32)
self.observation_space = gym.spaces.Dict({
"cnn": gym.spaces.Dict({
"grid": gym.spaces.MultiBinary([self.GRID_DIMENSION, self.GRID_DIMENSION, self.obs_road_features+2]), # Features representing the grid + visited cells + current position
}),
"fc": gym.spaces.Dict(fc_dict),
})
self.step_counter = 0
def reset(self):
self.is_over = False
self.culture.np_random = self.np_random
self.viewer = None
self.step_counter = 0
self.cumulated_return = 0
self.sum_speed = 0
self.grid = RoadGrid(self.GRID_DIMENSION, self.GRID_DIMENSION, self.culture)
self.grid_features = np.array(self.grid.get_features(), ndmin=3, dtype=np.int8)
self.grid_view = np.concatenate([
self.grid_features,
np.zeros((self.GRID_DIMENSION, self.GRID_DIMENSION, 2), dtype=np.int8), # current position + visited cells
], -1)
x,y = self.grid.agent_position
self.grid_view[x][y][self.AGENT_CELL_GRID_IDX] = 1 # set new position
self.grid_view[x][y][self.VISITED_CELL_GRID_IDX] = 1 # set current cell as visited
self.visited_cells = 1
self.speed = self.grid.agent["Speed"]
return self.get_state()
def step(self, action_vector):
self.step_counter += 1
self.direction = action_vector//self.MAX_GAPPED_SPEED
self.speed = (action_vector%self.MAX_GAPPED_SPEED)*self.SPEED_GAP
self.sum_speed += self.speed
# direction, gapped_speed = action_vector
old_x, old_y = self.grid.agent_position # get this before moving the agent
reward, dead, explanatory_labels = self.reward_fn(*self.grid.move_agent(self.direction, self.speed))
self.cumulated_return += reward
if not self.visiting_old_cell:
self.visited_cells += 1 # increase it before setting the current position as visited, otherwise visiting_old_cell will always be true
new_x, new_y = self.grid.agent_position # get this after moving the agent
# do the following aftwer moving the agent and checking positions with get_reward
self.grid_view[old_x][old_y][self.AGENT_CELL_GRID_IDX] = 0 # remove old position
self.grid_view[new_x][new_y][self.AGENT_CELL_GRID_IDX] = 1 # set new position
self.grid_view[new_x][new_y][self.VISITED_CELL_GRID_IDX] = 1 # set current cell as visited
info_dict = {'explanation': explanatory_labels}
out_of_time = self.step_counter >= self.MAX_STEP
terminated_episode = dead or out_of_time
if terminated_episode: # populate statistics
self.is_over = True
info_dict["stats_dict"] = {
"avg_speed": self.sum_speed/self.step_counter,
"out_of_time": 1 if out_of_time else 0,
"visited_cells": self.visited_cells,
}
return [
self.get_state(), # observation
reward,
terminated_episode,
info_dict,
]
def get_screen(self): # RGB array
# First set up the figure and the axis
# fig, ax = matplotlib.pyplot.subplots(nrows=1, ncols=1, sharey=False, sharex=False, figsize=(10,10)) # this method causes memory leaks
figure = Figure(figsize=(10, 10), tight_layout=True)
canvas = FigureCanvas(figure)
ax = figure.add_subplot(111) # nrows=1, ncols=1, index=1
cell_side = 20
# Compute speed limits for all cells.
road_limits = {}
columns = self.grid.width
rows = self.grid.height
temp_agent = copy.deepcopy(self.grid.agent)
for x in range(columns):
for y in range(rows):
road = self.grid.cells[x][y]
road_limits[road] = self.grid.road_culture.get_speed_limits(road, self.grid.agent) # (None,None) if road is unfeasible
# Draw cells
shapes = []
for x in range(columns):
for y in range(rows):
road = self.grid.cells[x][y]
# Draw rectangle
left = x * cell_side
right = left + cell_side
bottom = y * cell_side
top = bottom + cell_side
if self.grid_view[x][y][self.VISITED_CELL_GRID_IDX] > 0: # Already visited cell
cell_handle = Rectangle((left, bottom), cell_side, cell_side, color='gray', alpha=0.25)
elif road_limits[road] == (None, None): # Unfeasible road
cell_handle = Rectangle((left, bottom), cell_side, cell_side, color='red', alpha=0.25)
else: # new road with possible speed limits
cell_handle = Rectangle((left, bottom), cell_side, cell_side, fill=False)
shapes.append(cell_handle)
# Do not add label if agent is on top of cell.
if (x, y) == self.grid.agent_position:
continue
# Add speed limit label
min_speed, max_speed = road_limits[road]
label = f'{min_speed}-{max_speed}' if min_speed is not None else 'N/A'
ax.text(0.5*(left + right), 0.5*(bottom + top), label,
horizontalalignment='center', verticalalignment='center', size=18)
# Draw agent and agent label
agent_x, agent_y = self.grid.agent_position
left = agent_x * cell_side
right = left + cell_side
bottom = agent_y * cell_side
top = agent_y * cell_side
agent_circle = Circle((left + (cell_side/2), bottom + (cell_side/2)), cell_side/2, color='b', alpha=0.5)
shapes.append(agent_circle)
patch_collection = PatchCollection(shapes, match_original=True)
ax.add_collection(patch_collection)
# Adjust view around agent
zoom_factor = 3
left_view = agent_x - zoom_factor
right_view = agent_x + zoom_factor
bottom_view = agent_y - zoom_factor
top_view = agent_y + zoom_factor
if agent_x > (columns - zoom_factor): # Too far right
left_view -= (agent_x + zoom_factor) - columns
elif agent_x < zoom_factor: # Too far left
right_view += (zoom_factor - agent_x)
if agent_y > (rows - zoom_factor): # Too far up
bottom_view -= (agent_y + zoom_factor) - rows
elif agent_y < zoom_factor: # Too far down
top_view += (zoom_factor - agent_y)
ax.set_xlim([max(0, left_view * cell_side),
min(columns * cell_side, right_view * cell_side)])
ax.set_ylim([max(0, bottom_view * cell_side),
min(rows * cell_side, top_view * cell_side)])
# ax.set_ylim([0, rows * cell_side])
# Draw agent commanded speed on top of circle
label = str(self.grid.agent["Speed"])
ax.text(left + (cell_side/2), bottom + (cell_side/2), label,
horizontalalignment='center', verticalalignment='center', size=18)
# # Adjust ax limits in order to get the same scale factor on both x and y
# a, b = ax.get_xlim()
# c, d = ax.get_ylim()
# max_length = max(d - c, b - a)
# ax.set_xlim([a, a + max_length])
# ax.set_ylim([c, c + max_length])
# figure.tight_layout()
canvas.draw()
# Save plot into RGB array
data = np.fromstring(figure.canvas.tostring_rgb(), dtype=np.uint8, sep='')
data = data.reshape(figure.canvas.get_width_height()[::-1] + (3,))
figure.clear()
return data # RGB array
def render(self, mode='human'):
img = self.get_screen()
if mode == 'rgb_array':
return img
elif mode == 'human':
from gym.envs.classic_control import rendering
if self.viewer is None:
self.viewer = rendering.SimpleImageViewer()
self.viewer.imshow(img)
return self.viewer.isopen
@property
def visiting_old_cell(self):
x, y = self.grid.agent_position
return self.grid_view[x][y][self.VISITED_CELL_GRID_IDX] > 0
def frequent_reward_default(self, following_regulation, explanation_list):
def null_reward(is_terminal, label):
return (0, is_terminal, label)
def unitary_reward(is_positive, is_terminal, label):
return (1 if is_positive else -1, is_terminal, label)
def step_reward(is_positive, is_terminal, label):
reward = (self.speed+1)/self.MAX_SPEED # in (0,1]
return (reward if is_positive else -reward, is_terminal, label)
explanation_list_with_label = lambda _label,_explanation_list: list(map(lambda x:(_label,x), _explanation_list)) if _explanation_list else _label
#######################################
# "Follow regulation" rule. # Run dialogue against culture.
if not following_regulation:
return unitary_reward(is_positive=False, is_terminal=True, label=explanation_list_with_label('not_following_regulation',explanation_list))
#######################################
# "Visit new roads" rule
if self.visiting_old_cell: # already visited cell
return null_reward(is_terminal=False, label='not_visiting_new_roads')
#######################################
# "Move forward" rule
return step_reward(is_positive=True, is_terminal=False, label='moving_forward')
def frequent_reward_explanation_engineering_v1(self, following_regulation, explanation_list):
def null_reward(is_terminal, label):
return (0, is_terminal, label)
def unitary_reward(is_positive, is_terminal, label):
return (1 if is_positive else -1, is_terminal, label)
def step_reward(is_positive, is_terminal, label):
reward = (self.speed+1)/self.MAX_SPEED # in (0,1]
return (reward if is_positive else -reward, is_terminal, label)
explanation_list_with_label = lambda _label,_explanation_list: list(map(lambda x:(_label,x), _explanation_list)) if _explanation_list else _label
#######################################
# "Follow regulation" rule. # Run dialogue against culture.
if not following_regulation:
return unitary_reward(is_positive=False, is_terminal=True, label=explanation_list_with_label('not_following_regulation',explanation_list))
#######################################
# "Visit new roads" rule
if self.visiting_old_cell: # already visited cell
return null_reward(is_terminal=False, label=explanation_list_with_label('not_visiting_new_roads',explanation_list))
#######################################
# "Move forward" rule
return step_reward(is_positive=True, is_terminal=False, label=explanation_list_with_label('moving_forward',explanation_list))
def frequent_reward_explanation_engineering_v2(self, following_regulation, explanation_list):
def null_reward(is_terminal, label):
return (0, is_terminal, label)
def unitary_reward(is_positive, is_terminal, label):
return (1 if is_positive else -1, is_terminal, label)
def step_reward(is_positive, is_terminal, label):
reward = (self.speed+1)/self.MAX_SPEED # in (0,1]
return (reward if is_positive else -reward, is_terminal, label)
explanation_list_with_label = lambda _label,_explanation_list: list(map(lambda x:(_label,x), _explanation_list)) if _explanation_list else _label
#######################################
# "Follow regulation" rule. # Run dialogue against culture.
if not following_regulation:
return unitary_reward(is_positive=False, is_terminal=True, label=explanation_list_with_label('not_following_regulation',explanation_list))
#######################################
# "Visit new roads" rule
if self.visiting_old_cell: # already visited cell
return null_reward(is_terminal=False, label='not_visiting_new_roads')
#######################################
# "Move forward" rule
return step_reward(is_positive=True, is_terminal=False, label=explanation_list_with_label('moving_forward',explanation_list))
def frequent_reward_step_multiplied_by_junctions(self, following_regulation, explanation_list):
def null_reward(is_terminal, label):
return (0, is_terminal, label)
def unitary_reward(is_positive, is_terminal, label):
return (1 if is_positive else -1, is_terminal, label)
def step_reward(is_positive, is_terminal, label):
reward = (self.speed+1)/self.MAX_SPEED # in (0,1]
reward *= self.visited_cells
return (reward if is_positive else -reward, is_terminal, label)
explanation_list_with_label = lambda _label,_explanation_list: list(map(lambda x:(_label,x), _explanation_list)) if _explanation_list else _label
#######################################
# "Follow regulation" rule. # Run dialogue against culture.
if not following_regulation:
return unitary_reward(is_positive=False, is_terminal=True, label=explanation_list_with_label('not_following_regulation',explanation_list))
#######################################
# "Visit new roads" rule
if self.visiting_old_cell: # already visited cell
return null_reward(is_terminal=False, label='not_visiting_new_roads')
#######################################
# "Move forward" rule
return step_reward(is_positive=True, is_terminal=False, label='moving_forward')
def frequent_reward_full_step(self, following_regulation, explanation_list):
def null_reward(is_terminal, label):
return (0, is_terminal, label)
def unitary_reward(is_positive, is_terminal, label):
return (1 if is_positive else -1, is_terminal, label)
def step_reward(is_positive, is_terminal, label):
reward = (self.speed+1)/self.MAX_SPEED # in (0,1]
return (reward if is_positive else -reward, is_terminal, label)
explanation_list_with_label = lambda _label,_explanation_list: list(map(lambda x:(_label,x), _explanation_list)) if _explanation_list else _label
#######################################
# "Follow regulation" rule. # Run dialogue against culture.
if not following_regulation:
return step_reward(is_positive=False, is_terminal=True, label=explanation_list_with_label('not_following_regulation',explanation_list))
#######################################
# "Visit new roads" rule
if self.visiting_old_cell: # already visited cell
return null_reward(is_terminal=False, label='not_visiting_new_roads')
#######################################
# "Move forward" rule
return step_reward(is_positive=True, is_terminal=False, label='moving_forward')