-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathJacksCarRental.py
173 lines (145 loc) · 6.08 KB
/
JacksCarRental.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# Write a program for policy iteration and re-solve Jack's car rental problem with the following changes. One of Jack's
# employees at the first location rides a bus home each night and lives near the second location. She is happy to
# shuttle one car to the second location for free. Each additional car still costs $2, as do all cars moved in the other
# direction. In addition, Jack has limited parking space at each location. If more than 10 cars are kept overnight at a
# location (after any moving of cars), then an additional cost of $4 must be incurred to use a second parking lot
# (independent of how many cars are kept there). These sorts of nonlinearities and arbitrary dynamics often occur in
# real problems and cannot easily be handled by optimization methods other than dynamic programming. To check your
# program, first replicate the results given for the original problem. If your computer is too slow for the full
# problem, cut all the numbers of cars in half.
import numpy as np
import math
import matplotlib.pyplot as plt
class Params:
def __init__(self):
self.max_cars = 20
self.max_moves = 5
self.r_car = 10
self.c_night = 4
self.c_car = 2
self.discount = 0.9
self.request_first = 3
self.request_second = 4
self.return_first = 3
self.return_second = 2
self.theta = 0.01
class PolicyIteration:
def __init__(self, params):
self.params = Params()
self.S = [(x,y) for x in range(self.params.max_cars + 1) for y in range(self.params.max_cars + 1)]
self.V = np.zeros((self.params.max_cars + 1, self.params.max_cars + 1))
self.pi = np.zeros((self.params.max_cars + 1, self.params.max_cars + 1))
self.pif = []
def solve(self):
i = 0
while True:
print(f"Iteration: {i + 1}")
self.pif.append(self.pi.copy())
while True:
delta = 0
for s in self.S:
v = self.V[s]
self.V[s] = self.V_eval(s, self.pi[s])
delta = np.maximum(delta, abs(v - self.V[s]))
if delta < self.params.theta:
break
print(f"Delta: {delta}")
policy_stable = True
for s in self.S:
old_action = self.pi[s]
vals = {a : self.V_eval(s, a) for a in self.A(s)}
self.pi[s] = np.random.choice([a for a, value in vals.items() if value == np.max(list(vals.values()))])
if old_action != self.pi[s]:
policy_stable = False
if policy_stable:
break
i += 1
def A(self, s):
vals = []
A = [x for x in range(-self.params.max_moves, self.params.max_moves + 1)]
s_first, s_second = s
for a in A:
if s_first - a < 0 or s_first - a > self.params.max_cars:
continue
if s_second + a < 0 or s_second - a > self.params.max_cars:
continue
vals.append(a)
return vals
def V_eval(self, s, a):
val = 0
s_first, s_second = s
s_first -= int(a)
s_second += int(a)
cost = self.params.c_car * abs(a)
sum_prob_i = 0
for i in range(s_first + 1):
if i == s_first:
p_i = 1 - sum_prob_i
else:
p_i = PolicyIteration.poisson(self.params.request_first, i)
sum_prob_i += p_i
r_i = i * self.params.r_car
sum_prob_j = 0
for j in range(s_second + 1):
if j == s_second:
p_j = 1 - sum_prob_j
else:
p_j = PolicyIteration.poisson(self.params.request_second, j)
sum_prob_j += p_j
r_j = j * self.params.r_car
sum_prob_k = 0
for k in range(self.params.max_cars + i - s_first + 1):
if k == self.params.max_cars + i - s_first:
p_k = 1 - sum_prob_k
else:
p_k = PolicyIteration.poisson(self.params.return_first, k)
sum_prob_k += p_k
sum_prob_l = 0
for l in range(self.params.max_cars + j - s_second + 1):
if l == self.params.max_cars + j - s_second:
p_l = 1 - sum_prob_l
else:
p_l = PolicyIteration.poisson(self.params.return_second, l)
sum_prob_l += p_l
val += p_i * p_j * p_k * p_l * (
r_i + r_j - cost + self.params.discount * self.V[s_first - i + k, s_second - j + l])
return val
@staticmethod
def poisson(lamb, n):
"""
:param lamb: lambda parameter of poisson distribution, rate
:param n: n variable of poisson distribution, number of occurrences
:return: probability of the event
"""
return (lamb ** n) * math.exp(-lamb) / math.factorial(n)
def print_pif(self):
"""
Print policies
"""
for idx, pi in enumerate(self.pif):
plt.figure()
plt.imshow(pi, origin='lower', interpolation='none', vmin=-self.params.max_move, vmax=self.params.max_move)
plt.xlabel('#Cars at second location')
plt.ylabel('#Cars at first location')
plt.title('pi{:d} {:s}')
plt.colorbar()
def print_V(self):
"""
Print values
"""
fig = plt.figure()
ax = fig.gca(projection='3d')
X = np.arange(0, self.params.max_cars + 1)
Y = np.arange(0, self.params.max_cars + 1)
X, Y = np.meshgrid(X, Y)
ax.plot_surface(X, Y, self.V)
plt.title('V {:s}')
def main():
param = Params()
policy_iteration = PolicyIteration(param)
policy_iteration.solve()
policy_iteration.print_pif()
policy_iteration.print_V()
plt.show()
if __name__ == "__main__":
main()