forked from cgumbsch/goal_anticipations_via_event-inference
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_experiment.py
195 lines (179 loc) · 9.61 KB
/
run_experiment.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
import gym
import interaction_gym
import numpy as np
import event_inference as event
import sys
import random
def test_run(directory_name, setting_name, event_system, interaction_env,
claw, simulation_num, epoch_num, run_num, time_horizon,
file_name_addition=''):
"""
Performs one test run and logs the results
:param directory_name: name of target folder for log files
:param setting_name: name of this simulation setting
:param event_system: instance of trained event inference system
:param interaction_env: instance of agent-patient interaction gym
:param claw: bool, whether to use claw- or hand-agent in this test run
:param simulation_num: number of this simulation
:param epoch_num: number of training phases
:param run_num: number of runs in this testing phase
:param time_horizon: tau
:param file_name_addition: extra string added at the end of file name
"""
entity_name = 'hand'
if claw:
entity_name = 'claw'
filename = directory_name + '/' + setting_name + str(simulation_num) + '_epoch' + str(
epoch_num) + "_" + entity_name + file_name_addition + "_run" + str(run_num) + '.txt'
file = open(filename, 'w')
file.write('t, Event, Policy, P(still), P(random), P(reach), P(transport), o(t) \n')
o_t = interaction_env.reset_to_grasping(claw=claw) # claw=False for hand agent, claw=True for claw agent
pi_t = np.array([0.0, 0.0, 1.0]) # During testing the system starts with no fixation
event_system.reset()
for t in range(270):
# 1. step: Get o(t)
o_t, r_t, done_t, info_t = interaction_env.step(pi_t)
# 2. step: Infer event model and next action
pi_t, probs = event_system.step(o_t=o_t, pi_t=pi_t, training=False,
done=done_t, e_i=info_t, tau=time_horizon)
# 3. step: Log data
obs_str = ', '.join(map(str, o_t))
file.write(
str(t) + ', ' + str(info_t) + ', ' + str(np.argmax(pi_t)) + ', ' + str(probs[0]) +
', ' + str(probs[1]) + ', ' + str(probs[2]) + ', ' + str(probs[3]) + ', ' + obs_str + '\n')
file.close()
interaction_env.close()
# Global parameter settings used for all experiments
epsilon_start = 0.01
epsilon_end = 0.001
epsilon_dynamics = 0.001
random_Colors = True
percentage_reaching = 1.0/3.0
folder_name = 'Experiments/ResAblationTimeHorizon'
# EXPERIMENT 1, 2, and 3:
# tau = 2, 1.0/3.0 E_grasp events in training, randomized agent appearance
tau = 2
test_name = 'res_tau_2_sim'
for simulation in range(20):
seed = simulation
model = event.EventInferenceSystem(epsilon_start=epsilon_start, epsilon_dynamics=epsilon_dynamics,
epsilon_end=epsilon_end, no_transition_prior=0.9, dim_observation=18,
num_policies=3, num_models=4, r_seed=seed, sampling_rate=2)
env = interaction_gym.InteractionEventGym(sensory_noise_base=1.0, sensory_noise_focus=0.01,
r_seed=seed, randomize_colors=random_Colors,
percentage_reaching=percentage_reaching)
for epoch in range(30):
# TRAINING PHASE:
# do 100 training event sequences per phase
for sequence in range(100):
# reset environment to new event sequence
observation = env.reset()
# sample one-hot-encoding of policy pi(0)
policy_t = np.array([0.0, 0.0, 0.0])
policy_t[random.randint(0, 2)] = 1.0
done = False
while not done:
# perform pi(t) and receive new observation o(t)
observation, reward, done, info = env.step(policy_t)
# update the event probabilities, event schemata, and infer next policy
policy_t, P_ei = model.step(o_t=observation, pi_t=policy_t, training=True, done=done, e_i=info)
# TESTING PHASE:
# do 10 test phases for hand and claw agents
for run in range(10):
# hand:
test_run(directory_name=folder_name, setting_name=test_name, event_system=model,
interaction_env=env, claw=False, simulation_num=simulation,
epoch_num=epoch, run_num=run, time_horizon=tau)
# claw:
test_run(directory_name=folder_name, setting_name=test_name, event_system=model,
interaction_env=env, claw=True, simulation_num=simulation,
epoch_num=epoch, run_num=run, time_horizon=tau)
# EXPERIMENT 4:
# after fully training the system on tau = 2 and 30 epochs, test how behavior is altered if
# appearance of agent gets systematically more noise
sd_values = np.array([0.1, 0.5, 1.0, 5.0]) # possible noise values
for s in sd_values:
noise_per_dimension = np.zeros(18, dtype=np.float64)
noise_per_dimension[3] = s # agent's appearance receives extra noise
extra_file_name = '_' + str(s)
for run in range(10):
# hand:
env.set_other_noise(np.random.normal(0.0, 1.0, 18) * noise_per_dimension)
test_run(directory_name=folder_name, setting_name=test_name, event_system=model,
interaction_env=env, claw=False, simulation_num=simulation,
epoch_num=30, run_num=run, time_horizon=tau, file_name_addition=extra_file_name)
# EXPERIMENT 3: Testing different time horizons (tau)
# tau = 1, 1.0/3.0 E_grasp events in training, randomized agent appearance
tau = 1
test_name = 'res_tau_1_sim'
for simulation in range(20):
seed = simulation
model = event.EventInferenceSystem(epsilon_start=epsilon_start, epsilon_dynamics=epsilon_dynamics,
epsilon_end=epsilon_end, no_transition_prior=0.9, dim_observation=18,
num_policies=3, num_models=4, r_seed=seed, sampling_rate=2)
env = interaction_gym.InteractionEventGym(sensory_noise_base=1.0, sensory_noise_focus=0.01,
r_seed=seed, randomize_colors=random_Colors,
percentage_reaching=percentage_reaching)
for epoch in range(30):
# TRAINING PHASE:
# do 100 training event sequences per phase
for sequence in range(100):
# reset environment to new event sequence
observation = env.reset()
# sample one-hot-encoding of policy pi(0)
policy_t = np.array([0.0, 0.0, 0.0])
policy_t[random.randint(0, 2)] = 1.0
done = False
while not done:
# perform pi(t) and receive new observation o(t)
observation, reward, done, info = env.step(policy_t)
# update the event probabilities, event schemata, and infer next policy
policy_t, P_ei = model.step(o_t=observation, pi_t=policy_t, training=True, done=done, e_i=info)
# TESTING PHASE:
# do 10 test phases for hand and claw agents
for run in range(10):
# hand:
test_run(directory_name=folder_name, setting_name=test_name, event_system=model,
interaction_env=env, claw=False, simulation_num=simulation,
epoch_num=epoch, run_num=run, time_horizon=tau)
# claw:
test_run(directory_name=folder_name, setting_name=test_name, event_system=model,
interaction_env=env, claw=True, simulation_num=simulation,
epoch_num=epoch, run_num=run, time_horizon=tau)
# tau = 3, 1.0/3.0 E_grasp events in training, randomized agent appearance
tau = 3
test_name = 'res_tau_3_sim'
for simulation in range(20):
seed = simulation
model = event.EventInferenceSystem(epsilon_start=epsilon_start, epsilon_dynamics=epsilon_dynamics,
epsilon_end=epsilon_end, no_transition_prior=0.9, dim_observation=18,
num_policies=3, num_models=4, r_seed=seed, sampling_rate=2)
env = interaction_gym.InteractionEventGym(sensory_noise_base=1.0, sensory_noise_focus=0.01,
r_seed=seed, randomize_colors=random_Colors,
percentage_reaching=percentage_reaching)
for epoch in range(30):
# TRAINING PHASE:
# do 100 training event sequences per phase
for sequence in range(100):
# reset environment to new event sequence
observation = env.reset()
# sample one-hot-encoding of policy pi(0)
policy_t = np.array([0.0, 0.0, 0.0])
policy_t[random.randint(0, 2)] = 1.0
done = False
while not done:
# perform pi(t) and receive new observation o(t)
observation, reward, done, info = env.step(policy_t)
# update the event probabilities, event schemata, and infer next policy
policy_t, P_ei = model.step(o_t=observation, pi_t=policy_t, training=True, done=done, e_i=info)
# TESTING PHASE:
# do 10 test phases for hand and claw agents
for run in range(10):
# hand:
test_run(directory_name=folder_name, setting_name=test_name, event_system=model,
interaction_env=env, claw=False, simulation_num=simulation,
epoch_num=epoch, run_num=run, time_horizon=tau)
# claw:
test_run(directory_name=folder_name, setting_name=test_name, event_system=model,
interaction_env=env, claw=True, simulation_num=simulation,
epoch_num=epoch, run_num=run, time_horizon=tau)