-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysis.py
109 lines (98 loc) · 3.68 KB
/
analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import os
import numpy as np
from scipy.stats import bootstrap
from collections import namedtuple
from utils.plotter import Plotter
from utils.sweeper import unfinished_index, time_info, memory_info
def get_process_result_dict(result, config_idx, mode='Train'):
result_dict = {
'Env': result['Env'][0],
'Agent': result['Agent'][0],
'Config Index': config_idx,
'Return (mean)': result['Return'][-1*int(len(result['Return'])*0.1):].mean(skipna=True), # mean of last 10%
}
return result_dict
def get_csv_result_dict(result, config_idx, mode='Train', ci=95, method='percentile'):
perf_mean = result['Return (mean)'].values.tolist()
if len(perf_mean) > 1:
CI = bootstrap(
(perf_mean,),
np.mean, confidence_level=ci/100,
method=method
).confidence_interval
else:
CI = namedtuple('ConfidenceInterval', ['low', 'high'])(low=perf_mean[0], high=perf_mean[0])
result_dict = {
'Env': result['Env'][0],
'Agent': result['Agent'][0],
'Config Index': config_idx,
'Return (mean)': result['Return (mean)'].mean(skipna=True),
'Return (se)': result['Return (mean)'].sem(ddof=0),
'Return (bmean)': (CI.high + CI.low) / 2,
f'Return (ci={ci})': (CI.high - CI.low) / 2,
}
return result_dict
cfg = {
'exp': 'exp_name',
'merged': True,
'x_label': 'Step',
'y_label': 'Return',
'rolling_score_window': 20,
# 'rolling_score_window': -1,
'hue_label': 'Agent',
'show': False,
'imgType': 'png',
'estimator': 'mean',
# 'estimator': 'median',
'ci': 'se',
# 'ci': ('ci', 95),
'EMA': True,
'loc': 'best',
'sweep_keys': ['agent/name', 'optim/kwargs/learning_rate'],
'sort_by': ['Return (mean)', 'Return (se)'],
'ascending': [False, True],
'runs': 1
}
def analyze(exp, runs=1):
cfg['exp'] = exp
cfg['runs'] = runs
'''
sweep_keys_dict = dict(
dqn = ['optim/kwargs/learning_rate'],
ddqn = ['optim/kwargs/learning_rate'],
maxmin = ['optim/kwargs/learning_rate', 'agent/critic_num'],
td = ['optim/kwargs/learning_rate'],
sac = ['optim/kwargs/learning_rate'],
naf = ['optim/kwargs/learning_rate'],
ppo = ['optim/kwargs/learning_rate'],
ddpg = ['optim/kwargs/learning_rate'],
)
algo = exp.rstrip('0123456789').split('_')[-1]
cfg['sweep_keys'] = sweep_keys_dict[algo]
'''
plotter = Plotter(cfg)
plotter.csv_merged_results('Train', get_csv_result_dict, get_process_result_dict)
plotter.plot_results(mode='Train', indexes='all')
# plotter.csv_unmerged_results('Train', get_process_result_dict)
# group_keys = ['optim/kwargs/learning_rate', 'agent/critic_num']
# group_keys = ['Env']
# plotter.get_top1_result(group_keys=group_keys, perf='Return (bmean)', errorbar='Return (ci=95)', mode='Train', nd=0, markdown=False)
# Hyper-parameter Comparison
# plotter.csv_unmerged_results('Train', get_process_result_dict)
# plotter.csv_unmerged_results('Test', get_process_result_dict)
# constraints = [('agent/name', ['NAF'])]
# constraints = []
# for param_name in cfg['sweep_keys']:
# plotter.compare_parameter(param_name=param_name, constraints=constraints, mode='Train', kde=False)
# plotter.compare_parameter(param_name=param_name, constraints=constraints, mode='Train', kde=True)
if __name__ == "__main__":
runs = 10
mujoco_list = ['mujoco_sac', 'mujoco_ddpg', 'mujoco_td3', 'mujoco_ppo', 'mujoco_naf']
dqn_list = ['classic_dqn', 'lunar_dqn', 'pygame_dqn', 'minatar_dqn']
ddqn_list = ['classic_ddqn', 'lunar_ddqn', 'pygame_ddqn', 'minatar_ddqn']
maxmin_list = ['classic_maxmin', 'lunar_maxmin', 'pygame_maxmin', 'minatar_maxmin']
for exp in mujoco_list:
unfinished_index(exp, runs=runs)
memory_info(exp, runs=runs)
time_info(exp, runs=runs)
analyze(exp, runs=runs)