-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathkeyword_explanation.json
44 lines (44 loc) · 3.72 KB
/
keyword_explanation.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
{
"epochs": "Integer that determines the number of episodes to run before terminating training",
"batch_size": "Integer that determines the number of timesteps in a batch during training",
"model": "String: Keyword that determines whether to use PPO via StableBaselines3 or custom written DDPG",
"learning_rate": "Float: starting learning rate for training, typical value = 1e-4",
"discount": "Float: discount factor for rewards",
"epsilon": "Float: noise parameter to determine the percent of the time to randomly explore and the magnitude of this random exploration, unused if training with PPO",
"edecay": "Float: Rate at which epsilon decays every episode. Unused when using PPO",
"entropy": "Float: Entropy when using entropy regularization. Unused when using PPO",
"object": "String: Keyword that determines which shape to use. Currently only cube is supported",
"hand": "String: Keyword that determines which hand to use",
"task": "String: Keyworkd that determines which set of goal positions to use when training",
"evaluate": "Integer: determines the number of episodes to run before evaluating when training",
"sampling": "String: keyword to determine what sampling strategy we use when training. Unused when using PPO",
"reward": "String: keyword to determine which predefined reward strategy to use when training",
"action": "String: keyword to determine which action space to use",
"rollout_size": "Integer: determines number of timesteps to include in rollouts. Unused when using PPO",
"rollout_weight": "Float: scaling factor that is used to weigh rollout rewards relative to standard rewards. Unused when using PPO",
"tau": "Float: Fraction of actor-critic weights that get updated every step. Unused when using PPO",
"pv": "Integer: Number of previous timesteps to include in state space",
"viz": "Bool: Indicates if we should visualize the episodes when training",
"sr": "Int: Radius around goal position that we consider successful (mm)",
"success_reward": "Float: magnitude of reward given when trial is successful. Unused in some reward schemes",
"state_noise": "Float: variance of gaussian noise added to state space",
"start_noise": "Float: radius of potential object start location about 0,0.1",
"tsteps": "Int: Number of timesteps in a training episode",
"eval-tsteps": "Int: Number of timesteps in an evaluation episode",
"distance_scaling": "Float: Scaling factor that is used to weigh the distance portion of the reward",
"contact_scaling": "Float: Scaling factor that is used to weigh the finger contact portion of the reward",
"freq": "Int: frequency that the agent is queried to make a decision. Simulation physics is always updated at 240 hz",
"rstart": "Bool: do we randomize the start position",
"state_dim": "Int: size of the state space",
"state_mins": "List: contains the minium possible value for every state parameter",
"state_maxes": "List: contains the maximum possible value for every state parameter",
"state_list": "List: contains strings of keywords indicating which type of sensors to add to the simulator",
"action_dim": "Int: size of the action space",
"save_path": "String: full path to the folder where data will be saved",
"load_path": "String: (optional) full path to the folder where a previous policy is loaded from",
"hand_path": "String: full path to urdf file for hand",
"object_path": "String: full path to urdf file for object",
"max_action": "Float: maximum possible action. Assumes actions range from [-max_action, max_action]",
"points_path": "String: full path to csv file of goal positions for training",
"tname": "String: full path to folder for saving tensorboard files"
}