keyword_explanation.json

{
    "epochs": "Integer that determines the number of episodes to run before terminating training",
    "batch_size": "Integer that determines the number of timesteps in a batch during training",
    "model": "String: Keyword that determines whether to use PPO via StableBaselines3 or custom written DDPG",
    "learning_rate": "Float: starting learning rate for training, typical value = 1e-4",
    "discount": "Float: discount factor for rewards",
    "epsilon": "Float: noise parameter to determine the percent of the time to randomly explore and the magnitude of this random exploration, unused if training with PPO",
    "edecay": "Float: Rate at which epsilon decays every episode. Unused when using PPO",
    "entropy": "Float: Entropy when using entropy regularization. Unused when using PPO",
    "object": "String: Keyword that determines which shape to use. Currently only cube is supported",
    "hand": "String: Keyword that determines which hand to use",
    "task": "String: Keyworkd that determines which set of goal positions to use when training",
    "evaluate": "Integer: determines the number of episodes to run before evaluating when training",
    "sampling": "String: keyword to determine what sampling strategy we use when training. Unused when using PPO",
    "reward": "String: keyword to determine which predefined reward strategy to use when training",
    "action": "String: keyword to determine which action space to use",
    "rollout_size": "Integer: determines number of timesteps to include in rollouts. Unused when using PPO",
    "rollout_weight": "Float: scaling factor that is used to weigh rollout rewards relative to standard rewards. Unused when using PPO",
    "tau": "Float: Fraction of actor-critic weights that get updated every step. Unused when using PPO",
    "pv": "Integer: Number of previous timesteps to include in state space",
    "viz": "Bool: Indicates if we should visualize the episodes when training",
    "sr": "Int: Radius around goal position that we consider successful (mm)",
    "success_reward": "Float: magnitude of reward given when trial is successful. Unused in some reward schemes",
    "state_noise": "Float: variance of gaussian noise added to state space",
    "start_noise": "Float: radius of potential object start location about 0,0.1",
    "tsteps": "Int: Number of timesteps in a training episode",
    "eval-tsteps": "Int: Number of timesteps in an evaluation episode",
    "distance_scaling": "Float: Scaling factor that is used to weigh the distance portion of the reward",
    "contact_scaling": "Float: Scaling factor that is used to weigh the finger contact portion of the reward",
    "freq": "Int: frequency that the agent is queried to make a decision. Simulation physics is always updated at 240 hz",
    "rstart": "Bool: do we randomize the start position",
    "state_dim": "Int: size of the state space",
    "state_mins": "List: contains the minium possible value for every state parameter",
    "state_maxes": "List: contains the maximum possible value for every state parameter",
    "state_list": "List: contains strings of keywords indicating which type of sensors to add to the simulator",
    "action_dim": "Int: size of the action space",
    "save_path": "String: full path to the folder where data will be saved",
    "load_path": "String: (optional) full path to the folder where a previous policy is loaded from",
    "hand_path": "String: full path to urdf file for hand",
    "object_path": "String: full path to urdf file for object",
    "max_action": "Float: maximum possible action. Assumes actions range from [-max_action, max_action]",
    "points_path": "String: full path to csv file of goal positions for training",
    "tname": "String: full path to folder for saving tensorboard files"
}