utiasDSL · middleyuan · Jun 30, 2023 · Jul 10, 2023 · Jul 10, 2023 · Jul 10, 2023
diff --git a/examples/hpo/cartpole/config_overrides/cartpole_stab.yaml b/examples/hpo/cartpole/config_overrides/cartpole_stab.yaml
@@ -0,0 +1,68 @@
+task_config:
+  seed: 42
+  info_in_reset: true
+  ctrl_freq: 15
+  pyb_freq: 750
+  physics: pyb
+
+  init_state_randomization_info:
+    init_x:
+      distrib: uniform
+      high: 1.0
+      low:  1.0
+    init_x_dot:
+      distrib: uniform
+      high: 0.05
+      low: -0.05
+    init_theta:
+      distrib: uniform
+      high: 0.05
+      low:  -0.05
+    init_theta_dot:
+      distrib: uniform
+      high: 0.05
+      low: -0.05
+  # disturbances:
+  #   observation:
+  #     - disturbance_func: white_noise
+  #       std: 0.0001
+  inertial_prop:
+    cart_mass: 1.0
+    pole_length: 0.5
+    pole_mass: 0.1
+  inertial_prop_randomization_info: null
+  randomized_inertial_prop: false
+
+
+  # normalized_rl_action_space: false
+
+  task: stabilization
+  task_info:
+    stabilization_goal: [0.0, 0.0, 0.0, 0.0]
+    # stabilization_goal_tolerance: 0.01
+    stabilization_goal_tolerance: 0.0
+  use_constraint_penalty: false
+
+  episode_len_sec: 5
+  cost: quadratic
+  done_on_violation: false
+  done_on_out_of_bound: false
+
+  obs_goal_horizon: 1
+
+  # RL Reward
+  rew_state_weight: [1, 1, 1, 1]
+  rew_act_weight: 0.1
+  rew_exponential: True
+
+  constraints:
+    - constraint_form: default_constraint
+      constrained_variable: input
+      upper_bounds: [3.0]
+      lower_bounds: [-3.0]
+    - constraint_form: default_constraint
+      constrained_variable: state
+      # upper_bounds: [1.5, 0.45, 0.1, 0.3]
+      # lower_bounds: [-1.5, -0.45, -0.1, -0.3]
+      upper_bounds: [1.5, 0.45, 0.5, 0.5]
+      lower_bounds: [-1.5, -0.45, -0.5, -0.5]
diff --git a/...errides/cartpole/gp_mpc_cartpole_hpo.yaml → ...config_overrides/gp_mpc_cartpole_hpo.yaml b/...errides/cartpole/gp_mpc_cartpole_hpo.yaml → ...config_overrides/gp_mpc_cartpole_hpo.yaml
@@ -1,9 +1,10 @@
 hpo_config:
+
   hpo: True # do hyperparameter optimization
   load_if_exists: True # this should set to True if hpo is run in parallel
-  use_database: False # this is set to true if MySQL is used
-  objective: [exponentiated_avg_return] # [other metrics defined in base_experiment.py]
-  direction: [maximize] # [maximize, maximize]
+  use_database: True # this is set to true if MySQL is used
+  objective: [average_rmse] # [other metrics defined in base_experiment.py]
+  direction: [minimize] # [maximize, maximize]
   dynamical_runs: False # if True, dynamically increase runs
   warm_trials: 20 # number of trials to run before dyamical runs
   approximation_threshold: 5 # this is only used when dynamical_runs is True
@@ -33,3 +34,5 @@ hpo_config:
     n_ind_points: 35
     num_epochs: 5
     num_samples: 75
+    state_weight: [1, 1, 1, 1]
+    action_weight: [0.1]
diff --git a/...errides/cartpole/gp_mpc_cartpole_150.yaml → ...g_overrides/gp_mpc_cartpole_stab_200.yaml b/...errides/cartpole/gp_mpc_cartpole_150.yaml → ...g_overrides/gp_mpc_cartpole_stab_200.yaml
@@ -4,6 +4,7 @@ algo_config:
   deque_size: 10
   eval_batch_size: 10
   gp_approx: mean_eq
+  # gp_model_path: ./gp_mpc/results/ 
   gp_model_path: null
   horizon: 20
   prior_info:
@@ -27,17 +28,17 @@ algo_config:
   - 3000
   - 3000
   overwrite_saved_data: false
-  prior_param_coeff: 1.5
+  prior_param_coeff: 2.0
   prob: 0.95
   q_mpc:
   - 1
+  - 0.1
   - 1
-  - 1
-  - 1
+  - 0.1
   r_mpc:
   - 0.1
   kernel: Matern
-  sparse_gp: True
+  sparse_gp: true
   n_ind_points: 40
   inducing_point_selection_method: 'kmeans'
   recalc_inducing_points_at_every_step: false
@@ -49,18 +50,18 @@ algo_config:
   target_mask: null
   train_iterations: null
   test_data_ratio: 0.2
-  use_prev_start: true
   warmstart: true
-  num_epochs: 5
-  num_samples: 75
-  num_test_episodes_per_epoch: 2
-  num_train_episodes_per_epoch: 2
+  num_epochs: 2
+  num_samples: 50
+  num_test_episodes_per_epoch: 1
+  num_train_episodes_per_epoch: 1
   same_test_initial_state: true
   same_train_initial_state: false
   rand_data_selection: false
   terminate_train_on_done: True
   terminate_test_on_done: False
+  # parallel: False 
   parallel: True
 
-device: cpu
-restore: null
+device: cuda
+restore: null
diff --git a/examples/hpo/cartpole/config_overrides/ilqr_cartpole_hpo.yaml b/examples/hpo/cartpole/config_overrides/ilqr_cartpole_hpo.yaml
@@ -0,0 +1,27 @@
+hpo_config:
+
+  hpo: True # do hyperparameter optimization
+  load_if_exists: True # this should set to True if hpo is run in parallel
+  use_database: True # this is set to true if MySQL is used
+  objective: [average_rmse] # [other metrics defined in base_experiment.py]
+  direction: [minimize] # [maximize, maximize]
+  dynamical_runs: False # if True, dynamically increase runs
+  warm_trials: 20 # number of trials to run before dyamical runs
+  approximation_threshold: 5 # this is only used when dynamical_runs is True
+  repetitions: 5 # number of samples of performance for each objective query
+  alpha: 1 # significance level for CVaR
+  use_gpu: True
+  dashboard: False
+  seed: 24
+  save_n_best_hps: 3
+  # budget
+  trials: 40
+
+  # hyperparameters
+  hps_config:
+    max_iterations: 15
+    lamb_factor: 10
+    lamb_max: 1000
+    epsilon: 0.01
+    state_weight: [1, 1, 1, 1]
+    action_weight: [0.1]
diff --git a/examples/hpo/cartpole/config_overrides/ilqr_cartpole_stab_100.yaml b/examples/hpo/cartpole/config_overrides/ilqr_cartpole_stab_100.yaml
@@ -0,0 +1,24 @@
+algo: ilqr
+algo_config:
+  # Cost parameters
+  q_lqr: [1, 1, 1, 1]
+  r_lqr: [0.1]
+
+  # Model arguments
+  # Note: Higher simulation frequency is required if using controller designed
+  #       based on the continuous-time model
+  discrete_dynamics: True
+
+  # iLQR arguments
+  max_iterations: 15
+  lamb_factor: 10
+  lamb_max: 1000
+  epsilon: 0.01
+
+  # prior info
+  prior_param_coeff: 1.0
+  prior_info:
+    prior_prop:
+      cart_mass: 1.0
+      pole_length: 0.5
+      pole_mass: 0.1
diff --git a/examples/hpo/cartpole/config_overrides/linear_mpsc_cartpole_stab_100.yaml b/examples/hpo/cartpole/config_overrides/linear_mpsc_cartpole_stab_100.yaml
@@ -0,0 +1,34 @@
+safety_filter: linear_mpsc
+sf_config:
+  # LQR controller parameters
+  r_lin:
+    - 0.1
+  q_lin:
+    - 1
+    - 0.1
+    - 1
+    - 0.1
+
+  # MPC Parameters
+  horizon: 20
+  warmstart: True
+  integration_algo: rk4
+  # use_terminal_set: False
+  use_terminal_set: True
+
+  # Prior info
+  prior_info:
+    prior_prop: null
+    randomize_prior_prop: False
+    prior_prop_rand_info: null
+
+  # Safe set calculation
+  n_samples: 600
+  n_samples_terminal_set: 100
+  learn_terminal_set: False
+
+  # Tau parameter for the calcuation of the RPI
+  tau: 0.95
+
+  # Cost function
+  cost_function: one_step_cost
diff --git a/..._overrides/cartpole/ppo_cartpole_hpo.yaml → ...le/config_overrides/ppo_cartpole_hpo.yaml b/..._overrides/cartpole/ppo_cartpole_hpo.yaml → ...le/config_overrides/ppo_cartpole_hpo.yaml
@@ -1,9 +1,10 @@
 hpo_config:
+
   hpo: True # do hyperparameter optimization
   load_if_exists: True # this should set to True if hpo is run in parallel
-  use_database: False # this is set to true if MySQL is used
-  objective: [average_return] # [other metrics defined in base_experiment.py]
-  direction: [maximize] # [maximize, maximize]
+  use_database: True # this is set to true if MySQL is used
+  objective: [average_rmse] # [other metrics defined in base_experiment.py]
+  direction: [minimize] # [maximize, maximize]
   dynamical_runs: False # if True, dynamically increase runs
   warm_trials: 20 # number of trials to run before dyamical runs
   approximation_threshold: 5 # this is only used when dynamical_runs is True
@@ -14,13 +15,13 @@ hpo_config:
   seed: 24
   save_n_best_hps: 3
   # budget
-  trials: 80
+  trials: 40
 
   # hyperparameters
   hps_config:
     # model args
     hidden_dim: 64
-    activation: relu
+    activation: "relu"
 
     # loss args
     gamma: 0.99
@@ -37,5 +38,8 @@ hpo_config:
     # max_grad_norm: 0.5 (currently not implemented in PPO controller)
 
     # runner args
-    rollout_steps: 100
     max_env_steps: 72000
+
+    # objective 
+    state_weight: [1, 1, 1, 1]
+    action_weight: [0.1]
diff --git a/...nfig_overrides/cartpole/ppo_cartpole.yaml → .../config_overrides/ppo_cartpole_stab_.yaml b/...nfig_overrides/cartpole/ppo_cartpole.yaml → .../config_overrides/ppo_cartpole_stab_.yaml
@@ -2,7 +2,7 @@ algo: ppo
 algo_config:
   # model args
   hidden_dim: 64
-  activation: relu
+  activation: "relu"
   norm_obs: False
   norm_reward: False
   clip_obs: 10.0
@@ -38,4 +38,4 @@ algo_config:
   eval_interval: 73000
   num_checkpoints: 50
   eval_save_best: True
-  tensorboard: False
+  tensorboard: False
diff --git a/examples/hpo/gp_mpc/config_overrides/cartpole/cartpole_stab.yaml b/examples/hpo/gp_mpc/config_overrides/cartpole/cartpole_stab.yaml
diff --git a/examples/hpo/gp_mpc/config_overrides/cartpole/optimized_hyperparameters.yaml b/examples/hpo/gp_mpc/config_overrides/cartpole/optimized_hyperparameters.yaml