From 135c679bda0728f0376260c10b622ab80d70b344 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 01:04:59 -0500 Subject: [PATCH 01/38] spelling: algorithm Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/a2c/a2c.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/a2c/a2c.py b/baselines/a2c/a2c.py index 12d5769e72..2a793663b0 100644 --- a/baselines/a2c/a2c.py +++ b/baselines/a2c/a2c.py @@ -150,7 +150,7 @@ def learn( env: RL environment. Should implement interface similar to VecEnv (baselines.common/vec_env) or be wrapped with DummyVecEnv (baselines.common/vec_env/dummy_vec_env.py) - seed: seed to make random number sequence in the alorightm reproducible. By default is None which means seed from system noise generator (not reproducible) + seed: seed to make random number sequence in the algorithm reproducible. By default is None which means seed from system noise generator (not reproducible) nsteps: int, number of steps of the vectorized environment per update (i.e. batch size is nsteps * nenv where nenv is number of environment copies simulated in parallel) From 6b1149a96d0b8ed55b867c3cafad75c146bf23fe Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:29 -0500 Subject: [PATCH 02/38] spelling: auxiliary Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/her/README.md | 6 +++--- baselines/her/ddpg.py | 6 +++--- baselines/her/experiment/config.py | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/baselines/her/README.md b/baselines/her/README.md index 232b5053bd..873afb1f07 100644 --- a/baselines/her/README.md +++ b/baselines/her/README.md @@ -48,7 +48,7 @@ python experiment/data_generation/fetch_data_generation.py ``` This outputs ```data_fetch_random_100.npz``` file which is our data file. -To launch training with demonstrations (more technically, with behaviour cloning loss as an auxilliary loss), run the following +To launch training with demonstrations (more technically, with behaviour cloning loss as an auxiliary loss), run the following ```bash python -m baselines.run --alg=her --env=FetchPickAndPlace-v1 --num_timesteps=2.5e6 --demo_file=/Path/to/demo_file.npz ``` @@ -58,12 +58,12 @@ To inspect what the agent has learned, use the `--play` flag as described above. #### Configuration The provided configuration is for training an agent with HER without demonstrations, we need to change a few paramters for the HER algorithm to learn through demonstrations, to do that, set: -* bc_loss: 1 - whether or not to use the behavior cloning loss as an auxilliary loss +* bc_loss: 1 - whether or not to use the behavior cloning loss as an auxiliary loss * q_filter: 1 - whether or not a Q value filter should be used on the Actor outputs * num_demo: 100 - number of expert demo episodes * demo_batch_size: 128 - number of samples to be used from the demonstrations buffer, per mpi thread * prm_loss_weight: 0.001 - Weight corresponding to the primary loss -* aux_loss_weight: 0.0078 - Weight corresponding to the auxilliary loss also called the cloning loss +* aux_loss_weight: 0.0078 - Weight corresponding to the auxiliary loss also called the cloning loss Apart from these changes the reported results also have the following configurational changes: diff --git a/baselines/her/ddpg.py b/baselines/her/ddpg.py index 988f14b015..ad2dc30c47 100644 --- a/baselines/her/ddpg.py +++ b/baselines/her/ddpg.py @@ -55,12 +55,12 @@ def __init__(self, input_dims, buffer_size, hidden, layers, network_class, polya sample_transitions (function) function that samples from the replay buffer gamma (float): gamma used for Q learning updates reuse (boolean): whether or not the networks should be reused - bc_loss: whether or not the behavior cloning loss should be used as an auxilliary loss + bc_loss: whether or not the behavior cloning loss should be used as an auxiliary loss q_filter: whether or not a filter on the q value update should be used when training with demonstartions num_demo: Number of episodes in to be used in the demonstration buffer demo_batch_size: number of samples to be used from the demonstrations buffer, per mpi thread prm_loss_weight: Weight corresponding to the primary loss - aux_loss_weight: Weight corresponding to the auxilliary loss also called the cloning loss + aux_loss_weight: Weight corresponding to the auxiliary loss also called the cloning loss """ if self.clip_return is None: self.clip_return = np.inf @@ -364,7 +364,7 @@ def _create_network(self, reuse=False): self.cloning_loss_tf = tf.reduce_sum(tf.square(tf.boolean_mask(tf.boolean_mask((self.main.pi_tf), mask), maskMain, axis=0) - tf.boolean_mask(tf.boolean_mask((batch_tf['u']), mask), maskMain, axis=0))) self.pi_loss_tf = -self.prm_loss_weight * tf.reduce_mean(self.main.Q_pi_tf) #primary loss scaled by it's respective weight prm_loss_weight self.pi_loss_tf += self.prm_loss_weight * self.action_l2 * tf.reduce_mean(tf.square(self.main.pi_tf / self.max_u)) #L2 loss on action values scaled by the same weight prm_loss_weight - self.pi_loss_tf += self.aux_loss_weight * self.cloning_loss_tf #adding the cloning loss to the actor loss as an auxilliary loss scaled by its weight aux_loss_weight + self.pi_loss_tf += self.aux_loss_weight * self.cloning_loss_tf #adding the cloning loss to the actor loss as an auxiliary loss scaled by its weight aux_loss_weight elif self.bc_loss == 1 and self.q_filter == 0: # train with demonstrations without q_filter self.cloning_loss_tf = tf.reduce_sum(tf.square(tf.boolean_mask((self.main.pi_tf), mask) - tf.boolean_mask((batch_tf['u']), mask))) diff --git a/baselines/her/experiment/config.py b/baselines/her/experiment/config.py index 6370505a84..d237eb6ba2 100644 --- a/baselines/her/experiment/config.py +++ b/baselines/her/experiment/config.py @@ -46,12 +46,12 @@ 'norm_eps': 0.01, # epsilon used for observation normalization 'norm_clip': 5, # normalized observations are cropped to this values - 'bc_loss': 0, # whether or not to use the behavior cloning loss as an auxilliary loss + 'bc_loss': 0, # whether or not to use the behavior cloning loss as an auxiliary loss 'q_filter': 0, # whether or not a Q value filter should be used on the Actor outputs 'num_demo': 100, # number of expert demo episodes 'demo_batch_size': 128, #number of samples to be used from the demonstrations buffer, per mpi thread 128/1024 or 32/256 'prm_loss_weight': 0.001, #Weight corresponding to the primary loss - 'aux_loss_weight': 0.0078, #Weight corresponding to the auxilliary loss also called the cloning loss + 'aux_loss_weight': 0.0078, #Weight corresponding to the auxiliary loss also called the cloning loss } From 7135c559b8078a515e65b5b38c321a3d09fe115a Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:30 -0500 Subject: [PATCH 03/38] spelling: coefficient Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/a2c/a2c.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/a2c/a2c.py b/baselines/a2c/a2c.py index 2a793663b0..ec972ad9cf 100644 --- a/baselines/a2c/a2c.py +++ b/baselines/a2c/a2c.py @@ -159,7 +159,7 @@ def learn( vf_coef: float, coefficient in front of value function loss in the total loss function (default: 0.5) - ent_coef: float, coeffictiant in front of the policy entropy in the total loss function (default: 0.01) + ent_coef: float, coefficient in front of the policy entropy in the total loss function (default: 0.01) max_gradient_norm: float, gradient is clipped to have global L2 norm no more than this value (default: 0.5) From 9b1868092db1b716e18359e9902da4349ea4e3d4 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:30 -0500 Subject: [PATCH 04/38] spelling: contrasting Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/her/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/her/README.md b/baselines/her/README.md index 873afb1f07..50cf4d19b9 100644 --- a/baselines/her/README.md +++ b/baselines/her/README.md @@ -80,6 +80,6 @@ Training with demonstrations helps overcome the exploration problem and achieves
-
Training results for Fetch Pick and Place task constrasting between training with and without demonstration data.
+
Training results for Fetch Pick and Place task contrasting between training with and without demonstration data.
From 7458d5933b4cca664cacc331de18087baf652b04 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:31 -0500 Subject: [PATCH 05/38] spelling: demonstrations Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/her/ddpg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/her/ddpg.py b/baselines/her/ddpg.py index ad2dc30c47..b41ac64d35 100644 --- a/baselines/her/ddpg.py +++ b/baselines/her/ddpg.py @@ -56,7 +56,7 @@ def __init__(self, input_dims, buffer_size, hidden, layers, network_class, polya gamma (float): gamma used for Q learning updates reuse (boolean): whether or not the networks should be reused bc_loss: whether or not the behavior cloning loss should be used as an auxiliary loss - q_filter: whether or not a filter on the q value update should be used when training with demonstartions + q_filter: whether or not a filter on the q value update should be used when training with demonstrations num_demo: Number of episodes in to be used in the demonstration buffer demo_batch_size: number of samples to be used from the demonstrations buffer, per mpi thread prm_loss_weight: Weight corresponding to the primary loss From f57a63c9be13ff033324700ccd31a416227cd71c Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:31 -0500 Subject: [PATCH 06/38] spelling: deterministic Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/gail/result/gail-result.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/gail/result/gail-result.md b/baselines/gail/result/gail-result.md index 8ecc9ed1eb..a13f657fd4 100644 --- a/baselines/gail/result/gail-result.md +++ b/baselines/gail/result/gail-result.md @@ -24,7 +24,7 @@ Hopper-v1, Walker2d-v1, HalfCheetah-v1, Humanoid-v1, HumanoidStandup-v1. Every i For details (e.g., adversarial loss, discriminator accuracy, etc.) about GAIL training, please see [here](https://drive.google.com/drive/folders/1nnU8dqAV9i37-_5_vWIspyFUJFQLCsDD?usp=sharing) -### Determinstic Policy (Set std=0) +### Deterministic Policy (Set std=0) | | Un-normalized | Normalized | |---|---|---| | Hopper-v1 | | | From 8033327e65d4c4ea3d37e625f318ebd868bfcfa7 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:32 -0500 Subject: [PATCH 07/38] spelling: directory Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/common/tests/envs/mnist_env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/common/tests/envs/mnist_env.py b/baselines/common/tests/envs/mnist_env.py index cc0bde0619..94a73d70da 100644 --- a/baselines/common/tests/envs/mnist_env.py +++ b/baselines/common/tests/envs/mnist_env.py @@ -15,7 +15,7 @@ def __init__( import filelock from tensorflow.examples.tutorials.mnist import input_data # we could use temporary directory for this with a context manager and - # TemporaryDirecotry, but then each test that uses mnist would re-download the data + # TemporaryDirectory, but then each test that uses mnist would re-download the data # this way the data is not cleaned up, but we only download it once per machine mnist_path = osp.join(tempfile.gettempdir(), 'MNIST_data') with filelock.FileLock(mnist_path + '.lock'): From 6818376bd8fed80cb49283dd0423eb8550ac3599 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:32 -0500 Subject: [PATCH 08/38] spelling: evaluate Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/gail/gail-eval.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/gail/gail-eval.py b/baselines/gail/gail-eval.py index 1148cb309c..babee28749 100644 --- a/baselines/gail/gail-eval.py +++ b/baselines/gail/gail-eval.py @@ -1,5 +1,5 @@ ''' -This code is used to evalaute the imitators trained with different number of trajectories +This code is used to evaluate the imitators trained with different number of trajectories and plot the results in the same figure for easy comparison. ''' From 22b8c4f7ed394a0293c33800a271a905b2d9c542 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:33 -0500 Subject: [PATCH 09/38] spelling: evaluation Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/deepq/build_graph.py | 2 +- baselines/gail/behavior_clone.py | 2 +- baselines/gail/run_mujoco.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/baselines/deepq/build_graph.py b/baselines/deepq/build_graph.py index f7de316b29..e9bc7d9ee3 100644 --- a/baselines/deepq/build_graph.py +++ b/baselines/deepq/build_graph.py @@ -388,7 +388,7 @@ def build_train(make_obs_ph, q_func, num_actions, optimizer, grad_norm_clipping= q_t = q_func(obs_t_input.get(), num_actions, scope="q_func", reuse=True) # reuse parameters from act q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/q_func") - # target q network evalution + # target q network evaluation q_tp1 = q_func(obs_tp1_input.get(), num_actions, scope="target_q_func") target_q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/target_q_func") diff --git a/baselines/gail/behavior_clone.py b/baselines/gail/behavior_clone.py index aab1bc6407..cb458efa32 100644 --- a/baselines/gail/behavior_clone.py +++ b/baselines/gail/behavior_clone.py @@ -32,7 +32,7 @@ def argsparser(): parser.add_argument('--traj_limitation', type=int, default=-1) # Network Configuration (Using MLP Policy) parser.add_argument('--policy_hidden_size', type=int, default=100) - # for evaluatation + # for evaluation boolean_flag(parser, 'stochastic_policy', default=False, help='use stochastic/deterministic policy to evaluate') boolean_flag(parser, 'save_sample', default=False, help='save the trajectories or not') parser.add_argument('--BC_max_iter', help='Max iteration for training BC', type=int, default=1e5) diff --git a/baselines/gail/run_mujoco.py b/baselines/gail/run_mujoco.py index e875c5ccd2..8e0f578912 100644 --- a/baselines/gail/run_mujoco.py +++ b/baselines/gail/run_mujoco.py @@ -30,7 +30,7 @@ def argsparser(): parser.add_argument('--load_model_path', help='if provided, load the model', type=str, default=None) # Task parser.add_argument('--task', type=str, choices=['train', 'evaluate', 'sample'], default='train') - # for evaluatation + # for evaluation boolean_flag(parser, 'stochastic_policy', default=False, help='use stochastic/deterministic policy to evaluate') boolean_flag(parser, 'save_sample', default=False, help='save the trajectories or not') # Mujoco Dataset Configuration From a7a7511756a595434c796cb2dd6585ef515fc595 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:34 -0500 Subject: [PATCH 10/38] spelling: gradient Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/acer/acer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/acer/acer.py b/baselines/acer/acer.py index 62764dbc61..3976f210fe 100644 --- a/baselines/acer/acer.py +++ b/baselines/acer/acer.py @@ -123,7 +123,7 @@ def custom_getter(getter, *args, **kwargs): # entropy = tf.reduce_mean(strip(train_model.pd.entropy(), nenvs, nsteps)) entropy = tf.reduce_mean(cat_entropy_softmax(f)) - # Policy Graident loss, with truncated importance sampling & bias correction + # Policy Gradient loss, with truncated importance sampling & bias correction v = strip(v, nenvs, nsteps, True) check_shape([qret, v, rho_i, f_i], [[nenvs * nsteps]] * 4) check_shape([rho, f, q], [[nenvs * nsteps, nact]] * 2) From 5498b5e986a1bab25f79789e85bb5ab746f6277e Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:35 -0500 Subject: [PATCH 11/38] spelling: graph Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/gail/adversary.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/gail/adversary.py b/baselines/gail/adversary.py index 96b8a4cb49..98be13322f 100644 --- a/baselines/gail/adversary.py +++ b/baselines/gail/adversary.py @@ -26,7 +26,7 @@ def __init__(self, env, hidden_size, entcoeff=0.001, lr_rate=1e-3, scope="advers self.num_actions = env.action_space.shape[0] self.hidden_size = hidden_size self.build_ph() - # Build grpah + # Build graph generator_logits = self.build_graph(self.generator_obs_ph, self.generator_acs_ph, reuse=False) expert_logits = self.build_graph(self.expert_obs_ph, self.expert_acs_ph, reuse=True) # Build accuracy From e9bf097cb6f02e41757d9bbb1af0a4843b0e6366 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:36 -0500 Subject: [PATCH 12/38] spelling: implementation Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/common/running_mean_std.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/common/running_mean_std.py b/baselines/common/running_mean_std.py index 963a57fbc2..45ad702186 100644 --- a/baselines/common/running_mean_std.py +++ b/baselines/common/running_mean_std.py @@ -36,7 +36,7 @@ def update_mean_var_count_from_moments(mean, var, count, batch_mean, batch_var, class TfRunningMeanStd(object): # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm ''' - TensorFlow variables-based implmentation of computing running mean and std + TensorFlow variables-based implementation of computing running mean and std Benefit of this implementation is that it can be saved / loaded together with the tensorflow model ''' def __init__(self, epsilon=1e-4, shape=(), scope=''): From a8d2ff14747e255c5b6ed0927ba5a85c2ff03cc9 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:36 -0500 Subject: [PATCH 13/38] spelling: importance Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/deepq/build_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/deepq/build_graph.py b/baselines/deepq/build_graph.py index e9bc7d9ee3..1c3292a8c0 100644 --- a/baselines/deepq/build_graph.py +++ b/baselines/deepq/build_graph.py @@ -72,7 +72,7 @@ obs_tp1 gets ignored, but must be of the valid shape. dtype must be float32 and shape must be (batch_size,) weight: np.array - imporance weights for every element of the batch (gradient is multiplied + importance weights for every element of the batch (gradient is multiplied by the importance weight) dtype must be float32 and shape must be (batch_size,) Returns From 6895b2254e4e9ad67b1932218c8e43d665f1a06f Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:37 -0500 Subject: [PATCH 14/38] spelling: independent Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/acktr/kfac.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/acktr/kfac.py b/baselines/acktr/kfac.py index 3d4a8c27ad..d245ebcbee 100644 --- a/baselines/acktr/kfac.py +++ b/baselines/acktr/kfac.py @@ -72,7 +72,7 @@ def searchFactors(gradient, graph): fTensors = [] # combining additive gradient, assume they are the same op type and - # indepedent + # independent if 'AddN' in bpropOp_name: factors = [] for g in gradient.op.inputs: From 1a933f050676ee846ea086264a2d282496b846cd Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:35 -0500 Subject: [PATCH 15/38] spelling: indexes Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/deepq/replay_buffer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/deepq/replay_buffer.py b/baselines/deepq/replay_buffer.py index 3ddf708601..73ff992144 100644 --- a/baselines/deepq/replay_buffer.py +++ b/baselines/deepq/replay_buffer.py @@ -148,7 +148,7 @@ def sample(self, batch_size, beta): denoting importance weight of each sampled transition idxes: np.array Array of shape (batch_size,) and dtype np.int32 - idexes in buffer of sampled experiences + indexes in buffer of sampled experiences """ assert beta > 0 From b8be389e4558ded63f714be88889a96d8620903d Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:37 -0500 Subject: [PATCH 16/38] spelling: intended Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/common/misc_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/common/misc_util.py b/baselines/common/misc_util.py index 48bc3dedc5..55e3c3c67f 100644 --- a/baselines/common/misc_util.py +++ b/baselines/common/misc_util.py @@ -192,7 +192,7 @@ def relatively_safe_pickle_dump(obj, path, compression=False): - it is sometimes possible that we end up with useless temp file which needs to be deleted manually (it will be removed automatically on the next function call) - The indended use case is periodic checkpoints of experiment state, such that we never + The intended use case is periodic checkpoints of experiment state, such that we never corrupt previous checkpoints if the current one fails. Parameters From 782bbdbaedd0bfd127f5f70577bf4376e83a927f Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 01:14:18 -0500 Subject: [PATCH 17/38] spelling: macos Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/common/mpi_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/common/mpi_util.py b/baselines/common/mpi_util.py index ca7044e488..2f7f903a6a 100644 --- a/baselines/common/mpi_util.py +++ b/baselines/common/mpi_util.py @@ -39,7 +39,7 @@ def setup_mpi_gpus(): Set CUDA_VISIBLE_DEVICES to MPI rank if not already set """ if 'CUDA_VISIBLE_DEVICES' not in os.environ: - if sys.platform == 'darwin': # This Assumes if you're on OSX you're just + if sys.platform == 'darwin': # This Assumes if you're on macOS you're just ids = [] # doing a smoke test and don't want GPUs else: lrank, _lsize = get_local_rank_size(MPI.COMM_WORLD) From 01f44c968d84bbeccd47fc06032eebbae5a88379 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:38 -0500 Subject: [PATCH 18/38] spelling: meaning Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/common/schedules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/common/schedules.py b/baselines/common/schedules.py index 9dfff50f95..168ac4800e 100644 --- a/baselines/common/schedules.py +++ b/baselines/common/schedules.py @@ -40,7 +40,7 @@ def __init__(self, endpoints, interpolation=linear_interpolation, outside_value= """Piecewise schedule. endpoints: [(int, int)] - list of pairs `(time, value)` meanining that schedule should output + list of pairs `(time, value)` meaning that schedule should output `value` when `t==time`. All the values for time must be sorted in an increasing order. When t is between two times, e.g. `(time_a, value_a)` and `(time_b, value_b)`, such that `time_a <= t < time_b` then value outputs From 9de11b239409feeabe7058091a5d097127b8f842 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 01:12:59 -0500 Subject: [PATCH 19/38] spelling: nothing Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/trpo_mpi/trpo_mpi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/trpo_mpi/trpo_mpi.py b/baselines/trpo_mpi/trpo_mpi.py index cd1e7eab77..7d2f9001d9 100644 --- a/baselines/trpo_mpi/trpo_mpi.py +++ b/baselines/trpo_mpi/trpo_mpi.py @@ -277,7 +277,7 @@ def allmean(x): rewbuffer = deque(maxlen=40) # rolling buffer for episode rewards if sum([max_iters>0, total_timesteps>0, max_episodes>0])==0: - # noththing to be done + # nothing to be done return pi assert sum([max_iters>0, total_timesteps>0, max_episodes>0]) < 2, \ From 80c5812b35a9e4f80622e78b1f8e53d261f48e0a Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 01:13:32 -0500 Subject: [PATCH 20/38] spelling: observed Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/common/misc_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/common/misc_util.py b/baselines/common/misc_util.py index 55e3c3c67f..7e91e35aaf 100644 --- a/baselines/common/misc_util.py +++ b/baselines/common/misc_util.py @@ -126,7 +126,7 @@ def update(self, new_val): Parameters ---------- new_val: float - new observated value of estimated quantity. + new observed value of estimated quantity. """ if self._value is None: self._value = new_val From 7f0bd2381b1d02a0136bef10d87991dc9e581427 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 01:14:01 -0500 Subject: [PATCH 21/38] spelling: optimize Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/trpo_mpi/trpo_mpi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/trpo_mpi/trpo_mpi.py b/baselines/trpo_mpi/trpo_mpi.py index 7d2f9001d9..0f7b4144a9 100644 --- a/baselines/trpo_mpi/trpo_mpi.py +++ b/baselines/trpo_mpi/trpo_mpi.py @@ -127,7 +127,7 @@ def learn(*, cg_damping conjugate gradient damping - vf_stepsize learning rate for adam optimizer used to optimie value function loss + vf_stepsize learning rate for adam optimizer used to optimize value function loss vf_iters number of iterations of value function optimization iterations per each policy optimization step From cd7aea9081325f75e3cef9535d03db0e359eeaa7 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:38 -0500 Subject: [PATCH 22/38] spelling: parameters Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/common/misc_util.py | 4 ++-- baselines/common/segment_tree.py | 4 ++-- baselines/her/README.md | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/baselines/common/misc_util.py b/baselines/common/misc_util.py index 7e91e35aaf..68d5d76f8c 100644 --- a/baselines/common/misc_util.py +++ b/baselines/common/misc_util.py @@ -70,8 +70,8 @@ def pretty_eta(seconds_left): 2 hours and 37 minutes less than a minute - Paramters - --------- + Parameters + ---------- seconds_left: int Number of seconds to be converted to the ETA Returns diff --git a/baselines/common/segment_tree.py b/baselines/common/segment_tree.py index cb386ecdb5..da921f441b 100644 --- a/baselines/common/segment_tree.py +++ b/baselines/common/segment_tree.py @@ -16,8 +16,8 @@ def __init__(self, capacity, operation, neutral_element): `reduce` operation which reduces `operation` over a contiguous subsequence of items in the array. - Paramters - --------- + Parameters + ---------- capacity: int Total size of the array - must be a power of two. operation: lambda obj, obj -> obj diff --git a/baselines/her/README.md b/baselines/her/README.md index 50cf4d19b9..a7f39d6aa7 100644 --- a/baselines/her/README.md +++ b/baselines/her/README.md @@ -56,7 +56,7 @@ This will train a DDPG+HER agent on the `FetchPickAndPlace` environment by using To inspect what the agent has learned, use the `--play` flag as described above. #### Configuration -The provided configuration is for training an agent with HER without demonstrations, we need to change a few paramters for the HER algorithm to learn through demonstrations, to do that, set: +The provided configuration is for training an agent with HER without demonstrations, we need to change a few parameters for the HER algorithm to learn through demonstrations, to do that, set: * bc_loss: 1 - whether or not to use the behavior cloning loss as an auxiliary loss * q_filter: 1 - whether or not a Q value filter should be used on the Actor outputs From cb1b27bb18dcc1dfe1fd229774669d852c295f8f Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:39 -0500 Subject: [PATCH 23/38] spelling: policy Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/acer/acer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/acer/acer.py b/baselines/acer/acer.py index 3976f210fe..35e9176ed9 100644 --- a/baselines/acer/acer.py +++ b/baselines/acer/acer.py @@ -99,7 +99,7 @@ def custom_getter(getter, *args, **kwargs): # Notation: (var) = batch variable, (var)s = seqeuence variable, (var)_i = variable index by action at step i # action probability distributions according to train_model, polyak_model and step_model - # poilcy.pi is probability distribution parameters; to obtain distribution that sums to 1 need to take softmax + # policy.pi is probability distribution parameters; to obtain distribution that sums to 1 need to take softmax train_model_p = tf.nn.softmax(train_model.pi) polyak_model_p = tf.nn.softmax(polyak_model.pi) step_model_p = tf.nn.softmax(step_model.pi) From b39692b62ee98787ab155a87ed782bef87bb8fbf Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:39 -0500 Subject: [PATCH 24/38] spelling: published Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/her/experiment/config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/her/experiment/config.py b/baselines/her/experiment/config.py index d237eb6ba2..839492ae39 100644 --- a/baselines/her/experiment/config.py +++ b/baselines/her/experiment/config.py @@ -84,7 +84,7 @@ def make_env(subrank=None): except ImportError: MPI = None mpi_rank = 0 - logger.warn('Running with a single MPI process. This should work, but the results may differ from the ones publshed in Plappert et al.') + logger.warn('Running with a single MPI process. This should work, but the results may differ from the ones published in Plappert et al.') max_episode_steps = env._max_episode_steps env = Monitor(env, From 4cc118a86cd74811d37c9863a409071da8599713 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:40 -0500 Subject: [PATCH 25/38] spelling: redundant Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/acktr/kfac.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/acktr/kfac.py b/baselines/acktr/kfac.py index d245ebcbee..6f7f795b5c 100644 --- a/baselines/acktr/kfac.py +++ b/baselines/acktr/kfac.py @@ -134,7 +134,7 @@ def searchFactors(gradient, graph): ######## # check associated weights and bias for homogeneous coordinate representation - # and check redundent factors + # and check redundant factors # TO-DO: there may be a bug to detect associate bias and weights for # forking layer, e.g. in inception models. for param in varlist: From 811937ac51ba4c0fb693764d6b640ea2ef103b48 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 01:16:13 -0500 Subject: [PATCH 26/38] spelling: rescale Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/acktr/kfac.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/acktr/kfac.py b/baselines/acktr/kfac.py index 6f7f795b5c..86fdd8fb22 100644 --- a/baselines/acktr/kfac.py +++ b/baselines/acktr/kfac.py @@ -785,7 +785,7 @@ def getKfacPrecondUpdates(self, gradlist, varlist): local_vg = tf.reduce_sum(grad * g * (self._lr * self._lr)) vg += local_vg - # recale everything + # rescale everything if KFAC_DEBUG: print('apply vFv clipping') From eff93cb567d06b020bfe05767e010de9ca8f7f4a Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:41 -0500 Subject: [PATCH 27/38] spelling: sequence Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/acer/acer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/acer/acer.py b/baselines/acer/acer.py index 35e9176ed9..222497b371 100644 --- a/baselines/acer/acer.py +++ b/baselines/acer/acer.py @@ -96,7 +96,7 @@ def custom_getter(getter, *args, **kwargs): with tf.variable_scope("acer_model", custom_getter=custom_getter, reuse=True): polyak_model = policy(nbatch=nbatch, nsteps=nsteps, observ_placeholder=train_ob_placeholder, sess=sess) - # Notation: (var) = batch variable, (var)s = seqeuence variable, (var)_i = variable index by action at step i + # Notation: (var) = batch variable, (var)s = sequence variable, (var)_i = variable index by action at step i # action probability distributions according to train_model, polyak_model and step_model # policy.pi is probability distribution parameters; to obtain distribution that sums to 1 need to take softmax From cf226edc83f1da3ad557809ee9a606dcf8b53a58 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:40 -0500 Subject: [PATCH 28/38] spelling: specified Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/common/schedules.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/common/schedules.py b/baselines/common/schedules.py index 168ac4800e..4e4205a7f8 100644 --- a/baselines/common/schedules.py +++ b/baselines/common/schedules.py @@ -51,7 +51,7 @@ def __init__(self, endpoints, interpolation=linear_interpolation, outside_value= to the `endpoints`. Alpha is the fraction of distance from left endpoint to right endpoint that t has covered. See linear_interpolation for example. outside_value: float - if the value is requested outside of all the intervals sepecified in + if the value is requested outside of all the intervals specified in `endpoints` this value is returned. If None then AssertionError is raised when outside value is requested. """ From 6f037c7d2544b6447c009b075184309225596a84 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:41 -0500 Subject: [PATCH 29/38] spelling: stabilize Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/deepq/build_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/deepq/build_graph.py b/baselines/deepq/build_graph.py index 1c3292a8c0..2edf52884c 100644 --- a/baselines/deepq/build_graph.py +++ b/baselines/deepq/build_graph.py @@ -88,7 +88,7 @@ Q(s,a) - (r + gamma * max_a' Q'(s', a')) - Where Q' is lagging behind Q to stablize the learning. For example for Atari + Where Q' is lagging behind Q to stabilize the learning. For example for Atari Q' is set to Q once every 10000 updates training steps. From 810b4d1adc0a743dd7eea4f5362b5a0505fcb5d0 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:42 -0500 Subject: [PATCH 30/38] spelling: stateful Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/acer/runner.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/baselines/acer/runner.py b/baselines/acer/runner.py index afd19ce2eb..c5625ab6f1 100644 --- a/baselines/acer/runner.py +++ b/baselines/acer/runner.py @@ -34,7 +34,7 @@ def run(self): mb_mus.append(mus) mb_dones.append(self.dones) obs, rewards, dones, _ = self.env.step(actions) - # states information for statefull models like LSTM + # states information for stateful models like LSTM self.states = states self.dones = dones self.obs = obs @@ -51,7 +51,7 @@ def run(self): mb_dones = np.asarray(mb_dones, dtype=np.bool).swapaxes(1, 0) - mb_masks = mb_dones # Used for statefull models like LSTM's to mask state when done + mb_masks = mb_dones # Used for stateful models like LSTM's to mask state when done mb_dones = mb_dones[:, 1:] # Used for calculating returns. The dones array is now aligned with rewards # shapes are now [nenv, nsteps, []] From c3e81e3af5cb615e9b06e49125c5361f1de097da Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:43 -0500 Subject: [PATCH 31/38] spelling: stochastic Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/gail/result/gail-result.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/gail/result/gail-result.md b/baselines/gail/result/gail-result.md index a13f657fd4..17c5c64646 100644 --- a/baselines/gail/result/gail-result.md +++ b/baselines/gail/result/gail-result.md @@ -33,7 +33,7 @@ For details (e.g., adversarial loss, discriminator accuracy, etc.) about GAIL tr | Humanoid-v1 | | | | HumanoidStandup-v1 | | | -### Stochatic Policy +### Stochastic Policy | | Un-normalized | Normalized | |---|---|---| | Hopper-v1 | | | From df25a9ddfd613ea896724e3e0c1650b567682159 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:43 -0500 Subject: [PATCH 32/38] spelling: strategy Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/acktr/kfac.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/acktr/kfac.py b/baselines/acktr/kfac.py index 86fdd8fb22..9430c03c58 100644 --- a/baselines/acktr/kfac.py +++ b/baselines/acktr/kfac.py @@ -64,7 +64,7 @@ def getFactors(self, g, varlist): fops = [] def searchFactors(gradient, graph): - # hard coded search stratergy + # hard coded search strategy bpropOp = gradient.op bpropOp_name = bpropOp.name From 36f9c0ae455f92389af16e8576ab5f211ee30a49 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:44 -0500 Subject: [PATCH 33/38] spelling: subprocesses Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/common/vec_env/subproc_vec_env.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/common/vec_env/subproc_vec_env.py b/baselines/common/vec_env/subproc_vec_env.py index 8c74000889..47b8a372bd 100644 --- a/baselines/common/vec_env/subproc_vec_env.py +++ b/baselines/common/vec_env/subproc_vec_env.py @@ -38,7 +38,7 @@ def step_env(env, action): class SubprocVecEnv(VecEnv): """ - VecEnv that runs multiple environments in parallel in subproceses and communicates with them via pipes. + VecEnv that runs multiple environments in parallel in subprocesses and communicates with them via pipes. Recommended to use when num_envs > 1 and step() can be a bottleneck. """ def __init__(self, env_fns, spaces=None, context='spawn', in_series=1): From 294f2c046eb47e44e5b6bb5979841ed4e2f8f70d Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:47:56 -0500 Subject: [PATCH 34/38] spelling: to Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/her/ddpg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/her/ddpg.py b/baselines/her/ddpg.py index b41ac64d35..9cff557708 100644 --- a/baselines/her/ddpg.py +++ b/baselines/her/ddpg.py @@ -57,7 +57,7 @@ def __init__(self, input_dims, buffer_size, hidden, layers, network_class, polya reuse (boolean): whether or not the networks should be reused bc_loss: whether or not the behavior cloning loss should be used as an auxiliary loss q_filter: whether or not a filter on the q value update should be used when training with demonstrations - num_demo: Number of episodes in to be used in the demonstration buffer + num_demo: Number of episodes to be used in the demonstration buffer demo_batch_size: number of samples to be used from the demonstrations buffer, per mpi thread prm_loss_weight: Weight corresponding to the primary loss aux_loss_weight: Weight corresponding to the auxiliary loss also called the cloning loss From 663e81f0a506949c2302873bdf77d09925566ffb Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:44 -0500 Subject: [PATCH 35/38] spelling: training Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/gail/run_mujoco.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baselines/gail/run_mujoco.py b/baselines/gail/run_mujoco.py index 8e0f578912..fc021fe829 100644 --- a/baselines/gail/run_mujoco.py +++ b/baselines/gail/run_mujoco.py @@ -46,7 +46,7 @@ def argsparser(): parser.add_argument('--max_kl', type=float, default=0.01) parser.add_argument('--policy_entcoeff', help='entropy coefficiency of policy', type=float, default=0) parser.add_argument('--adversary_entcoeff', help='entropy coefficiency of discriminator', type=float, default=1e-3) - # Traing Configuration + # Training Configuration parser.add_argument('--save_per_iter', help='save model every xx iterations', type=int, default=100) parser.add_argument('--num_timesteps', help='number of timesteps per episode', type=int, default=5e6) # Behavior Cloning From dce56c880aa260ee47d9d13b5970b52835953822 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:45 -0500 Subject: [PATCH 36/38] spelling: update Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/gail/trpo_mpi.py | 2 +- baselines/ppo1/pposgd_simple.py | 2 +- baselines/trpo_mpi/trpo_mpi.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/baselines/gail/trpo_mpi.py b/baselines/gail/trpo_mpi.py index 615a4326a7..d97df25588 100644 --- a/baselines/gail/trpo_mpi.py +++ b/baselines/gail/trpo_mpi.py @@ -248,7 +248,7 @@ def fisher_vector_product(p): add_vtarg_and_adv(seg, gamma, lam) # ob, ac, atarg, ret, td1ret = map(np.concatenate, (obs, acs, atargs, rets, td1rets)) ob, ac, atarg, tdlamret = seg["ob"], seg["ac"], seg["adv"], seg["tdlamret"] - vpredbefore = seg["vpred"] # predicted value function before udpate + vpredbefore = seg["vpred"] # predicted value function before update atarg = (atarg - atarg.mean()) / atarg.std() # standardized advantage function estimate if hasattr(pi, "ob_rms"): pi.ob_rms.update(ob) # update running mean/std for policy diff --git a/baselines/ppo1/pposgd_simple.py b/baselines/ppo1/pposgd_simple.py index 7ecd48db58..cd52763e67 100644 --- a/baselines/ppo1/pposgd_simple.py +++ b/baselines/ppo1/pposgd_simple.py @@ -165,7 +165,7 @@ def learn(env, policy_fn, *, # ob, ac, atarg, ret, td1ret = map(np.concatenate, (obs, acs, atargs, rets, td1rets)) ob, ac, atarg, tdlamret = seg["ob"], seg["ac"], seg["adv"], seg["tdlamret"] - vpredbefore = seg["vpred"] # predicted value function before udpate + vpredbefore = seg["vpred"] # predicted value function before update atarg = (atarg - atarg.mean()) / atarg.std() # standardized advantage function estimate d = Dataset(dict(ob=ob, ac=ac, atarg=atarg, vtarg=tdlamret), deterministic=pi.recurrent) optim_batchsize = optim_batchsize or ob.shape[0] diff --git a/baselines/trpo_mpi/trpo_mpi.py b/baselines/trpo_mpi/trpo_mpi.py index 0f7b4144a9..cb2f602008 100644 --- a/baselines/trpo_mpi/trpo_mpi.py +++ b/baselines/trpo_mpi/trpo_mpi.py @@ -299,7 +299,7 @@ def allmean(x): # ob, ac, atarg, ret, td1ret = map(np.concatenate, (obs, acs, atargs, rets, td1rets)) ob, ac, atarg, tdlamret = seg["ob"], seg["ac"], seg["adv"], seg["tdlamret"] - vpredbefore = seg["vpred"] # predicted value function before udpate + vpredbefore = seg["vpred"] # predicted value function before update atarg = (atarg - atarg.mean()) / atarg.std() # standardized advantage function estimate if hasattr(pi, "ret_rms"): pi.ret_rms.update(tdlamret) From 78d1551d2a6440b570e0d084d72c3a81c8587bbd Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:45 -0500 Subject: [PATCH 37/38] spelling: variable Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- baselines/results_plotter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/baselines/results_plotter.py b/baselines/results_plotter.py index 66f09bd12d..273759f575 100644 --- a/baselines/results_plotter.py +++ b/baselines/results_plotter.py @@ -82,8 +82,8 @@ def main(): parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--dirs', help='List of log directories', nargs = '*', default=['./log']) parser.add_argument('--num_timesteps', type=int, default=int(10e6)) - parser.add_argument('--xaxis', help = 'Varible on X-axis', default = X_TIMESTEPS) - parser.add_argument('--yaxis', help = 'Varible on Y-axis', default = Y_REWARD) + parser.add_argument('--xaxis', help = 'Variable on X-axis', default = X_TIMESTEPS) + parser.add_argument('--yaxis', help = 'Variable on Y-axis', default = Y_REWARD) parser.add_argument('--task_name', help = 'Title of plot', default = 'Breakout') args = parser.parse_args() args.dirs = [os.path.abspath(dir) for dir in args.dirs] From d24c669946d0984718ce5716bb24e059f04172e4 Mon Sep 17 00:00:00 2001 From: Josh Soref <2119212+jsoref@users.noreply.github.com> Date: Tue, 24 Jan 2023 00:56:45 -0500 Subject: [PATCH 38/38] spelling: visualizing Signed-off-by: Josh Soref <2119212+jsoref@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 32680a672e..8cb0172c38 100644 --- a/README.md +++ b/README.md @@ -116,7 +116,7 @@ python -m baselines.run --alg=ppo2 --env=PongNoFrameskip-v4 --num_timesteps=0 -- *NOTE:* Mujoco environments require normalization to work properly, so we wrap them with VecNormalize wrapper. Currently, to ensure the models are saved with normalization (so that trained models can be restored and run without further training) the normalization coefficients are saved as tensorflow variables. This can decrease the performance somewhat, so if you require high-throughput steps with Mujoco and do not need saving/restoring the models, it may make sense to use numpy normalization instead. To do that, set 'use_tf=False` in [baselines/run.py](baselines/run.py#L116). -### Logging and vizualizing learning curves and other training metrics +### Logging and visualizing learning curves and other training metrics By default, all summary data, including progress, standard output, is saved to a unique directory in a temp folder, specified by a call to Python's [tempfile.gettempdir()](https://docs.python.org/3/library/tempfile.html#tempfile.gettempdir). The directory can be changed with the `--log_path` command-line option. ```bash