Update cleanrl/ppo_atari_accelerate.py

Co-authored-by: Costa Huang <[email protected]>
vwxyzjn · Feb 13, 2024 · 03d1a1c · 03d1a1c
1 parent ba8fbd8
commit 03d1a1c
Showing 1 changed file with 4 additions and 3 deletions.
diff --git a/cleanrl/ppo_atari_accelerate.py b/cleanrl/ppo_atari_accelerate.py
@@ -278,12 +278,13 @@ def get_action_and_value(self, x, action=None):
         b_values = values.reshape(-1)
 
         # Optimizing the policy and value network
-        b_inds = np.arange(args.batch_size)
+        b_inds = np.arange(args.local_batch_size)
         clipfracs = []
         for epoch in range(args.update_epochs):
             np.random.shuffle(b_inds)
-            for start in range(0, args.batch_size, args.minibatch_size):
-                end = start + args.minibatch_size
+            for start in range(0, args.local_batch_size, args.local_minibatch_size):
+                end = start + args.local_minibatch_size
+                mb_inds = b_inds[start:end]
                 mb_inds = b_inds[start:end]
 
                 _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions.long()[mb_inds])