support parallel reward function

CarperAI · Oct 25, 2023 · 42a91c4 · 42a91c4
1 parent 6068728
commit 42a91c4
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/trlx/trainer/accelerate_ppo_trainer.py b/trlx/trainer/accelerate_ppo_trainer.py
@@ -339,7 +339,7 @@ def make_experience(self, num_rollouts: int = 1024, iter_count: int = 0):  # noq
             else:
                 all_scores = None
                 max_len = torch.tensor(0, dtype=torch.long, device=device)
-            
+
             if self.config.train.reward_only_in_main_process:
                 if torch.distributed.is_initialized():
                     torch.distributed.broadcast(max_len, 0)