Skip to content

Commit

Permalink
fix model parallel (#481)
Browse files Browse the repository at this point in the history
fixes #447
  • Loading branch information
NathanHB authored Jan 2, 2025
1 parent a2541b1 commit 24afde2
Showing 1 changed file with 1 addition and 1 deletion.
2 changes: 1 addition & 1 deletion src/lighteval/models/transformers/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,7 @@ def init_model_parallel(self, model_parallel: bool | None = None) -> Tuple[bool,
return False, None, None

self.num_local_processes = int(os.environ.get("LOCAL_WORLD_SIZE", 1))
self.num_machines = int(os.environ.get("WORLD_SIZE", 0)) // self.num_local_processes
self.num_machines = torch.cuda.device_count() // self.num_local_processes
if self.num_machines == 0:
logger.info("We are not in a distributed setting. Setting model_parallel to False.")
model_parallel = False
Expand Down

0 comments on commit 24afde2

Please sign in to comment.