forked from stas00/ml-engineering
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmulti-gpu-non-interleaved-print.py
executable file
·40 lines (33 loc) · 1.2 KB
/
multi-gpu-non-interleaved-print.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env python
# printflock allows one to print in a non-interleaved fashion when printing from multiple procesess.
# Typically this only the issue within a single node. When processes from different nodes print their
# output it doesn't get interleaved.
#
# This file includes the wrapper and a full example on how to use it.
#
# e.g., if you have 2 gpus run it as:
#
# python -m torch.distributed.run --nproc_per_node 2 multi-gpu-non-interleaved-print.py
#
import fcntl
def printflock(*args, **kwargs):
"""
non-interleaved print function for using when printing concurrently from many processes,
like the case under torch.distributed
"""
with open(__file__, "r") as fh:
fcntl.flock(fh, fcntl.LOCK_EX)
try:
print(*args, **kwargs)
finally:
fcntl.flock(fh, fcntl.LOCK_UN)
if __name__ == "__main__":
import torch.distributed as dist
import torch
import os
local_rank = int(os.environ["LOCAL_RANK"])
torch.cuda.set_device(local_rank)
dist.init_process_group("nccl")
world_size = dist.get_world_size()
rank = dist.get_rank()
printflock(f"This is a very long message from rank {rank} (world_size={world_size})")