From e8a92fda47c40d8597e42ece5619b57a84decbc9 Mon Sep 17 00:00:00 2001 From: JalenCato Date: Tue, 21 Jan 2025 18:34:57 +0000 Subject: [PATCH 1/3] add spawn --- python/graphstorm/gconstruct/utils.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/graphstorm/gconstruct/utils.py b/python/graphstorm/gconstruct/utils.py index d2077f632e..7e7148ba77 100644 --- a/python/graphstorm/gconstruct/utils.py +++ b/python/graphstorm/gconstruct/utils.py @@ -217,6 +217,7 @@ def worker_fn(worker_id, task_queue, res_queue, user_parser, ext_mem_workspace): num_gpus = th.cuda.device_count() gpu = worker_id % num_gpus os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) + multiprocessing.set_start_method("spawn", force=True) if worker_id >= num_gpus: logging.warning("There are more than 1 processes are attachd to GPU %d.", gpu) try: From b10f20216bcd68c40e8e45a622cc6f6da122362b Mon Sep 17 00:00:00 2001 From: jalencato Date: Tue, 21 Jan 2025 12:46:55 -0800 Subject: [PATCH 2/3] Update utils.py --- python/graphstorm/gconstruct/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/graphstorm/gconstruct/utils.py b/python/graphstorm/gconstruct/utils.py index 7e7148ba77..3aaa5b7c14 100644 --- a/python/graphstorm/gconstruct/utils.py +++ b/python/graphstorm/gconstruct/utils.py @@ -213,11 +213,11 @@ def worker_fn(worker_id, task_queue, res_queue, user_parser, ext_mem_workspace): """ # We need to set a GPU device for each worker process in case that # some transformations (e.g., computing BERT embeddings) require GPU computation. + multiprocessing.set_start_method("spawn", force=True) if th.cuda.is_available(): num_gpus = th.cuda.device_count() gpu = worker_id % num_gpus os.environ['CUDA_VISIBLE_DEVICES'] = str(gpu) - multiprocessing.set_start_method("spawn", force=True) if worker_id >= num_gpus: logging.warning("There are more than 1 processes are attachd to GPU %d.", gpu) try: From 6959b05d5e30ad6215d9b513a162f8a38418ace0 Mon Sep 17 00:00:00 2001 From: jalencato Date: Tue, 21 Jan 2025 14:23:07 -0800 Subject: [PATCH 3/3] Update utils.py --- python/graphstorm/gconstruct/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/graphstorm/gconstruct/utils.py b/python/graphstorm/gconstruct/utils.py index 3aaa5b7c14..9653dcb32d 100644 --- a/python/graphstorm/gconstruct/utils.py +++ b/python/graphstorm/gconstruct/utils.py @@ -213,7 +213,6 @@ def worker_fn(worker_id, task_queue, res_queue, user_parser, ext_mem_workspace): """ # We need to set a GPU device for each worker process in case that # some transformations (e.g., computing BERT embeddings) require GPU computation. - multiprocessing.set_start_method("spawn", force=True) if th.cuda.is_available(): num_gpus = th.cuda.device_count() gpu = worker_id % num_gpus @@ -296,6 +295,8 @@ def multiprocessing_data_read(in_files, num_processes, user_parser, ext_mem_work a dict : key is the file index, the value is processed data. """ if num_processes > 1 and len(in_files) > 1: + if th.cuda.is_available(): + multiprocessing.set_start_method("spawn", force=True) processes = [] manager = multiprocessing.Manager() task_queue = manager.Queue()