From e0ed0dc4e440dd79383a3dd5ed306d99a22d42a5 Mon Sep 17 00:00:00 2001 From: Julio Perez Date: Thu, 11 Jan 2024 18:04:27 -0500 Subject: [PATCH 1/6] use HAS_GPU to determine of cuda is available --- merlin/core/compat/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/merlin/core/compat/__init__.py b/merlin/core/compat/__init__.py index dc5ec84ed..5ce96e746 100644 --- a/merlin/core/compat/__init__.py +++ b/merlin/core/compat/__init__.py @@ -21,7 +21,7 @@ from merlin.core.has_gpu import HAS_GPU # noqa pylint: disable=unused-import -if not cuda.is_available(): +if not HAS_GPU: cuda = None try: From bd495d323adeab1087d755010cbb2add3927f474 Mon Sep 17 00:00:00 2001 From: Julio Perez Date: Fri, 12 Jan 2024 15:28:11 -0500 Subject: [PATCH 2/6] removed use of numba cuda context during init --- merlin/core/compat/__init__.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/merlin/core/compat/__init__.py b/merlin/core/compat/__init__.py index 5ce96e746..614124c52 100644 --- a/merlin/core/compat/__init__.py +++ b/merlin/core/compat/__init__.py @@ -21,8 +21,7 @@ from merlin.core.has_gpu import HAS_GPU # noqa pylint: disable=unused-import -if not HAS_GPU: - cuda = None +cuda = False if not HAS_GPU else cuda try: import psutil @@ -99,10 +98,7 @@ def device_mem_size(kind="total", cpu=False): if kind not in ["free", "total"]: raise ValueError(f"{kind} not a supported option for device_mem_size.") try: - if kind == "free": - return int(cuda.current_context().get_memory_info()[0]) - else: - return int(cuda.current_context().get_memory_info()[1]) + return pynvml_mem_size(kind=kind) except NotImplementedError: if kind == "free": # Not using NVML "free" memory, because it will not include RMM-managed memory From 8e6a737784319aead01609028ee81dedb6ff593a Mon Sep 17 00:00:00 2001 From: Julio Perez Date: Fri, 12 Jan 2024 16:30:58 -0500 Subject: [PATCH 3/6] removed excess logic now that using pynvml --- merlin/core/compat/__init__.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/merlin/core/compat/__init__.py b/merlin/core/compat/__init__.py index 614124c52..169629f7e 100644 --- a/merlin/core/compat/__init__.py +++ b/merlin/core/compat/__init__.py @@ -97,14 +97,9 @@ def device_mem_size(kind="total", cpu=False): if kind not in ["free", "total"]: raise ValueError(f"{kind} not a supported option for device_mem_size.") - try: - return pynvml_mem_size(kind=kind) - except NotImplementedError: - if kind == "free": - # Not using NVML "free" memory, because it will not include RMM-managed memory - warnings.warn("get_memory_info is not supported. Using total device memory from NVML.") - size = pynvml_mem_size(kind="total", index=0) - return size + + return pynvml_mem_size(kind=kind) + try: From 1d48e60c7d48c873f29a78d3bad706ba08875f46 Mon Sep 17 00:00:00 2001 From: Julio Perez Date: Wed, 17 Jan 2024 14:26:30 -0500 Subject: [PATCH 4/6] fix for merlin writer issues --- merlin/core/compat/__init__.py | 2 +- merlin/io/writer.py | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/merlin/core/compat/__init__.py b/merlin/core/compat/__init__.py index 169629f7e..8ff3eede1 100644 --- a/merlin/core/compat/__init__.py +++ b/merlin/core/compat/__init__.py @@ -21,7 +21,7 @@ from merlin.core.has_gpu import HAS_GPU # noqa pylint: disable=unused-import -cuda = False if not HAS_GPU else cuda +cuda = None if not HAS_GPU else cuda try: import psutil diff --git a/merlin/io/writer.py b/merlin/io/writer.py index 66c24170d..e26a702be 100644 --- a/merlin/io/writer.py +++ b/merlin/io/writer.py @@ -25,6 +25,7 @@ from merlin.core.compat import cupy as cp from merlin.core.dispatch import annotate from merlin.io.shuffle import shuffle_df +import logging class Writer: @@ -193,10 +194,13 @@ def _add_data_slice(self, df): # Pandas does not support the `scatter_by_map` method # used in `_add_data_scatter`. So, we manually shuffle # the df and write out slices. + logging.error(f"df: {df.shape}, {self.num_out_files}") if self.shuffle: df = shuffle_df(df) + logging.error(f"df: {df.shape}, {self.num_out_files}") int_slice_size = df.shape[0] // self.num_out_files - slice_size = int_slice_size if df.shape[0] % int_slice_size == 0 else int_slice_size + 1 + logging.error(f"df: {df.shape}, {self.num_out_files}, {int_slice_size}") + slice_size = int_slice_size if int_slice_size > 0 and df.shape[0] % int_slice_size == 0 else int_slice_size + 1 for x in range(self.num_out_files): start = x * slice_size end = start + slice_size From 622a43dbb7d6fcbb2c5fe5c07fcc4f111b46fa05 Mon Sep 17 00:00:00 2001 From: Julio Perez Date: Wed, 17 Jan 2024 14:50:40 -0500 Subject: [PATCH 5/6] remove logging statements --- merlin/io/writer.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/merlin/io/writer.py b/merlin/io/writer.py index e26a702be..2acd75884 100644 --- a/merlin/io/writer.py +++ b/merlin/io/writer.py @@ -25,7 +25,6 @@ from merlin.core.compat import cupy as cp from merlin.core.dispatch import annotate from merlin.io.shuffle import shuffle_df -import logging class Writer: @@ -194,12 +193,9 @@ def _add_data_slice(self, df): # Pandas does not support the `scatter_by_map` method # used in `_add_data_scatter`. So, we manually shuffle # the df and write out slices. - logging.error(f"df: {df.shape}, {self.num_out_files}") if self.shuffle: df = shuffle_df(df) - logging.error(f"df: {df.shape}, {self.num_out_files}") int_slice_size = df.shape[0] // self.num_out_files - logging.error(f"df: {df.shape}, {self.num_out_files}, {int_slice_size}") slice_size = int_slice_size if int_slice_size > 0 and df.shape[0] % int_slice_size == 0 else int_slice_size + 1 for x in range(self.num_out_files): start = x * slice_size From 903f17fdfac1399c94e5c1aed8031db92adc4807 Mon Sep 17 00:00:00 2001 From: Julio Perez Date: Wed, 17 Jan 2024 16:55:42 -0500 Subject: [PATCH 6/6] fix linting errors --- merlin/core/compat/__init__.py | 1 - merlin/io/writer.py | 6 +++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/merlin/core/compat/__init__.py b/merlin/core/compat/__init__.py index 8ff3eede1..4d81d60b9 100644 --- a/merlin/core/compat/__init__.py +++ b/merlin/core/compat/__init__.py @@ -101,7 +101,6 @@ def device_mem_size(kind="total", cpu=False): return pynvml_mem_size(kind=kind) - try: import numpy except ImportError: diff --git a/merlin/io/writer.py b/merlin/io/writer.py index 2acd75884..c4a5aa788 100644 --- a/merlin/io/writer.py +++ b/merlin/io/writer.py @@ -196,7 +196,11 @@ def _add_data_slice(self, df): if self.shuffle: df = shuffle_df(df) int_slice_size = df.shape[0] // self.num_out_files - slice_size = int_slice_size if int_slice_size > 0 and df.shape[0] % int_slice_size == 0 else int_slice_size + 1 + slice_size = ( + int_slice_size + if int_slice_size > 0 and df.shape[0] % int_slice_size == 0 + else int_slice_size + 1 + ) for x in range(self.num_out_files): start = x * slice_size end = start + slice_size