From e0ed0dc4e440dd79383a3dd5ed306d99a22d42a5 Mon Sep 17 00:00:00 2001
From: Julio Perez <jperez@nvidia.com>
Date: Thu, 11 Jan 2024 18:04:27 -0500
Subject: [PATCH 1/6] use HAS_GPU to determine of cuda is available

---
 merlin/core/compat/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/merlin/core/compat/__init__.py b/merlin/core/compat/__init__.py
index dc5ec84ed..5ce96e746 100644
--- a/merlin/core/compat/__init__.py
+++ b/merlin/core/compat/__init__.py
@@ -21,7 +21,7 @@
 
 from merlin.core.has_gpu import HAS_GPU  # noqa pylint: disable=unused-import
 
-if not cuda.is_available():
+if not HAS_GPU:
     cuda = None
 
 try:

From bd495d323adeab1087d755010cbb2add3927f474 Mon Sep 17 00:00:00 2001
From: Julio Perez <jperez@nvidia.com>
Date: Fri, 12 Jan 2024 15:28:11 -0500
Subject: [PATCH 2/6] removed use of numba cuda context during init

---
 merlin/core/compat/__init__.py | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/merlin/core/compat/__init__.py b/merlin/core/compat/__init__.py
index 5ce96e746..614124c52 100644
--- a/merlin/core/compat/__init__.py
+++ b/merlin/core/compat/__init__.py
@@ -21,8 +21,7 @@
 
 from merlin.core.has_gpu import HAS_GPU  # noqa pylint: disable=unused-import
 
-if not HAS_GPU:
-    cuda = None
+cuda = False if not HAS_GPU else cuda
 
 try:
     import psutil
@@ -99,10 +98,7 @@ def device_mem_size(kind="total", cpu=False):
     if kind not in ["free", "total"]:
         raise ValueError(f"{kind} not a supported option for device_mem_size.")
     try:
-        if kind == "free":
-            return int(cuda.current_context().get_memory_info()[0])
-        else:
-            return int(cuda.current_context().get_memory_info()[1])
+        return pynvml_mem_size(kind=kind)
     except NotImplementedError:
         if kind == "free":
             # Not using NVML "free" memory, because it will not include RMM-managed memory

From 8e6a737784319aead01609028ee81dedb6ff593a Mon Sep 17 00:00:00 2001
From: Julio Perez <jperez@nvidia.com>
Date: Fri, 12 Jan 2024 16:30:58 -0500
Subject: [PATCH 3/6] removed excess logic now that using pynvml

---
 merlin/core/compat/__init__.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/merlin/core/compat/__init__.py b/merlin/core/compat/__init__.py
index 614124c52..169629f7e 100644
--- a/merlin/core/compat/__init__.py
+++ b/merlin/core/compat/__init__.py
@@ -97,14 +97,9 @@ def device_mem_size(kind="total", cpu=False):
 
     if kind not in ["free", "total"]:
         raise ValueError(f"{kind} not a supported option for device_mem_size.")
-    try:
-        return pynvml_mem_size(kind=kind)
-    except NotImplementedError:
-        if kind == "free":
-            # Not using NVML "free" memory, because it will not include RMM-managed memory
-            warnings.warn("get_memory_info is not supported. Using total device memory from NVML.")
-        size = pynvml_mem_size(kind="total", index=0)
-        return size
+
+    return pynvml_mem_size(kind=kind)
+
 
 
 try:

From 1d48e60c7d48c873f29a78d3bad706ba08875f46 Mon Sep 17 00:00:00 2001
From: Julio Perez <jperez@nvidia.com>
Date: Wed, 17 Jan 2024 14:26:30 -0500
Subject: [PATCH 4/6] fix for merlin writer issues

---
 merlin/core/compat/__init__.py | 2 +-
 merlin/io/writer.py            | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/merlin/core/compat/__init__.py b/merlin/core/compat/__init__.py
index 169629f7e..8ff3eede1 100644
--- a/merlin/core/compat/__init__.py
+++ b/merlin/core/compat/__init__.py
@@ -21,7 +21,7 @@
 
 from merlin.core.has_gpu import HAS_GPU  # noqa pylint: disable=unused-import
 
-cuda = False if not HAS_GPU else cuda
+cuda = None if not HAS_GPU else cuda
 
 try:
     import psutil
diff --git a/merlin/io/writer.py b/merlin/io/writer.py
index 66c24170d..e26a702be 100644
--- a/merlin/io/writer.py
+++ b/merlin/io/writer.py
@@ -25,6 +25,7 @@
 from merlin.core.compat import cupy as cp
 from merlin.core.dispatch import annotate
 from merlin.io.shuffle import shuffle_df
+import logging
 
 
 class Writer:
@@ -193,10 +194,13 @@ def _add_data_slice(self, df):
         # Pandas does not support the `scatter_by_map` method
         # used in `_add_data_scatter`. So, we manually shuffle
         # the df and write out slices.
+        logging.error(f"df: {df.shape}, {self.num_out_files}")
         if self.shuffle:
             df = shuffle_df(df)
+        logging.error(f"df: {df.shape}, {self.num_out_files}")
         int_slice_size = df.shape[0] // self.num_out_files
-        slice_size = int_slice_size if df.shape[0] % int_slice_size == 0 else int_slice_size + 1
+        logging.error(f"df: {df.shape}, {self.num_out_files}, {int_slice_size}")
+        slice_size = int_slice_size if int_slice_size > 0 and df.shape[0] % int_slice_size == 0 else int_slice_size + 1
         for x in range(self.num_out_files):
             start = x * slice_size
             end = start + slice_size

From 622a43dbb7d6fcbb2c5fe5c07fcc4f111b46fa05 Mon Sep 17 00:00:00 2001
From: Julio Perez <jperez@nvidia.com>
Date: Wed, 17 Jan 2024 14:50:40 -0500
Subject: [PATCH 5/6] remove logging statements

---
 merlin/io/writer.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/merlin/io/writer.py b/merlin/io/writer.py
index e26a702be..2acd75884 100644
--- a/merlin/io/writer.py
+++ b/merlin/io/writer.py
@@ -25,7 +25,6 @@
 from merlin.core.compat import cupy as cp
 from merlin.core.dispatch import annotate
 from merlin.io.shuffle import shuffle_df
-import logging
 
 
 class Writer:
@@ -194,12 +193,9 @@ def _add_data_slice(self, df):
         # Pandas does not support the `scatter_by_map` method
         # used in `_add_data_scatter`. So, we manually shuffle
         # the df and write out slices.
-        logging.error(f"df: {df.shape}, {self.num_out_files}")
         if self.shuffle:
             df = shuffle_df(df)
-        logging.error(f"df: {df.shape}, {self.num_out_files}")
         int_slice_size = df.shape[0] // self.num_out_files
-        logging.error(f"df: {df.shape}, {self.num_out_files}, {int_slice_size}")
         slice_size = int_slice_size if int_slice_size > 0 and df.shape[0] % int_slice_size == 0 else int_slice_size + 1
         for x in range(self.num_out_files):
             start = x * slice_size

From 903f17fdfac1399c94e5c1aed8031db92adc4807 Mon Sep 17 00:00:00 2001
From: Julio Perez <jperez@nvidia.com>
Date: Wed, 17 Jan 2024 16:55:42 -0500
Subject: [PATCH 6/6] fix linting errors

---
 merlin/core/compat/__init__.py | 1 -
 merlin/io/writer.py            | 6 +++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/merlin/core/compat/__init__.py b/merlin/core/compat/__init__.py
index 8ff3eede1..4d81d60b9 100644
--- a/merlin/core/compat/__init__.py
+++ b/merlin/core/compat/__init__.py
@@ -101,7 +101,6 @@ def device_mem_size(kind="total", cpu=False):
     return pynvml_mem_size(kind=kind)
 
 
-
 try:
     import numpy
 except ImportError:
diff --git a/merlin/io/writer.py b/merlin/io/writer.py
index 2acd75884..c4a5aa788 100644
--- a/merlin/io/writer.py
+++ b/merlin/io/writer.py
@@ -196,7 +196,11 @@ def _add_data_slice(self, df):
         if self.shuffle:
             df = shuffle_df(df)
         int_slice_size = df.shape[0] // self.num_out_files
-        slice_size = int_slice_size if int_slice_size > 0 and df.shape[0] % int_slice_size == 0 else int_slice_size + 1
+        slice_size = (
+            int_slice_size
+            if int_slice_size > 0 and df.shape[0] % int_slice_size == 0
+            else int_slice_size + 1
+        )
         for x in range(self.num_out_files):
             start = x * slice_size
             end = start + slice_size