From 1b83eb13ec7f913a98e297aaeb7a5ed7942e0cf0 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 6 Sep 2024 11:17:57 -0700
Subject: [PATCH 1/3] Use cuda-python to measure memory leak

---
 .../all_cuda-118_arch-x86_64.yaml             |  2 +-
 .../all_cuda-125_arch-x86_64.yaml             |  2 +-
 dependencies.yaml                             | 11 +++++++++-
 python/cucim/pyproject.toml                   |  2 +-
 .../clara/test_read_region_memory_usage.py    | 22 ++++++++++---------
 5 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 1b657fcd0..cf5f956ef 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -6,10 +6,10 @@ channels:
 - conda-forge
 - nvidia
 dependencies:
-- GPUtil>=1.4.0
 - c-compiler
 - click
 - cmake>=3.26.4,!=3.30.0
+- cuda-python>=11.7.1,<12.0a0
 - cuda-version=11.8
 - cudatoolkit
 - cupy>=12.0.0
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index 2f5f50192..f4bd88a16 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -6,12 +6,12 @@ channels:
 - conda-forge
 - nvidia
 dependencies:
-- GPUtil>=1.4.0
 - c-compiler
 - click
 - cmake>=3.26.4,!=3.30.0
 - cuda-cudart-dev
 - cuda-nvcc
+- cuda-python>=12.0,<13.0a0
 - cuda-version=12.5
 - cupy>=12.0.0
 - cxx-compiler
diff --git a/dependencies.yaml b/dependencies.yaml
index e8d5f8aaf..67efbf600 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -295,7 +295,6 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - GPUtil>=1.4.0
           - psutil>=5.8.0
           - pytest>=6.2.4,<8.0.0a0
           - pytest-cov>=2.12.1
@@ -323,3 +322,13 @@ dependencies:
         packages:
           # Already added to requirements via docs. This is for tests.
           - numpydoc>=1.5
+    specific:
+      - output_types: [conda, requirements, pyproject]
+        matrices:
+          - matrix: {cuda: "12.*"}
+            packages:
+              - cuda-python>=12.0,<13.0a0
+          - matrix: {cuda: "11.*"}
+            packages: &test_cuda_python_cu11
+              - cuda-python>=11.7.1,<12.0a0
+          - {matrix: null, packages: *test_cuda_python_cu11}
diff --git a/python/cucim/pyproject.toml b/python/cucim/pyproject.toml
index 4537f3df6..b8d1121b1 100644
--- a/python/cucim/pyproject.toml
+++ b/python/cucim/pyproject.toml
@@ -57,7 +57,7 @@ Tracker = "https://github.com/rapidsai/cucim/issues"
 
 [project.optional-dependencies]
 test = [
-    "GPUtil>=1.4.0",
+    "cuda-python>=11.7.1,<12.0a0",
     "imagecodecs>=2021.6.8; platform_machine=='x86_64'",
     "matplotlib",
     "numpydoc>=1.5",
diff --git a/python/cucim/tests/performance/clara/test_read_region_memory_usage.py b/python/cucim/tests/performance/clara/test_read_region_memory_usage.py
index 8f259b910..48b7dc0b9 100644
--- a/python/cucim/tests/performance/clara/test_read_region_memory_usage.py
+++ b/python/cucim/tests/performance/clara/test_read_region_memory_usage.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 #
 
+import cuda.cudart
 import pytest
 
 from ...util.io import open_image_cucim
@@ -22,23 +23,24 @@
 
 
 def test_read_region_cuda_memleak(testimg_tiff_stripe_4096x4096_256_jpeg):
-    import GPUtil
-
-    gpus = GPUtil.getGPUs()
-
-    if len(gpus) == 0:
+    def get_used_gpu_memory():
+        status, free, total = cuda.cudart.cudaMemGetInfo()
+        if status != cuda.cudart.cudaError_t.cudaSuccess:
+            raise RuntimeError("Failed to get GPU memory info.")
+        memory_used = (total - free) / (2**20)
+        return memory_used
+
+    status, num_gpus = cuda.cudart.cudaGetDeviceCount()
+    if status != cuda.cudart.cudaError_t.cudaSuccess or num_gpus == 0:
         pytest.skip("No gpu available")
 
     img = open_image_cucim(testimg_tiff_stripe_4096x4096_256_jpeg)
 
-    gpu = gpus[0]
-    mem_usage_history = [gpu.memoryUsed]
+    mem_usage_history = [get_used_gpu_memory()]
 
     for i in range(10):
         _ = img.read_region(device="cuda")
-        gpus = GPUtil.getGPUs()
-        gpu = gpus[0]
-        mem_usage_history.append(gpu.memoryUsed)
+        mem_usage_history.append(get_used_gpu_memory())
 
     print(mem_usage_history)
 

From 8075ba3cd304dd0e3300958d5b6fad2fb3baa5b6 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 6 Sep 2024 11:22:38 -0700
Subject: [PATCH 2/3] Clarify units are MiB.

---
 .../performance/clara/test_read_region_memory_usage.py     | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/python/cucim/tests/performance/clara/test_read_region_memory_usage.py b/python/cucim/tests/performance/clara/test_read_region_memory_usage.py
index 48b7dc0b9..33a5c3e0d 100644
--- a/python/cucim/tests/performance/clara/test_read_region_memory_usage.py
+++ b/python/cucim/tests/performance/clara/test_read_region_memory_usage.py
@@ -23,7 +23,8 @@
 
 
 def test_read_region_cuda_memleak(testimg_tiff_stripe_4096x4096_256_jpeg):
-    def get_used_gpu_memory():
+    def get_used_gpu_memory_mib():
+        """Get the used GPU memory in MiB."""
         status, free, total = cuda.cudart.cudaMemGetInfo()
         if status != cuda.cudart.cudaError_t.cudaSuccess:
             raise RuntimeError("Failed to get GPU memory info.")
@@ -36,11 +37,11 @@ def get_used_gpu_memory():
 
     img = open_image_cucim(testimg_tiff_stripe_4096x4096_256_jpeg)
 
-    mem_usage_history = [get_used_gpu_memory()]
+    mem_usage_history = [get_used_gpu_memory_mib()]
 
     for i in range(10):
         _ = img.read_region(device="cuda")
-        mem_usage_history.append(get_used_gpu_memory())
+        mem_usage_history.append(get_used_gpu_memory_mib())
 
     print(mem_usage_history)
 

From a509f2d433dbe87dcef47dd2e7481595c163aba7 Mon Sep 17 00:00:00 2001
From: Bradley Dice <bdice@bradleydice.com>
Date: Fri, 6 Sep 2024 12:39:24 -0700
Subject: [PATCH 3/3] Use cupy instead of cuda-python.

---
 conda/environments/all_cuda-118_arch-x86_64.yaml      |  1 -
 conda/environments/all_cuda-125_arch-x86_64.yaml      |  1 -
 dependencies.yaml                                     | 10 ----------
 python/cucim/pyproject.toml                           |  1 -
 .../clara/test_read_region_memory_usage.py            | 11 +++++------
 5 files changed, 5 insertions(+), 19 deletions(-)

diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index cf5f956ef..d657617b4 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -9,7 +9,6 @@ dependencies:
 - c-compiler
 - click
 - cmake>=3.26.4,!=3.30.0
-- cuda-python>=11.7.1,<12.0a0
 - cuda-version=11.8
 - cudatoolkit
 - cupy>=12.0.0
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index f4bd88a16..a193f5cdd 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -11,7 +11,6 @@ dependencies:
 - cmake>=3.26.4,!=3.30.0
 - cuda-cudart-dev
 - cuda-nvcc
-- cuda-python>=12.0,<13.0a0
 - cuda-version=12.5
 - cupy>=12.0.0
 - cxx-compiler
diff --git a/dependencies.yaml b/dependencies.yaml
index 67efbf600..62e2ddefe 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -322,13 +322,3 @@ dependencies:
         packages:
           # Already added to requirements via docs. This is for tests.
           - numpydoc>=1.5
-    specific:
-      - output_types: [conda, requirements, pyproject]
-        matrices:
-          - matrix: {cuda: "12.*"}
-            packages:
-              - cuda-python>=12.0,<13.0a0
-          - matrix: {cuda: "11.*"}
-            packages: &test_cuda_python_cu11
-              - cuda-python>=11.7.1,<12.0a0
-          - {matrix: null, packages: *test_cuda_python_cu11}
diff --git a/python/cucim/pyproject.toml b/python/cucim/pyproject.toml
index b8d1121b1..25248b3b2 100644
--- a/python/cucim/pyproject.toml
+++ b/python/cucim/pyproject.toml
@@ -57,7 +57,6 @@ Tracker = "https://github.com/rapidsai/cucim/issues"
 
 [project.optional-dependencies]
 test = [
-    "cuda-python>=11.7.1,<12.0a0",
     "imagecodecs>=2021.6.8; platform_machine=='x86_64'",
     "matplotlib",
     "numpydoc>=1.5",
diff --git a/python/cucim/tests/performance/clara/test_read_region_memory_usage.py b/python/cucim/tests/performance/clara/test_read_region_memory_usage.py
index 33a5c3e0d..9b17b1549 100644
--- a/python/cucim/tests/performance/clara/test_read_region_memory_usage.py
+++ b/python/cucim/tests/performance/clara/test_read_region_memory_usage.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 #
 
-import cuda.cudart
+import cupy as cp
 import pytest
 
 from ...util.io import open_image_cucim
@@ -25,14 +25,13 @@
 def test_read_region_cuda_memleak(testimg_tiff_stripe_4096x4096_256_jpeg):
     def get_used_gpu_memory_mib():
         """Get the used GPU memory in MiB."""
-        status, free, total = cuda.cudart.cudaMemGetInfo()
-        if status != cuda.cudart.cudaError_t.cudaSuccess:
-            raise RuntimeError("Failed to get GPU memory info.")
+        dev = cp.cuda.Device()
+        free, total = dev.mem_info
         memory_used = (total - free) / (2**20)
         return memory_used
 
-    status, num_gpus = cuda.cudart.cudaGetDeviceCount()
-    if status != cuda.cudart.cudaError_t.cudaSuccess or num_gpus == 0:
+    num_gpus = cp.cuda.runtime.getDeviceCount()
+    if num_gpus == 0:
         pytest.skip("No gpu available")
 
     img = open_image_cucim(testimg_tiff_stripe_4096x4096_256_jpeg)