From 1b83eb13ec7f913a98e297aaeb7a5ed7942e0cf0 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Sep 2024 11:17:57 -0700 Subject: [PATCH 1/3] Use cuda-python to measure memory leak --- .../all_cuda-118_arch-x86_64.yaml | 2 +- .../all_cuda-125_arch-x86_64.yaml | 2 +- dependencies.yaml | 11 +++++++++- python/cucim/pyproject.toml | 2 +- .../clara/test_read_region_memory_usage.py | 22 ++++++++++--------- 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 1b657fcd0..cf5f956ef 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -6,10 +6,10 @@ channels: - conda-forge - nvidia dependencies: -- GPUtil>=1.4.0 - c-compiler - click - cmake>=3.26.4,!=3.30.0 +- cuda-python>=11.7.1,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index 2f5f50192..f4bd88a16 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -6,12 +6,12 @@ channels: - conda-forge - nvidia dependencies: -- GPUtil>=1.4.0 - c-compiler - click - cmake>=3.26.4,!=3.30.0 - cuda-cudart-dev - cuda-nvcc +- cuda-python>=12.0,<13.0a0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler diff --git a/dependencies.yaml b/dependencies.yaml index e8d5f8aaf..67efbf600 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -295,7 +295,6 @@ dependencies: common: - output_types: [conda, requirements, pyproject] packages: - - GPUtil>=1.4.0 - psutil>=5.8.0 - pytest>=6.2.4,<8.0.0a0 - pytest-cov>=2.12.1 @@ -323,3 +322,13 @@ dependencies: packages: # Already added to requirements via docs. This is for tests. - numpydoc>=1.5 + specific: + - output_types: [conda, requirements, pyproject] + matrices: + - matrix: {cuda: "12.*"} + packages: + - cuda-python>=12.0,<13.0a0 + - matrix: {cuda: "11.*"} + packages: &test_cuda_python_cu11 + - cuda-python>=11.7.1,<12.0a0 + - {matrix: null, packages: *test_cuda_python_cu11} diff --git a/python/cucim/pyproject.toml b/python/cucim/pyproject.toml index 4537f3df6..b8d1121b1 100644 --- a/python/cucim/pyproject.toml +++ b/python/cucim/pyproject.toml @@ -57,7 +57,7 @@ Tracker = "https://github.com/rapidsai/cucim/issues" [project.optional-dependencies] test = [ - "GPUtil>=1.4.0", + "cuda-python>=11.7.1,<12.0a0", "imagecodecs>=2021.6.8; platform_machine=='x86_64'", "matplotlib", "numpydoc>=1.5", diff --git a/python/cucim/tests/performance/clara/test_read_region_memory_usage.py b/python/cucim/tests/performance/clara/test_read_region_memory_usage.py index 8f259b910..48b7dc0b9 100644 --- a/python/cucim/tests/performance/clara/test_read_region_memory_usage.py +++ b/python/cucim/tests/performance/clara/test_read_region_memory_usage.py @@ -13,6 +13,7 @@ # limitations under the License. # +import cuda.cudart import pytest from ...util.io import open_image_cucim @@ -22,23 +23,24 @@ def test_read_region_cuda_memleak(testimg_tiff_stripe_4096x4096_256_jpeg): - import GPUtil - - gpus = GPUtil.getGPUs() - - if len(gpus) == 0: + def get_used_gpu_memory(): + status, free, total = cuda.cudart.cudaMemGetInfo() + if status != cuda.cudart.cudaError_t.cudaSuccess: + raise RuntimeError("Failed to get GPU memory info.") + memory_used = (total - free) / (2**20) + return memory_used + + status, num_gpus = cuda.cudart.cudaGetDeviceCount() + if status != cuda.cudart.cudaError_t.cudaSuccess or num_gpus == 0: pytest.skip("No gpu available") img = open_image_cucim(testimg_tiff_stripe_4096x4096_256_jpeg) - gpu = gpus[0] - mem_usage_history = [gpu.memoryUsed] + mem_usage_history = [get_used_gpu_memory()] for i in range(10): _ = img.read_region(device="cuda") - gpus = GPUtil.getGPUs() - gpu = gpus[0] - mem_usage_history.append(gpu.memoryUsed) + mem_usage_history.append(get_used_gpu_memory()) print(mem_usage_history) From 8075ba3cd304dd0e3300958d5b6fad2fb3baa5b6 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Sep 2024 11:22:38 -0700 Subject: [PATCH 2/3] Clarify units are MiB. --- .../performance/clara/test_read_region_memory_usage.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/python/cucim/tests/performance/clara/test_read_region_memory_usage.py b/python/cucim/tests/performance/clara/test_read_region_memory_usage.py index 48b7dc0b9..33a5c3e0d 100644 --- a/python/cucim/tests/performance/clara/test_read_region_memory_usage.py +++ b/python/cucim/tests/performance/clara/test_read_region_memory_usage.py @@ -23,7 +23,8 @@ def test_read_region_cuda_memleak(testimg_tiff_stripe_4096x4096_256_jpeg): - def get_used_gpu_memory(): + def get_used_gpu_memory_mib(): + """Get the used GPU memory in MiB.""" status, free, total = cuda.cudart.cudaMemGetInfo() if status != cuda.cudart.cudaError_t.cudaSuccess: raise RuntimeError("Failed to get GPU memory info.") @@ -36,11 +37,11 @@ def get_used_gpu_memory(): img = open_image_cucim(testimg_tiff_stripe_4096x4096_256_jpeg) - mem_usage_history = [get_used_gpu_memory()] + mem_usage_history = [get_used_gpu_memory_mib()] for i in range(10): _ = img.read_region(device="cuda") - mem_usage_history.append(get_used_gpu_memory()) + mem_usage_history.append(get_used_gpu_memory_mib()) print(mem_usage_history) From a509f2d433dbe87dcef47dd2e7481595c163aba7 Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Fri, 6 Sep 2024 12:39:24 -0700 Subject: [PATCH 3/3] Use cupy instead of cuda-python. --- conda/environments/all_cuda-118_arch-x86_64.yaml | 1 - conda/environments/all_cuda-125_arch-x86_64.yaml | 1 - dependencies.yaml | 10 ---------- python/cucim/pyproject.toml | 1 - .../clara/test_read_region_memory_usage.py | 11 +++++------ 5 files changed, 5 insertions(+), 19 deletions(-) diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index cf5f956ef..d657617b4 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -9,7 +9,6 @@ dependencies: - c-compiler - click - cmake>=3.26.4,!=3.30.0 -- cuda-python>=11.7.1,<12.0a0 - cuda-version=11.8 - cudatoolkit - cupy>=12.0.0 diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml index f4bd88a16..a193f5cdd 100644 --- a/conda/environments/all_cuda-125_arch-x86_64.yaml +++ b/conda/environments/all_cuda-125_arch-x86_64.yaml @@ -11,7 +11,6 @@ dependencies: - cmake>=3.26.4,!=3.30.0 - cuda-cudart-dev - cuda-nvcc -- cuda-python>=12.0,<13.0a0 - cuda-version=12.5 - cupy>=12.0.0 - cxx-compiler diff --git a/dependencies.yaml b/dependencies.yaml index 67efbf600..62e2ddefe 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -322,13 +322,3 @@ dependencies: packages: # Already added to requirements via docs. This is for tests. - numpydoc>=1.5 - specific: - - output_types: [conda, requirements, pyproject] - matrices: - - matrix: {cuda: "12.*"} - packages: - - cuda-python>=12.0,<13.0a0 - - matrix: {cuda: "11.*"} - packages: &test_cuda_python_cu11 - - cuda-python>=11.7.1,<12.0a0 - - {matrix: null, packages: *test_cuda_python_cu11} diff --git a/python/cucim/pyproject.toml b/python/cucim/pyproject.toml index b8d1121b1..25248b3b2 100644 --- a/python/cucim/pyproject.toml +++ b/python/cucim/pyproject.toml @@ -57,7 +57,6 @@ Tracker = "https://github.com/rapidsai/cucim/issues" [project.optional-dependencies] test = [ - "cuda-python>=11.7.1,<12.0a0", "imagecodecs>=2021.6.8; platform_machine=='x86_64'", "matplotlib", "numpydoc>=1.5", diff --git a/python/cucim/tests/performance/clara/test_read_region_memory_usage.py b/python/cucim/tests/performance/clara/test_read_region_memory_usage.py index 33a5c3e0d..9b17b1549 100644 --- a/python/cucim/tests/performance/clara/test_read_region_memory_usage.py +++ b/python/cucim/tests/performance/clara/test_read_region_memory_usage.py @@ -13,7 +13,7 @@ # limitations under the License. # -import cuda.cudart +import cupy as cp import pytest from ...util.io import open_image_cucim @@ -25,14 +25,13 @@ def test_read_region_cuda_memleak(testimg_tiff_stripe_4096x4096_256_jpeg): def get_used_gpu_memory_mib(): """Get the used GPU memory in MiB.""" - status, free, total = cuda.cudart.cudaMemGetInfo() - if status != cuda.cudart.cudaError_t.cudaSuccess: - raise RuntimeError("Failed to get GPU memory info.") + dev = cp.cuda.Device() + free, total = dev.mem_info memory_used = (total - free) / (2**20) return memory_used - status, num_gpus = cuda.cudart.cudaGetDeviceCount() - if status != cuda.cudart.cudaError_t.cudaSuccess or num_gpus == 0: + num_gpus = cp.cuda.runtime.getDeviceCount() + if num_gpus == 0: pytest.skip("No gpu available") img = open_image_cucim(testimg_tiff_stripe_4096x4096_256_jpeg)