Make cudaMallocAsync logic non-optional as we require CUDA 11.2+ (#…

…1667) We can remove the optimizations around `CUDA_STATIC_RUNTIME` and instead see if the function is already in the process space so that RMM doesn't need to have any build context to run properly Fixes #1679 Authors: - Robert Maynard (https://github.com/robertmaynard) Approvers: - Mark Harris (https://github.com/harrism) - Vyas Ramasubramani (https://github.com/vyasr) URL: #1667
rapidsai · Nov 19, 2024 · 929a595 · 929a595
1 parent c7fc017
commit 929a595
Show file tree

Hide file tree

Showing 16 changed files with 131 additions and 359 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -89,7 +89,6 @@ target_include_directories(rmm INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOUR
 if(CUDA_STATIC_RUNTIME)
   message(STATUS "RMM: Enabling static linking of cudart")
   target_link_libraries(rmm INTERFACE CUDA::cudart_static)
-  target_compile_definitions(rmm INTERFACE RMM_STATIC_CUDART)
 else()
   target_link_libraries(rmm INTERFACE CUDA::cudart)
 endif()

diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu
@@ -133,9 +133,7 @@ static void benchmark_range(benchmark::internal::Benchmark* bench)
 MRFactoryFunc get_mr_factory(std::string const& resource_name)
 {
   if (resource_name == "cuda") { return &make_cuda; }
-#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
   if (resource_name == "cuda_async") { return &make_cuda_async; }
-#endif
   if (resource_name == "pool") { return &make_pool; }
   if (resource_name == "arena") { return &make_arena; }
   if (resource_name == "binning") { return &make_binning; }
@@ -153,13 +151,11 @@ void declare_benchmark(std::string const& name)
     return;
   }
 
-#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
   if (name == "cuda_async") {
     BENCHMARK_CAPTURE(BM_MultiStreamAllocations, cuda_async, &make_cuda_async)  //
       ->Apply(benchmark_range);
     return;
   }
-#endif
 
   if (name == "pool") {
     BENCHMARK_CAPTURE(BM_MultiStreamAllocations, pool_mr, &make_pool)  //
@@ -248,9 +244,7 @@ int main(int argc, char** argv)
         resource_names.emplace_back(args["resource"].as<std::string>());
       } else {
         resource_names.emplace_back("cuda");
-#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
         resource_names.emplace_back("cuda_async");
-#endif
         resource_names.emplace_back("pool");
         resource_names.emplace_back("arena");
         resource_names.emplace_back("binning");

diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp
@@ -316,9 +316,7 @@ int main(int argc, char** argv)
       std::map<std::string, MRFactoryFunc> const funcs({{"arena", &make_arena},
                                                         {"binning", &make_binning},
                                                         {"cuda", &make_cuda},
-#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
                                                         {"cuda_async", &make_cuda_async},
-#endif
                                                         {"pool", &make_pool}});
       auto resource = args["resource"].as<std::string>();
 
@@ -340,11 +338,7 @@ int main(int argc, char** argv)
         std::string mr_name = args["resource"].as<std::string>();
         declare_benchmark(mr_name);
       } else {
-#ifdef RMM_CUDA_MALLOC_ASYNC_SUPPORT
         std::vector<std::string> mrs{"pool", "binning", "arena", "cuda_async", "cuda"};
-#else
-        std::vector<std::string> mrs{"pool", "binning", "arena", "cuda"};
-#endif
         std::for_each(
           std::cbegin(mrs), std::cend(mrs), [](auto const& mr) { declare_benchmark(mr); });
       }

diff --git a/include/rmm/detail/dynamic_load_runtime.hpp b/include/rmm/detail/dynamic_load_runtime.hpp
diff --git a/include/rmm/detail/runtime_async_alloc.hpp b/include/rmm/detail/runtime_async_alloc.hpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <rmm/cuda_device.hpp>
+#include <rmm/detail/export.hpp>
+
+#include <cuda_runtime_api.h>
+
+#include <dlfcn.h>
+
+#include <memory>
+#include <optional>
+
+namespace RMM_NAMESPACE {
+namespace detail {
+
+/**
+ * @brief Determine at runtime if the CUDA driver supports the stream-ordered
+ * memory allocator functions.
+ *
+ * This allows RMM users to compile/link against CUDA 11.2+ and run with
+ * older drivers.
+ */
+
+struct runtime_async_alloc {
+  static bool is_supported()
+  {
+    static auto driver_supports_pool{[] {
+      int cuda_pool_supported{};
+      auto result = cudaDeviceGetAttribute(&cuda_pool_supported,
+                                           cudaDevAttrMemoryPoolsSupported,
+                                           rmm::get_current_cuda_device().value());
+      return result == cudaSuccess and cuda_pool_supported == 1;
+    }()};
+    return driver_supports_pool;
+  }
+
+  /**
+   * @brief Check whether the specified `cudaMemAllocationHandleType` is supported on the present
+   * CUDA driver/runtime version.
+   *
+   * @param handle_type An IPC export handle type to check for support.
+   * @return true if supported
+   * @return false if unsupported
+   */
+  static bool is_export_handle_type_supported(cudaMemAllocationHandleType handle_type)
+  {
+    int supported_handle_types_bitmask{};
+    if (cudaMemHandleTypeNone != handle_type) {
+      auto const result = cudaDeviceGetAttribute(&supported_handle_types_bitmask,
+                                                 cudaDevAttrMemoryPoolSupportedHandleTypes,
+                                                 rmm::get_current_cuda_device().value());
+
+      // Don't throw on cudaErrorInvalidValue
+      auto const unsupported_runtime = (result == cudaErrorInvalidValue);
+      if (unsupported_runtime) return false;
+      // throw any other error that may have occurred
+      RMM_CUDA_TRY(result);
+    }
+    return (supported_handle_types_bitmask & handle_type) == handle_type;
+  }
+};
+
+}  // namespace detail
+}  // namespace RMM_NAMESPACE