From 285792af1669e18e0c601ea38ecaa3d630cd9e17 Mon Sep 17 00:00:00 2001 From: Bernhard Manfred Gruber Date: Fri, 17 May 2024 00:23:09 +0200 Subject: [PATCH] Enable CUB NVTX ranges only if the NVTX3 C++ API V1 is available --- cub/cub/detail/nvtx.cuh | 50 +++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 19 deletions(-) diff --git a/cub/cub/detail/nvtx.cuh b/cub/cub/detail/nvtx.cuh index cc8a0379b18..f86fd7f33bb 100644 --- a/cub/cub/detail/nvtx.cuh +++ b/cub/cub/detail/nvtx.cuh @@ -37,11 +37,11 @@ # pragma system_header #endif // no system header -// Enable the functionality of this header if +// Enable the functionality of this header if: // * The NVTX3 C API is available in CTK // * NVTX is not explicitly disabled // * C++14 is availabl for cuda::std::optional -#if __has_include() && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014 +#if __has_include( ) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014 // Include our NVTX3 C++ wrapper if not available from the CTK # if __has_include() // TODO(bgruber): replace by a check for the first CTK version shipping the header # include @@ -49,7 +49,9 @@ # include "nvtx3.hpp" # endif // __has_include() -# include +// Furthermore, we only support the NVTX3 C++ API V1 +# ifdef NVTX3_CPP_DEFINITIONS_V1_0 +# include CUB_NAMESPACE_BEGIN namespace detail @@ -62,27 +64,37 @@ struct NVTXCCCLDomain CUB_NAMESPACE_END // Hook for the NestedNVTXRangeGuard from the unit tests -# ifndef CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE -# define CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE(name) -# endif // !CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE +# ifndef CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE +# define CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE(name) +# endif // !CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE // Conditionally inserts a NVTX range starting here until the end of the current function scope in host code. Does // nothing in device code. // The optional is needed to defer the construction of an NVTX range (host-only code) and message string registration // into a dispatch region running only on the host, while preserving the semantic scope where the range is declared. -# define CUB_DETAIL_NVTX_RANGE_SCOPE_IF(condition, name) \ - CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE(name) \ - ::cuda::std::optional<::nvtx3::v1::scoped_range_in> __cub_nvtx3_range; \ - NV_IF_TARGET( \ - NV_IS_HOST, \ - static const ::nvtx3::v1::registered_string_in __cub_nvtx3_func_name{ \ - name}; \ - static const ::nvtx3::v1::event_attributes __cub_nvtx3_func_attr{__cub_nvtx3_func_name}; \ - if (condition) __cub_nvtx3_range.emplace(__cub_nvtx3_func_attr); \ - (void) __cub_nvtx3_range;) +# define CUB_DETAIL_NVTX_RANGE_SCOPE_IF(condition, name) \ + CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE(name) \ + ::cuda::std::optional<::nvtx3::v1::scoped_range_in> __cub_nvtx3_range; \ + NV_IF_TARGET( \ + NV_IS_HOST, \ + static const ::nvtx3::v1::registered_string_in __cub_nvtx3_func_name{ \ + name}; \ + static const ::nvtx3::v1::event_attributes __cub_nvtx3_func_attr{__cub_nvtx3_func_name}; \ + if (condition) __cub_nvtx3_range.emplace(__cub_nvtx3_func_attr); \ + (void) __cub_nvtx3_range;) -# define CUB_DETAIL_NVTX_RANGE_SCOPE(name) CUB_DETAIL_NVTX_RANGE_SCOPE_IF(true, name) -#else // __has_include() && !defined(NVTX_DISABLE) && _CCCL_STD_VER > 2011 +# define CUB_DETAIL_NVTX_RANGE_SCOPE(name) CUB_DETAIL_NVTX_RANGE_SCOPE_IF(true, name) +# else // NVTX3_CPP_DEFINITIONS_V1_0 +// Tell the user we don't support their NVTX3 version. +# if defined(_CCCL_COMPILER_MSVC) +# pragma message("warning: CUB only supports NVTX3 V1. Please open an issue on GitHub.") +# else +# warning CUB only supports NVTX3 V1. Please open an issue on GitHub. +# endif +# define CUB_DETAIL_NVTX_RANGE_SCOPE_IF(condition, name) +# define CUB_DETAIL_NVTX_RANGE_SCOPE(name) +# endif // NVTX3_CPP_DEFINITIONS_V1_0 +#else // __has_include( ) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014 # define CUB_DETAIL_NVTX_RANGE_SCOPE_IF(condition, name) # define CUB_DETAIL_NVTX_RANGE_SCOPE(name) -#endif // __has_include() && !defined(NVTX_DISABLE) && _CCCL_STD_VER > 2011 +#endif // __has_include( ) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014