Skip to content

Commit

Permalink
Enable CUB NVTX ranges only if the NVTX3 C++ API V1 is available
Browse files Browse the repository at this point in the history
  • Loading branch information
bernhardmgruber committed May 16, 2024
1 parent 06de31f commit 285792a
Showing 1 changed file with 31 additions and 19 deletions.
50 changes: 31 additions & 19 deletions cub/cub/detail/nvtx.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,19 +37,21 @@
# pragma system_header
#endif // no system header

// Enable the functionality of this header if
// Enable the functionality of this header if:
// * The NVTX3 C API is available in CTK
// * NVTX is not explicitly disabled
// * C++14 is availabl for cuda::std::optional
#if __has_include(<nvtx3/nvToolsExt.h>) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014
#if __has_include(<nvtx3/nvToolsExt.h> ) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014
// Include our NVTX3 C++ wrapper if not available from the CTK
# if __has_include(<nvtx3/nvtx3.hpp>) // TODO(bgruber): replace by a check for the first CTK version shipping the header
# include <nvtx3/nvtx3.hpp>
# else // __has_include(<nvtx3/nvtx3.hpp>)
# include "nvtx3.hpp"
# endif // __has_include(<nvtx3/nvtx3.hpp>)

# include <cuda/std/optional>
// Furthermore, we only support the NVTX3 C++ API V1
# ifdef NVTX3_CPP_DEFINITIONS_V1_0
# include <cuda/std/optional>

CUB_NAMESPACE_BEGIN
namespace detail
Expand All @@ -62,27 +64,37 @@ struct NVTXCCCLDomain
CUB_NAMESPACE_END

// Hook for the NestedNVTXRangeGuard from the unit tests
# ifndef CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE
# define CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE(name)
# endif // !CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE
# ifndef CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE
# define CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE(name)
# endif // !CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE

// Conditionally inserts a NVTX range starting here until the end of the current function scope in host code. Does
// nothing in device code.
// The optional is needed to defer the construction of an NVTX range (host-only code) and message string registration
// into a dispatch region running only on the host, while preserving the semantic scope where the range is declared.
# define CUB_DETAIL_NVTX_RANGE_SCOPE_IF(condition, name) \
CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE(name) \
::cuda::std::optional<::nvtx3::v1::scoped_range_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain>> __cub_nvtx3_range; \
NV_IF_TARGET( \
NV_IS_HOST, \
static const ::nvtx3::v1::registered_string_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain> __cub_nvtx3_func_name{ \
name}; \
static const ::nvtx3::v1::event_attributes __cub_nvtx3_func_attr{__cub_nvtx3_func_name}; \
if (condition) __cub_nvtx3_range.emplace(__cub_nvtx3_func_attr); \
(void) __cub_nvtx3_range;)
# define CUB_DETAIL_NVTX_RANGE_SCOPE_IF(condition, name) \
CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE(name) \
::cuda::std::optional<::nvtx3::v1::scoped_range_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain>> __cub_nvtx3_range; \
NV_IF_TARGET( \
NV_IS_HOST, \
static const ::nvtx3::v1::registered_string_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain> __cub_nvtx3_func_name{ \
name}; \
static const ::nvtx3::v1::event_attributes __cub_nvtx3_func_attr{__cub_nvtx3_func_name}; \
if (condition) __cub_nvtx3_range.emplace(__cub_nvtx3_func_attr); \
(void) __cub_nvtx3_range;)

# define CUB_DETAIL_NVTX_RANGE_SCOPE(name) CUB_DETAIL_NVTX_RANGE_SCOPE_IF(true, name)
#else // __has_include(<nvtx3/nvToolsExt.h>) && !defined(NVTX_DISABLE) && _CCCL_STD_VER > 2011
# define CUB_DETAIL_NVTX_RANGE_SCOPE(name) CUB_DETAIL_NVTX_RANGE_SCOPE_IF(true, name)
# else // NVTX3_CPP_DEFINITIONS_V1_0
// Tell the user we don't support their NVTX3 version.
# if defined(_CCCL_COMPILER_MSVC)
# pragma message("warning: CUB only supports NVTX3 V1. Please open an issue on GitHub.")
# else
# warning CUB only supports NVTX3 V1. Please open an issue on GitHub.
# endif
# define CUB_DETAIL_NVTX_RANGE_SCOPE_IF(condition, name)
# define CUB_DETAIL_NVTX_RANGE_SCOPE(name)
# endif // NVTX3_CPP_DEFINITIONS_V1_0
#else // __has_include(<nvtx3/nvToolsExt.h> ) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014
# define CUB_DETAIL_NVTX_RANGE_SCOPE_IF(condition, name)
# define CUB_DETAIL_NVTX_RANGE_SCOPE(name)
#endif // __has_include(<nvtx3/nvToolsExt.h>) && !defined(NVTX_DISABLE) && _CCCL_STD_VER > 2011
#endif // __has_include(<nvtx3/nvToolsExt.h> ) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014

0 comments on commit 285792a

Please sign in to comment.