Skip to content

Commit

Permalink
Add NVTX ranges for all CUB algorithms (#1657)
Browse files Browse the repository at this point in the history
Add NVTX ranges to all CUB algorithms

* Add a standalone test for NVTX ranges
* Document NVTX ranges in CUB device algorithms
* Let header checks tolerate the inclusion of Windows.h
* Avoid shadowing warning:
c:\cccl\cub\cub\detail\nvtx3.hpp(807): warning C4459: declaration of 'd' hides global declaration
C:/cccl/thrust/examples/cuda/global_device_vector.cu(30): note: see declaration of 'd'

Fixes: #719

Co-authored-by: Michael Schellenberger Costa <[email protected]>
  • Loading branch information
bernhardmgruber and miscco authored Apr 29, 2024
1 parent 7bfd940 commit dd6f124
Show file tree
Hide file tree
Showing 70 changed files with 3,976 additions and 180 deletions.
5 changes: 5 additions & 0 deletions cub/cmake/header_test.in
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@
//#define min(...) CUB_MACRO_CHECK('min', windows.h)
//#define max(...) CUB_MACRO_CHECK('max', windows.h)

#ifdef _WIN32
// On Windows, make sure any include of Windows.h (e.g. via NVTX) does not define the checked macros
# define WIN32_LEAN_AND_MEAN
#endif // _WIN32

// termios.h conflicts (NVIDIA/thrust#1547)
#define B0 CUB_MACRO_CHECK("B0", termios.h)

Expand Down
87 changes: 87 additions & 0 deletions cub/cub/detail/nvtx.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/******************************************************************************
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#pragma once

#include <cub/config.cuh>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

// Enable the functionality of this header if
// * The NVTX3 C API is available in CTK
// * NVTX is not explicitly disabled
// * C++14 is availabl for cuda::std::optional
#if __has_include(<nvtx3/nvToolsExt.h>) && !defined(NVTX_DISABLE) && _CCCL_STD_VER >= 2014
// Include our NVTX3 C++ wrapper if not available from the CTK
# if __has_include(<nvtx3/nvtx3.hpp>) // TODO(bgruber): replace by a check for the first CTK version shipping the header
# include <nvtx3/nvtx3.hpp>
# else // __has_include(<nvtx3/nvtx3.hpp>)
# include "nvtx3.hpp"
# endif // __has_include(<nvtx3/nvtx3.hpp>)

# include <cuda/std/optional>

CUB_NAMESPACE_BEGIN
namespace detail
{
struct NVTXCCCLDomain
{
static constexpr const char* name = "CCCL";
};
} // namespace detail
CUB_NAMESPACE_END

// Hook for the NestedNVTXRangeGuard from the unit tests
# ifndef CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE
# define CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE(name)
# endif // !CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE

// Conditionally inserts a NVTX range starting here until the end of the current function scope in host code. Does
// nothing in device code.
// The optional is needed to defer the construction of an NVTX range (host-only code) and message string registration
// into a dispatch region running only on the host, while preserving the semantic scope where the range is declared.
# define CUB_DETAIL_NVTX_RANGE_SCOPE_IF(condition, name) \
CUB_DETAIL_BEFORE_NVTX_RANGE_SCOPE(name) \
::cuda::std::optional<::nvtx3::scoped_range_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain>> __cub_nvtx3_range; \
NV_IF_TARGET( \
NV_IS_HOST, \
static const ::nvtx3::registered_string_in<CUB_NS_QUALIFIER::detail::NVTXCCCLDomain> __cub_nvtx3_func_name{name}; \
static const ::nvtx3::event_attributes __cub_nvtx3_func_attr{__cub_nvtx3_func_name}; \
if (condition) __cub_nvtx3_range.emplace(__cub_nvtx3_func_attr); \
(void) __cub_nvtx3_range;)

# define CUB_DETAIL_NVTX_RANGE_SCOPE(name) CUB_DETAIL_NVTX_RANGE_SCOPE_IF(true, name)
#else // __has_include(<nvtx3/nvToolsExt.h>) && !defined(NVTX_DISABLE) && _CCCL_STD_VER > 2011
# define CUB_DETAIL_NVTX_RANGE_SCOPE_IF(condition, name)
# define CUB_DETAIL_NVTX_RANGE_SCOPE(name)
#endif // __has_include(<nvtx3/nvToolsExt.h>) && !defined(NVTX_DISABLE) && _CCCL_STD_VER > 2011
Loading

0 comments on commit dd6f124

Please sign in to comment.