From 34f9cbce9012102ca76b9e4cda8149b42f5c95e6 Mon Sep 17 00:00:00 2001 From: Weiqun Zhang Date: Thu, 2 Nov 2023 11:34:57 -0700 Subject: [PATCH] Simplify clz --- Src/Base/AMReX_Algorithm.H | 124 +++++++------------------------------ 1 file changed, 23 insertions(+), 101 deletions(-) diff --git a/Src/Base/AMReX_Algorithm.H b/Src/Base/AMReX_Algorithm.H index 3cdeec47536..5acd104bbe5 100644 --- a/Src/Base/AMReX_Algorithm.H +++ b/Src/Base/AMReX_Algorithm.H @@ -240,24 +240,6 @@ int builtin_clz_wrapper (clzll_tag, T x) noexcept return static_cast(__builtin_clzll(x) - (sizeof(unsigned long long) * CHAR_BIT - sizeof(T) * CHAR_BIT)); } -#ifdef AMREX_USE_CUDA - -// likewise with CUDA, there are __clz functions that take (signed) int and long long int -template ::type> -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -int clz_wrapper (clz_tag, T x) noexcept -{ - return __clz((int) x) - (sizeof(int) * CHAR_BIT - sizeof(T) * CHAR_BIT); -} - -template ::type> -AMREX_GPU_DEVICE AMREX_FORCE_INLINE -int clz_wrapper (clzll_tag, T x) noexcept -{ - return __clzll((long long int) x) - (sizeof(long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT); -} -#endif - } AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE @@ -269,33 +251,6 @@ int clz_generic (std::uint8_t x) noexcept return upper ? clz_lookup[upper] : 4 + clz_lookup[lower]; } -#if defined AMREX_USE_CUDA - -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int clz (std::uint8_t x) noexcept -{ - AMREX_IF_ON_DEVICE((return detail::clz_wrapper(detail::clz_tag{}, x);)) -#if AMREX_HAS_BUILTIN_CLZ - AMREX_IF_ON_HOST((return detail::builtin_clz_wrapper(detail::clz_tag{}, x);)) -#else - AMREX_IF_ON_HOST((return clz_generic(x);)) -#endif -} - -#else // !defined AMREX_USE_CUDA - -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int clz (std::uint8_t x) noexcept -{ -#if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ) - return detail::builtin_clz_wrapper(detail::clz_tag{}, x); -#else - return clz_generic(x); -#endif -} - -#endif // defined AMREX_USE_CUDA - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int clz_generic (std::uint16_t x) noexcept { @@ -304,33 +259,6 @@ int clz_generic (std::uint16_t x) noexcept return upper ? clz(upper) : 8 + clz(lower); } -#if defined AMREX_USE_CUDA - -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int clz (std::uint16_t x) noexcept -{ - AMREX_IF_ON_DEVICE((return detail::clz_wrapper(detail::clz_tag{}, x);)) -#if AMREX_HAS_BUILTIN_CLZ - AMREX_IF_ON_HOST((return detail::builtin_clz_wrapper(detail::clz_tag{}, x);)) -#else - AMREX_IF_ON_HOST((return clz_generic(x);)) -#endif -} - -#else // !defined AMREX_USE_CUDA - -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int clz (std::uint16_t x) noexcept -{ -#if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ) - return detail::builtin_clz_wrapper(detail::clz_tag{}, x); -#else - return clz_generic(x); -#endif -} - -#endif // defined AMREX_USE_CUDA - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int clz_generic (std::uint32_t x) noexcept { @@ -339,33 +267,6 @@ int clz_generic (std::uint32_t x) noexcept return upper ? clz(upper) : 16 + clz(lower); } -#if defined AMREX_USE_CUDA - -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int clz (std::uint32_t x) noexcept -{ - AMREX_IF_ON_DEVICE((return detail::clz_wrapper(detail::clz_tag{}, x);)) -#if AMREX_HAS_BUILTIN_CLZ - AMREX_IF_ON_HOST((return detail::builtin_clz_wrapper(detail::clz_tag{}, x);)) -#else - AMREX_IF_ON_HOST((return clz_generic(x);)) -#endif -} - -#else // !defined AMREX_USE_CUDA - -AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int clz (std::uint32_t x) noexcept -{ -#if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ) - return detail::builtin_clz_wrapper(detail::clz_tag{}, x); -#else - return clz_generic(x); -#endif -} - -#endif // defined AMREX_USE_CUDA - AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE int clz_generic (std::uint64_t x) noexcept { @@ -376,8 +277,29 @@ int clz_generic (std::uint64_t x) noexcept #if defined AMREX_USE_CUDA +namespace detail { + // likewise with CUDA, there are __clz functions that take (signed) int and long long int + template ::type> + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + int clz_wrapper (clz_tag, T x) noexcept + { + return __clz((int) x) - (sizeof(int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } + + template ::type> + AMREX_GPU_DEVICE AMREX_FORCE_INLINE + int clz_wrapper (clzll_tag, T x) noexcept + { + return __clzll((long long int) x) - (sizeof(long long int) * CHAR_BIT - sizeof(T) * CHAR_BIT); + } +} + +template ,std::uint8_t> || + std::is_same_v,std::uint16_t> || + std::is_same_v,std::uint32_t> || + std::is_same_v,std::uint64_t>, int> = 0> AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int clz (std::uint64_t x) noexcept +int clz (T x) noexcept { AMREX_IF_ON_DEVICE((return detail::clz_wrapper(detail::clz_tag{}, x);)) #if AMREX_HAS_BUILTIN_CLZ @@ -390,7 +312,7 @@ int clz (std::uint64_t x) noexcept #else // !defined AMREX_USE_CUDA AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE -int clz (std::uint64_t x) noexcept +int clz (T x) noexcept { #if (!AMREX_DEVICE_COMPILE && AMREX_HAS_BUILTIN_CLZ) return detail::builtin_clz_wrapper(detail::clz_tag{}, x);