Skip to content

Commit

Permalink
Try to work around issue with NVHPC in conjunction of older CTK versions
Browse files Browse the repository at this point in the history
NVHPC can consume older CTK headers for stdpar, so we need to try and avoid using those
  • Loading branch information
miscco committed Nov 19, 2024
1 parent 8994dc4 commit 4b92514
Showing 1 changed file with 10 additions and 0 deletions.
10 changes: 10 additions & 0 deletions cub/cub/thread/thread_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -440,10 +440,15 @@ struct SimdMin<__half>

_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE __half2 operator()(__half2 a, __half2 b) const
{
# if _CCCL_CUDACC_BELOW(12.0) && defined(_CCCL_CUDA_COMPILER_NVHPC)
return __floats2half2_rn(::cuda::minimum<>{}(__half2float(a.x), __half2float(b.x)),
::cuda::minimum<>{}(__half2float(a.y), __half2float(b.y)));
# else // ^^^ _CCCL_CUDACC_BELOW(12.0) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
NV_IF_TARGET(NV_PROVIDES_SM_80,
(return __hmin2(a, b);),
(return __halves2half2(__float2half(::cuda::minimum<>{}(__half2float(a.x), __half2float(b.x))),
__float2half(::cuda::minimum<>{}(__half2float(a.y), __half2float(b.y))));));
# endif // !_CCCL_CUDACC_BELOW(12.0) || !_CCCL_CUDA_COMPILER_NVHPC
}
};

Expand All @@ -470,11 +475,16 @@ struct SimdMin<__nv_bfloat16>

_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE __nv_bfloat162 operator()(__nv_bfloat162 a, __nv_bfloat162 b) const
{
# if _CCCL_CUDACC_BELOW(12.0) && defined(_CCCL_CUDA_COMPILER_NVHPC)
return __floats2bfloat162_rn(::cuda::minimum<>{}(__bfloat162float(a.x), __bfloat162float(b.x)),
::cuda::minimum<>{}(__bfloat162float(a.y), __bfloat162float(b.y)));
# else // ^^^ _CCCL_CUDACC_BELOW(12.0) && _CCCL_CUDA_COMPILER_NVHPC ^^^ / vvv otherwise vvv
NV_IF_TARGET(NV_PROVIDES_SM_80,
(return __hmin2(a, b);),
(return cub::internal::halves2bfloat162(
__float2bfloat16(::cuda::minimum<>{}(__bfloat162float(a.x), __bfloat162float(b.x))),
__float2bfloat16(::cuda::minimum<>{}(__bfloat162float(a.y), __bfloat162float(b.y))));));
# endif // !_CCCL_CUDACC_BELOW(12.0) || !_CCCL_CUDA_COMPILER_NVHPC
}
};

Expand Down

0 comments on commit 4b92514

Please sign in to comment.