Move std::midpoint to its own file

NVIDIA · Apr 30, 2024 · 6cbaa6e · 6cbaa6e
1 parent f4b1fcd
commit 6cbaa6e
Show file tree

Hide file tree

Showing 8 changed files with 531 additions and 89 deletions.
diff --git a/libcudacxx/include/cuda/std/__numeric/midpoint.h b/libcudacxx/include/cuda/std/__numeric/midpoint.h
@@ -0,0 +1,104 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCUDACXX___NUMERIC_MIDPOINT_H
+#define _LIBCUDACXX___NUMERIC_MIDPOINT_H
+
+#include <cuda/std/detail/__config>
+
+#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
+#  pragma GCC system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
+#  pragma clang system_header
+#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
+#  pragma system_header
+#endif // no system header
+
+#include <cuda/std/__type_traits/enable_if.h>
+#include <cuda/std/__type_traits/is_floating_point.h>
+#include <cuda/std/__type_traits/is_integral.h>
+#include <cuda/std/__type_traits/is_null_pointer.h>
+#include <cuda/std/__type_traits/is_object.h>
+#include <cuda/std/__type_traits/is_pointer.h>
+#include <cuda/std/__type_traits/is_same.h>
+#include <cuda/std/__type_traits/is_void.h>
+#include <cuda/std/__type_traits/make_unsigned.h>
+#include <cuda/std/__type_traits/remove_pointer.h>
+#include <cuda/std/cstddef>
+#include <cuda/std/limits>
+
+// comes last
+#include <cuda/std/detail/__pragma_push>
+
+_LIBCUDACXX_BEGIN_NAMESPACE_STD
+
+template <class _Tp>
+_CCCL_NODISCARD _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14
+  __enable_if_t<_LIBCUDACXX_TRAIT(is_integral, _Tp) && !_LIBCUDACXX_TRAIT(is_same, bool, _Tp)
+                  && !_LIBCUDACXX_TRAIT(is_null_pointer, _Tp),
+                _Tp>
+  midpoint(_Tp __a, _Tp __b) noexcept
+{
+  using _Up = __make_unsigned_t<_Tp>;
+
+  if (__a > __b)
+  {
+    const _Up __diff = _Up(__a) - _Up(__b);
+    return static_cast<_Tp>(__a - static_cast<_Tp>(__diff / 2));
+  }
+  else
+  {
+    const _Up __diff = _Up(__b) - _Up(__a);
+    return static_cast<_Tp>(__a + static_cast<_Tp>(__diff / 2));
+  }
+}
+
+template <
+  class _Tp,
+  __enable_if_t<_LIBCUDACXX_TRAIT(is_object, _Tp) && !_LIBCUDACXX_TRAIT(is_void, _Tp) && (sizeof(_Tp) > 0), int> = 0>
+_CCCL_NODISCARD _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 _Tp* midpoint(_Tp* __a, _Tp* __b) noexcept
+{
+  return __a + _CUDA_VSTD::midpoint(ptrdiff_t(0), __b - __a);
+}
+
+template <typename _Tp>
+_CCCL_NODISCARD _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 int __sign(_Tp __val)
+{
+  return (_Tp(0) < __val) - (__val < _Tp(0));
+}
+
+template <typename _Fp>
+_CCCL_NODISCARD _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 _Fp __fp_abs(_Fp __f)
+{
+  return __f >= 0 ? __f : -__f;
+}
+
+template <class _Fp>
+_CCCL_NODISCARD _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14
+  __enable_if_t<_LIBCUDACXX_TRAIT(is_floating_point, _Fp), _Fp>
+  midpoint(_Fp __a, _Fp __b) noexcept
+{
+  _CCCL_CONSTEXPR_CXX14 _Fp __lo = numeric_limits<_Fp>::min() * 2;
+  _CCCL_CONSTEXPR_CXX14 _Fp __hi = numeric_limits<_Fp>::max() / 2;
+  return _CUDA_VSTD::__fp_abs(__a) <= __hi && _CUDA_VSTD::__fp_abs(__b) <= __hi
+         ? // typical case: overflow is impossible
+           (__a + __b) / 2
+         : // always correctly rounded
+           _CUDA_VSTD::__fp_abs(__a) < __lo ? __a + __b / 2 : // not safe to halve a
+             _CUDA_VSTD::__fp_abs(__b) < __lo
+             ? __a / 2 + __b
+             : // not safe to halve b
+             __a / 2 + __b / 2; // otherwise correctly rounded
+}
+
+_LIBCUDACXX_END_NAMESPACE_STD
+
+#include <cuda/std/detail/__pragma_pop>
+
+#endif // _LIBCUDACXX___NUMERIC_MIDPOINT_H
diff --git a/libcudacxx/include/cuda/std/__type_traits/is_null_pointer.h b/libcudacxx/include/cuda/std/__type_traits/is_null_pointer.h
@@ -37,11 +37,9 @@ template <class _Tp>
 struct _LIBCUDACXX_TEMPLATE_VIS __is_nullptr_t : public __is_nullptr_t_impl<__remove_cv_t<_Tp>>
 {};
 
-#if _CCCL_STD_VER > 2011
 template <class _Tp>
 struct _LIBCUDACXX_TEMPLATE_VIS is_null_pointer : public __is_nullptr_t_impl<__remove_cv_t<_Tp>>
 {};
-#endif
 
 #if _CCCL_STD_VER > 2011 && !defined(_LIBCUDACXX_HAS_NO_VARIABLE_TEMPLATES)
 template <class _Tp>

diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/numeric b/libcudacxx/include/cuda/std/detail/libcxx/include/numeric
@@ -155,97 +155,22 @@ floating_point  midpoint(floating_point a, floating_point b);    // C++20
 #  pragma system_header
 #endif // no system header
 
-#include <cuda/std/__iterator/iterator_traits.h>
 #include <cuda/std/__numeric/accumulate.h>
 #include <cuda/std/__numeric/adjacent_difference.h>
 #include <cuda/std/__numeric/exclusive_scan.h>
 #include <cuda/std/__numeric/gcd_lcm.h>
 #include <cuda/std/__numeric/inclusive_scan.h>
 #include <cuda/std/__numeric/inner_product.h>
 #include <cuda/std/__numeric/iota.h>
+#include <cuda/std/__numeric/midpoint.h>
 #include <cuda/std/__numeric/partial_sum.h>
 #include <cuda/std/__numeric/reduce.h>
 #include <cuda/std/__numeric/transform_exclusive_scan.h>
 #include <cuda/std/__numeric/transform_inclusive_scan.h>
 #include <cuda/std/__numeric/transform_reduce.h>
-#include <cuda/std/__utility/move.h>
-#include <cuda/std/cmath> // for isnormal
 #include <cuda/std/detail/libcxx/include/__assert> // all public C++ headers provide the assertion handler
-#include <cuda/std/detail/libcxx/include/__pragma_push>
-#include <cuda/std/functional>
-#include <cuda/std/limits> // for numeric_limits
-#include <cuda/std/version>
-
-_LIBCUDACXX_BEGIN_NAMESPACE_STD
-
-#ifndef __cuda_std__
-#  if _CCCL_STD_VER > 2017
-template <class _Tp>
-_LIBCUDACXX_INLINE_VISIBILITY constexpr enable_if_t<
-  is_integral_v<_Tp> && !is_same_v<bool, _Tp> && !is_null_pointer_v<_Tp>,
-  _Tp>
-midpoint(_Tp __a, _Tp __b) noexcept _LIBCUDACXX_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
-{
-  using _Up = std::make_unsigned_t<_Tp>;
-
-  int __sign = 1;
-  _Up __m    = __a;
-  _Up __M    = __b;
-  if (__a > __b)
-  {
-    __sign = -1;
-    __m    = __b;
-    __M    = __a;
-  }
-  return __a + __sign * _Tp(_Up(__M - __m) >> 1);
-}
-
-template <class _TPtr>
-_LIBCUDACXX_INLINE_VISIBILITY constexpr enable_if_t<
-  is_pointer_v<_TPtr> && is_object_v<remove_pointer_t<_TPtr>> && !is_void_v<remove_pointer_t<_TPtr>>
-    && (sizeof(remove_pointer_t<_TPtr>) > 0),
-  _TPtr>
-midpoint(_TPtr __a, _TPtr __b) noexcept
-{
-  return __a + _CUDA_VSTD::midpoint(ptrdiff_t(0), __b - __a);
-}
-
-template <typename _Tp>
-constexpr int __sign(_Tp __val)
-{
-  return (_Tp(0) < __val) - (__val < _Tp(0));
-}
 
-template <typename _Fp>
-constexpr _Fp __fp_abs(_Fp __f)
-{
-  return __f >= 0 ? __f : -__f;
-}
-
-template <class _Fp>
-_LIBCUDACXX_INLINE_VISIBILITY constexpr enable_if_t<is_floating_point_v<_Fp>, _Fp> midpoint(_Fp __a, _Fp __b) noexcept
-{
-  constexpr _Fp __lo = numeric_limits<_Fp>::min() * 2;
-  constexpr _Fp __hi = numeric_limits<_Fp>::max() / 2;
-  return __fp_abs(__a) <= __hi && __fp_abs(__b) <= __hi
-         ? // typical case: overflow is impossible
-           (__a + __b) / 2
-         : // always correctly rounded
-           __fp_abs(__a) < __lo ? __a + __b / 2 : // not safe to halve a
-             __fp_abs(__a) < __lo ? __a / 2 + __b
-                                  : // not safe to halve b
-             __a / 2 + __b / 2; // otherwise correctly rounded
-}
-
-#  endif // _CCCL_STD_VER > 2017
-#endif // __cuda_std__
-
-_LIBCUDACXX_END_NAMESPACE_STD
-
-#if defined(_LIBCUDACXX_HAS_PARALLEL_ALGORITHMS) && _CCCL_STD_VER >= 2017
-#  include <__pstl_numeric>
-#endif
-
-#include <cuda/std/detail/libcxx/include/__pragma_pop> //__cuda_std__
+// standard mandated include
+#include <cuda/std/version>
 
 #endif // _LIBCUDACXX_NUMERIC
diff --git a/...cxx/test/libcudacxx/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.float.pass.cpp b/...cxx/test/libcudacxx/std/numerics/numeric.ops/numeric.ops.midpoint/midpoint.float.pass.cpp
@@ -0,0 +1,130 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// <numeric>
+
+// template <class _Float>
+// _Tp midpoint(_Float __a, _Float __b) noexcept
+//
+
+#include <cuda/std/__numeric_>
+#include <cuda/std/cassert>
+#include <cuda/std/cmath>
+
+#include "fp_compare.h"
+#include "test_macros.h"
+
+//  Totally arbitrary picks for precision
+template <typename T>
+__host__ __device__ constexpr T fp_error_pct();
+
+template <>
+__host__ __device__ constexpr float fp_error_pct<float>()
+{
+  return 1.0e-4f;
+}
+
+template <>
+__host__ __device__ constexpr double fp_error_pct<double>()
+{
+  return 1.0e-12;
+}
+
+template <>
+__host__ __device__ constexpr long double fp_error_pct<long double>()
+{
+  return 1.0e-13l;
+}
+
+template <typename T>
+__host__ __device__ void fp_test()
+{
+  ASSERT_SAME_TYPE(T, decltype(cuda::std::midpoint(T(), T())));
+  ASSERT_NOEXCEPT(cuda::std::midpoint(T(), T()));
+
+  constexpr T maxV = cuda::std::numeric_limits<T>::max();
+  constexpr T minV = cuda::std::numeric_limits<T>::min();
+
+  //  Things that can be compared exactly
+  assert((cuda::std::midpoint(T(0), T(0)) == T(0)));
+  assert((cuda::std::midpoint(T(2), T(4)) == T(3)));
+  assert((cuda::std::midpoint(T(4), T(2)) == T(3)));
+  assert((cuda::std::midpoint(T(3), T(4)) == T(3.5)));
+  assert((cuda::std::midpoint(T(0), T(0.4)) == T(0.2)));
+
+  //  Things that can't be compared exactly
+  constexpr T pct = fp_error_pct<T>();
+  assert((fptest_close_pct(cuda::std::midpoint(T(1.3), T(11.4)), T(6.35), pct)));
+  assert((fptest_close_pct(cuda::std::midpoint(T(11.33), T(31.45)), T(21.39), pct)));
+  assert((fptest_close_pct(cuda::std::midpoint(T(-1.3), T(11.4)), T(5.05), pct)));
+  assert((fptest_close_pct(cuda::std::midpoint(T(11.4), T(-1.3)), T(5.05), pct)));
+  assert((fptest_close_pct(cuda::std::midpoint(T(0.1), T(0.4)), T(0.25), pct)));
+
+  assert((fptest_close_pct(cuda::std::midpoint(T(11.2345), T(14.5432)), T(12.88885), pct)));
+
+  //  From e to pi
+  assert((fptest_close_pct(cuda::std::midpoint(T(2.71828182845904523536028747135266249775724709369995),
+                                               T(3.14159265358979323846264338327950288419716939937510)),
+                           T(2.92993724102441923691146542731608269097720824653752),
+                           pct)));
+
+  assert((fptest_close_pct(cuda::std::midpoint(maxV, T(0)), maxV / 2, pct)));
+  assert((fptest_close_pct(cuda::std::midpoint(T(0), maxV), maxV / 2, pct)));
+  assert((fptest_close_pct(cuda::std::midpoint(minV, T(0)), minV / 2, pct)));
+  assert((fptest_close_pct(cuda::std::midpoint(T(0), minV), minV / 2, pct)));
+  assert((fptest_close_pct(cuda::std::midpoint(maxV, maxV), maxV, pct)));
+  assert((fptest_close_pct(cuda::std::midpoint(minV, minV), minV, pct)));
+  assert((fptest_close_pct(cuda::std::midpoint(maxV, minV), maxV / 2, pct)));
+  assert((fptest_close_pct(cuda::std::midpoint(minV, maxV), maxV / 2, pct)));
+
+  //  Near the min and the max
+  assert((fptest_close_pct(cuda::std::midpoint(maxV * T(0.75), maxV * T(0.50)), maxV * T(0.625), pct)));
+  assert((fptest_close_pct(cuda::std::midpoint(maxV * T(0.50), maxV * T(0.75)), maxV * T(0.625), pct)));
+  assert((fptest_close_pct(cuda::std::midpoint(minV * T(2), minV * T(8)), minV * T(5), pct)));
+
+  //  Big numbers of different signs
+  assert((fptest_close_pct(cuda::std::midpoint(maxV * T(0.75), maxV * T(-0.5)), maxV * T(0.125), pct)));
+  assert((fptest_close_pct(cuda::std::midpoint(maxV * T(-0.75), maxV * T(0.5)), maxV * T(-0.125), pct)));
+
+#if !defined(TEST_COMPILER_NVRTC) // missing nextafter
+  //  Check two values "close to each other"
+  T d1 = T(3.14);
+  T d0 = cuda::std::nextafter(d1, T(2));
+  T d2 = cuda::std::nextafter(d1, T(5));
+  assert(d0 < d1); // sanity checking
+  assert(d1 < d2); // sanity checking
+
+  //  Since there's nothing in between, the midpoint has to be one or the other
+  T res;
+  res = cuda::std::midpoint(d0, d1);
+  assert(res == d0 || res == d1);
+  assert(d0 <= res);
+  assert(res <= d1);
+  res = cuda::std::midpoint(d1, d0);
+  assert(res == d0 || res == d1);
+  assert(d0 <= res);
+  assert(res <= d1);
+
+  res = cuda::std::midpoint(d1, d2);
+  assert(res == d1 || res == d2);
+  assert(d1 <= res);
+  assert(res <= d2);
+  res = cuda::std::midpoint(d2, d1);
+  assert(res == d1 || res == d2);
+  assert(d1 <= res);
+  assert(res <= d2);
+#endif // !TEST_COMPILER_NVRTC
+}
+
+int main(int, char**)
+{
+  fp_test<float>();
+  fp_test<double>();
+
+  return 0;
+}