Skip to content

Commit

Permalink
Move std::midpoint to its own file
Browse files Browse the repository at this point in the history
  • Loading branch information
miscco committed Apr 30, 2024
1 parent f4b1fcd commit 6cbaa6e
Show file tree
Hide file tree
Showing 8 changed files with 531 additions and 89 deletions.
104 changes: 104 additions & 0 deletions libcudacxx/include/cuda/std/__numeric/midpoint.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// -*- C++ -*-
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef _LIBCUDACXX___NUMERIC_MIDPOINT_H
#define _LIBCUDACXX___NUMERIC_MIDPOINT_H

#include <cuda/std/detail/__config>

#if defined(_CCCL_IMPLICIT_SYSTEM_HEADER_GCC)
# pragma GCC system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_CLANG)
# pragma clang system_header
#elif defined(_CCCL_IMPLICIT_SYSTEM_HEADER_MSVC)
# pragma system_header
#endif // no system header

#include <cuda/std/__type_traits/enable_if.h>
#include <cuda/std/__type_traits/is_floating_point.h>
#include <cuda/std/__type_traits/is_integral.h>
#include <cuda/std/__type_traits/is_null_pointer.h>
#include <cuda/std/__type_traits/is_object.h>
#include <cuda/std/__type_traits/is_pointer.h>
#include <cuda/std/__type_traits/is_same.h>
#include <cuda/std/__type_traits/is_void.h>
#include <cuda/std/__type_traits/make_unsigned.h>
#include <cuda/std/__type_traits/remove_pointer.h>
#include <cuda/std/cstddef>
#include <cuda/std/limits>

// comes last
#include <cuda/std/detail/__pragma_push>

_LIBCUDACXX_BEGIN_NAMESPACE_STD

template <class _Tp>
_CCCL_NODISCARD _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14
__enable_if_t<_LIBCUDACXX_TRAIT(is_integral, _Tp) && !_LIBCUDACXX_TRAIT(is_same, bool, _Tp)
&& !_LIBCUDACXX_TRAIT(is_null_pointer, _Tp),
_Tp>
midpoint(_Tp __a, _Tp __b) noexcept
{
using _Up = __make_unsigned_t<_Tp>;

if (__a > __b)
{
const _Up __diff = _Up(__a) - _Up(__b);
return static_cast<_Tp>(__a - static_cast<_Tp>(__diff / 2));
}
else
{
const _Up __diff = _Up(__b) - _Up(__a);
return static_cast<_Tp>(__a + static_cast<_Tp>(__diff / 2));
}
}

template <
class _Tp,
__enable_if_t<_LIBCUDACXX_TRAIT(is_object, _Tp) && !_LIBCUDACXX_TRAIT(is_void, _Tp) && (sizeof(_Tp) > 0), int> = 0>
_CCCL_NODISCARD _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 _Tp* midpoint(_Tp* __a, _Tp* __b) noexcept
{
return __a + _CUDA_VSTD::midpoint(ptrdiff_t(0), __b - __a);
}

template <typename _Tp>
_CCCL_NODISCARD _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 int __sign(_Tp __val)
{
return (_Tp(0) < __val) - (__val < _Tp(0));
}

template <typename _Fp>
_CCCL_NODISCARD _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14 _Fp __fp_abs(_Fp __f)
{
return __f >= 0 ? __f : -__f;
}

template <class _Fp>
_CCCL_NODISCARD _LIBCUDACXX_INLINE_VISIBILITY _CCCL_CONSTEXPR_CXX14
__enable_if_t<_LIBCUDACXX_TRAIT(is_floating_point, _Fp), _Fp>
midpoint(_Fp __a, _Fp __b) noexcept
{
_CCCL_CONSTEXPR_CXX14 _Fp __lo = numeric_limits<_Fp>::min() * 2;
_CCCL_CONSTEXPR_CXX14 _Fp __hi = numeric_limits<_Fp>::max() / 2;
return _CUDA_VSTD::__fp_abs(__a) <= __hi && _CUDA_VSTD::__fp_abs(__b) <= __hi
? // typical case: overflow is impossible
(__a + __b) / 2
: // always correctly rounded
_CUDA_VSTD::__fp_abs(__a) < __lo ? __a + __b / 2 : // not safe to halve a
_CUDA_VSTD::__fp_abs(__b) < __lo
? __a / 2 + __b
: // not safe to halve b
__a / 2 + __b / 2; // otherwise correctly rounded
}

_LIBCUDACXX_END_NAMESPACE_STD

#include <cuda/std/detail/__pragma_pop>

#endif // _LIBCUDACXX___NUMERIC_MIDPOINT_H
2 changes: 0 additions & 2 deletions libcudacxx/include/cuda/std/__type_traits/is_null_pointer.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,9 @@ template <class _Tp>
struct _LIBCUDACXX_TEMPLATE_VIS __is_nullptr_t : public __is_nullptr_t_impl<__remove_cv_t<_Tp>>
{};

#if _CCCL_STD_VER > 2011
template <class _Tp>
struct _LIBCUDACXX_TEMPLATE_VIS is_null_pointer : public __is_nullptr_t_impl<__remove_cv_t<_Tp>>
{};
#endif

#if _CCCL_STD_VER > 2011 && !defined(_LIBCUDACXX_HAS_NO_VARIABLE_TEMPLATES)
template <class _Tp>
Expand Down
81 changes: 3 additions & 78 deletions libcudacxx/include/cuda/std/detail/libcxx/include/numeric
Original file line number Diff line number Diff line change
Expand Up @@ -155,97 +155,22 @@ floating_point midpoint(floating_point a, floating_point b); // C++20
# pragma system_header
#endif // no system header

#include <cuda/std/__iterator/iterator_traits.h>
#include <cuda/std/__numeric/accumulate.h>
#include <cuda/std/__numeric/adjacent_difference.h>
#include <cuda/std/__numeric/exclusive_scan.h>
#include <cuda/std/__numeric/gcd_lcm.h>
#include <cuda/std/__numeric/inclusive_scan.h>
#include <cuda/std/__numeric/inner_product.h>
#include <cuda/std/__numeric/iota.h>
#include <cuda/std/__numeric/midpoint.h>
#include <cuda/std/__numeric/partial_sum.h>
#include <cuda/std/__numeric/reduce.h>
#include <cuda/std/__numeric/transform_exclusive_scan.h>
#include <cuda/std/__numeric/transform_inclusive_scan.h>
#include <cuda/std/__numeric/transform_reduce.h>
#include <cuda/std/__utility/move.h>
#include <cuda/std/cmath> // for isnormal
#include <cuda/std/detail/libcxx/include/__assert> // all public C++ headers provide the assertion handler
#include <cuda/std/detail/libcxx/include/__pragma_push>
#include <cuda/std/functional>
#include <cuda/std/limits> // for numeric_limits
#include <cuda/std/version>

_LIBCUDACXX_BEGIN_NAMESPACE_STD

#ifndef __cuda_std__
# if _CCCL_STD_VER > 2017
template <class _Tp>
_LIBCUDACXX_INLINE_VISIBILITY constexpr enable_if_t<
is_integral_v<_Tp> && !is_same_v<bool, _Tp> && !is_null_pointer_v<_Tp>,
_Tp>
midpoint(_Tp __a, _Tp __b) noexcept _LIBCUDACXX_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK
{
using _Up = std::make_unsigned_t<_Tp>;

int __sign = 1;
_Up __m = __a;
_Up __M = __b;
if (__a > __b)
{
__sign = -1;
__m = __b;
__M = __a;
}
return __a + __sign * _Tp(_Up(__M - __m) >> 1);
}

template <class _TPtr>
_LIBCUDACXX_INLINE_VISIBILITY constexpr enable_if_t<
is_pointer_v<_TPtr> && is_object_v<remove_pointer_t<_TPtr>> && !is_void_v<remove_pointer_t<_TPtr>>
&& (sizeof(remove_pointer_t<_TPtr>) > 0),
_TPtr>
midpoint(_TPtr __a, _TPtr __b) noexcept
{
return __a + _CUDA_VSTD::midpoint(ptrdiff_t(0), __b - __a);
}

template <typename _Tp>
constexpr int __sign(_Tp __val)
{
return (_Tp(0) < __val) - (__val < _Tp(0));
}

template <typename _Fp>
constexpr _Fp __fp_abs(_Fp __f)
{
return __f >= 0 ? __f : -__f;
}

template <class _Fp>
_LIBCUDACXX_INLINE_VISIBILITY constexpr enable_if_t<is_floating_point_v<_Fp>, _Fp> midpoint(_Fp __a, _Fp __b) noexcept
{
constexpr _Fp __lo = numeric_limits<_Fp>::min() * 2;
constexpr _Fp __hi = numeric_limits<_Fp>::max() / 2;
return __fp_abs(__a) <= __hi && __fp_abs(__b) <= __hi
? // typical case: overflow is impossible
(__a + __b) / 2
: // always correctly rounded
__fp_abs(__a) < __lo ? __a + __b / 2 : // not safe to halve a
__fp_abs(__a) < __lo ? __a / 2 + __b
: // not safe to halve b
__a / 2 + __b / 2; // otherwise correctly rounded
}

# endif // _CCCL_STD_VER > 2017
#endif // __cuda_std__

_LIBCUDACXX_END_NAMESPACE_STD

#if defined(_LIBCUDACXX_HAS_PARALLEL_ALGORITHMS) && _CCCL_STD_VER >= 2017
# include <__pstl_numeric>
#endif

#include <cuda/std/detail/libcxx/include/__pragma_pop> //__cuda_std__
// standard mandated include
#include <cuda/std/version>

#endif // _LIBCUDACXX_NUMERIC
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// <numeric>

// template <class _Float>
// _Tp midpoint(_Float __a, _Float __b) noexcept
//

#include <cuda/std/__numeric_>
#include <cuda/std/cassert>
#include <cuda/std/cmath>

#include "fp_compare.h"
#include "test_macros.h"

// Totally arbitrary picks for precision
template <typename T>
__host__ __device__ constexpr T fp_error_pct();

template <>
__host__ __device__ constexpr float fp_error_pct<float>()
{
return 1.0e-4f;
}

template <>
__host__ __device__ constexpr double fp_error_pct<double>()
{
return 1.0e-12;
}

template <>
__host__ __device__ constexpr long double fp_error_pct<long double>()
{
return 1.0e-13l;
}

template <typename T>
__host__ __device__ void fp_test()
{
ASSERT_SAME_TYPE(T, decltype(cuda::std::midpoint(T(), T())));
ASSERT_NOEXCEPT(cuda::std::midpoint(T(), T()));

constexpr T maxV = cuda::std::numeric_limits<T>::max();
constexpr T minV = cuda::std::numeric_limits<T>::min();

// Things that can be compared exactly
assert((cuda::std::midpoint(T(0), T(0)) == T(0)));
assert((cuda::std::midpoint(T(2), T(4)) == T(3)));
assert((cuda::std::midpoint(T(4), T(2)) == T(3)));
assert((cuda::std::midpoint(T(3), T(4)) == T(3.5)));
assert((cuda::std::midpoint(T(0), T(0.4)) == T(0.2)));

// Things that can't be compared exactly
constexpr T pct = fp_error_pct<T>();
assert((fptest_close_pct(cuda::std::midpoint(T(1.3), T(11.4)), T(6.35), pct)));
assert((fptest_close_pct(cuda::std::midpoint(T(11.33), T(31.45)), T(21.39), pct)));
assert((fptest_close_pct(cuda::std::midpoint(T(-1.3), T(11.4)), T(5.05), pct)));
assert((fptest_close_pct(cuda::std::midpoint(T(11.4), T(-1.3)), T(5.05), pct)));
assert((fptest_close_pct(cuda::std::midpoint(T(0.1), T(0.4)), T(0.25), pct)));

assert((fptest_close_pct(cuda::std::midpoint(T(11.2345), T(14.5432)), T(12.88885), pct)));

// From e to pi
assert((fptest_close_pct(cuda::std::midpoint(T(2.71828182845904523536028747135266249775724709369995),
T(3.14159265358979323846264338327950288419716939937510)),
T(2.92993724102441923691146542731608269097720824653752),
pct)));

assert((fptest_close_pct(cuda::std::midpoint(maxV, T(0)), maxV / 2, pct)));
assert((fptest_close_pct(cuda::std::midpoint(T(0), maxV), maxV / 2, pct)));
assert((fptest_close_pct(cuda::std::midpoint(minV, T(0)), minV / 2, pct)));
assert((fptest_close_pct(cuda::std::midpoint(T(0), minV), minV / 2, pct)));
assert((fptest_close_pct(cuda::std::midpoint(maxV, maxV), maxV, pct)));
assert((fptest_close_pct(cuda::std::midpoint(minV, minV), minV, pct)));
assert((fptest_close_pct(cuda::std::midpoint(maxV, minV), maxV / 2, pct)));
assert((fptest_close_pct(cuda::std::midpoint(minV, maxV), maxV / 2, pct)));

// Near the min and the max
assert((fptest_close_pct(cuda::std::midpoint(maxV * T(0.75), maxV * T(0.50)), maxV * T(0.625), pct)));
assert((fptest_close_pct(cuda::std::midpoint(maxV * T(0.50), maxV * T(0.75)), maxV * T(0.625), pct)));
assert((fptest_close_pct(cuda::std::midpoint(minV * T(2), minV * T(8)), minV * T(5), pct)));

// Big numbers of different signs
assert((fptest_close_pct(cuda::std::midpoint(maxV * T(0.75), maxV * T(-0.5)), maxV * T(0.125), pct)));
assert((fptest_close_pct(cuda::std::midpoint(maxV * T(-0.75), maxV * T(0.5)), maxV * T(-0.125), pct)));

#if !defined(TEST_COMPILER_NVRTC) // missing nextafter
// Check two values "close to each other"
T d1 = T(3.14);
T d0 = cuda::std::nextafter(d1, T(2));
T d2 = cuda::std::nextafter(d1, T(5));
assert(d0 < d1); // sanity checking
assert(d1 < d2); // sanity checking

// Since there's nothing in between, the midpoint has to be one or the other
T res;
res = cuda::std::midpoint(d0, d1);
assert(res == d0 || res == d1);
assert(d0 <= res);
assert(res <= d1);
res = cuda::std::midpoint(d1, d0);
assert(res == d0 || res == d1);
assert(d0 <= res);
assert(res <= d1);

res = cuda::std::midpoint(d1, d2);
assert(res == d1 || res == d2);
assert(d1 <= res);
assert(res <= d2);
res = cuda::std::midpoint(d2, d1);
assert(res == d1 || res == d2);
assert(d1 <= res);
assert(res <= d2);
#endif // !TEST_COMPILER_NVRTC
}

int main(int, char**)
{
fp_test<float>();
fp_test<double>();

return 0;
}
Loading

0 comments on commit 6cbaa6e

Please sign in to comment.