Skip to content

Commit

Permalink
Merge pull request #1196 from boostorg/GPU_Batch_11
Browse files Browse the repository at this point in the history
GPU Batch 11
  • Loading branch information
mborland authored Sep 16, 2024
2 parents 937107a + b5214b5 commit 885fd95
Show file tree
Hide file tree
Showing 15 changed files with 3,831 additions and 7 deletions.
25 changes: 25 additions & 0 deletions doc/quadrature/double_exponential.qbk
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
[/
Copyright (c) 2017 Nick Thompson
Copyright (c) 2024 Matt Borland
Use, modification and distribution are subject to the
Boost Software License, Version 1.0. (See accompanying file
LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
Expand Down Expand Up @@ -538,6 +539,30 @@ This form integrates just fine over (-log([pi]/2), +[infin]) using either the `t

[endsect] [/section:de_caveats Caveats]

[section:gpu_usage GPU Usage]

``
#include <boost/math/quadrature/exp_sinh.hpp>

namespace boost{ namespace math{ namespace quadrature {

template <class F, class Real, class Policy = policies::policy<> >
__device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)

template <class F, class Real, class Policy = policies::policy<> >
__device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)

}}}
``

Quadrature is additionally able to run on CUDA (NVCC and NVRTC) platforms.
The major difference is outlined in the above function signatures.
When used on device these are free standing functions instead of using OOP like on the host.
The tables of abscissas and weights are stored in shared read only memory on the device instead of being initialized when the class is constructed.
An example use case would be in the finite elements method computing a stiffness matrix since it would consist of many different functions.

[endsect] [/section:gpu_usage Usage]

[section:de_refes References]

* Hidetosi Takahasi and Masatake Mori, ['Double Exponential Formulas for Numerical Integration] Publ. Res. Inst. Math. Sci., 9 (1974), pp. 721-741.
Expand Down
1,460 changes: 1,459 additions & 1 deletion include/boost/math/quadrature/detail/exp_sinh_detail.hpp

Large diffs are not rendered by default.

869 changes: 867 additions & 2 deletions include/boost/math/quadrature/detail/sinh_sinh_detail.hpp

Large diffs are not rendered by default.

83 changes: 81 additions & 2 deletions include/boost/math/quadrature/exp_sinh.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,15 @@
#ifndef BOOST_MATH_QUADRATURE_EXP_SINH_HPP
#define BOOST_MATH_QUADRATURE_EXP_SINH_HPP

#include <boost/math/tools/config.hpp>
#include <boost/math/quadrature/detail/exp_sinh_detail.hpp>

#ifndef BOOST_MATH_HAS_NVRTC

#include <cmath>
#include <limits>
#include <memory>
#include <string>
#include <boost/math/quadrature/detail/exp_sinh_detail.hpp>

namespace boost{ namespace math{ namespace quadrature {

Expand Down Expand Up @@ -98,4 +102,79 @@ auto exp_sinh<Real, Policy>::integrate(const F& f, Real tolerance, Real* error,


}}}
#endif

#endif // BOOST_MATH_HAS_NVRTC

#ifdef BOOST_MATH_ENABLE_CUDA

#include <boost/math/tools/type_traits.hpp>
#include <boost/math/tools/cstdint.hpp>
#include <boost/math/tools/precision.hpp>
#include <boost/math/policies/error_handling.hpp>
#include <boost/math/constants/constants.hpp>

namespace boost {
namespace math {
namespace quadrature {

template <class F, class Real, class Policy = policies::policy<> >
__device__ auto exp_sinh_integrate(const F& f, Real a, Real b, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
{
BOOST_MATH_STD_USING

using K = decltype(f(a));
static_assert(!boost::math::is_integral<K>::value,
"The return type cannot be integral, it must be either a real or complex floating point type.");

constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate";

// Neither limit may be a NaN:
if((boost::math::isnan)(a) || (boost::math::isnan)(b))
{
return static_cast<K>(policies::raise_domain_error(function, "NaN supplied as one limit of integration - sorry I don't know what to do", a, Policy()));
}
// Right limit is infinite:
if ((boost::math::isfinite)(a) && (b >= boost::math::tools::max_value<Real>()))
{
// If a = 0, don't use an additional level of indirection:
if (a == static_cast<Real>(0))
{
return detail::exp_sinh_integrate_impl(f, tolerance, error, L1, levels);
}
const auto u = [&](Real t)->K { return f(t + a); };
return detail::exp_sinh_integrate_impl(u, tolerance, error, L1, levels);
}

if ((boost::math::isfinite)(b) && a <= -boost::math::tools::max_value<Real>())
{
const auto u = [&](Real t)->K { return f(b-t);};
return detail::exp_sinh_integrate_impl(u, tolerance, error, L1, levels);
}

// Infinite limits:
if ((a <= -boost::math::tools::max_value<Real>()) && (b >= boost::math::tools::max_value<Real>()))
{
return static_cast<K>(policies::raise_domain_error(function, "Use sinh_sinh quadrature for integration over the whole real line; exp_sinh is for half infinite integrals.", a, Policy()));
}
// If we get to here then both ends must necessarily be finite:
return static_cast<K>(policies::raise_domain_error(function, "Use tanh_sinh quadrature for integration over finite domains; exp_sinh is for half infinite integrals.", a, Policy()));
}

template <class F, class Real, class Policy = policies::policy<> >
__device__ auto exp_sinh_integrate(const F& f, Real tolerance, Real* error, Real* L1, boost::math::size_t* levels)
{
BOOST_MATH_STD_USING
constexpr auto function = "boost::math::quadrature::exp_sinh<%1%>::integrate";
if (abs(tolerance) > 1) {
return policies::raise_domain_error(function, "The tolerance provided (%1%) is unusually large; did you confuse it with a domain bound?", tolerance, Policy());
}
return detail::exp_sinh_integrate_impl(f, tolerance, error, L1, levels);
}

} // namespace quadrature
} // namespace math
} // namespace boost

#endif // BOOST_MATH_ENABLE_CUDA

#endif // BOOST_MATH_QUADRATURE_EXP_SINH_HPP
33 changes: 31 additions & 2 deletions include/boost/math/quadrature/sinh_sinh.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// Copyright Nick Thompson, 2017
// Copyright Matt Borland, 2024
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt
Expand All @@ -15,10 +16,17 @@
#ifndef BOOST_MATH_QUADRATURE_SINH_SINH_HPP
#define BOOST_MATH_QUADRATURE_SINH_SINH_HPP

#include <boost/math/tools/config.hpp>
#include <boost/math/tools/precision.hpp>
#include <boost/math/tools/cstdint.hpp>
#include <boost/math/quadrature/detail/sinh_sinh_detail.hpp>
#include <boost/math/policies/error_handling.hpp>

#ifndef BOOST_MATH_HAS_NVRTC

#include <cmath>
#include <limits>
#include <memory>
#include <boost/math/quadrature/detail/sinh_sinh_detail.hpp>

namespace boost{ namespace math{ namespace quadrature {

Expand All @@ -40,4 +48,25 @@ class sinh_sinh
};

}}}
#endif

#endif // BOOST_MATH_HAS_NVRTC

#ifdef BOOST_MATH_ENABLE_CUDA

namespace boost {
namespace math {
namespace quadrature {

template <class F, class Real, class Policy = boost::math::policies::policy<> >
__device__ auto sinh_sinh_integrate(const F& f, Real tol = boost::math::tools::root_epsilon<Real>(), Real* error = nullptr, Real* L1 = nullptr, boost::math::size_t* levels = nullptr)
{
return detail::sinh_sinh_integrate_impl(f, tol, error, L1, levels);
}

} // namespace quadrature
} // namespace math
} // namespace boost

#endif // BOOST_MATH_ENABLE_CUDA

#endif // BOOST_MATH_QUADRATURE_SINH_SINH_HPP
6 changes: 6 additions & 0 deletions test/cuda_jamfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ project : requirements
[ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
;

# Quad
run test_exp_sinh_quad_float.cu ;
run test_exp_sinh_quad_double.cu ;
run test_sinh_sinh_quad_float.cu ;
run test_sinh_sinh_quad_double.cu ;

# Distributions
run test_arcsine_cdf_double.cu ;
run test_arcsine_cdf_float.cu ;
Expand Down
6 changes: 6 additions & 0 deletions test/nvrtc_jamfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ project : requirements
[ requires cxx14_decltype_auto cxx14_generic_lambdas cxx14_return_type_deduction cxx14_variable_templates cxx14_constexpr ]
;

# Quad
run test_exp_sinh_quad_nvrtc_float.cpp ;
run test_exp_sinh_quad_nvrtc_double.cpp ;
run test_sinh_sinh_quad_nvrtc_float.cpp ;
run test_sinh_sinh_quad_nvrtc_double.cpp ;

# Distributions
run test_arcsine_cdf_nvrtc_double.cpp ;
run test_arcsine_cdf_nvrtc_float.cpp ;
Expand Down
133 changes: 133 additions & 0 deletions test/test_exp_sinh_quad_double.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@

// Copyright John Maddock 2016.
// Copyright Matt Borland 2024.
// Use, modification and distribution are subject to the
// Boost Software License, Version 1.0. (See accompanying file
// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <iostream>
#include <iomanip>
#include <vector>
#include <boost/math/quadrature/exp_sinh.hpp>
#include <boost/math/special_functions.hpp>
#include <boost/math/tools/precision.hpp>
#include "cuda_managed_ptr.hpp"
#include "stopwatch.hpp"

// For the CUDA runtime routines (prefixed with "cuda_")
#include <cuda_runtime.h>

typedef double float_type;

__host__ __device__ float_type func(float_type x)
{
BOOST_MATH_STD_USING
return 1/(1+x*x);
}

/**
* CUDA Kernel Device code
*
*/
__global__ void cuda_test(float_type *out, int numElements)
{
int i = blockDim.x * blockIdx.x + threadIdx.x;
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::size_t levels;

if (i < numElements)
{
out[i] = boost::math::quadrature::exp_sinh_integrate(func, tol, &error, &L1, &levels);
}
}

/**
* Host main routine
*/
int main(void)
{
// Error code to check return values for CUDA calls
cudaError_t err = cudaSuccess;

// Print the vector length to be used, and compute its size
int numElements = 50000;
std::cout << "[Vector operation on " << numElements << " elements]" << std::endl;

// Allocate the managed input vector A
cuda_managed_ptr<float_type> input_vector(numElements);

// Allocate the managed output vector C
cuda_managed_ptr<float_type> output_vector(numElements);

// Initialize the input vectors
for (int i = 0; i < numElements; ++i)
{
input_vector[i] = M_PI * (static_cast<float_type>(i) / numElements);
}

// Launch the Vector Add CUDA Kernel
int threadsPerBlock = 512;
int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
std::cout << "CUDA kernel launch with " << blocksPerGrid << " blocks of " << threadsPerBlock << " threads" << std::endl;

watch w;

cuda_test<<<blocksPerGrid, threadsPerBlock>>>(output_vector.get(), numElements);
cudaDeviceSynchronize();

std::cout << "CUDA kernal done in: " << w.elapsed() << "s" << std::endl;

err = cudaGetLastError();

if (err != cudaSuccess)
{
std::cerr << "Failed to launch vectorAdd kernel (error code " << cudaGetErrorString(err) << ")!" << std::endl;
return EXIT_FAILURE;
}

// Verify that the result vector is correct
std::vector<float_type> results;
results.reserve(numElements);
w.reset();
float_type tol = boost::math::tools::root_epsilon<float_type>();
float_type error;
float_type L1;
boost::math::quadrature::exp_sinh<float_type> integrator;
for(int i = 0; i < numElements; ++i)
{
results.push_back(integrator.integrate(func, tol, &error, &L1));
}
double t = w.elapsed();
// check the results
int failed_count = 0;
for(int i = 0; i < numElements; ++i)
{
const auto eps = boost::math::epsilon_difference(output_vector[i], results[i]);
if (eps > 10)
{
std::cerr << std::setprecision(std::numeric_limits<float_type>::digits10)
<< "Result verification failed at element " << i << "!\n"
<< "Device: " << output_vector[i]
<< "\n Host: " << results[i]
<< "\n Eps: " << eps << "\n";
failed_count++;
}
if (failed_count > 100)
{
break;
}
}

if (failed_count != 0)
{
std::cout << "Test FAILED" << std::endl;
return EXIT_FAILURE;
}

std::cout << "Test PASSED, normal calculation time: " << t << "s" << std::endl;
std::cout << "Done\n";

return 0;
}
Loading

0 comments on commit 885fd95

Please sign in to comment.