From 455a84460365d4cc8a87807b6ba611913f5fbb34 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Thu, 18 Jan 2024 03:52:35 +0900 Subject: [PATCH 1/6] add SYCL back with oneMKL for Intel GPUs --- common/src/CMakeLists.txt | 8 + common/src/KokkosFFT_SYCL_types.hpp | 256 +++++++++++++++++++++++++++ fft/src/KokkosFFT_Plans.hpp | 14 +- fft/src/KokkosFFT_SYCL_plans.hpp | 179 +++++++++++++++++++ fft/src/KokkosFFT_SYCL_transform.hpp | 67 +++++++ fft/src/KokkosFFT_Transform.hpp | 7 + 6 files changed, 527 insertions(+), 4 deletions(-) create mode 100644 common/src/KokkosFFT_SYCL_types.hpp create mode 100644 fft/src/KokkosFFT_SYCL_plans.hpp create mode 100644 fft/src/KokkosFFT_SYCL_transform.hpp diff --git a/common/src/CMakeLists.txt b/common/src/CMakeLists.txt index 9f59f4d3..de5ffe6e 100644 --- a/common/src/CMakeLists.txt +++ b/common/src/CMakeLists.txt @@ -21,6 +21,14 @@ elseif(Kokkos_ENABLE_HIP) target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP) target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE) endif() +elseif(Kokkos_ENABLE_SYCL) +find_package(MKL REQUIRED COMPONENTS SYCL) + target_link_libraries(common INTERFACE MKL::MKL_SYCL) + if(KokkosFFT_ENABLE_HOST_AND_DEVICE) + find_package(FFTW MODULE REQUIRED) + target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP) + target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE) + endif() elseif(Kokkos_ENABLE_OPENMP) find_package(FFTW MODULE REQUIRED) target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP) diff --git a/common/src/KokkosFFT_SYCL_types.hpp b/common/src/KokkosFFT_SYCL_types.hpp new file mode 100644 index 00000000..bdcead04 --- /dev/null +++ b/common/src/KokkosFFT_SYCL_types.hpp @@ -0,0 +1,256 @@ +#ifndef KOKKOSFFT_SYCL_TYPES_HPP +#define KOKKOSFFT_SYCL_TYPES_HPP + +#include +#include +#include +#include +#include "KokkosFFT_utils.hpp" + +// Check the size of complex type +// [TO DO] I guess this kind of test is already made by Kokkos itself +static_assert(sizeof(std::complex) == sizeof(Kokkos::complex)); +static_assert(alignof(std::complex) <= alignof(Kokkos::complex)); + +static_assert(sizeof(std::complex) == sizeof(Kokkos::complex)); +static_assert(alignof(std::complex) <= + alignof(Kokkos::complex)); + +#ifdef ENABLE_HOST_AND_DEVICE +#include +static_assert(sizeof(fftwf_complex) == sizeof(Kokkos::complex)); +static_assert(alignof(fftwf_complex) <= alignof(Kokkos::complex)); + +static_assert(sizeof(fftw_complex) == sizeof(Kokkos::complex)); +static_assert(alignof(fftw_complex) <= alignof(Kokkos::complex)); +#endif + +namespace KokkosFFT { +namespace Impl { +enum class Direction { + Forward, + Backward, +}; + +using FFTDirectionType = int; +constexpr FFTDirectionType MKL_FFT_FORWARD = 1; +constexpr FFTDirectionType MKL_FFT_BACKWARD = -1; + +enum class FFTWTransformType { R2C, D2Z, C2R, Z2D, C2C, Z2Z }; + +template +using TransformType = FFTWTransformType; + +// Define fft transform types +template +struct transform_type { + static_assert(std::is_same_v, + "Real to real transform is unavailable"); +}; + +template +struct transform_type> { + static_assert(std::is_same_v, + "T1 and T2 should have the same precision"); + using _TransformType = TransformType; + + static constexpr _TransformType m_type = std::is_same_v + ? FFTWTransformType::R2C + : FFTWTransformType::D2Z; + static constexpr _TransformType type() { return m_type; }; +}; + +template +struct transform_type, T2> { + static_assert(std::is_same_v, + "T1 and T2 should have the same precision"); + using _TransformType = TransformType; + + static constexpr _TransformType m_type = std::is_same_v + ? FFTWTransformType::C2R + : FFTWTransformType::Z2D; + static constexpr _TransformType type() { return m_type; }; +}; + +template +struct transform_type, + Kokkos::complex> { + static_assert(std::is_same_v, + "T1 and T2 should have the same precision"); + using _TransformType = TransformType; + + static constexpr _TransformType m_type = std::is_same_v + ? FFTWTransformType::C2C + : FFTWTransformType::Z2Z; + static constexpr _TransformType type() { return m_type; }; +}; + +#ifdef ENABLE_HOST_AND_DEVICE + +template +struct FFTDataType { + using float32 = float; + using float64 = double; + + using complex64 = std::conditional_t< + std::is_same_v, + std::complex, fftwf_complex>; + using complex128 = std::conditional_t< + std::is_same_v, + std::complex, fftw_complex>; +}; + +template +struct FFTPlanType { + using fftwHandle = + std::conditional_t, float>, + fftwf_plan, fftw_plan>; + using precision = + std::conditional_t, float>, + oneapi::mkl::dft::precision::SINGLE, + oneapi::mkl::dft::precision::DOUBLE>; + using type = std::conditional_t, + cufftHandle, fftwHandle>; +}; + +template +struct FFTPlanType { + static_assert(std::is_same_v, + "Real to real transform is unavailable"); +}; + +template +struct FFTPlanType> { + using float_type = T1; + static constexpr oneapi::mkl::dft::precision prec = + std::is_same_v, float> + ? oneapi::mkl::dft::precision::SINGLE + : oneapi::mkl::dft::precision::DOUBLE; + static constexpr oneapi::mkl::dft::domain dom = + oneapi::mkl::dft::domain::REAL; + + using fftwHandle = std::conditional_t< + std::is_same_v, float>, + fftwf_plan, fftw_plan>; + + using onemklHandle = oneapi::mkl::dft::descriptor; + using type = std::conditional_t< + std::is_same_v, onemklHandle, + fftwHandle>; +}; + +template +struct FFTPlanType, T2> { + using float_type = T2; + static constexpr oneapi::mkl::dft::precision prec = + std::is_same_v, float> + ? oneapi::mkl::dft::precision::SINGLE + : oneapi::mkl::dft::precision::DOUBLE; + static constexpr oneapi::mkl::dft::domain dom = + oneapi::mkl::dft::domain::REAL; + + using fftwHandle = std::conditional_t< + std::is_same_v, float>, + fftwf_plan, fftw_plan>; + + using onemklHandle = oneapi::mkl::dft::descriptor; + using type = std::conditional_t< + std::is_same_v, onemklHandle, + fftwHandle>; +}; + +template +struct FFTPlanType, Kokkos::complex> { + using float_type = KokkosFFT::Impl::real_type_t; + static constexpr oneapi::mkl::dft::precision prec = + std::is_same_v, float> + ? oneapi::mkl::dft::precision::SINGLE + : oneapi::mkl::dft::precision::DOUBLE; + static constexpr oneapi::mkl::dft::domain dom = + oneapi::mkl::dft::domain::COMPLEX; + + using fftwHandle = std::conditional_t< + std::is_same_v, float>, + fftwf_plan, fftw_plan>; + + using onemklHandle = oneapi::mkl::dft::descriptor; + using type = std::conditional_t< + std::is_same_v, onemklHandle, + fftwHandle>; +}; + +template +auto direction_type(Direction direction) { + static constexpr FFTDirectionType _FORWARD = + std::is_same_v + ? MKL_FFT_FORWARD + : FFTW_FORWARD; + static constexpr FFTDirectionType _BACKWARD = + std::is_same_v + ? MKL_FFT_BACKWARD + : FFTW_BACKWARD; + return direction == Direction::Forward ? _FORWARD : _BACKWARD; +} +#else +template +struct FFTDataType { + using float32 = float; + using float64 = double; + using complex64 = std::complex; + using complex128 = std::complex; +}; + +template +struct FFTPlanType { + static_assert(std::is_same_v, + "Real to real transform is unavailable"); +}; + +template +struct FFTPlanType> { + using float_type = T1; + static constexpr oneapi::mkl::dft::precision prec = + std::is_same_v, float> + ? oneapi::mkl::dft::precision::SINGLE + : oneapi::mkl::dft::precision::DOUBLE; + static constexpr oneapi::mkl::dft::domain dom = + oneapi::mkl::dft::domain::REAL; + + using type = oneapi::mkl::dft::descriptor; +}; + +template +struct FFTPlanType, T2> { + using float_type = T2; + static constexpr oneapi::mkl::dft::precision prec = + std::is_same_v, float> + ? oneapi::mkl::dft::precision::SINGLE + : oneapi::mkl::dft::precision::DOUBLE; + static constexpr oneapi::mkl::dft::domain dom = + oneapi::mkl::dft::domain::REAL; + + using type = oneapi::mkl::dft::descriptor; +}; + +template +struct FFTPlanType, Kokkos::complex> { + using float_type = KokkosFFT::Impl::real_type_t; + static constexpr oneapi::mkl::dft::precision prec = + std::is_same_v, float> + ? oneapi::mkl::dft::precision::SINGLE + : oneapi::mkl::dft::precision::DOUBLE; + static constexpr oneapi::mkl::dft::domain dom = + oneapi::mkl::dft::domain::COMPLEX; + + using type = oneapi::mkl::dft::descriptor; +}; + +template +auto direction_type(Direction direction) { + return direction == Direction::Forward ? MKL_FFT_FORWARD : MKL_FFT_BACKWARD; +} +#endif +} // namespace Impl +} // namespace KokkosFFT + +#endif \ No newline at end of file diff --git a/fft/src/KokkosFFT_Plans.hpp b/fft/src/KokkosFFT_Plans.hpp index e1c03fd4..2186ea36 100644 --- a/fft/src/KokkosFFT_Plans.hpp +++ b/fft/src/KokkosFFT_Plans.hpp @@ -18,6 +18,11 @@ using default_device = Kokkos::HIP; #ifdef ENABLE_HOST_AND_DEVICE #include "KokkosFFT_OpenMP_plans.hpp" #endif +#elif defined(KOKKOS_ENABLE_SYCL) +#include "KokkosFFT_SYCL_plans.hpp" +#ifdef ENABLE_HOST_AND_DEVICE +#include "KokkosFFT_OpenMP_plans.hpp" +#endif #elif defined(KOKKOS_ENABLE_OPENMP) using default_device = Kokkos::OpenMP; #include "KokkosFFT_OpenMP_plans.hpp" @@ -39,13 +44,14 @@ class Plan { using out_value_type = typename OutViewType::non_const_value_type; using float_type = KokkosFFT::Impl::real_type_t; using fft_plan_type = - typename KokkosFFT::Impl::FFTPlanType::type; + typename KokkosFFT::Impl::FFTPlanType::type; using fft_size_type = std::size_t; using map_type = axis_type; using nonConstInViewType = std::remove_cv_t; using nonConstOutViewType = std::remove_cv_t; - fft_plan_type m_plan; + std::unique_ptr m_plan; fft_size_type m_fft_size; map_type m_map, m_map_inv; bool m_is_transpose_needed; @@ -91,7 +97,7 @@ class Plan { KokkosFFT::Impl::_create(exec_space, m_plan, in, out, direction, axes); } - ~Plan() { _destroy(m_plan); } + ~Plan() { _destroy(m_plan); } template @@ -124,7 +130,7 @@ class Plan { // [TO DO] Check view extents } - fft_plan_type plan() const { return m_plan; } + fft_plan_type& plan() const { return *m_plan; } fft_size_type fft_size() const { return m_fft_size; } bool is_transpose_needed() const { return m_is_transpose_needed; } map_type map() const { return m_map; } diff --git a/fft/src/KokkosFFT_SYCL_plans.hpp b/fft/src/KokkosFFT_SYCL_plans.hpp new file mode 100644 index 00000000..ce3109af --- /dev/null +++ b/fft/src/KokkosFFT_SYCL_plans.hpp @@ -0,0 +1,179 @@ +#ifndef KOKKOSFFT_SYCL_PLANS_HPP +#define KOKKOSFFT_SYCL_PLANS_HPP + +#include +#include +#include "KokkosFFT_SYCL_types.hpp" +#include "KokkosFFT_layouts.hpp" + +namespace KokkosFFT { +namespace Impl { +// Helper to convert the integer type of vectors +template +auto convert_int_type(std::vector& in) -> std::vector { + std::vector out(in.size()); + std::transform( + in.begin(), in.end(), out.begin(), + [](const InType v) -> OutType { return static_cast(v); }); + + return out; +} + +// Helper to compute strides from extents +// (n0, n1) -> (0, n1, 1) +// (n0) -> (0, 1) +template +auto compute_strides(std::vector& extents) -> std::vector { + std::vector out; + + OutType stride = 1; + for (auto it = extents.rbegin(); it != extents.rend(); ++it) { + out.push_back(stride); + stride *= static_cast(*it); + } + out.push_back(0); + std::reverse(out.begin(), out.end()); + + return out; +} + +// ND transform +template < + typename ExecutionSpace, typename PlanType, typename InViewType, + typename OutViewType, + std::enable_if_t, + std::nullptr_t> = nullptr> +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, + const InViewType& in, const OutViewType& out, + [[maybe_unused]] Direction direction) { + static_assert(Kokkos::is_view::value, + "KokkosFFT::_create: InViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosFFT::_create: OutViewType is not a Kokkos::View."); + using in_value_type = typename InViewType::non_const_value_type; + using out_value_type = typename OutViewType::non_const_value_type; + + const int rank = InViewType::rank(); + const int axis = -1; + const int howmany = 1; + constexpr auto type = + KokkosFFT::Impl::transform_type::type(); + auto [in_extents, out_extents, fft_extents] = + KokkosFFT::Impl::get_extents(in, out, axis); + int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, + std::multiplies<>()); + int odist = std::accumulate(out_extents.begin(), out_extents.end(), 1, + std::multiplies<>()); + int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, + std::multiplies<>()); + + auto* idata = reinterpret_cast::type*>(in.data()); + auto* odata = reinterpret_cast::type*>(out.data()); + auto sign = KokkosFFT::Impl::direction_type(direction); + + // Create plan + auto in_strides = compute_strides(in_extents); + auto out_strides = compute_strides(out_extents); + auto _fft_extents = convert_int_type(fft_extents); + + plan = std::make_unique(_fft_extents); + plan->set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, + in_strides.data()); + plan->set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, + out_strides.data()); + plan->set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_NOT_INPLACE); + plan->set_value(oneapi::mkl::dft::config_param::CONJUGATE_EVEN_STORAGE, + DFTI_COMPLEX_COMPLEX); + + sycl::queue q = exec_space.sycl_queue(); + plan->commit(q); + + return fft_size; +} + +// batched transform, over ND Views +template < + typename ExecutionSpace, typename PlanType, typename InViewType, + typename OutViewType, std::size_t fft_rank = 1, + std::enable_if_t, + std::nullptr_t> = nullptr> +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, + const InViewType& in, const OutViewType& out, + [[maybe_unused]] Direction direction, axis_type axes) { + static_assert(Kokkos::is_view::value, + "KokkosFFT::_create: InViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosFFT::_create: OutViewType is not a Kokkos::View."); + using in_value_type = typename InViewType::non_const_value_type; + using out_value_type = typename OutViewType::non_const_value_type; + + static_assert( + InViewType::rank() >= fft_rank, + "KokkosFFT::_create: Rank of View must be larger than Rank of FFT."); + const int rank = fft_rank; + + constexpr auto type = + KokkosFFT::Impl::transform_type::type(); + auto [in_extents, out_extents, fft_extents, howmany] = + KokkosFFT::Impl::get_extents_batched(in, out, axes); + int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, + std::multiplies<>()); + int odist = std::accumulate(out_extents.begin(), out_extents.end(), 1, + std::multiplies<>()); + int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, + std::multiplies<>()); + + auto* idata = reinterpret_cast::type*>(in.data()); + auto* odata = reinterpret_cast::type*>(out.data()); + + // For the moment, considering the contiguous layout only + auto sign = KokkosFFT::Impl::direction_type(direction); + + // Create plan + auto in_strides = compute_strides(in_extents); + auto out_strides = compute_strides(out_extents); + auto _fft_extents = convert_int_type(fft_extents); + + // In oneMKL, the distance is always defined based on R2C transform + std::int64_t _idist = static_cast(std::max(idist, odist)); + std::int64_t _odist = static_cast(std::min(idist, odist)); + + plan = std::make_unique(_fft_extents); + plan->set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, + in_strides.data()); + plan->set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, + out_strides.data()); + + // Configuration for batched plan + plan->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, _idist); + plan->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, _odist); + plan->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, + static_cast(howmany)); + + // Data layout in conjugate-even domain + plan->set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_NOT_INPLACE); + plan->set_value(oneapi::mkl::dft::config_param::CONJUGATE_EVEN_STORAGE, + DFTI_COMPLEX_COMPLEX); + + sycl::queue q = exec_space.sycl_queue(); + plan->commit(q); + + return fft_size; +} + +// In oneMKL, plans are destroybed by default destructor +template < + typename ExecutionSpace, typename PlanType, + std::enable_if_t, + std::nullptr_t> = nullptr> +void _destroy(std::unique_ptr& plan) {} +} // namespace Impl +} // namespace KokkosFFT + +#endif \ No newline at end of file diff --git a/fft/src/KokkosFFT_SYCL_transform.hpp b/fft/src/KokkosFFT_SYCL_transform.hpp new file mode 100644 index 00000000..5340a429 --- /dev/null +++ b/fft/src/KokkosFFT_SYCL_transform.hpp @@ -0,0 +1,67 @@ +#ifndef KOKKOSFFT_SYCL_TRANSFORM_HPP +#define KOKKOSFFT_SYCL_TRANSFORM_HPP + +#include +#include + +namespace KokkosFFT { +namespace Impl { +template +void _exec(PlanType& plan, float* idata, std::complex* odata, + [[maybe_unused]] int direction) { + auto r2c = oneapi::mkl::dft::compute_forward(plan, idata, + reinterpret_cast(odata)); + r2c.wait(); +} + +template +void _exec(PlanType& plan, double* idata, std::complex* odata, + [[maybe_unused]] int direction) { + auto d2z = oneapi::mkl::dft::compute_forward( + plan, idata, reinterpret_cast(odata)); + d2z.wait(); +} + +template +void _exec(PlanType& plan, std::complex* idata, float* odata, + [[maybe_unused]] int direction) { + auto c2r = oneapi::mkl::dft::compute_backward( + plan, reinterpret_cast(idata), odata); + c2r.wait(); +} + +template +void _exec(PlanType& plan, std::complex* idata, double* odata, + [[maybe_unused]] int direction) { + auto z2d = oneapi::mkl::dft::compute_backward( + plan, reinterpret_cast(idata), odata); + z2d.wait(); +} + +template +void _exec(PlanType& plan, std::complex* idata, + std::complex* odata, [[maybe_unused]] int direction) { + if (direction == 1) { + auto c2c = oneapi::mkl::dft::compute_forward(plan, idata, odata); + c2c.wait(); + } else { + auto c2c = oneapi::mkl::dft::compute_backward(plan, idata, odata); + c2c.wait(); + } +} + +template +void _exec(PlanType& plan, std::complex* idata, + std::complex* odata, [[maybe_unused]] int direction) { + if (direction == 1) { + auto z2z = oneapi::mkl::dft::compute_forward(plan, idata, odata); + z2z.wait(); + } else { + auto z2z = oneapi::mkl::dft::compute_backward(plan, idata, odata); + z2z.wait(); + } +} +} // namespace Impl +} // namespace KokkosFFT + +#endif \ No newline at end of file diff --git a/fft/src/KokkosFFT_Transform.hpp b/fft/src/KokkosFFT_Transform.hpp index 3c9500e6..aad16e21 100644 --- a/fft/src/KokkosFFT_Transform.hpp +++ b/fft/src/KokkosFFT_Transform.hpp @@ -21,6 +21,11 @@ using default_device = Kokkos::HIP; #ifdef ENABLE_HOST_AND_DEVICE #include "KokkosFFT_OpenMP_transform.hpp" #endif +#elif defined(KOKKOS_ENABLE_SYCL) +#include "KokkosFFT_SYCL_transform.hpp" +#ifdef ENABLE_HOST_AND_DEVICE +#include "KokkosFFT_OpenMP_transform.hpp" +#endif #elif defined(KOKKOS_ENABLE_OPENMP) using default_device = Kokkos::OpenMP; #include "KokkosFFT_OpenMP_transform.hpp" @@ -55,6 +60,7 @@ void _fft(const ExecutionSpace& exec_space, PlanType& plan, auto forward = direction_type(KokkosFFT::Impl::Direction::Forward); + KokkosFFT::Impl::_exec(plan.plan(), idata, odata, forward); KokkosFFT::Impl::normalize(exec_space, out, KokkosFFT::Impl::Direction::Forward, norm, @@ -81,6 +87,7 @@ void _ifft(const ExecutionSpace& exec_space, PlanType& plan, auto backward = direction_type(KokkosFFT::Impl::Direction::Backward); + KokkosFFT::Impl::_exec(plan.plan(), idata, odata, backward); KokkosFFT::Impl::normalize(exec_space, out, KokkosFFT::Impl::Direction::Backward, norm, From 90997e743487845a0891fedf323551d1d5d3c8e6 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Thu, 18 Jan 2024 03:53:33 +0900 Subject: [PATCH 2/6] Fix OpenMP backend --- common/src/KokkosFFT_OpenMP_types.hpp | 8 ++--- common/src/KokkosFFT_default_types.hpp | 2 ++ fft/src/KokkosFFT_OpenMP_plans.hpp | 41 +++++++++++++------------- fft/src/KokkosFFT_OpenMP_transform.hpp | 10 +++---- 4 files changed, 32 insertions(+), 29 deletions(-) diff --git a/common/src/KokkosFFT_OpenMP_types.hpp b/common/src/KokkosFFT_OpenMP_types.hpp index e97edf79..b7f79721 100644 --- a/common/src/KokkosFFT_OpenMP_types.hpp +++ b/common/src/KokkosFFT_OpenMP_types.hpp @@ -28,11 +28,11 @@ struct FFTDataType { using complex128 = fftw_complex; }; -template +template struct FFTPlanType { - using type = - std::conditional_t, float>, - fftwf_plan, fftw_plan>; + using type = std::conditional_t< + std::is_same_v, float>, fftwf_plan, + fftw_plan>; }; template diff --git a/common/src/KokkosFFT_default_types.hpp b/common/src/KokkosFFT_default_types.hpp index 0cba34e1..0eb0c796 100644 --- a/common/src/KokkosFFT_default_types.hpp +++ b/common/src/KokkosFFT_default_types.hpp @@ -9,6 +9,8 @@ using default_device = Kokkos::Cuda; #elif defined(KOKKOS_ENABLE_HIP) using default_device = Kokkos::HIP; #include "KokkosFFT_HIP_types.hpp" +#elif defined(KOKKOS_ENABLE_SYCL) +#include "KokkosFFT_SYCL_types.hpp" #elif defined(KOKKOS_ENABLE_OPENMP) using default_device = Kokkos::OpenMP; #include "KokkosFFT_OpenMP_types.hpp" diff --git a/fft/src/KokkosFFT_OpenMP_plans.hpp b/fft/src/KokkosFFT_OpenMP_plans.hpp index 4f62aa80..b00d76f3 100644 --- a/fft/src/KokkosFFT_OpenMP_plans.hpp +++ b/fft/src/KokkosFFT_OpenMP_plans.hpp @@ -28,7 +28,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] Direction direction) { static_assert(Kokkos::is_view::value, @@ -63,28 +63,29 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int istride = 1, ostride = 1; auto sign = KokkosFFT::Impl::direction_type(direction); + plan = std::make_unique(); if constexpr (type == KokkosFFT::Impl::FFTWTransformType::R2C) { - plan = fftwf_plan_many_dft_r2c( + *plan = fftwf_plan_many_dft_r2c( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::D2Z) { - plan = fftw_plan_many_dft_r2c( + *plan = fftw_plan_many_dft_r2c( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::C2R) { - plan = fftwf_plan_many_dft_c2r( + *plan = fftwf_plan_many_dft_c2r( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::Z2D) { - plan = fftw_plan_many_dft_c2r( + *plan = fftw_plan_many_dft_c2r( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::C2C) { - plan = fftwf_plan_many_dft( + *plan = fftwf_plan_many_dft( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, sign, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::Z2Z) { - plan = fftw_plan_many_dft( + *plan = fftw_plan_many_dft( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, sign, FFTW_ESTIMATE); } @@ -98,7 +99,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] Direction direction, axis_type axes) { static_assert(Kokkos::is_view::value, @@ -137,28 +138,29 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int istride = 1, ostride = 1; auto sign = KokkosFFT::Impl::direction_type(direction); + plan = std::make_unique(); if constexpr (type == KokkosFFT::Impl::FFTWTransformType::R2C) { - plan = fftwf_plan_many_dft_r2c( + *plan = fftwf_plan_many_dft_r2c( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::D2Z) { - plan = fftw_plan_many_dft_r2c( + *plan = fftw_plan_many_dft_r2c( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::C2R) { - plan = fftwf_plan_many_dft_c2r( + *plan = fftwf_plan_many_dft_c2r( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::Z2D) { - plan = fftw_plan_many_dft_c2r( + *plan = fftw_plan_many_dft_c2r( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::C2C) { - plan = fftwf_plan_many_dft( + *plan = fftwf_plan_many_dft( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, sign, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::Z2Z) { - plan = fftw_plan_many_dft( + *plan = fftw_plan_many_dft( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, sign, FFTW_ESTIMATE); } @@ -166,16 +168,15 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, return fft_size; } -template , std::nullptr_t> = nullptr> -void _destroy( - typename KokkosFFT::Impl::FFTPlanType::type& plan) { - if constexpr (std::is_same_v) { - fftwf_destroy_plan(plan); +void _destroy(std::unique_ptr& plan) { + if constexpr (std::is_same_v) { + fftwf_destroy_plan(*plan); } else { - fftw_destroy_plan(plan); + fftw_destroy_plan(*plan); } } } // namespace Impl diff --git a/fft/src/KokkosFFT_OpenMP_transform.hpp b/fft/src/KokkosFFT_OpenMP_transform.hpp index c16d1b7f..f34438f0 100644 --- a/fft/src/KokkosFFT_OpenMP_transform.hpp +++ b/fft/src/KokkosFFT_OpenMP_transform.hpp @@ -6,31 +6,31 @@ namespace KokkosFFT { namespace Impl { template -void _exec(PlanType plan, float* idata, fftwf_complex* odata, +void _exec(PlanType& plan, float* idata, fftwf_complex* odata, [[maybe_unused]] int direction) { fftwf_execute_dft_r2c(plan, idata, odata); } template -void _exec(PlanType plan, double* idata, fftw_complex* odata, +void _exec(PlanType& plan, double* idata, fftw_complex* odata, [[maybe_unused]] int direction) { fftw_execute_dft_r2c(plan, idata, odata); } template -void _exec(PlanType plan, fftwf_complex* idata, float* odata, +void _exec(PlanType& plan, fftwf_complex* idata, float* odata, [[maybe_unused]] int direction) { fftwf_execute_dft_c2r(plan, idata, odata); } template -void _exec(PlanType plan, fftw_complex* idata, double* odata, +void _exec(PlanType& plan, fftw_complex* idata, double* odata, [[maybe_unused]] int direction) { fftw_execute_dft_c2r(plan, idata, odata); } template -void _exec(PlanType plan, fftwf_complex* idata, fftwf_complex* odata, +void _exec(PlanType& plan, fftwf_complex* idata, fftwf_complex* odata, [[maybe_unused]] int direction) { fftwf_execute_dft(plan, idata, odata); } From f867556c847221325e1b2d66f7cb79daedbf929c Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Thu, 18 Jan 2024 03:54:16 +0900 Subject: [PATCH 3/6] Fix CUDA backend --- common/src/KokkosFFT_Cuda_types.hpp | 10 +++--- fft/src/KokkosFFT_Cuda_plans.hpp | 48 +++++++++++++++------------- fft/src/KokkosFFT_Cuda_transform.hpp | 12 +++---- 3 files changed, 37 insertions(+), 33 deletions(-) diff --git a/common/src/KokkosFFT_Cuda_types.hpp b/common/src/KokkosFFT_Cuda_types.hpp index 7234a791..c6e577e6 100644 --- a/common/src/KokkosFFT_Cuda_types.hpp +++ b/common/src/KokkosFFT_Cuda_types.hpp @@ -48,11 +48,11 @@ struct FFTDataType { cufftDoubleComplex, fftw_complex>; }; -template +template struct FFTPlanType { - using fftwHandle = - std::conditional_t, float>, - fftwf_plan, fftw_plan>; + using fftwHandle = std::conditional_t< + std::is_same_v, float>, fftwf_plan, + fftw_plan>; using type = std::conditional_t, cufftHandle, fftwHandle>; }; @@ -151,7 +151,7 @@ struct FFTDataType { using complex128 = cufftDoubleComplex; }; -template +template struct FFTPlanType { using type = cufftHandle; }; diff --git a/fft/src/KokkosFFT_Cuda_plans.hpp b/fft/src/KokkosFFT_Cuda_plans.hpp index e78dd43c..8d4bc7bd 100644 --- a/fft/src/KokkosFFT_Cuda_plans.hpp +++ b/fft/src/KokkosFFT_Cuda_plans.hpp @@ -13,7 +13,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] Direction direction) { static_assert(Kokkos::is_view::value, @@ -23,7 +23,8 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - cufftResult cufft_rt = cufftCreate(&plan); + plan = std::make_unique(); + cufftResult cufft_rt = cufftCreate(&(*plan)); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftCreate failed"); const int batch = 1; @@ -37,7 +38,7 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - cufft_rt = cufftPlan1d(&plan, nx, type, batch); + cufft_rt = cufftPlan1d(&(*plan), nx, type, batch); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftPlan1d failed"); return fft_size; } @@ -48,7 +49,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] Direction direction) { static_assert(Kokkos::is_view::value, @@ -58,7 +59,8 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - cufftResult cufft_rt = cufftCreate(&plan); + plan = std::make_unique(); + cufftResult cufft_rt = cufftCreate(&(*plan)); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftCreate failed"); const int axis = 0; @@ -70,7 +72,7 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - cufft_rt = cufftPlan2d(&plan, nx, ny, type); + cufft_rt = cufftPlan2d(&(*plan), nx, ny, type); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftPlan2d failed"); return fft_size; } @@ -81,7 +83,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] Direction direction) { static_assert(Kokkos::is_view::value, @@ -91,7 +93,8 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - cufftResult cufft_rt = cufftCreate(&plan); + plan = std::make_unique(); + cufftResult cufft_rt = cufftCreate(&(*plan)); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftCreate failed"); const int axis = 0; @@ -106,7 +109,7 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - cufft_rt = cufftPlan3d(&plan, nx, ny, nz, type); + cufft_rt = cufftPlan3d(&(*plan), nx, ny, nz, type); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftPlan3d failed"); return fft_size; } @@ -117,7 +120,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] Direction direction) { static_assert(Kokkos::is_view::value, @@ -127,7 +130,8 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - cufftResult cufft_rt = cufftCreate(&plan); + plan = std::make_unique(); + cufftResult cufft_rt = cufftCreate(&(*plan)); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftCreate failed"); const int rank = InViewType::rank(); @@ -144,8 +148,8 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - cufft_rt = cufftPlanMany(&plan, rank, fft_extents.data(), nullptr, 1, idist, - nullptr, 1, odist, type, batch); + cufft_rt = cufftPlanMany(&(*plan), rank, fft_extents.data(), nullptr, 1, + idist, nullptr, 1, odist, type, batch); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftPlanMany failed"); return fft_size; @@ -156,7 +160,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] Direction direction, axis_type axes) { static_assert(Kokkos::is_view::value, @@ -185,24 +189,24 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, // For the moment, considering the contiguous layout only int istride = 1, ostride = 1; - cufftResult cufft_rt = cufftCreate(&plan); + plan = std::make_unique(); + cufftResult cufft_rt = cufftCreate(&(*plan)); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftCreate failed"); - cufft_rt = - cufftPlanMany(&plan, rank, fft_extents.data(), in_extents.data(), istride, - idist, out_extents.data(), ostride, odist, type, howmany); + cufft_rt = cufftPlanMany(&(*plan), rank, fft_extents.data(), + in_extents.data(), istride, idist, + out_extents.data(), ostride, odist, type, howmany); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftPlanMany failed"); return fft_size; } -template , std::nullptr_t> = nullptr> -void _destroy( - typename KokkosFFT::Impl::FFTPlanType::type& plan) { - cufftDestroy(plan); +void _destroy(std::unique_ptr& plan) { + cufftDestroy(*plan); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_Cuda_transform.hpp b/fft/src/KokkosFFT_Cuda_transform.hpp index 478e16a0..4ba45964 100644 --- a/fft/src/KokkosFFT_Cuda_transform.hpp +++ b/fft/src/KokkosFFT_Cuda_transform.hpp @@ -5,42 +5,42 @@ namespace KokkosFFT { namespace Impl { -void _exec(cufftHandle plan, cufftReal* idata, cufftComplex* odata, +void _exec(cufftHandle& plan, cufftReal* idata, cufftComplex* odata, [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecR2C(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecR2C failed"); } -void _exec(cufftHandle plan, cufftDoubleReal* idata, cufftDoubleComplex* odata, +void _exec(cufftHandle& plan, cufftDoubleReal* idata, cufftDoubleComplex* odata, [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecD2Z(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecD2Z failed"); } -void _exec(cufftHandle plan, cufftComplex* idata, cufftReal* odata, +void _exec(cufftHandle& plan, cufftComplex* idata, cufftReal* odata, [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecC2R(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecC2R failed"); } -void _exec(cufftHandle plan, cufftDoubleComplex* idata, cufftDoubleReal* odata, +void _exec(cufftHandle& plan, cufftDoubleComplex* idata, cufftDoubleReal* odata, [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecZ2D(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecZ2D failed"); } -void _exec(cufftHandle plan, cufftComplex* idata, cufftComplex* odata, +void _exec(cufftHandle& plan, cufftComplex* idata, cufftComplex* odata, int direction) { cufftResult cufft_rt = cufftExecC2C(plan, idata, odata, direction); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecC2C failed"); } -void _exec(cufftHandle plan, cufftDoubleComplex* idata, +void _exec(cufftHandle& plan, cufftDoubleComplex* idata, cufftDoubleComplex* odata, int direction) { cufftResult cufft_rt = cufftExecZ2Z(plan, idata, odata, direction); if (cufft_rt != CUFFT_SUCCESS) From 92c0ad31b9e9e4d1375b147dbe41535375b873e3 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Thu, 18 Jan 2024 03:55:07 +0900 Subject: [PATCH 4/6] Fix HIP backend --- common/src/KokkosFFT_HIP_types.hpp | 10 +++--- fft/src/KokkosFFT_HIP_plans.hpp | 48 ++++++++++++++++------------- fft/src/KokkosFFT_HIP_transform.hpp | 12 ++++---- 3 files changed, 37 insertions(+), 33 deletions(-) diff --git a/common/src/KokkosFFT_HIP_types.hpp b/common/src/KokkosFFT_HIP_types.hpp index 8729f1e7..fe1fa574 100644 --- a/common/src/KokkosFFT_HIP_types.hpp +++ b/common/src/KokkosFFT_HIP_types.hpp @@ -48,11 +48,11 @@ struct FFTDataType { hipfftDoubleComplex, fftw_complex>; }; -template +template struct FFTPlanType { - using fftwHandle = - std::conditional_t, float>, - fftwf_plan, fftw_plan>; + using fftwHandle = std::conditional_t< + std::is_same_v, float>, fftwf_plan, + fftw_plan>; using type = std::conditional_t, hipfftHandle, fftwHandle>; }; @@ -151,7 +151,7 @@ struct FFTDataType { using complex128 = hipfftDoubleComplex; }; -template +template struct FFTPlanType { using type = hipfftHandle; }; diff --git a/fft/src/KokkosFFT_HIP_plans.hpp b/fft/src/KokkosFFT_HIP_plans.hpp index 7d494d32..fc3d67e6 100644 --- a/fft/src/KokkosFFT_HIP_plans.hpp +++ b/fft/src/KokkosFFT_HIP_plans.hpp @@ -13,7 +13,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] FFTDirectionType direction) { static_assert(Kokkos::is_view::value, @@ -23,7 +23,8 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - hipfftResult hipfft_rt = hipfftCreate(&plan); + plan = std::make_unique(); + hipfftResult hipfft_rt = hipfftCreate(&(*plan)); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftCreate failed"); @@ -38,7 +39,7 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - hipfft_rt = hipfftPlan1d(&plan, nx, type, batch); + hipfft_rt = hipfftPlan1d(&(*plan), nx, type, batch); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlan1d failed"); return fft_size; @@ -50,7 +51,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] FFTDirectionType direction) { static_assert(Kokkos::is_view::value, @@ -60,7 +61,8 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - hipfftResult hipfft_rt = hipfftCreate(&plan); + plan = std::make_unique(); + hipfftResult hipfft_rt = hipfftCreate(&(*plan)); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftCreate failed"); @@ -73,7 +75,7 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - hipfft_rt = hipfftPlan2d(&plan, nx, ny, type); + hipfft_rt = hipfftPlan2d(&(*plan), nx, ny, type); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlan2d failed"); return fft_size; @@ -85,7 +87,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] FFTDirectionType direction) { static_assert(Kokkos::is_view::value, @@ -95,7 +97,8 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - hipfftResult hipfft_rt = hipfftCreate(&plan); + plan = std::make_unique(); + hipfftResult hipfft_rt = hipfftCreate(&(*plan)); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftCreate failed"); @@ -112,7 +115,7 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - hipfft_rt = hipfftPlan3d(&plan, nx, ny, nz, type); + hipfft_rt = hipfftPlan3d(&(*plan), nx, ny, nz, type); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlan3d failed"); return fft_size; @@ -124,7 +127,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] FFTDirectionType direction) { static_assert(Kokkos::is_view::value, @@ -134,7 +137,8 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - hipfftResult hipfft_rt = hipfftCreate(&plan); + plan = std::make_unique(); + hipfftResult hipfft_rt = hipfftCreate(&(*plan)); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftCreate failed"); @@ -152,8 +156,8 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - hipfft_rt = hipfftPlanMany(&plan, rank, fft_extents.data(), nullptr, 1, idist, - nullptr, 1, odist, type, batch); + hipfft_rt = hipfftPlanMany(&(*plan), rank, fft_extents.data(), nullptr, 1, + idist, nullptr, 1, odist, type, batch); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlanMany failed"); return fft_size; @@ -165,7 +169,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] FFTDirectionType direction, axis_type axes) { @@ -195,25 +199,25 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, // For the moment, considering the contiguous layout only int istride = 1, ostride = 1; - hipfftResult hipfft_rt = hipfftCreate(&plan); + plan = std::make_unique(); + hipfftResult hipfft_rt = hipfftCreate(&(*plan)); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftCreate failed"); - hipfft_rt = hipfftPlanMany(&plan, rank, fft_extents.data(), in_extents.data(), - istride, idist, out_extents.data(), ostride, odist, - type, howmany); + hipfft_rt = hipfftPlanMany(&(*plan), rank, fft_extents.data(), + in_extents.data(), istride, idist, + out_extents.data(), ostride, odist, type, howmany); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlan failed"); return fft_size; } -template , std::nullptr_t> = nullptr> -void _destroy( - typename KokkosFFT::Impl::FFTPlanType::type& plan) { - hipfftDestroy(plan); +void _destroy(std::unique_ptr& plan) { + hipfftDestroy(*plan); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_HIP_transform.hpp b/fft/src/KokkosFFT_HIP_transform.hpp index dd63fbbb..30c1785a 100644 --- a/fft/src/KokkosFFT_HIP_transform.hpp +++ b/fft/src/KokkosFFT_HIP_transform.hpp @@ -5,42 +5,42 @@ namespace KokkosFFT { namespace Impl { -void _exec(hipfftHandle plan, hipfftReal* idata, hipfftComplex* odata, +void _exec(hipfftHandle& plan, hipfftReal* idata, hipfftComplex* odata, [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecR2C(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecR2C failed"); } -void _exec(hipfftHandle plan, hipfftDoubleReal* idata, +void _exec(hipfftHandle& plan, hipfftDoubleReal* idata, hipfftDoubleComplex* odata, [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecD2Z(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecD2Z failed"); } -void _exec(hipfftHandle plan, hipfftComplex* idata, hipfftReal* odata, +void _exec(hipfftHandle& plan, hipfftComplex* idata, hipfftReal* odata, [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecC2R(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecC2R failed"); } -void _exec(hipfftHandle plan, hipfftDoubleComplex* idata, +void _exec(hipfftHandle& plan, hipfftDoubleComplex* idata, hipfftDoubleReal* odata, [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecZ2D(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecZ2D failed"); } -void _exec(hipfftHandle plan, hipfftComplex* idata, hipfftComplex* odata, +void _exec(hipfftHandle& plan, hipfftComplex* idata, hipfftComplex* odata, int direction) { hipfftResult hipfft_rt = hipfftExecC2C(plan, idata, odata, direction); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecC2C failed"); } -void _exec(hipfftHandle plan, hipfftDoubleComplex* idata, +void _exec(hipfftHandle& plan, hipfftDoubleComplex* idata, hipfftDoubleComplex* odata, int direction) { hipfftResult hipfft_rt = hipfftExecZ2Z(plan, idata, odata, direction); if (hipfft_rt != HIPFFT_SUCCESS) From 9e1290a25c0215141a1e0bd8c3645a639d12c5e7 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Thu, 18 Jan 2024 03:57:22 +0900 Subject: [PATCH 5/6] Add CI for Intel (fails because oneMKL missing) --- .github/workflows/cmake.yml | 45 +++++++++++++++++++++++++++ docker/intel/Dockerfile | 62 +++++++++++++++++++++++++++++++++++++ 2 files changed, 107 insertions(+) create mode 100644 docker/intel/Dockerfile diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 4fc7f865..aa2cd9f7 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -125,6 +125,51 @@ jobs: -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \ -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_HIP=ON -DKokkos_ARCH_${{env.architecture}}=ON -DBUILD_TESTING=ON ${{matrix.backend.option}} + - name: Build + # Build your program with the given configuration + run: | + docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake --build build_${{matrix.backend.name}} --config ${{env.BUILD_TYPE}} -j 2 + + build_intel: + # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. + # You can convert this to a matrix build if you need cross-platform coverage. + # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix + runs-on: ubuntu-latest + + env: + #backends: HIP HIP_HOST_DEVICE + architecture: INTEL_PVC + CMAKE_CXX_COMPILER: icpx + container: intel_env + + strategy: + matrix: + backend: [ {name: INTEL, option: ""}, {name: INTEL_HOST_DEVICE, option: "-DKokkosFFT_ENABLE_HOST_AND_DEVICE=ON"} ] + + steps: + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@v1.2.0 + with: { tool-cache: true, large-packages: false } + + - name: Checkout built branch + uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Update submodules + run: git submodule update --remote --recursive + + - name: Build docker + run: docker build -t ${{ env.container }} docker/intel + + - name: Configure CMake for SYCL backend + # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. + # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type + run: | + docker run -v ${{github.workspace}}:/work ${{ env.container }} cmake -B build_${{matrix.backend.name}} \ + -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}} -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \ + -DCMAKE_CXX_STANDARD=17 -DKokkos_ENABLE_SYCL=ON -DKokkos_ARCH_${{env.architecture}}=ON -DBUILD_TESTING=ON ${{matrix.backend.option}} + - name: Build # Build your program with the given configuration run: | diff --git a/docker/intel/Dockerfile b/docker/intel/Dockerfile new file mode 100644 index 00000000..ff683aaa --- /dev/null +++ b/docker/intel/Dockerfile @@ -0,0 +1,62 @@ +ARG BASE=nvidia/cuda:11.7.1-devel-ubuntu22.04 +FROM $BASE + +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub + +RUN apt-get update && apt-get install -y \ + bc \ + wget \ + ccache \ + ninja-build \ + python3 \ + git \ + libfftw3-dev \ + libomp-dev \ + && \ + apt-get clean && \ + git config --global --add safe.directory '*' && \ + rm -rf /var/lib/apt/lists/* + +RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ + KEYDUMP_FILE=keydump && \ + wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ + wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ + gpg --import ${KEYDUMP_FILE} && \ + gpg --verify ${KEYDUMP_FILE}.sig ${KEYDUMP_FILE} && \ + rm ${KEYDUMP_FILE}* + +ARG CMAKE_VERSION=3.23.2 +ARG build_dir=/tmp/build + +ENV CMAKE_DIR=/opt/cmake +RUN CMAKE_SCRIPT=cmake-${CMAKE_VERSION}-linux-x86_64.sh && \ + mkdir -p ${build_dir} && mkdir -p ${CMAKE_DIR} \ + cd ${build_dir} && \ + wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_SCRIPT} && \ + sh ${CMAKE_SCRIPT} --skip-license --prefix=${CMAKE_DIR} && \ + cd ${HOME} && rm -rf ${build_dir} +ENV PATH=${CMAKE_DIR}/bin:$PATH + +RUN wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB && \ + apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB && \ + echo "deb https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \ + apt-get update -o Dir::Etc::sourcelist="sources.list.d/oneAPI.list" -o APT::Get::List-Cleanup="0" && \ + apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-2023.0.0 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN wget https://cloud.cees.ornl.gov/download/oneapi-for-nvidia-gpus-2023.0.0-linux.sh && \ + chmod +x oneapi-for-nvidia-gpus-2023.0.0-linux.sh && \ + ./oneapi-for-nvidia-gpus-2023.0.0-linux.sh -y && \ + rm oneapi-for-nvidia-gpus-2023.0.0-linux.sh + +RUN wget https://registrationcenter-download.intel.com/akdlm/irc_nas/19133/l_oneDPL_p_2022.0.0.25335.sh &&\ + chmod +x ./l_oneDPL_p_2022.0.0.25335.sh && \ + ./l_oneDPL_p_2022.0.0.25335.sh -a -s --eula accept && \ + rm l_oneDPL_p_2022.0.0.25335.sh + +WORKDIR /work +ENV FFTWDIR "/usr" +RUN . /opt/intel/oneapi/setvars.sh --include-intel-llvm +ENV PATH=${CMAKE_DIR}/bin:/opt/intel/oneapi/compiler/latest/linux/bin:$PATH +CMD ["bash"] \ No newline at end of file From de19e63fc4a88d44c76c68ca45453e243bc8c7e8 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Thu, 18 Jan 2024 16:34:43 +0900 Subject: [PATCH 6/6] Install oneAPI for oneMKL --- docker/intel/Dockerfile | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/docker/intel/Dockerfile b/docker/intel/Dockerfile index ff683aaa..2dcd423e 100644 --- a/docker/intel/Dockerfile +++ b/docker/intel/Dockerfile @@ -50,13 +50,16 @@ RUN wget https://cloud.cees.ornl.gov/download/oneapi-for-nvidia-gpus-2023.0.0-li ./oneapi-for-nvidia-gpus-2023.0.0-linux.sh -y && \ rm oneapi-for-nvidia-gpus-2023.0.0-linux.sh -RUN wget https://registrationcenter-download.intel.com/akdlm/irc_nas/19133/l_oneDPL_p_2022.0.0.25335.sh &&\ - chmod +x ./l_oneDPL_p_2022.0.0.25335.sh && \ - ./l_oneDPL_p_2022.0.0.25335.sh -a -s --eula accept && \ - rm l_oneDPL_p_2022.0.0.25335.sh +RUN wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/163da6e4-56eb-4948-aba3-debcec61c064/l_BaseKit_p_2024.0.1.46.sh &&\ + chmod +x ./l_BaseKit_p_2024.0.1.46.sh && \ + ./l_BaseKit_p_2024.0.1.46.sh -a -s --eula accept && \ + rm l_BaseKit_p_2024.0.1.46.sh WORKDIR /work ENV FFTWDIR "/usr" RUN . /opt/intel/oneapi/setvars.sh --include-intel-llvm -ENV PATH=${CMAKE_DIR}/bin:/opt/intel/oneapi/compiler/latest/linux/bin:$PATH +ENV PATH=${CMAKE_DIR}/bin:/opt/intel/oneapi/compiler/latest/bin:$PATH +ENV CMAKE_PREFIX_PATH "$CMAKE_PREFIX_PATH;/opt/intel/oneapi/dpl/latest/lib/cmake/oneDPL/;/opt/intel/oneapi/mkl/latest/lib/cmake/mkl/" +ENV oneDPL_DIR=/opt/intel/oneapi/dpl/latest/lib/cmake/oneDPL +ENV MKL_DIR=/opt/intel/oneapi/mkl/latest/lib/cmake/mkl CMD ["bash"] \ No newline at end of file