From 93cb07bcedbb8d54d94b635ce3293074577f5870 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Mon, 29 Jan 2024 17:52:34 +0900 Subject: [PATCH 1/6] Add SYCL backend with oneMKL for Intel GPUs --- common/src/CMakeLists.txt | 8 + common/src/KokkosFFT_Cuda_types.hpp | 10 +- common/src/KokkosFFT_HIP_types.hpp | 10 +- common/src/KokkosFFT_OpenMP_types.hpp | 8 +- common/src/KokkosFFT_SYCL_types.hpp | 256 +++++++++++++++++++++++++ common/src/KokkosFFT_default_types.hpp | 2 + fft/src/KokkosFFT_Cuda_transform.hpp | 12 +- fft/src/KokkosFFT_HIP_transform.hpp | 12 +- fft/src/KokkosFFT_OpenMP_transform.hpp | 10 +- fft/src/KokkosFFT_Plans.hpp | 14 +- fft/src/KokkosFFT_SYCL_plans.hpp | 179 +++++++++++++++++ fft/src/KokkosFFT_SYCL_transform.hpp | 67 +++++++ fft/src/KokkosFFT_Transform.hpp | 5 + 13 files changed, 558 insertions(+), 35 deletions(-) create mode 100644 common/src/KokkosFFT_SYCL_types.hpp create mode 100644 fft/src/KokkosFFT_SYCL_plans.hpp create mode 100644 fft/src/KokkosFFT_SYCL_transform.hpp diff --git a/common/src/CMakeLists.txt b/common/src/CMakeLists.txt index d5682f0d..fa7d5acd 100644 --- a/common/src/CMakeLists.txt +++ b/common/src/CMakeLists.txt @@ -21,6 +21,14 @@ elseif(Kokkos_ENABLE_HIP) target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP) target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE) endif() +elseif(Kokkos_ENABLE_SYCL) + find_package(MKL REQUIRED COMPONENTS SYCL) + target_link_libraries(common INTERFACE MKL::MKL_SYCL) + if(KokkosFFT_ENABLE_HOST_AND_DEVICE) + find_package(FFTW MODULE REQUIRED) + target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP) + target_compile_definitions(common INTERFACE ENABLE_HOST_AND_DEVICE) + endif() elseif(Kokkos_ENABLE_OPENMP) find_package(FFTW MODULE REQUIRED) target_link_libraries(common INTERFACE FFTW::Float FFTW::Double FFTW::FloatOpenMP FFTW::DoubleOpenMP) diff --git a/common/src/KokkosFFT_Cuda_types.hpp b/common/src/KokkosFFT_Cuda_types.hpp index 7234a791..c6e577e6 100644 --- a/common/src/KokkosFFT_Cuda_types.hpp +++ b/common/src/KokkosFFT_Cuda_types.hpp @@ -48,11 +48,11 @@ struct FFTDataType { cufftDoubleComplex, fftw_complex>; }; -template +template struct FFTPlanType { - using fftwHandle = - std::conditional_t, float>, - fftwf_plan, fftw_plan>; + using fftwHandle = std::conditional_t< + std::is_same_v, float>, fftwf_plan, + fftw_plan>; using type = std::conditional_t, cufftHandle, fftwHandle>; }; @@ -151,7 +151,7 @@ struct FFTDataType { using complex128 = cufftDoubleComplex; }; -template +template struct FFTPlanType { using type = cufftHandle; }; diff --git a/common/src/KokkosFFT_HIP_types.hpp b/common/src/KokkosFFT_HIP_types.hpp index 8729f1e7..fe1fa574 100644 --- a/common/src/KokkosFFT_HIP_types.hpp +++ b/common/src/KokkosFFT_HIP_types.hpp @@ -48,11 +48,11 @@ struct FFTDataType { hipfftDoubleComplex, fftw_complex>; }; -template +template struct FFTPlanType { - using fftwHandle = - std::conditional_t, float>, - fftwf_plan, fftw_plan>; + using fftwHandle = std::conditional_t< + std::is_same_v, float>, fftwf_plan, + fftw_plan>; using type = std::conditional_t, hipfftHandle, fftwHandle>; }; @@ -151,7 +151,7 @@ struct FFTDataType { using complex128 = hipfftDoubleComplex; }; -template +template struct FFTPlanType { using type = hipfftHandle; }; diff --git a/common/src/KokkosFFT_OpenMP_types.hpp b/common/src/KokkosFFT_OpenMP_types.hpp index e97edf79..b7f79721 100644 --- a/common/src/KokkosFFT_OpenMP_types.hpp +++ b/common/src/KokkosFFT_OpenMP_types.hpp @@ -28,11 +28,11 @@ struct FFTDataType { using complex128 = fftw_complex; }; -template +template struct FFTPlanType { - using type = - std::conditional_t, float>, - fftwf_plan, fftw_plan>; + using type = std::conditional_t< + std::is_same_v, float>, fftwf_plan, + fftw_plan>; }; template diff --git a/common/src/KokkosFFT_SYCL_types.hpp b/common/src/KokkosFFT_SYCL_types.hpp new file mode 100644 index 00000000..bdcead04 --- /dev/null +++ b/common/src/KokkosFFT_SYCL_types.hpp @@ -0,0 +1,256 @@ +#ifndef KOKKOSFFT_SYCL_TYPES_HPP +#define KOKKOSFFT_SYCL_TYPES_HPP + +#include +#include +#include +#include +#include "KokkosFFT_utils.hpp" + +// Check the size of complex type +// [TO DO] I guess this kind of test is already made by Kokkos itself +static_assert(sizeof(std::complex) == sizeof(Kokkos::complex)); +static_assert(alignof(std::complex) <= alignof(Kokkos::complex)); + +static_assert(sizeof(std::complex) == sizeof(Kokkos::complex)); +static_assert(alignof(std::complex) <= + alignof(Kokkos::complex)); + +#ifdef ENABLE_HOST_AND_DEVICE +#include +static_assert(sizeof(fftwf_complex) == sizeof(Kokkos::complex)); +static_assert(alignof(fftwf_complex) <= alignof(Kokkos::complex)); + +static_assert(sizeof(fftw_complex) == sizeof(Kokkos::complex)); +static_assert(alignof(fftw_complex) <= alignof(Kokkos::complex)); +#endif + +namespace KokkosFFT { +namespace Impl { +enum class Direction { + Forward, + Backward, +}; + +using FFTDirectionType = int; +constexpr FFTDirectionType MKL_FFT_FORWARD = 1; +constexpr FFTDirectionType MKL_FFT_BACKWARD = -1; + +enum class FFTWTransformType { R2C, D2Z, C2R, Z2D, C2C, Z2Z }; + +template +using TransformType = FFTWTransformType; + +// Define fft transform types +template +struct transform_type { + static_assert(std::is_same_v, + "Real to real transform is unavailable"); +}; + +template +struct transform_type> { + static_assert(std::is_same_v, + "T1 and T2 should have the same precision"); + using _TransformType = TransformType; + + static constexpr _TransformType m_type = std::is_same_v + ? FFTWTransformType::R2C + : FFTWTransformType::D2Z; + static constexpr _TransformType type() { return m_type; }; +}; + +template +struct transform_type, T2> { + static_assert(std::is_same_v, + "T1 and T2 should have the same precision"); + using _TransformType = TransformType; + + static constexpr _TransformType m_type = std::is_same_v + ? FFTWTransformType::C2R + : FFTWTransformType::Z2D; + static constexpr _TransformType type() { return m_type; }; +}; + +template +struct transform_type, + Kokkos::complex> { + static_assert(std::is_same_v, + "T1 and T2 should have the same precision"); + using _TransformType = TransformType; + + static constexpr _TransformType m_type = std::is_same_v + ? FFTWTransformType::C2C + : FFTWTransformType::Z2Z; + static constexpr _TransformType type() { return m_type; }; +}; + +#ifdef ENABLE_HOST_AND_DEVICE + +template +struct FFTDataType { + using float32 = float; + using float64 = double; + + using complex64 = std::conditional_t< + std::is_same_v, + std::complex, fftwf_complex>; + using complex128 = std::conditional_t< + std::is_same_v, + std::complex, fftw_complex>; +}; + +template +struct FFTPlanType { + using fftwHandle = + std::conditional_t, float>, + fftwf_plan, fftw_plan>; + using precision = + std::conditional_t, float>, + oneapi::mkl::dft::precision::SINGLE, + oneapi::mkl::dft::precision::DOUBLE>; + using type = std::conditional_t, + cufftHandle, fftwHandle>; +}; + +template +struct FFTPlanType { + static_assert(std::is_same_v, + "Real to real transform is unavailable"); +}; + +template +struct FFTPlanType> { + using float_type = T1; + static constexpr oneapi::mkl::dft::precision prec = + std::is_same_v, float> + ? oneapi::mkl::dft::precision::SINGLE + : oneapi::mkl::dft::precision::DOUBLE; + static constexpr oneapi::mkl::dft::domain dom = + oneapi::mkl::dft::domain::REAL; + + using fftwHandle = std::conditional_t< + std::is_same_v, float>, + fftwf_plan, fftw_plan>; + + using onemklHandle = oneapi::mkl::dft::descriptor; + using type = std::conditional_t< + std::is_same_v, onemklHandle, + fftwHandle>; +}; + +template +struct FFTPlanType, T2> { + using float_type = T2; + static constexpr oneapi::mkl::dft::precision prec = + std::is_same_v, float> + ? oneapi::mkl::dft::precision::SINGLE + : oneapi::mkl::dft::precision::DOUBLE; + static constexpr oneapi::mkl::dft::domain dom = + oneapi::mkl::dft::domain::REAL; + + using fftwHandle = std::conditional_t< + std::is_same_v, float>, + fftwf_plan, fftw_plan>; + + using onemklHandle = oneapi::mkl::dft::descriptor; + using type = std::conditional_t< + std::is_same_v, onemklHandle, + fftwHandle>; +}; + +template +struct FFTPlanType, Kokkos::complex> { + using float_type = KokkosFFT::Impl::real_type_t; + static constexpr oneapi::mkl::dft::precision prec = + std::is_same_v, float> + ? oneapi::mkl::dft::precision::SINGLE + : oneapi::mkl::dft::precision::DOUBLE; + static constexpr oneapi::mkl::dft::domain dom = + oneapi::mkl::dft::domain::COMPLEX; + + using fftwHandle = std::conditional_t< + std::is_same_v, float>, + fftwf_plan, fftw_plan>; + + using onemklHandle = oneapi::mkl::dft::descriptor; + using type = std::conditional_t< + std::is_same_v, onemklHandle, + fftwHandle>; +}; + +template +auto direction_type(Direction direction) { + static constexpr FFTDirectionType _FORWARD = + std::is_same_v + ? MKL_FFT_FORWARD + : FFTW_FORWARD; + static constexpr FFTDirectionType _BACKWARD = + std::is_same_v + ? MKL_FFT_BACKWARD + : FFTW_BACKWARD; + return direction == Direction::Forward ? _FORWARD : _BACKWARD; +} +#else +template +struct FFTDataType { + using float32 = float; + using float64 = double; + using complex64 = std::complex; + using complex128 = std::complex; +}; + +template +struct FFTPlanType { + static_assert(std::is_same_v, + "Real to real transform is unavailable"); +}; + +template +struct FFTPlanType> { + using float_type = T1; + static constexpr oneapi::mkl::dft::precision prec = + std::is_same_v, float> + ? oneapi::mkl::dft::precision::SINGLE + : oneapi::mkl::dft::precision::DOUBLE; + static constexpr oneapi::mkl::dft::domain dom = + oneapi::mkl::dft::domain::REAL; + + using type = oneapi::mkl::dft::descriptor; +}; + +template +struct FFTPlanType, T2> { + using float_type = T2; + static constexpr oneapi::mkl::dft::precision prec = + std::is_same_v, float> + ? oneapi::mkl::dft::precision::SINGLE + : oneapi::mkl::dft::precision::DOUBLE; + static constexpr oneapi::mkl::dft::domain dom = + oneapi::mkl::dft::domain::REAL; + + using type = oneapi::mkl::dft::descriptor; +}; + +template +struct FFTPlanType, Kokkos::complex> { + using float_type = KokkosFFT::Impl::real_type_t; + static constexpr oneapi::mkl::dft::precision prec = + std::is_same_v, float> + ? oneapi::mkl::dft::precision::SINGLE + : oneapi::mkl::dft::precision::DOUBLE; + static constexpr oneapi::mkl::dft::domain dom = + oneapi::mkl::dft::domain::COMPLEX; + + using type = oneapi::mkl::dft::descriptor; +}; + +template +auto direction_type(Direction direction) { + return direction == Direction::Forward ? MKL_FFT_FORWARD : MKL_FFT_BACKWARD; +} +#endif +} // namespace Impl +} // namespace KokkosFFT + +#endif \ No newline at end of file diff --git a/common/src/KokkosFFT_default_types.hpp b/common/src/KokkosFFT_default_types.hpp index 0cba34e1..0eb0c796 100644 --- a/common/src/KokkosFFT_default_types.hpp +++ b/common/src/KokkosFFT_default_types.hpp @@ -9,6 +9,8 @@ using default_device = Kokkos::Cuda; #elif defined(KOKKOS_ENABLE_HIP) using default_device = Kokkos::HIP; #include "KokkosFFT_HIP_types.hpp" +#elif defined(KOKKOS_ENABLE_SYCL) +#include "KokkosFFT_SYCL_types.hpp" #elif defined(KOKKOS_ENABLE_OPENMP) using default_device = Kokkos::OpenMP; #include "KokkosFFT_OpenMP_types.hpp" diff --git a/fft/src/KokkosFFT_Cuda_transform.hpp b/fft/src/KokkosFFT_Cuda_transform.hpp index 478e16a0..4ba45964 100644 --- a/fft/src/KokkosFFT_Cuda_transform.hpp +++ b/fft/src/KokkosFFT_Cuda_transform.hpp @@ -5,42 +5,42 @@ namespace KokkosFFT { namespace Impl { -void _exec(cufftHandle plan, cufftReal* idata, cufftComplex* odata, +void _exec(cufftHandle& plan, cufftReal* idata, cufftComplex* odata, [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecR2C(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecR2C failed"); } -void _exec(cufftHandle plan, cufftDoubleReal* idata, cufftDoubleComplex* odata, +void _exec(cufftHandle& plan, cufftDoubleReal* idata, cufftDoubleComplex* odata, [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecD2Z(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecD2Z failed"); } -void _exec(cufftHandle plan, cufftComplex* idata, cufftReal* odata, +void _exec(cufftHandle& plan, cufftComplex* idata, cufftReal* odata, [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecC2R(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecC2R failed"); } -void _exec(cufftHandle plan, cufftDoubleComplex* idata, cufftDoubleReal* odata, +void _exec(cufftHandle& plan, cufftDoubleComplex* idata, cufftDoubleReal* odata, [[maybe_unused]] int direction) { cufftResult cufft_rt = cufftExecZ2D(plan, idata, odata); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecZ2D failed"); } -void _exec(cufftHandle plan, cufftComplex* idata, cufftComplex* odata, +void _exec(cufftHandle& plan, cufftComplex* idata, cufftComplex* odata, int direction) { cufftResult cufft_rt = cufftExecC2C(plan, idata, odata, direction); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftExecC2C failed"); } -void _exec(cufftHandle plan, cufftDoubleComplex* idata, +void _exec(cufftHandle& plan, cufftDoubleComplex* idata, cufftDoubleComplex* odata, int direction) { cufftResult cufft_rt = cufftExecZ2Z(plan, idata, odata, direction); if (cufft_rt != CUFFT_SUCCESS) diff --git a/fft/src/KokkosFFT_HIP_transform.hpp b/fft/src/KokkosFFT_HIP_transform.hpp index dd63fbbb..30c1785a 100644 --- a/fft/src/KokkosFFT_HIP_transform.hpp +++ b/fft/src/KokkosFFT_HIP_transform.hpp @@ -5,42 +5,42 @@ namespace KokkosFFT { namespace Impl { -void _exec(hipfftHandle plan, hipfftReal* idata, hipfftComplex* odata, +void _exec(hipfftHandle& plan, hipfftReal* idata, hipfftComplex* odata, [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecR2C(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecR2C failed"); } -void _exec(hipfftHandle plan, hipfftDoubleReal* idata, +void _exec(hipfftHandle& plan, hipfftDoubleReal* idata, hipfftDoubleComplex* odata, [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecD2Z(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecD2Z failed"); } -void _exec(hipfftHandle plan, hipfftComplex* idata, hipfftReal* odata, +void _exec(hipfftHandle& plan, hipfftComplex* idata, hipfftReal* odata, [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecC2R(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecC2R failed"); } -void _exec(hipfftHandle plan, hipfftDoubleComplex* idata, +void _exec(hipfftHandle& plan, hipfftDoubleComplex* idata, hipfftDoubleReal* odata, [[maybe_unused]] int direction) { hipfftResult hipfft_rt = hipfftExecZ2D(plan, idata, odata); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecZ2D failed"); } -void _exec(hipfftHandle plan, hipfftComplex* idata, hipfftComplex* odata, +void _exec(hipfftHandle& plan, hipfftComplex* idata, hipfftComplex* odata, int direction) { hipfftResult hipfft_rt = hipfftExecC2C(plan, idata, odata, direction); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftExecC2C failed"); } -void _exec(hipfftHandle plan, hipfftDoubleComplex* idata, +void _exec(hipfftHandle& plan, hipfftDoubleComplex* idata, hipfftDoubleComplex* odata, int direction) { hipfftResult hipfft_rt = hipfftExecZ2Z(plan, idata, odata, direction); if (hipfft_rt != HIPFFT_SUCCESS) diff --git a/fft/src/KokkosFFT_OpenMP_transform.hpp b/fft/src/KokkosFFT_OpenMP_transform.hpp index c16d1b7f..f34438f0 100644 --- a/fft/src/KokkosFFT_OpenMP_transform.hpp +++ b/fft/src/KokkosFFT_OpenMP_transform.hpp @@ -6,31 +6,31 @@ namespace KokkosFFT { namespace Impl { template -void _exec(PlanType plan, float* idata, fftwf_complex* odata, +void _exec(PlanType& plan, float* idata, fftwf_complex* odata, [[maybe_unused]] int direction) { fftwf_execute_dft_r2c(plan, idata, odata); } template -void _exec(PlanType plan, double* idata, fftw_complex* odata, +void _exec(PlanType& plan, double* idata, fftw_complex* odata, [[maybe_unused]] int direction) { fftw_execute_dft_r2c(plan, idata, odata); } template -void _exec(PlanType plan, fftwf_complex* idata, float* odata, +void _exec(PlanType& plan, fftwf_complex* idata, float* odata, [[maybe_unused]] int direction) { fftwf_execute_dft_c2r(plan, idata, odata); } template -void _exec(PlanType plan, fftw_complex* idata, double* odata, +void _exec(PlanType& plan, fftw_complex* idata, double* odata, [[maybe_unused]] int direction) { fftw_execute_dft_c2r(plan, idata, odata); } template -void _exec(PlanType plan, fftwf_complex* idata, fftwf_complex* odata, +void _exec(PlanType& plan, fftwf_complex* idata, fftwf_complex* odata, [[maybe_unused]] int direction) { fftwf_execute_dft(plan, idata, odata); } diff --git a/fft/src/KokkosFFT_Plans.hpp b/fft/src/KokkosFFT_Plans.hpp index c106a2d4..e04e66ba 100644 --- a/fft/src/KokkosFFT_Plans.hpp +++ b/fft/src/KokkosFFT_Plans.hpp @@ -18,6 +18,11 @@ using default_device = Kokkos::HIP; #ifdef ENABLE_HOST_AND_DEVICE #include "KokkosFFT_OpenMP_plans.hpp" #endif +#elif defined(KOKKOS_ENABLE_SYCL) +#include "KokkosFFT_SYCL_plans.hpp" +#ifdef ENABLE_HOST_AND_DEVICE +#include "KokkosFFT_OpenMP_plans.hpp" +#endif #elif defined(KOKKOS_ENABLE_OPENMP) using default_device = Kokkos::OpenMP; #include "KokkosFFT_OpenMP_plans.hpp" @@ -39,14 +44,15 @@ class Plan { using out_value_type = typename OutViewType::non_const_value_type; using float_type = KokkosFFT::Impl::real_type_t; using fft_plan_type = - typename KokkosFFT::Impl::FFTPlanType::type; + typename KokkosFFT::Impl::FFTPlanType::type; using fft_size_type = std::size_t; using map_type = axis_type; using nonConstInViewType = std::remove_cv_t; using nonConstOutViewType = std::remove_cv_t; using extents_type = shape_type; - fft_plan_type m_plan; + std::unique_ptr m_plan; fft_size_type m_fft_size; map_type m_map, m_map_inv; bool m_is_transpose_needed; @@ -146,7 +152,7 @@ class Plan { KokkosFFT::Impl::_create(exec_space, m_plan, in, out, direction, axes); } - ~Plan() { _destroy(m_plan); } + ~Plan() { _destroy(m_plan); } template @@ -200,7 +206,7 @@ class Plan { } } - fft_plan_type plan() const { return m_plan; } + fft_plan_type& plan() const { return *m_plan; } fft_size_type fft_size() const { return m_fft_size; } bool is_transpose_needed() const { return m_is_transpose_needed; } map_type map() const { return m_map; } diff --git a/fft/src/KokkosFFT_SYCL_plans.hpp b/fft/src/KokkosFFT_SYCL_plans.hpp new file mode 100644 index 00000000..ce3109af --- /dev/null +++ b/fft/src/KokkosFFT_SYCL_plans.hpp @@ -0,0 +1,179 @@ +#ifndef KOKKOSFFT_SYCL_PLANS_HPP +#define KOKKOSFFT_SYCL_PLANS_HPP + +#include +#include +#include "KokkosFFT_SYCL_types.hpp" +#include "KokkosFFT_layouts.hpp" + +namespace KokkosFFT { +namespace Impl { +// Helper to convert the integer type of vectors +template +auto convert_int_type(std::vector& in) -> std::vector { + std::vector out(in.size()); + std::transform( + in.begin(), in.end(), out.begin(), + [](const InType v) -> OutType { return static_cast(v); }); + + return out; +} + +// Helper to compute strides from extents +// (n0, n1) -> (0, n1, 1) +// (n0) -> (0, 1) +template +auto compute_strides(std::vector& extents) -> std::vector { + std::vector out; + + OutType stride = 1; + for (auto it = extents.rbegin(); it != extents.rend(); ++it) { + out.push_back(stride); + stride *= static_cast(*it); + } + out.push_back(0); + std::reverse(out.begin(), out.end()); + + return out; +} + +// ND transform +template < + typename ExecutionSpace, typename PlanType, typename InViewType, + typename OutViewType, + std::enable_if_t, + std::nullptr_t> = nullptr> +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, + const InViewType& in, const OutViewType& out, + [[maybe_unused]] Direction direction) { + static_assert(Kokkos::is_view::value, + "KokkosFFT::_create: InViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosFFT::_create: OutViewType is not a Kokkos::View."); + using in_value_type = typename InViewType::non_const_value_type; + using out_value_type = typename OutViewType::non_const_value_type; + + const int rank = InViewType::rank(); + const int axis = -1; + const int howmany = 1; + constexpr auto type = + KokkosFFT::Impl::transform_type::type(); + auto [in_extents, out_extents, fft_extents] = + KokkosFFT::Impl::get_extents(in, out, axis); + int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, + std::multiplies<>()); + int odist = std::accumulate(out_extents.begin(), out_extents.end(), 1, + std::multiplies<>()); + int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, + std::multiplies<>()); + + auto* idata = reinterpret_cast::type*>(in.data()); + auto* odata = reinterpret_cast::type*>(out.data()); + auto sign = KokkosFFT::Impl::direction_type(direction); + + // Create plan + auto in_strides = compute_strides(in_extents); + auto out_strides = compute_strides(out_extents); + auto _fft_extents = convert_int_type(fft_extents); + + plan = std::make_unique(_fft_extents); + plan->set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, + in_strides.data()); + plan->set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, + out_strides.data()); + plan->set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_NOT_INPLACE); + plan->set_value(oneapi::mkl::dft::config_param::CONJUGATE_EVEN_STORAGE, + DFTI_COMPLEX_COMPLEX); + + sycl::queue q = exec_space.sycl_queue(); + plan->commit(q); + + return fft_size; +} + +// batched transform, over ND Views +template < + typename ExecutionSpace, typename PlanType, typename InViewType, + typename OutViewType, std::size_t fft_rank = 1, + std::enable_if_t, + std::nullptr_t> = nullptr> +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, + const InViewType& in, const OutViewType& out, + [[maybe_unused]] Direction direction, axis_type axes) { + static_assert(Kokkos::is_view::value, + "KokkosFFT::_create: InViewType is not a Kokkos::View."); + static_assert(Kokkos::is_view::value, + "KokkosFFT::_create: OutViewType is not a Kokkos::View."); + using in_value_type = typename InViewType::non_const_value_type; + using out_value_type = typename OutViewType::non_const_value_type; + + static_assert( + InViewType::rank() >= fft_rank, + "KokkosFFT::_create: Rank of View must be larger than Rank of FFT."); + const int rank = fft_rank; + + constexpr auto type = + KokkosFFT::Impl::transform_type::type(); + auto [in_extents, out_extents, fft_extents, howmany] = + KokkosFFT::Impl::get_extents_batched(in, out, axes); + int idist = std::accumulate(in_extents.begin(), in_extents.end(), 1, + std::multiplies<>()); + int odist = std::accumulate(out_extents.begin(), out_extents.end(), 1, + std::multiplies<>()); + int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, + std::multiplies<>()); + + auto* idata = reinterpret_cast::type*>(in.data()); + auto* odata = reinterpret_cast::type*>(out.data()); + + // For the moment, considering the contiguous layout only + auto sign = KokkosFFT::Impl::direction_type(direction); + + // Create plan + auto in_strides = compute_strides(in_extents); + auto out_strides = compute_strides(out_extents); + auto _fft_extents = convert_int_type(fft_extents); + + // In oneMKL, the distance is always defined based on R2C transform + std::int64_t _idist = static_cast(std::max(idist, odist)); + std::int64_t _odist = static_cast(std::min(idist, odist)); + + plan = std::make_unique(_fft_extents); + plan->set_value(oneapi::mkl::dft::config_param::INPUT_STRIDES, + in_strides.data()); + plan->set_value(oneapi::mkl::dft::config_param::OUTPUT_STRIDES, + out_strides.data()); + + // Configuration for batched plan + plan->set_value(oneapi::mkl::dft::config_param::FWD_DISTANCE, _idist); + plan->set_value(oneapi::mkl::dft::config_param::BWD_DISTANCE, _odist); + plan->set_value(oneapi::mkl::dft::config_param::NUMBER_OF_TRANSFORMS, + static_cast(howmany)); + + // Data layout in conjugate-even domain + plan->set_value(oneapi::mkl::dft::config_param::PLACEMENT, DFTI_NOT_INPLACE); + plan->set_value(oneapi::mkl::dft::config_param::CONJUGATE_EVEN_STORAGE, + DFTI_COMPLEX_COMPLEX); + + sycl::queue q = exec_space.sycl_queue(); + plan->commit(q); + + return fft_size; +} + +// In oneMKL, plans are destroybed by default destructor +template < + typename ExecutionSpace, typename PlanType, + std::enable_if_t, + std::nullptr_t> = nullptr> +void _destroy(std::unique_ptr& plan) {} +} // namespace Impl +} // namespace KokkosFFT + +#endif \ No newline at end of file diff --git a/fft/src/KokkosFFT_SYCL_transform.hpp b/fft/src/KokkosFFT_SYCL_transform.hpp new file mode 100644 index 00000000..5340a429 --- /dev/null +++ b/fft/src/KokkosFFT_SYCL_transform.hpp @@ -0,0 +1,67 @@ +#ifndef KOKKOSFFT_SYCL_TRANSFORM_HPP +#define KOKKOSFFT_SYCL_TRANSFORM_HPP + +#include +#include + +namespace KokkosFFT { +namespace Impl { +template +void _exec(PlanType& plan, float* idata, std::complex* odata, + [[maybe_unused]] int direction) { + auto r2c = oneapi::mkl::dft::compute_forward(plan, idata, + reinterpret_cast(odata)); + r2c.wait(); +} + +template +void _exec(PlanType& plan, double* idata, std::complex* odata, + [[maybe_unused]] int direction) { + auto d2z = oneapi::mkl::dft::compute_forward( + plan, idata, reinterpret_cast(odata)); + d2z.wait(); +} + +template +void _exec(PlanType& plan, std::complex* idata, float* odata, + [[maybe_unused]] int direction) { + auto c2r = oneapi::mkl::dft::compute_backward( + plan, reinterpret_cast(idata), odata); + c2r.wait(); +} + +template +void _exec(PlanType& plan, std::complex* idata, double* odata, + [[maybe_unused]] int direction) { + auto z2d = oneapi::mkl::dft::compute_backward( + plan, reinterpret_cast(idata), odata); + z2d.wait(); +} + +template +void _exec(PlanType& plan, std::complex* idata, + std::complex* odata, [[maybe_unused]] int direction) { + if (direction == 1) { + auto c2c = oneapi::mkl::dft::compute_forward(plan, idata, odata); + c2c.wait(); + } else { + auto c2c = oneapi::mkl::dft::compute_backward(plan, idata, odata); + c2c.wait(); + } +} + +template +void _exec(PlanType& plan, std::complex* idata, + std::complex* odata, [[maybe_unused]] int direction) { + if (direction == 1) { + auto z2z = oneapi::mkl::dft::compute_forward(plan, idata, odata); + z2z.wait(); + } else { + auto z2z = oneapi::mkl::dft::compute_backward(plan, idata, odata); + z2z.wait(); + } +} +} // namespace Impl +} // namespace KokkosFFT + +#endif \ No newline at end of file diff --git a/fft/src/KokkosFFT_Transform.hpp b/fft/src/KokkosFFT_Transform.hpp index 45fc7ef3..7c62977f 100644 --- a/fft/src/KokkosFFT_Transform.hpp +++ b/fft/src/KokkosFFT_Transform.hpp @@ -21,6 +21,11 @@ using default_device = Kokkos::HIP; #ifdef ENABLE_HOST_AND_DEVICE #include "KokkosFFT_OpenMP_transform.hpp" #endif +#elif defined(KOKKOS_ENABLE_SYCL) +#include "KokkosFFT_SYCL_transform.hpp" +#ifdef ENABLE_HOST_AND_DEVICE +#include "KokkosFFT_OpenMP_transform.hpp" +#endif #elif defined(KOKKOS_ENABLE_OPENMP) using default_device = Kokkos::OpenMP; #include "KokkosFFT_OpenMP_transform.hpp" From 98c8d8266e0992aa00d4791dd1625de5835cfbff Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Mon, 29 Jan 2024 17:53:09 +0900 Subject: [PATCH 2/6] Update other backends --- fft/src/KokkosFFT_Cuda_plans.hpp | 52 ++++++++++++++++-------------- fft/src/KokkosFFT_HIP_plans.hpp | 52 ++++++++++++++++-------------- fft/src/KokkosFFT_OpenMP_plans.hpp | 41 +++++++++++------------ 3 files changed, 77 insertions(+), 68 deletions(-) diff --git a/fft/src/KokkosFFT_Cuda_plans.hpp b/fft/src/KokkosFFT_Cuda_plans.hpp index 53709683..6c3d85fa 100644 --- a/fft/src/KokkosFFT_Cuda_plans.hpp +++ b/fft/src/KokkosFFT_Cuda_plans.hpp @@ -13,7 +13,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] Direction direction) { static_assert(Kokkos::is_view::value, @@ -23,11 +23,12 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - cufftResult cufft_rt = cufftCreate(&plan); + plan = std::make_unique(); + cufftResult cufft_rt = cufftCreate(&(*plan)); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftCreate failed"); cudaStream_t stream = exec_space.cuda_stream(); - cufftSetStream(plan, stream); + cufftSetStream((*plan), stream); const int batch = 1; const int axis = 0; @@ -40,7 +41,7 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - cufft_rt = cufftPlan1d(&plan, nx, type, batch); + cufft_rt = cufftPlan1d(&(*plan), nx, type, batch); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftPlan1d failed"); return fft_size; } @@ -51,7 +52,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] Direction direction) { static_assert(Kokkos::is_view::value, @@ -61,11 +62,12 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - cufftResult cufft_rt = cufftCreate(&plan); + plan = std::make_unique(); + cufftResult cufft_rt = cufftCreate(&(*plan)); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftCreate failed"); cudaStream_t stream = exec_space.cuda_stream(); - cufftSetStream(plan, stream); + cufftSetStream((*plan), stream); const int axis = 0; auto type = KokkosFFT::Impl::transform_type()); - cufft_rt = cufftPlan2d(&plan, nx, ny, type); + cufft_rt = cufftPlan2d(&(*plan), nx, ny, type); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftPlan2d failed"); return fft_size; } @@ -87,7 +89,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] Direction direction) { static_assert(Kokkos::is_view::value, @@ -97,11 +99,12 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - cufftResult cufft_rt = cufftCreate(&plan); + plan = std::make_unique(); + cufftResult cufft_rt = cufftCreate(&(*plan)); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftCreate failed"); cudaStream_t stream = exec_space.cuda_stream(); - cufftSetStream(plan, stream); + cufftSetStream((*plan), stream); const int axis = 0; @@ -115,7 +118,7 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - cufft_rt = cufftPlan3d(&plan, nx, ny, nz, type); + cufft_rt = cufftPlan3d(&(*plan), nx, ny, nz, type); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftPlan3d failed"); return fft_size; } @@ -126,7 +129,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] Direction direction) { static_assert(Kokkos::is_view::value, @@ -136,11 +139,12 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - cufftResult cufft_rt = cufftCreate(&plan); + plan = std::make_unique(); + cufftResult cufft_rt = cufftCreate(&(*plan)); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftCreate failed"); cudaStream_t stream = exec_space.cuda_stream(); - cufftSetStream(plan, stream); + cufftSetStream((*plan), stream); const int rank = InViewType::rank(); const int batch = 1; @@ -156,7 +160,7 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - cufft_rt = cufftPlanMany(&plan, rank, fft_extents.data(), nullptr, 1, idist, + cufft_rt = cufftPlanMany(&(*plan), rank, fft_extents.data(), nullptr, 1, idist, nullptr, 1, odist, type, batch); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftPlanMany failed"); @@ -168,7 +172,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] Direction direction, axis_type axes) { static_assert(Kokkos::is_view::value, @@ -197,14 +201,15 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, // For the moment, considering the contiguous layout only int istride = 1, ostride = 1; - cufftResult cufft_rt = cufftCreate(&plan); + plan = std::make_unique(); + cufftResult cufft_rt = cufftCreate(&(*plan)); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftCreate failed"); cudaStream_t stream = exec_space.cuda_stream(); - cufftSetStream(plan, stream); + cufftSetStream((*plan), stream); cufft_rt = - cufftPlanMany(&plan, rank, fft_extents.data(), in_extents.data(), istride, + cufftPlanMany(&(*plan), rank, fft_extents.data(), in_extents.data(), istride, idist, out_extents.data(), ostride, odist, type, howmany); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftPlanMany failed"); @@ -212,12 +217,11 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, return fft_size; } -template , std::nullptr_t> = nullptr> -void _destroy( - typename KokkosFFT::Impl::FFTPlanType::type& plan) { - cufftDestroy(plan); +void _destroy(std::unique_ptr& plan) { + cufftDestroy(*plan); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_HIP_plans.hpp b/fft/src/KokkosFFT_HIP_plans.hpp index f92c015b..8ed7d112 100644 --- a/fft/src/KokkosFFT_HIP_plans.hpp +++ b/fft/src/KokkosFFT_HIP_plans.hpp @@ -13,7 +13,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] FFTDirectionType direction) { static_assert(Kokkos::is_view::value, @@ -23,12 +23,13 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - hipfftResult hipfft_rt = hipfftCreate(&plan); + plan = std::make_unique(); + hipfftResult hipfft_rt = hipfftCreate(&(*plan)); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftCreate failed"); hipStream_t stream = exec_space.hip_stream(); - hipfftSetStream(plan, stream); + hipfftSetStream((*plan), stream); const int batch = 1; const int axis = 0; @@ -41,7 +42,7 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - hipfft_rt = hipfftPlan1d(&plan, nx, type, batch); + hipfft_rt = hipfftPlan1d(&(*plan), nx, type, batch); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlan1d failed"); return fft_size; @@ -53,7 +54,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] FFTDirectionType direction) { static_assert(Kokkos::is_view::value, @@ -63,12 +64,13 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - hipfftResult hipfft_rt = hipfftCreate(&plan); + plan = std::make_unique(); + hipfftResult hipfft_rt = hipfftCreate(&(*plan)); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftCreate failed"); hipStream_t stream = exec_space.hip_stream(); - hipfftSetStream(plan, stream); + hipfftSetStream((*plan), stream); const int axis = 0; auto type = KokkosFFT::Impl::transform_type()); - hipfft_rt = hipfftPlan2d(&plan, nx, ny, type); + hipfft_rt = hipfftPlan2d(&(*plan), nx, ny, type); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlan2d failed"); return fft_size; @@ -91,7 +93,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] FFTDirectionType direction) { static_assert(Kokkos::is_view::value, @@ -101,12 +103,13 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - hipfftResult hipfft_rt = hipfftCreate(&plan); + plan = std::make_unique(); + hipfftResult hipfft_rt = hipfftCreate(&(*plan)); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftCreate failed"); hipStream_t stream = exec_space.hip_stream(); - hipfftSetStream(plan, stream); + hipfftSetStream((*plan), stream); const int batch = 1; const int axis = 0; @@ -121,7 +124,7 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - hipfft_rt = hipfftPlan3d(&plan, nx, ny, nz, type); + hipfft_rt = hipfftPlan3d(&(*plan), nx, ny, nz, type); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlan3d failed"); return fft_size; @@ -133,7 +136,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] FFTDirectionType direction) { static_assert(Kokkos::is_view::value, @@ -143,12 +146,13 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, using in_value_type = typename InViewType::non_const_value_type; using out_value_type = typename OutViewType::non_const_value_type; - hipfftResult hipfft_rt = hipfftCreate(&plan); + plan = std::make_unique(); + hipfftResult hipfft_rt = hipfftCreate(&(*plan)); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftCreate failed"); hipStream_t stream = exec_space.hip_stream(); - hipfftSetStream(plan, stream); + hipfftSetStream((*plan), stream); const int rank = InViewType::rank(); const int batch = 1; @@ -164,7 +168,7 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - hipfft_rt = hipfftPlanMany(&plan, rank, fft_extents.data(), nullptr, 1, idist, + hipfft_rt = hipfftPlanMany(&(*plan), rank, fft_extents.data(), nullptr, 1, idist, nullptr, 1, odist, type, batch); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlanMany failed"); @@ -177,7 +181,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] FFTDirectionType direction, axis_type axes) { @@ -207,14 +211,15 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, // For the moment, considering the contiguous layout only int istride = 1, ostride = 1; - hipfftResult hipfft_rt = hipfftCreate(&plan); + plan = std::make_unique(); + hipfftResult hipfft_rt = hipfftCreate(&(*plan)); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftCreate failed"); hipStream_t stream = exec_space.hip_stream(); - hipfftSetStream(plan, stream); + hipfftSetStream((*plan), stream); - hipfft_rt = hipfftPlanMany(&plan, rank, fft_extents.data(), in_extents.data(), + hipfft_rt = hipfftPlanMany(&(*plan), rank, fft_extents.data(), in_extents.data(), istride, idist, out_extents.data(), ostride, odist, type, howmany); @@ -223,12 +228,11 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, return fft_size; } -template , std::nullptr_t> = nullptr> -void _destroy( - typename KokkosFFT::Impl::FFTPlanType::type& plan) { - hipfftDestroy(plan); +void _destroy(std::unique_ptr& plan) { + hipfftDestroy(*plan); } } // namespace Impl } // namespace KokkosFFT diff --git a/fft/src/KokkosFFT_OpenMP_plans.hpp b/fft/src/KokkosFFT_OpenMP_plans.hpp index 4f62aa80..b00d76f3 100644 --- a/fft/src/KokkosFFT_OpenMP_plans.hpp +++ b/fft/src/KokkosFFT_OpenMP_plans.hpp @@ -28,7 +28,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] Direction direction) { static_assert(Kokkos::is_view::value, @@ -63,28 +63,29 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int istride = 1, ostride = 1; auto sign = KokkosFFT::Impl::direction_type(direction); + plan = std::make_unique(); if constexpr (type == KokkosFFT::Impl::FFTWTransformType::R2C) { - plan = fftwf_plan_many_dft_r2c( + *plan = fftwf_plan_many_dft_r2c( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::D2Z) { - plan = fftw_plan_many_dft_r2c( + *plan = fftw_plan_many_dft_r2c( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::C2R) { - plan = fftwf_plan_many_dft_c2r( + *plan = fftwf_plan_many_dft_c2r( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::Z2D) { - plan = fftw_plan_many_dft_c2r( + *plan = fftw_plan_many_dft_c2r( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::C2C) { - plan = fftwf_plan_many_dft( + *plan = fftwf_plan_many_dft( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, sign, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::Z2Z) { - plan = fftw_plan_many_dft( + *plan = fftw_plan_many_dft( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, sign, FFTW_ESTIMATE); } @@ -98,7 +99,7 @@ template , std::nullptr_t> = nullptr> -auto _create(const ExecutionSpace& exec_space, PlanType& plan, +auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, const InViewType& in, const OutViewType& out, [[maybe_unused]] Direction direction, axis_type axes) { static_assert(Kokkos::is_view::value, @@ -137,28 +138,29 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, int istride = 1, ostride = 1; auto sign = KokkosFFT::Impl::direction_type(direction); + plan = std::make_unique(); if constexpr (type == KokkosFFT::Impl::FFTWTransformType::R2C) { - plan = fftwf_plan_many_dft_r2c( + *plan = fftwf_plan_many_dft_r2c( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::D2Z) { - plan = fftw_plan_many_dft_r2c( + *plan = fftw_plan_many_dft_r2c( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::C2R) { - plan = fftwf_plan_many_dft_c2r( + *plan = fftwf_plan_many_dft_c2r( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::Z2D) { - plan = fftw_plan_many_dft_c2r( + *plan = fftw_plan_many_dft_c2r( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::C2C) { - plan = fftwf_plan_many_dft( + *plan = fftwf_plan_many_dft( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, sign, FFTW_ESTIMATE); } else if constexpr (type == KokkosFFT::Impl::FFTWTransformType::Z2Z) { - plan = fftw_plan_many_dft( + *plan = fftw_plan_many_dft( rank, fft_extents.data(), howmany, idata, in_extents.data(), istride, idist, odata, out_extents.data(), ostride, odist, sign, FFTW_ESTIMATE); } @@ -166,16 +168,15 @@ auto _create(const ExecutionSpace& exec_space, PlanType& plan, return fft_size; } -template , std::nullptr_t> = nullptr> -void _destroy( - typename KokkosFFT::Impl::FFTPlanType::type& plan) { - if constexpr (std::is_same_v) { - fftwf_destroy_plan(plan); +void _destroy(std::unique_ptr& plan) { + if constexpr (std::is_same_v) { + fftwf_destroy_plan(*plan); } else { - fftw_destroy_plan(plan); + fftw_destroy_plan(*plan); } } } // namespace Impl From 48f623421d4f9586a6a655c643cb651bfd49f7f0 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Mon, 29 Jan 2024 17:56:55 +0900 Subject: [PATCH 3/6] CI for SYCL backend --- .github/workflows/cmake.yml | 60 ++++++++++++++++++++++++++++++++++- docker/intel/Dockerfile | 63 +++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 docker/intel/Dockerfile diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 2755d1ce..d5618157 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -144,4 +144,62 @@ jobs: - name: Install test for HIP backend run: | - docker run -v ${{github.workspace}}:/work ${{ env.container }} ./install_test/bin/install_hip.sh /tmp ${{matrix.backend.name}} \ No newline at end of file + docker run -v ${{github.workspace}}:/work ${{ env.container }} ./install_test/bin/install_hip.sh /tmp ${{matrix.backend.name}} + + build_intel: + # The CMake configure and build commands are platform agnostic and should work equally well on Windows or Mac. + # You can convert this to a matrix build if you need cross-platform coverage. + # See: https://docs.github.com/en/free-pro-team@latest/actions/learn-github-actions/managing-complex-workflows#using-a-build-matrix + runs-on: ubuntu-latest + + env: + #architecture: INTEL_PVC + architecture: AMPERE80 + #CMAKE_CXX_COMPILER: icpx + CMAKE_CXX_COMPILER: /opt/intel/oneapi/compiler/latest/bin/compiler/clang++ + container: intel_env + + strategy: + matrix: + backend: [ {name: INTEL, option: ""}, {name: INTEL_HOST_DEVICE, option: "-DKokkosFFT_ENABLE_HOST_AND_DEVICE=ON"} ] + + steps: + - name: Free Disk Space (Ubuntu) + uses: jlumbroso/free-disk-space@v1.2.0 + with: { tool-cache: true, large-packages: false } + + - name: Checkout built branch + uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Update submodules + run: git submodule update --remote --recursive + + - name: Build docker + run: docker build -t ${{ env.container }} docker/intel + + - name: Configure CMake for SYCL backend + # Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make. + # See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type + run: | + docker run -v ${{github.workspace}}:/work ${{ env.container }} /bin/bash -c \ + ". /opt/intel/oneapi/setvars.sh --include-intel-llvm && cmake -B build_${{matrix.backend.name}} \ + -DCMAKE_BUILD_TYPE=Release \ + -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \ + -DCMAKE_CXX_STANDARD=17 \ + -DKOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED=0 \ + -DKokkos_ARCH_NATIVE=ON \ + -DCMAKE_CXX_FLAGS="-fsycl-device-code-split=per_kernel -Wno-deprecated-declarations -Werror -Wno-gnu-zero-variadic-macro-arguments -Wno-unknown-cuda-version -Wno-sycl-target" \ + -DKokkos_ENABLE_SYCL=ON \ + -DKokkos_ENABLE_COMPILER_WARNINGS=ON \ + -DKokkos_ENABLE_DEPRECATED_CODE_4=OFF \ + -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ + -DKokkos_ENABLE_UNSUPPORTED_ARCHS=ON \ + -DKokkos_ARCH_${{env.architecture}}=ON \ + -DBUILD_TESTING=ON ${{matrix.backend.option}}" + + - name: Build + # Build your program with the given configuration + run: | + docker run -v ${{github.workspace}}:/work ${{ env.container }} /bin/bash -c ". /opt/intel/oneapi/setvars.sh --include-intel-llvm && cmake --build build_${{matrix.backend.name}} --config ${{env.BUILD_TYPE}} -j 2" \ No newline at end of file diff --git a/docker/intel/Dockerfile b/docker/intel/Dockerfile new file mode 100644 index 00000000..33bca3f1 --- /dev/null +++ b/docker/intel/Dockerfile @@ -0,0 +1,63 @@ +ARG BASE=nvidia/cuda:11.7.1-devel-ubuntu22.04 +FROM $BASE + +RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub + +RUN apt-get update && apt-get install -y \ + bc \ + wget \ + ccache \ + ninja-build \ + python3 \ + git \ + libfftw3-dev \ + libomp-dev \ + && \ + apt-get clean && \ + git config --global --add safe.directory '*' && \ + rm -rf /var/lib/apt/lists/* + +RUN KEYDUMP_URL=https://cloud.cees.ornl.gov/download && \ + KEYDUMP_FILE=keydump && \ + wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE} && \ + wget --quiet ${KEYDUMP_URL}/${KEYDUMP_FILE}.sig && \ + gpg --import ${KEYDUMP_FILE} && \ + gpg --verify ${KEYDUMP_FILE}.sig ${KEYDUMP_FILE} && \ + rm ${KEYDUMP_FILE}* + +ARG CMAKE_VERSION=3.26.0 +ARG build_dir=/tmp/build + +ENV CMAKE_DIR=/opt/cmake +RUN CMAKE_SCRIPT=cmake-${CMAKE_VERSION}-linux-x86_64.sh && \ + mkdir -p ${build_dir} && mkdir -p ${CMAKE_DIR} \ + cd ${build_dir} && \ + wget https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/${CMAKE_SCRIPT} && \ + sh ${CMAKE_SCRIPT} --skip-license --prefix=${CMAKE_DIR} && \ + cd ${HOME} && rm -rf ${build_dir} +ENV PATH=${CMAKE_DIR}/bin:$PATH + +RUN wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB && \ + apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS-2023.PUB && \ + echo "deb https://apt.repos.intel.com/oneapi all main" | tee /etc/apt/sources.list.d/oneAPI.list && \ + apt-get update -o Dir::Etc::sourcelist="sources.list.d/oneAPI.list" -o APT::Get::List-Cleanup="0" && \ + apt-get install -y intel-oneapi-compiler-dpcpp-cpp-and-cpp-classic-2023.0.0 && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +RUN wget https://cloud.cees.ornl.gov/download/oneapi-for-nvidia-gpus-2023.0.0-linux.sh && \ + chmod +x oneapi-for-nvidia-gpus-2023.0.0-linux.sh && \ + ./oneapi-for-nvidia-gpus-2023.0.0-linux.sh -y && \ + rm oneapi-for-nvidia-gpus-2023.0.0-linux.sh + +RUN wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/163da6e4-56eb-4948-aba3-debcec61c064/l_BaseKit_p_2024.0.1.46.sh &&\ + chmod +x ./l_BaseKit_p_2024.0.1.46.sh && \ + ./l_BaseKit_p_2024.0.1.46.sh -a -s --eula accept && \ + rm l_BaseKit_p_2024.0.1.46.sh + +WORKDIR /work +ENV FFTWDIR "/usr" + +ENV PATH=${CMAKE_DIR}/bin:/opt/intel/oneapi/compiler/latest/bin/compiler:$PATH + +CMD ["bash"] \ No newline at end of file From 5040130f3e867832a6889f1d4be6ca99eb0da017 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Mon, 29 Jan 2024 18:02:56 +0900 Subject: [PATCH 4/6] formatting --- fft/src/KokkosFFT_Cuda_plans.hpp | 10 +++++----- fft/src/KokkosFFT_HIP_plans.hpp | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/fft/src/KokkosFFT_Cuda_plans.hpp b/fft/src/KokkosFFT_Cuda_plans.hpp index 6c3d85fa..48fed42a 100644 --- a/fft/src/KokkosFFT_Cuda_plans.hpp +++ b/fft/src/KokkosFFT_Cuda_plans.hpp @@ -160,8 +160,8 @@ auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - cufft_rt = cufftPlanMany(&(*plan), rank, fft_extents.data(), nullptr, 1, idist, - nullptr, 1, odist, type, batch); + cufft_rt = cufftPlanMany(&(*plan), rank, fft_extents.data(), nullptr, 1, + idist, nullptr, 1, odist, type, batch); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftPlanMany failed"); return fft_size; @@ -208,9 +208,9 @@ auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, cudaStream_t stream = exec_space.cuda_stream(); cufftSetStream((*plan), stream); - cufft_rt = - cufftPlanMany(&(*plan), rank, fft_extents.data(), in_extents.data(), istride, - idist, out_extents.data(), ostride, odist, type, howmany); + cufft_rt = cufftPlanMany(&(*plan), rank, fft_extents.data(), + in_extents.data(), istride, idist, + out_extents.data(), ostride, odist, type, howmany); if (cufft_rt != CUFFT_SUCCESS) throw std::runtime_error("cufftPlanMany failed"); diff --git a/fft/src/KokkosFFT_HIP_plans.hpp b/fft/src/KokkosFFT_HIP_plans.hpp index 8ed7d112..f46ccb2f 100644 --- a/fft/src/KokkosFFT_HIP_plans.hpp +++ b/fft/src/KokkosFFT_HIP_plans.hpp @@ -168,8 +168,8 @@ auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, int fft_size = std::accumulate(fft_extents.begin(), fft_extents.end(), 1, std::multiplies<>()); - hipfft_rt = hipfftPlanMany(&(*plan), rank, fft_extents.data(), nullptr, 1, idist, - nullptr, 1, odist, type, batch); + hipfft_rt = hipfftPlanMany(&(*plan), rank, fft_extents.data(), nullptr, 1, + idist, nullptr, 1, odist, type, batch); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlanMany failed"); return fft_size; @@ -219,9 +219,9 @@ auto _create(const ExecutionSpace& exec_space, std::unique_ptr& plan, hipStream_t stream = exec_space.hip_stream(); hipfftSetStream((*plan), stream); - hipfft_rt = hipfftPlanMany(&(*plan), rank, fft_extents.data(), in_extents.data(), - istride, idist, out_extents.data(), ostride, odist, - type, howmany); + hipfft_rt = hipfftPlanMany(&(*plan), rank, fft_extents.data(), + in_extents.data(), istride, idist, + out_extents.data(), ostride, odist, type, howmany); if (hipfft_rt != HIPFFT_SUCCESS) throw std::runtime_error("hipfftPlan failed"); From bca894ba6039e7c1fe8ea7da64985b5461b0a5ef Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Mon, 29 Jan 2024 18:15:07 +0900 Subject: [PATCH 5/6] use internal kokkos in CI --- .github/workflows/cmake.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index d5618157..22bc67f0 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -197,6 +197,7 @@ jobs: -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_UNSUPPORTED_ARCHS=ON \ -DKokkos_ARCH_${{env.architecture}}=ON \ + -DKokkosFFT_INTERNAL_Kokkos=ON \ -DBUILD_TESTING=ON ${{matrix.backend.option}}" - name: Build From 0a22d3d4b1280030dc655cc5c79f7b3aa7055770 Mon Sep 17 00:00:00 2001 From: Yuuichi Asahi Date: Mon, 29 Jan 2024 18:37:11 +0900 Subject: [PATCH 6/6] Find Kokkos issue in intel build --- .github/workflows/cmake.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 22bc67f0..928343d6 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -188,6 +188,7 @@ jobs: -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_CXX_COMPILER=${{env.CMAKE_CXX_COMPILER}} \ -DCMAKE_CXX_STANDARD=17 \ + -DKokkosFFT_INTERNAL_Kokkos=ON \ -DKOKKOS_IMPL_SYCL_DEVICE_GLOBAL_SUPPORTED=0 \ -DKokkos_ARCH_NATIVE=ON \ -DCMAKE_CXX_FLAGS="-fsycl-device-code-split=per_kernel -Wno-deprecated-declarations -Werror -Wno-gnu-zero-variadic-macro-arguments -Wno-unknown-cuda-version -Wno-sycl-target" \ @@ -197,7 +198,6 @@ jobs: -DKokkos_ENABLE_DEPRECATION_WARNINGS=OFF \ -DKokkos_ENABLE_UNSUPPORTED_ARCHS=ON \ -DKokkos_ARCH_${{env.architecture}}=ON \ - -DKokkosFFT_INTERNAL_Kokkos=ON \ -DBUILD_TESTING=ON ${{matrix.backend.option}}" - name: Build