Merge pull request #18 from MarcelKoch/develop

Testing another merge
MarcelKoch · Dec 9, 2024 · aeafaa6 · aeafaa6
2 parents 97c825c + 059823f
commit aeafaa6
Show file tree

Hide file tree

Showing 368 changed files with 7,147 additions and 3,181 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,4 +1,8 @@
 repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+    -  id: end-of-file-fixer
 - repo: https://github.com/pre-commit/mirrors-clang-format
   rev: 'v14.0.0'  # The default in Ubuntu 22.04, which is used in our CI
   hooks:

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -32,6 +32,12 @@ option(GINKGO_BUILD_DOC "Generate documentation" OFF)
 option(GINKGO_FAST_TESTS "Reduces the input size for a few tests known to be time-intensive" OFF)
 option(GINKGO_TEST_NONDEFAULT_STREAM "Uses non-default streams in CUDA and HIP tests" OFF)
 option(GINKGO_MIXED_PRECISION "Instantiate true mixed-precision kernels (otherwise they will be conversion-based using implicit temporary storage)" OFF)
+option(GINKGO_ENABLE_HALF "Enable the use of half precision" ON)
+# We do not support half precision in MSVC.
+if(MSVC)
+    message(STATUS "We do not support half precision in MSVC.")
+    set(GINKGO_ENABLE_HALF OFF CACHE BOOL "Enable the use of half precision" FORCE)
+endif()
 option(GINKGO_SKIP_DEPENDENCY_UPDATE
     "Do not update dependencies each time the project is rebuilt" ON)
 option(GINKGO_WITH_CLANG_TIDY "Make Ginkgo call `clang-tidy` to find programming issues." OFF)
@@ -298,9 +304,11 @@ endif()
 
 if(GINKGO_BUILD_SYCL)
     ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_MAJOR_VERSION __LIBSYCL_MAJOR_VERSION)
+    ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_MINOR_VERSION __LIBSYCL_MINOR_VERSION)
     ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_VERSION __SYCL_COMPILER_VERSION)
 else()
     set(GINKGO_DPCPP_MAJOR_VERSION "0")
+    set(GINKGO_DPCPP_MINOR_VERSION "0")
 endif()
 configure_file(${Ginkgo_SOURCE_DIR}/include/ginkgo/config.hpp.in
     ${Ginkgo_BINARY_DIR}/include/ginkgo/config.hpp @ONLY)

diff --git a/INSTALL.md b/INSTALL.md
@@ -25,6 +25,9 @@ Ginkgo adds the following additional switches to control what is being built:
     instead of converting data on the fly, default is `OFF`.
     Enabling this flag increases the library size, but improves performance of
     mixed-precision kernels.
+*   `-DGINKGO_ENABLE_HALF={ON, OFF}` enable half precision support in Ginkgo, default is `ON`.
+    It is `OFF` when the compiler is MSVC. If compiling is done with the CUDA backend before CUDA 12.2, 
+    we only support half precision after compute capability 5.3. CUDA 12.2+ compilers waive the compute capbility limitation.
 *   `-DGINKGO_BUILD_TESTS={ON, OFF}` builds Ginkgo's tests
     (will download googletest), default is `ON`.
 *   `-DGINKGO_FAST_TESTS={ON, OFF}` reduces the input sizes for a few slow tests

diff --git a/accessor/cuda_helper.hpp b/accessor/cuda_helper.hpp
@@ -17,7 +17,15 @@
 #include "utils.hpp"
 
 
+struct __half;
+
+
 namespace gko {
+
+
+class half;
+
+
 namespace acc {
 namespace detail {
 
@@ -27,6 +35,11 @@ struct cuda_type {
     using type = T;
 };
 
+template <>
+struct cuda_type<gko::half> {
+    using type = __half;
+};
+
 // Unpack cv and reference / pointer qualifiers
 template <typename T>
 struct cuda_type<const T> {
@@ -57,7 +70,7 @@ struct cuda_type<T&&> {
 // Transform std::complex to thrust::complex
 template <typename T>
 struct cuda_type<std::complex<T>> {
-    using type = thrust::complex<T>;
+    using type = thrust::complex<typename cuda_type<T>::type>;
 };
 
 

diff --git a/accessor/hip_helper.hpp b/accessor/hip_helper.hpp
@@ -17,7 +17,15 @@
 #include "utils.hpp"
 
 
+struct __half;
+
+
 namespace gko {
+
+
+class half;
+
+
 namespace acc {
 namespace detail {
 
@@ -53,11 +61,15 @@ struct hip_type<T&&> {
     using type = typename hip_type<T>::type&&;
 };
 
+template <>
+struct hip_type<gko::half> {
+    using type = __half;
+};
 
 // Transform std::complex to thrust::complex
 template <typename T>
 struct hip_type<std::complex<T>> {
-    using type = thrust::complex<T>;
+    using type = thrust::complex<typename hip_type<T>::type>;
 };
 
 

diff --git a/accessor/reference_helper.hpp b/accessor/reference_helper.hpp
@@ -12,18 +12,16 @@
 #include "utils.hpp"
 
 
-// CUDA TOOLKIT < 11 does not support constexpr in combination with
-// thrust::complex, which is why constexpr is only present in later versions
-#if defined(__CUDA_ARCH__) && defined(__CUDACC_VER_MAJOR__) && \
-    (__CUDACC_VER_MAJOR__ < 11)
+// NVC++ disallow a constexpr function has a nonliteral return type like half
+#if defined(__NVCOMPILER) && GINKGO_ENABLE_HALF
 
 #define GKO_ACC_ENABLE_REFERENCE_CONSTEXPR
 
 #else
 
 #define GKO_ACC_ENABLE_REFERENCE_CONSTEXPR constexpr
 
-#endif  // __CUDA_ARCH__ && __CUDACC_VER_MAJOR__ && __CUDACC_VER_MAJOR__ < 11
+#endif
 
 
 namespace gko {

diff --git a/accessor/sycl_helper.hpp b/accessor/sycl_helper.hpp
@@ -0,0 +1,202 @@
+// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+#ifndef GKO_ACCESSOR_SYCL_HELPER_HPP_
+#define GKO_ACCESSOR_SYCL_HELPER_HPP_
+
+
+#include <complex>
+#include <type_traits>
+
+#include "block_col_major.hpp"
+#include "reduced_row_major.hpp"
+#include "row_major.hpp"
+#include "scaled_reduced_row_major.hpp"
+#include "utils.hpp"
+
+
+// namespace sycl {
+// inline namespace _V1 {
+
+
+// class half;
+
+
+// }
+// }  // namespace sycl
+
+
+namespace gko {
+
+
+class half;
+
+
+template <typename V>
+class complex;
+
+
+namespace acc {
+namespace detail {
+
+
+template <typename T>
+struct sycl_type {
+    using type = T;
+};
+
+template <>
+struct sycl_type<gko::half> {
+    using type = sycl::half;
+};
+
+// Unpack cv and reference / pointer qualifiers
+template <typename T>
+struct sycl_type<const T> {
+    using type = const typename sycl_type<T>::type;
+};
+
+template <typename T>
+struct sycl_type<volatile T> {
+    using type = volatile typename sycl_type<T>::type;
+};
+
+template <typename T>
+struct sycl_type<T*> {
+    using type = typename sycl_type<T>::type*;
+};
+
+template <typename T>
+struct sycl_type<T&> {
+    using type = typename sycl_type<T>::type&;
+};
+
+template <typename T>
+struct sycl_type<T&&> {
+    using type = typename sycl_type<T>::type&&;
+};
+
+
+// Transform the underlying type of std::complex
+template <typename T>
+struct sycl_type<std::complex<T>> {
+    using type = std::complex<typename sycl_type<T>::type>;
+};
+
+
+template <>
+struct sycl_type<std::complex<gko::half>> {
+    using type = gko::complex<typename sycl_type<gko::half>::type>;
+};
+
+
+}  // namespace detail
+
+
+/**
+ * This is an alias for SYCL's equivalent of `T`.
+ *
+ * @tparam T  a type
+ */
+template <typename T>
+using sycl_type_t = typename detail::sycl_type<T>::type;
+
+
+/**
+ * Reinterprets the passed in value as a SYCL type.
+ *
+ * @param val  the value to reinterpret
+ *
+ * @return `val` reinterpreted to SYCL type
+ */
+template <typename T>
+std::enable_if_t<std::is_pointer<T>::value || std::is_reference<T>::value,
+                 sycl_type_t<T>>
+as_sycl_type(T val)
+{
+    return reinterpret_cast<sycl_type_t<T>>(val);
+}
+
+
+/**
+ * @copydoc as_sycl_type()
+ */
+template <typename T>
+std::enable_if_t<!std::is_pointer<T>::value && !std::is_reference<T>::value,
+                 sycl_type_t<T>>
+as_sycl_type(T val)
+{
+    return *reinterpret_cast<sycl_type_t<T>*>(&val);
+}
+
+
+/**
+ * Changes the types and reinterprets the passed in range pointers as a SYCL
+ * types.
+ *
+ * @param r  the range which pointers need to be reinterpreted
+ *
+ * @return `r` with appropriate types and reinterpreted to SYCL pointers
+ */
+template <std::size_t dim, typename Type1, typename Type2>
+GKO_ACC_INLINE auto as_sycl_range(
+    const range<reduced_row_major<dim, Type1, Type2>>& r)
+{
+    return range<
+        reduced_row_major<dim, sycl_type_t<Type1>, sycl_type_t<Type2>>>(
+        r.get_accessor().get_size(),
+        as_sycl_type(r.get_accessor().get_stored_data()),
+        r.get_accessor().get_stride());
+}
+
+/**
+ * @copydoc as_sycl_range()
+ */
+template <std::size_t dim, typename Type1, typename Type2, std::uint64_t mask>
+GKO_ACC_INLINE auto as_sycl_range(
+    const range<scaled_reduced_row_major<dim, Type1, Type2, mask>>& r)
+{
+    return range<scaled_reduced_row_major<dim, sycl_type_t<Type1>,
+                                          sycl_type_t<Type2>, mask>>(
+        r.get_accessor().get_size(),
+        as_sycl_type(r.get_accessor().get_stored_data()),
+        r.get_accessor().get_storage_stride(),
+        as_sycl_type(r.get_accessor().get_scalar()),
+        r.get_accessor().get_scalar_stride());
+}
+
+/**
+ * @copydoc as_sycl_range()
+ */
+template <typename T, size_type dim>
+GKO_ACC_INLINE auto as_sycl_range(const range<block_col_major<T, dim>>& r)
+{
+    return range<block_col_major<sycl_type_t<T>, dim>>(
+        r.get_accessor().lengths, as_sycl_type(r.get_accessor().data),
+        r.get_accessor().stride);
+}
+
+/**
+ * @copydoc as_sycl_range()
+ */
+template <typename T, size_type dim>
+GKO_ACC_INLINE auto as_sycl_range(const range<row_major<T, dim>>& r)
+{
+    return range<block_col_major<sycl_type_t<T>, dim>>(
+        r.get_accessor().lengths, as_sycl_type(r.get_accessor().data),
+        r.get_accessor().stride);
+}
+
+template <typename AccType>
+GKO_ACC_INLINE auto as_device_range(AccType&& acc)
+{
+    return as_sycl_range(std::forward<AccType>(acc));
+}
+
+
+}  // namespace acc
+}  // namespace gko
+
+
+#endif  // GKO_ACCESSOR_SYCL_HELPER_HPP_
diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt
@@ -18,6 +18,9 @@ function(ginkgo_benchmark_cusparse_linops type def)
             PRIVATE
             $<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
     endif()
+    if(GINKGO_CUDA_CUSTOM_THRUST_NAMESPACE)
+        target_compile_definitions(cusparse_linops_${type} PRIVATE THRUST_CUB_WRAPPED_NAMESPACE=gko)
+    endif()
     # make the dependency public to catch issues
     target_compile_definitions(cusparse_linops_${type} PUBLIC ${def})
     target_compile_definitions(cusparse_linops_${type} PRIVATE GKO_COMPILING_CUDA)
@@ -28,6 +31,9 @@ endfunction()
 function(ginkgo_benchmark_hipsparse_linops type def)
     add_library(hipsparse_linops_${type} utils/hip_linops.hip.cpp)
     set_source_files_properties(utils/hip_linops.hip.cpp PROPERTIES LANGUAGE HIP)
+    if(GINKGO_CUDA_CUSTOM_THRUST_NAMESPACE)
+        target_compile_definitions(hipsparse_linops_${type} PRIVATE THRUST_CUB_WRAPPED_NAMESPACE=gko)
+    endif()
     target_compile_definitions(hipsparse_linops_${type} PUBLIC ${def})
     target_compile_definitions(hipsparse_linops_${type} PRIVATE GKO_COMPILING_HIP)
     target_include_directories(hipsparse_linops_${type} SYSTEM PRIVATE ${HIPBLAS_INCLUDE_DIRS} ${HIPSPARSE_INCLUDE_DIRS})

diff --git a/benchmark/utils/dpcpp_timer.dp.cpp b/benchmark/utils/dpcpp_timer.dp.cpp
@@ -4,7 +4,7 @@
 
 #include <iostream>
 
-#include <CL/sycl.hpp>
+#include <sycl/sycl.hpp>
 
 #include "benchmark/utils/timer_impl.hpp"
 

diff --git a/benchmark/utils/formats.hpp b/benchmark/utils/formats.hpp
@@ -129,7 +129,7 @@ using hybrid = gko::matrix::Hybrid<etype, itype>;
 using csr = gko::matrix::Csr<etype, itype>;
 using coo = gko::matrix::Coo<etype, itype>;
 using ell = gko::matrix::Ell<etype, itype>;
-using ell_mixed = gko::matrix::Ell<gko::next_precision<etype>, itype>;
+using ell_mixed = gko::matrix::Ell<gko::next_precision_base<etype>, itype>;
 
 
 /**
@@ -274,7 +274,7 @@ std::unique_ptr<gko::LinOp> matrix_factory(
         check_ell_admissibility(data);
     }
     if (format == "ell_mixed") {
-        gko::matrix_data<gko::next_precision<etype>, itype> conv_data;
+        gko::matrix_data<gko::next_precision_base<etype>, itype> conv_data;
         conv_data.size = data.size;
         conv_data.nonzeros.resize(data.nonzeros.size());
         auto it = conv_data.nonzeros.begin();
@@ -284,8 +284,8 @@ std::unique_ptr<gko::LinOp> matrix_factory(
             it->value = el.value;
             ++it;
         }
-        gko::as<gko::ReadableFromMatrixData<gko::next_precision<etype>, itype>>(
-            mat.get())
+        gko::as<gko::ReadableFromMatrixData<gko::next_precision_base<etype>,
+                                            itype>>(mat.get())
             ->read(conv_data);
     } else {
         gko::as<gko::ReadableFromMatrixData<etype, itype>>(mat.get())->read(