Skip to content

Commit

Permalink
Merge pull request #18 from MarcelKoch/develop
Browse files Browse the repository at this point in the history
Testing another merge
  • Loading branch information
MarcelKoch authored Dec 9, 2024
2 parents 97c825c + 059823f commit aeafaa6
Show file tree
Hide file tree
Showing 368 changed files with 7,147 additions and 3,181 deletions.
4 changes: 4 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: end-of-file-fixer
- repo: https://github.com/pre-commit/mirrors-clang-format
rev: 'v14.0.0' # The default in Ubuntu 22.04, which is used in our CI
hooks:
Expand Down
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ option(GINKGO_BUILD_DOC "Generate documentation" OFF)
option(GINKGO_FAST_TESTS "Reduces the input size for a few tests known to be time-intensive" OFF)
option(GINKGO_TEST_NONDEFAULT_STREAM "Uses non-default streams in CUDA and HIP tests" OFF)
option(GINKGO_MIXED_PRECISION "Instantiate true mixed-precision kernels (otherwise they will be conversion-based using implicit temporary storage)" OFF)
option(GINKGO_ENABLE_HALF "Enable the use of half precision" ON)
# We do not support half precision in MSVC.
if(MSVC)
message(STATUS "We do not support half precision in MSVC.")
set(GINKGO_ENABLE_HALF OFF CACHE BOOL "Enable the use of half precision" FORCE)
endif()
option(GINKGO_SKIP_DEPENDENCY_UPDATE
"Do not update dependencies each time the project is rebuilt" ON)
option(GINKGO_WITH_CLANG_TIDY "Make Ginkgo call `clang-tidy` to find programming issues." OFF)
Expand Down Expand Up @@ -298,9 +304,11 @@ endif()

if(GINKGO_BUILD_SYCL)
ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_MAJOR_VERSION __LIBSYCL_MAJOR_VERSION)
ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_MINOR_VERSION __LIBSYCL_MINOR_VERSION)
ginkgo_extract_dpcpp_version(${CMAKE_CXX_COMPILER} GINKGO_DPCPP_VERSION __SYCL_COMPILER_VERSION)
else()
set(GINKGO_DPCPP_MAJOR_VERSION "0")
set(GINKGO_DPCPP_MINOR_VERSION "0")
endif()
configure_file(${Ginkgo_SOURCE_DIR}/include/ginkgo/config.hpp.in
${Ginkgo_BINARY_DIR}/include/ginkgo/config.hpp @ONLY)
Expand Down
3 changes: 3 additions & 0 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ Ginkgo adds the following additional switches to control what is being built:
instead of converting data on the fly, default is `OFF`.
Enabling this flag increases the library size, but improves performance of
mixed-precision kernels.
* `-DGINKGO_ENABLE_HALF={ON, OFF}` enable half precision support in Ginkgo, default is `ON`.
It is `OFF` when the compiler is MSVC. If compiling is done with the CUDA backend before CUDA 12.2,
we only support half precision after compute capability 5.3. CUDA 12.2+ compilers waive the compute capbility limitation.
* `-DGINKGO_BUILD_TESTS={ON, OFF}` builds Ginkgo's tests
(will download googletest), default is `ON`.
* `-DGINKGO_FAST_TESTS={ON, OFF}` reduces the input sizes for a few slow tests
Expand Down
15 changes: 14 additions & 1 deletion accessor/cuda_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,15 @@
#include "utils.hpp"


struct __half;


namespace gko {


class half;


namespace acc {
namespace detail {

Expand All @@ -27,6 +35,11 @@ struct cuda_type {
using type = T;
};

template <>
struct cuda_type<gko::half> {
using type = __half;
};

// Unpack cv and reference / pointer qualifiers
template <typename T>
struct cuda_type<const T> {
Expand Down Expand Up @@ -57,7 +70,7 @@ struct cuda_type<T&&> {
// Transform std::complex to thrust::complex
template <typename T>
struct cuda_type<std::complex<T>> {
using type = thrust::complex<T>;
using type = thrust::complex<typename cuda_type<T>::type>;
};


Expand Down
14 changes: 13 additions & 1 deletion accessor/hip_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,15 @@
#include "utils.hpp"


struct __half;


namespace gko {


class half;


namespace acc {
namespace detail {

Expand Down Expand Up @@ -53,11 +61,15 @@ struct hip_type<T&&> {
using type = typename hip_type<T>::type&&;
};

template <>
struct hip_type<gko::half> {
using type = __half;
};

// Transform std::complex to thrust::complex
template <typename T>
struct hip_type<std::complex<T>> {
using type = thrust::complex<T>;
using type = thrust::complex<typename hip_type<T>::type>;
};


Expand Down
8 changes: 3 additions & 5 deletions accessor/reference_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,18 +12,16 @@
#include "utils.hpp"


// CUDA TOOLKIT < 11 does not support constexpr in combination with
// thrust::complex, which is why constexpr is only present in later versions
#if defined(__CUDA_ARCH__) && defined(__CUDACC_VER_MAJOR__) && \
(__CUDACC_VER_MAJOR__ < 11)
// NVC++ disallow a constexpr function has a nonliteral return type like half
#if defined(__NVCOMPILER) && GINKGO_ENABLE_HALF

#define GKO_ACC_ENABLE_REFERENCE_CONSTEXPR

#else

#define GKO_ACC_ENABLE_REFERENCE_CONSTEXPR constexpr

#endif // __CUDA_ARCH__ && __CUDACC_VER_MAJOR__ && __CUDACC_VER_MAJOR__ < 11
#endif


namespace gko {
Expand Down
202 changes: 202 additions & 0 deletions accessor/sycl_helper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_ACCESSOR_SYCL_HELPER_HPP_
#define GKO_ACCESSOR_SYCL_HELPER_HPP_


#include <complex>
#include <type_traits>

#include "block_col_major.hpp"
#include "reduced_row_major.hpp"
#include "row_major.hpp"
#include "scaled_reduced_row_major.hpp"
#include "utils.hpp"


// namespace sycl {
// inline namespace _V1 {


// class half;


// }
// } // namespace sycl


namespace gko {


class half;


template <typename V>
class complex;


namespace acc {
namespace detail {


template <typename T>
struct sycl_type {
using type = T;
};

template <>
struct sycl_type<gko::half> {
using type = sycl::half;
};

// Unpack cv and reference / pointer qualifiers
template <typename T>
struct sycl_type<const T> {
using type = const typename sycl_type<T>::type;
};

template <typename T>
struct sycl_type<volatile T> {
using type = volatile typename sycl_type<T>::type;
};

template <typename T>
struct sycl_type<T*> {
using type = typename sycl_type<T>::type*;
};

template <typename T>
struct sycl_type<T&> {
using type = typename sycl_type<T>::type&;
};

template <typename T>
struct sycl_type<T&&> {
using type = typename sycl_type<T>::type&&;
};


// Transform the underlying type of std::complex
template <typename T>
struct sycl_type<std::complex<T>> {
using type = std::complex<typename sycl_type<T>::type>;
};


template <>
struct sycl_type<std::complex<gko::half>> {
using type = gko::complex<typename sycl_type<gko::half>::type>;
};


} // namespace detail


/**
* This is an alias for SYCL's equivalent of `T`.
*
* @tparam T a type
*/
template <typename T>
using sycl_type_t = typename detail::sycl_type<T>::type;


/**
* Reinterprets the passed in value as a SYCL type.
*
* @param val the value to reinterpret
*
* @return `val` reinterpreted to SYCL type
*/
template <typename T>
std::enable_if_t<std::is_pointer<T>::value || std::is_reference<T>::value,
sycl_type_t<T>>
as_sycl_type(T val)
{
return reinterpret_cast<sycl_type_t<T>>(val);
}


/**
* @copydoc as_sycl_type()
*/
template <typename T>
std::enable_if_t<!std::is_pointer<T>::value && !std::is_reference<T>::value,
sycl_type_t<T>>
as_sycl_type(T val)
{
return *reinterpret_cast<sycl_type_t<T>*>(&val);
}


/**
* Changes the types and reinterprets the passed in range pointers as a SYCL
* types.
*
* @param r the range which pointers need to be reinterpreted
*
* @return `r` with appropriate types and reinterpreted to SYCL pointers
*/
template <std::size_t dim, typename Type1, typename Type2>
GKO_ACC_INLINE auto as_sycl_range(
const range<reduced_row_major<dim, Type1, Type2>>& r)
{
return range<
reduced_row_major<dim, sycl_type_t<Type1>, sycl_type_t<Type2>>>(
r.get_accessor().get_size(),
as_sycl_type(r.get_accessor().get_stored_data()),
r.get_accessor().get_stride());
}

/**
* @copydoc as_sycl_range()
*/
template <std::size_t dim, typename Type1, typename Type2, std::uint64_t mask>
GKO_ACC_INLINE auto as_sycl_range(
const range<scaled_reduced_row_major<dim, Type1, Type2, mask>>& r)
{
return range<scaled_reduced_row_major<dim, sycl_type_t<Type1>,
sycl_type_t<Type2>, mask>>(
r.get_accessor().get_size(),
as_sycl_type(r.get_accessor().get_stored_data()),
r.get_accessor().get_storage_stride(),
as_sycl_type(r.get_accessor().get_scalar()),
r.get_accessor().get_scalar_stride());
}

/**
* @copydoc as_sycl_range()
*/
template <typename T, size_type dim>
GKO_ACC_INLINE auto as_sycl_range(const range<block_col_major<T, dim>>& r)
{
return range<block_col_major<sycl_type_t<T>, dim>>(
r.get_accessor().lengths, as_sycl_type(r.get_accessor().data),
r.get_accessor().stride);
}

/**
* @copydoc as_sycl_range()
*/
template <typename T, size_type dim>
GKO_ACC_INLINE auto as_sycl_range(const range<row_major<T, dim>>& r)
{
return range<block_col_major<sycl_type_t<T>, dim>>(
r.get_accessor().lengths, as_sycl_type(r.get_accessor().data),
r.get_accessor().stride);
}

template <typename AccType>
GKO_ACC_INLINE auto as_device_range(AccType&& acc)
{
return as_sycl_range(std::forward<AccType>(acc));
}


} // namespace acc
} // namespace gko


#endif // GKO_ACCESSOR_SYCL_HELPER_HPP_
6 changes: 6 additions & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@ function(ginkgo_benchmark_cusparse_linops type def)
PRIVATE
$<$<COMPILE_LANGUAGE:CUDA>:--expt-relaxed-constexpr>)
endif()
if(GINKGO_CUDA_CUSTOM_THRUST_NAMESPACE)
target_compile_definitions(cusparse_linops_${type} PRIVATE THRUST_CUB_WRAPPED_NAMESPACE=gko)
endif()
# make the dependency public to catch issues
target_compile_definitions(cusparse_linops_${type} PUBLIC ${def})
target_compile_definitions(cusparse_linops_${type} PRIVATE GKO_COMPILING_CUDA)
Expand All @@ -28,6 +31,9 @@ endfunction()
function(ginkgo_benchmark_hipsparse_linops type def)
add_library(hipsparse_linops_${type} utils/hip_linops.hip.cpp)
set_source_files_properties(utils/hip_linops.hip.cpp PROPERTIES LANGUAGE HIP)
if(GINKGO_CUDA_CUSTOM_THRUST_NAMESPACE)
target_compile_definitions(hipsparse_linops_${type} PRIVATE THRUST_CUB_WRAPPED_NAMESPACE=gko)
endif()
target_compile_definitions(hipsparse_linops_${type} PUBLIC ${def})
target_compile_definitions(hipsparse_linops_${type} PRIVATE GKO_COMPILING_HIP)
target_include_directories(hipsparse_linops_${type} SYSTEM PRIVATE ${HIPBLAS_INCLUDE_DIRS} ${HIPSPARSE_INCLUDE_DIRS})
Expand Down
2 changes: 1 addition & 1 deletion benchmark/utils/dpcpp_timer.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

#include <iostream>

#include <CL/sycl.hpp>
#include <sycl/sycl.hpp>

#include "benchmark/utils/timer_impl.hpp"

Expand Down
8 changes: 4 additions & 4 deletions benchmark/utils/formats.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ using hybrid = gko::matrix::Hybrid<etype, itype>;
using csr = gko::matrix::Csr<etype, itype>;
using coo = gko::matrix::Coo<etype, itype>;
using ell = gko::matrix::Ell<etype, itype>;
using ell_mixed = gko::matrix::Ell<gko::next_precision<etype>, itype>;
using ell_mixed = gko::matrix::Ell<gko::next_precision_base<etype>, itype>;


/**
Expand Down Expand Up @@ -274,7 +274,7 @@ std::unique_ptr<gko::LinOp> matrix_factory(
check_ell_admissibility(data);
}
if (format == "ell_mixed") {
gko::matrix_data<gko::next_precision<etype>, itype> conv_data;
gko::matrix_data<gko::next_precision_base<etype>, itype> conv_data;
conv_data.size = data.size;
conv_data.nonzeros.resize(data.nonzeros.size());
auto it = conv_data.nonzeros.begin();
Expand All @@ -284,8 +284,8 @@ std::unique_ptr<gko::LinOp> matrix_factory(
it->value = el.value;
++it;
}
gko::as<gko::ReadableFromMatrixData<gko::next_precision<etype>, itype>>(
mat.get())
gko::as<gko::ReadableFromMatrixData<gko::next_precision_base<etype>,
itype>>(mat.get())
->read(conv_data);
} else {
gko::as<gko::ReadableFromMatrixData<etype, itype>>(mat.get())->read(
Expand Down
Loading

0 comments on commit aeafaa6

Please sign in to comment.