[wip] compile tests with device kernels

ginkgo-project · Feb 3, 2025 · 7285f08 · 7285f08
1 parent 0a7c301
commit 7285f08
Show file tree

Hide file tree

Showing 3 changed files with 124 additions and 16 deletions.
diff --git a/examples/batched-matrix-free-templated/tensor.hpp b/examples/batched-matrix-free-templated/tensor.hpp
@@ -195,6 +195,48 @@ constexpr void simple_apply(
     advanced_apply(1.0, a, b, 0.0, x, tag);
 }
 
+#if defined(GINKGO_BUILD_CUDA) || defined(GINKGO_BUILD_HIP)
+
+
+__device__ void advanced_apply(
+    double alpha, tensor_left_item a,
+    gko::batch::multi_vector::batch_item<const double> b, double beta,
+    gko::batch::multi_vector::batch_item<double> x,
+    [[maybe_unused]] gko::cuda_hip_kernel)
+{
+    auto row =
+        static_cast<gko::size_type>(blockIdx.x * blockDim.x + threadIdx.x);
+    auto n = a.size_1d;
+    auto num_rows = n * n * n;
+
+    if (row >= num_rows) {
+        return;
+    }
+
+    auto k = row / (n * n);
+    auto j = (row - k * n * n) / n;
+    auto i = (row - k * n * n) % n;
+    auto vector_start = k * n * n + i;
+
+    ValueType acc = 0;
+    for (gko::size_type q = 0; q < n; q++) {
+        auto vector_index = vector_start + q * n;
+        acc = a.data[j * n + q] * b.values[vector_index] + acc;
+    }
+    x.values[row] = alpha * acc + beta * x.values[row];
+}
+
+__device__ void simple_apply(
+    const tensor_left_item& a,
+    const gko::batch::multi_vector::batch_item<const double>& b,
+    const gko::batch::multi_vector::batch_item<double>& x,
+    gko::cuda_hip_kernel tag)
+{
+    advanced_apply(1.0, a, b, 0.0, x, tag);
+}
+
+#endif
+
 
 std::unique_ptr<gko::batch::matrix::Dense<ValueType>> convert(
     gko::ptr_param<const TensorLeft> tensor)

diff --git a/examples/batched-matrix-free-templated/test/CMakeLists.txt b/examples/batched-matrix-free-templated/test/CMakeLists.txt
@@ -1,2 +1,14 @@
-ginkgo_create_test(tensor NO_GTEST_MAIN ADDITIONAL_LIBRARIES Kokkos::kokkos ginkgo_gtest_main_kokkos)
-target_link_libraries(test_tensor PRIVATE Kokkos::kokkos)
+ginkgo_create_common_and_reference_test(tensor NO_GTEST_MAIN ADDITIONAL_LIBRARIES Kokkos::kokkos ginkgo_gtest_main_kokkos)
+
+if(GINKGO_BUILD_HIP)
+    target_link_libraries(test_tensor_hip PRIVATE Kokkos::kokkos)
+endif ()
+if(GINKGO_BUILD_CUDA)
+    target_link_libraries(test_tensor_cuda PRIVATE Kokkos::kokkos)
+endif ()
+if(GINKGO_BUILD_OMP)
+    target_link_libraries(test_tensor_omp PRIVATE Kokkos::kokkos)
+endif ()
+if(GINKGO_BUILD_REFERENCE)
+    target_link_libraries(test_tensor_reference PRIVATE Kokkos::kokkos)
+endif ()
diff --git a/examples/batched-matrix-free-templated/test/tensor.cpp b/examples/batched-matrix-free-templated/test/tensor.cpp
@@ -8,28 +8,48 @@
 
 #include <gtest/gtest.h>
 
+#include <test/utils/common_fixture.hpp>
+
 #include "core/matrix/batch_struct.hpp"
 #include "core/test/utils.hpp"
 
-auto exec = gko::ReferenceExecutor::create();
 
-TEST(Tensor, CanCreateEmpty)
+constexpr auto get_kernel_tag()
+{
+#if GKO_COMPILING_REFERENCE
+    return gko::reference_kernel{};
+#elif GKO_COMPILING_OMP
+    return gko::omp_kernel{};
+#elif GKO_COMPILING_CUDA
+    return gko::cuda_kernel{};
+#elif GKO_COMPILING_HIP
+    return gko::hip_kernel{};
+#elif GKO_COMPILING_DPCPP
+    return gko::sycl_kernel{};
+#endif
+}
+
+
+class TensorCreate : public CommonTestFixture {};
+
+
+TEST_F(TensorCreate, CanCreateEmpty)
 {
     auto tensor = std::make_unique<tensor::TensorLeft>(exec);
 
     ASSERT_EQ(tensor->get_size(), gko::batch_dim<2>{});
     ASSERT_EQ(tensor->get_executor(), exec);
 }
 
-TEST(Tensor, CanCreateWithSize)
+TEST_F(TensorCreate, CanCreateWithSize)
 {
     auto tensor = std::make_unique<tensor::TensorLeft>(exec, 3, 4);
 
     auto expected_size = gko::batch_dim<2>{3, gko::dim<2>{64, 64}};
     ASSERT_EQ(tensor->get_size(), expected_size);
 }
 
-TEST(Tensor, CanCreateFromData)
+TEST_F(TensorCreate, CanCreateFromData)
 {
     auto data = gko::batch::matrix::Dense<tensor::ValueType>::create(
         exec, gko::batch_dim<2>{3, gko::dim<2>{4, 4}});
@@ -51,7 +71,10 @@ TEST(Tensor, CanCreateFromData)
 }
 
 
-TEST(TensorConvert, CanConvertDenseId)
+class TensorConvert : public CommonTestFixture {};
+
+
+TEST_F(TensorConvert, CanConvertDenseId)
 {
     auto A = gko::initialize<gko::matrix::Dense<tensor::ValueType>>(
         {{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, exec);
@@ -72,7 +95,7 @@ TEST(TensorConvert, CanConvertDenseId)
     GKO_ASSERT_MTX_NEAR(result, expected, 0.0);
 }
 
-TEST(TensorConvert, CanConvertIdDense)
+TEST_F(TensorConvert, CanConvertIdDense)
 {
     auto A = gko::initialize<gko::matrix::Dense<tensor::ValueType>>(
         {{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, exec);
@@ -94,9 +117,9 @@ TEST(TensorConvert, CanConvertIdDense)
 }
 
 
-class Tensor2 : public testing::Test {
+class TensorApply : public CommonTestFixture {
 public:
-    Tensor2()
+    TensorApply()
     {
         auto data = gko::batch::matrix::Dense<tensor::ValueType>::create(
             exec, gko::batch_dim<2>{3, gko::dim<2>{4, 4}});
@@ -133,25 +156,56 @@ class Tensor2 : public testing::Test {
     std::unique_ptr<gko::batch::MultiVector<tensor::ValueType>> b;
 };
 
-TEST_F(Tensor2, CanConvert)
+TEST_F(TensorApply, CanConvert)
 {
     auto mat = convert(tensor);
 
     ASSERT_EQ(mat->get_size(), tensor->get_size());
     gko::write(std::ofstream("batch.mtx"), mat->create_view_for_item(1));
 }
 
-TEST_F(Tensor2, CanApplySingleBatch)
+#if defined(GKO_COMPILING_HIP) || defined(GKO_COMPILING_CUDA)
+
+__global__ void call_simple_apply_kernel(
+    const tensor::tensor_left_item a,
+    const gko::batch::multi_vector::batch_item<const double> b,
+    const gko::batch::multi_vector::batch_item<double> x)
+{
+    tensor::simple_apply(a, b, x, get_kernel_tag());
+}
+
+void call_simple_apply(
+    const tensor::tensor_left_item a,
+    const gko::batch::multi_vector::batch_item<const double> b,
+    const gko::batch::multi_vector::batch_item<double> x)
+{
+    call_simple_apply_kernel<<<1, 512>>>(a, b, x);
+}
+
+#else
+
+void call_simple_apply(
+    const tensor::tensor_left_item a,
+    const gko::batch::multi_vector::batch_item<const double> b,
+    const gko::batch::multi_vector::batch_item<double> x)
+{
+    tensor::simple_apply(a, b, x, get_kernel_tag());
+}
+
+#endif
+
+
+TEST_F(TensorApply, CanApplySingleBatch)
 {
     gko::size_type batch_id = 1;
     auto view = tensor->create_view();
     auto item = tensor::extract_batch_item(view, batch_id);
     auto x_view = gko::batch::to_const(x->create_view());
     auto b_view = b->create_view();
 
-    tensor::simple_apply(item, gko::batch::extract_batch_item(x_view, batch_id),
-                         gko::batch::extract_batch_item(b_view, batch_id),
-                         gko::reference_kernel{});
+    call_simple_apply(item, gko::batch::extract_batch_item(x_view, batch_id),
+                      gko::batch::extract_batch_item(b_view, batch_id));
+    exec->synchronize();
 
     auto dense = convert(tensor);
     auto expected_b = gko::clone(b);
@@ -161,7 +215,7 @@ TEST_F(Tensor2, CanApplySingleBatch)
                         r<tensor::ValueType>::value);
 }
 
-TEST_F(Tensor2, CanApply)
+TEST_F(TensorApply, CanApply)
 {
     tensor->apply(x, b);