Skip to content

Commit

Permalink
[wip] compile tests with device kernels
Browse files Browse the repository at this point in the history
  • Loading branch information
MarcelKoch committed Feb 3, 2025
1 parent 0a7c301 commit 7285f08
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 16 deletions.
42 changes: 42 additions & 0 deletions examples/batched-matrix-free-templated/tensor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,48 @@ constexpr void simple_apply(
advanced_apply(1.0, a, b, 0.0, x, tag);
}

#if defined(GINKGO_BUILD_CUDA) || defined(GINKGO_BUILD_HIP)


__device__ void advanced_apply(
double alpha, tensor_left_item a,
gko::batch::multi_vector::batch_item<const double> b, double beta,
gko::batch::multi_vector::batch_item<double> x,
[[maybe_unused]] gko::cuda_hip_kernel)
{
auto row =
static_cast<gko::size_type>(blockIdx.x * blockDim.x + threadIdx.x);
auto n = a.size_1d;
auto num_rows = n * n * n;

if (row >= num_rows) {
return;
}

auto k = row / (n * n);
auto j = (row - k * n * n) / n;
auto i = (row - k * n * n) % n;
auto vector_start = k * n * n + i;

ValueType acc = 0;
for (gko::size_type q = 0; q < n; q++) {
auto vector_index = vector_start + q * n;
acc = a.data[j * n + q] * b.values[vector_index] + acc;
}
x.values[row] = alpha * acc + beta * x.values[row];
}

__device__ void simple_apply(
const tensor_left_item& a,
const gko::batch::multi_vector::batch_item<const double>& b,
const gko::batch::multi_vector::batch_item<double>& x,
gko::cuda_hip_kernel tag)
{
advanced_apply(1.0, a, b, 0.0, x, tag);
}

#endif


std::unique_ptr<gko::batch::matrix::Dense<ValueType>> convert(
gko::ptr_param<const TensorLeft> tensor)
Expand Down
16 changes: 14 additions & 2 deletions examples/batched-matrix-free-templated/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,14 @@
ginkgo_create_test(tensor NO_GTEST_MAIN ADDITIONAL_LIBRARIES Kokkos::kokkos ginkgo_gtest_main_kokkos)
target_link_libraries(test_tensor PRIVATE Kokkos::kokkos)
ginkgo_create_common_and_reference_test(tensor NO_GTEST_MAIN ADDITIONAL_LIBRARIES Kokkos::kokkos ginkgo_gtest_main_kokkos)

if(GINKGO_BUILD_HIP)
target_link_libraries(test_tensor_hip PRIVATE Kokkos::kokkos)
endif ()
if(GINKGO_BUILD_CUDA)
target_link_libraries(test_tensor_cuda PRIVATE Kokkos::kokkos)
endif ()
if(GINKGO_BUILD_OMP)
target_link_libraries(test_tensor_omp PRIVATE Kokkos::kokkos)
endif ()
if(GINKGO_BUILD_REFERENCE)
target_link_libraries(test_tensor_reference PRIVATE Kokkos::kokkos)
endif ()
82 changes: 68 additions & 14 deletions examples/batched-matrix-free-templated/test/tensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,28 +8,48 @@

#include <gtest/gtest.h>

#include <test/utils/common_fixture.hpp>

#include "core/matrix/batch_struct.hpp"
#include "core/test/utils.hpp"

auto exec = gko::ReferenceExecutor::create();

TEST(Tensor, CanCreateEmpty)
constexpr auto get_kernel_tag()
{
#if GKO_COMPILING_REFERENCE
return gko::reference_kernel{};
#elif GKO_COMPILING_OMP
return gko::omp_kernel{};
#elif GKO_COMPILING_CUDA
return gko::cuda_kernel{};
#elif GKO_COMPILING_HIP
return gko::hip_kernel{};
#elif GKO_COMPILING_DPCPP
return gko::sycl_kernel{};
#endif
}


class TensorCreate : public CommonTestFixture {};


TEST_F(TensorCreate, CanCreateEmpty)
{
auto tensor = std::make_unique<tensor::TensorLeft>(exec);

ASSERT_EQ(tensor->get_size(), gko::batch_dim<2>{});
ASSERT_EQ(tensor->get_executor(), exec);
}

TEST(Tensor, CanCreateWithSize)
TEST_F(TensorCreate, CanCreateWithSize)
{
auto tensor = std::make_unique<tensor::TensorLeft>(exec, 3, 4);

auto expected_size = gko::batch_dim<2>{3, gko::dim<2>{64, 64}};
ASSERT_EQ(tensor->get_size(), expected_size);
}

TEST(Tensor, CanCreateFromData)
TEST_F(TensorCreate, CanCreateFromData)
{
auto data = gko::batch::matrix::Dense<tensor::ValueType>::create(
exec, gko::batch_dim<2>{3, gko::dim<2>{4, 4}});
Expand All @@ -51,7 +71,10 @@ TEST(Tensor, CanCreateFromData)
}


TEST(TensorConvert, CanConvertDenseId)
class TensorConvert : public CommonTestFixture {};


TEST_F(TensorConvert, CanConvertDenseId)
{
auto A = gko::initialize<gko::matrix::Dense<tensor::ValueType>>(
{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, exec);
Expand All @@ -72,7 +95,7 @@ TEST(TensorConvert, CanConvertDenseId)
GKO_ASSERT_MTX_NEAR(result, expected, 0.0);
}

TEST(TensorConvert, CanConvertIdDense)
TEST_F(TensorConvert, CanConvertIdDense)
{
auto A = gko::initialize<gko::matrix::Dense<tensor::ValueType>>(
{{1.0, 2.0}, {3.0, 4.0}, {5.0, 6.0}}, exec);
Expand All @@ -94,9 +117,9 @@ TEST(TensorConvert, CanConvertIdDense)
}


class Tensor2 : public testing::Test {
class TensorApply : public CommonTestFixture {
public:
Tensor2()
TensorApply()
{
auto data = gko::batch::matrix::Dense<tensor::ValueType>::create(
exec, gko::batch_dim<2>{3, gko::dim<2>{4, 4}});
Expand Down Expand Up @@ -133,25 +156,56 @@ class Tensor2 : public testing::Test {
std::unique_ptr<gko::batch::MultiVector<tensor::ValueType>> b;
};

TEST_F(Tensor2, CanConvert)
TEST_F(TensorApply, CanConvert)
{
auto mat = convert(tensor);

ASSERT_EQ(mat->get_size(), tensor->get_size());
gko::write(std::ofstream("batch.mtx"), mat->create_view_for_item(1));
}

TEST_F(Tensor2, CanApplySingleBatch)
#if defined(GKO_COMPILING_HIP) || defined(GKO_COMPILING_CUDA)

__global__ void call_simple_apply_kernel(
const tensor::tensor_left_item a,
const gko::batch::multi_vector::batch_item<const double> b,
const gko::batch::multi_vector::batch_item<double> x)
{
tensor::simple_apply(a, b, x, get_kernel_tag());
}

void call_simple_apply(
const tensor::tensor_left_item a,
const gko::batch::multi_vector::batch_item<const double> b,
const gko::batch::multi_vector::batch_item<double> x)
{
call_simple_apply_kernel<<<1, 512>>>(a, b, x);
}

#else

void call_simple_apply(
const tensor::tensor_left_item a,
const gko::batch::multi_vector::batch_item<const double> b,
const gko::batch::multi_vector::batch_item<double> x)
{
tensor::simple_apply(a, b, x, get_kernel_tag());
}

#endif


TEST_F(TensorApply, CanApplySingleBatch)
{
gko::size_type batch_id = 1;
auto view = tensor->create_view();
auto item = tensor::extract_batch_item(view, batch_id);
auto x_view = gko::batch::to_const(x->create_view());
auto b_view = b->create_view();

tensor::simple_apply(item, gko::batch::extract_batch_item(x_view, batch_id),
gko::batch::extract_batch_item(b_view, batch_id),
gko::reference_kernel{});
call_simple_apply(item, gko::batch::extract_batch_item(x_view, batch_id),
gko::batch::extract_batch_item(b_view, batch_id));
exec->synchronize();

auto dense = convert(tensor);
auto expected_b = gko::clone(b);
Expand All @@ -161,7 +215,7 @@ TEST_F(Tensor2, CanApplySingleBatch)
r<tensor::ValueType>::value);
}

TEST_F(Tensor2, CanApply)
TEST_F(TensorApply, CanApply)
{
tensor->apply(x, b);

Expand Down

0 comments on commit 7285f08

Please sign in to comment.