[wip] add benchmark

ginkgo-project · Feb 4, 2025 · a9b37ae · a9b37ae
1 parent 4b7cd1b
commit a9b37ae
Show file tree

Hide file tree

Showing 6 changed files with 181 additions and 5 deletions.
diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
+// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
 //
 // SPDX-License-Identifier: BSD-3-Clause
 
@@ -338,7 +338,7 @@ const std::map<std::string, std::function<std::shared_ptr<gko::Executor>(bool)>>
         {"hip",
          [](bool) {
              return gko::HipExecutor::create(FLAGS_device_id,
-                                             gko::OmpExecutor::create(),
+                                             gko::ReferenceExecutor::create(),
                                              create_hip_allocator());
          }},
         {"dpcpp", [](bool use_gpu_timer) {

diff --git a/benchmark/utils/general_matrix.hpp b/benchmark/utils/general_matrix.hpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
+// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
 //
 // SPDX-License-Identifier: BSD-3-Clause
 
@@ -11,7 +11,6 @@
 #include <ginkgo/ginkgo.hpp>
 
 #include "benchmark/utils/general.hpp"
-#include "benchmark/utils/generator.hpp"
 
 
 std::string reordering_algorithm_desc =

diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp
@@ -1,4 +1,4 @@
-// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
+// SPDX-FileCopyrightText: 2017 - 2025 The Ginkgo authors
 //
 // SPDX-License-Identifier: BSD-3-Clause
 
@@ -80,6 +80,26 @@ std::unique_ptr<MatrixType> generate_random_batch_matrix(
 }
 
 
+/**
+ * Generates a batch of random matrices of the specified type.
+ */
+template <typename MatrixType, typename ValueDistribution, typename Engine>
+std::unique_ptr<MatrixType> generate_random_batch_dense_matrix(
+    const size_type num_batch_items, const size_type num_rows,
+    const size_type num_cols, ValueDistribution&& value_dist, Engine&& engine,
+    std::shared_ptr<const Executor> exec)
+{
+    using value_type = typename MatrixType::value_type;
+    auto random_array = generate_random_array<value_type>(
+        num_batch_items * num_rows * num_cols,
+        std::forward<ValueDistribution>(value_dist),
+        std::forward<Engine>(engine), exec);
+    return MatrixType::create(
+        exec, batch_dim<2>(num_batch_items, dim<2>(num_rows, num_cols)),
+        std::move(random_array));
+}
+
+
 /**
  * Generate a batch of 1D Poisson (3pt stencil, {-1, 5, -1}) matrices in the
  * given input matrix format.

diff --git a/examples/batched-matrix-free-templated/CMakeLists.txt b/examples/batched-matrix-free-templated/CMakeLists.txt
@@ -54,3 +54,4 @@ target_link_libraries(batched-matrix-free-templated Ginkgo::ginkgo cxxopts::cxxo
 if(GINKGO_BUILD_TESTS)
     add_subdirectory(test)
 endif ()
+add_subdirectory(benchmark)
diff --git a/examples/batched-matrix-free-templated/benchmark/CMakeLists.txt b/examples/batched-matrix-free-templated/benchmark/CMakeLists.txt
@@ -0,0 +1,10 @@
+set(definitions)
+if (GINKGO_BUILD_CUDA)
+    set(definitions GKO_COMPILING_CUDA=1 GKO_DEVICE_NAMESPACE=cuda)
+elseif (GINKGO_BUILD_HIP)
+    set(definitions GKO_COMPILING_HIP=1 GKO_DEVICE_NAMESPACE=hip)
+endif ()
+
+ginkgo_add_single_benchmark_executable(bench_tensor "NO" "GKO_BENCHMARK_USE_DOUBLE_PRECISION" "d" tensor.cpp)
+target_link_libraries(bench_tensor GTest::GTest Kokkos::kokkos)
+target_compile_definitions(bench_tensor PRIVATE ${definitions})
diff --git a/examples/batched-matrix-free-templated/benchmark/tensor.cpp b/examples/batched-matrix-free-templated/benchmark/tensor.cpp
@@ -0,0 +1,146 @@
+// SPDX-FileCopyrightText: 2025 The Ginkgo authors
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+#include <benchmark/utils/general.hpp>
+#include <benchmark/utils/general_matrix.hpp>
+#include <benchmark/utils/iteration_control.hpp>
+#include <benchmark/utils/runner.hpp>
+#include <core/test/utils/array_generator.hpp>
+#include <core/test/utils/batch_helpers.hpp>
+#include <examples/batched-matrix-free-templated/tensor.hpp>
+
+DEFINE_string(apply, "matrix-free",
+              "The apply implementation: either >matrix-free<, or "
+              ">matrix-based<, or a >,< separated list.");
+
+using vtype = tensor::ValueType;
+
+
+std::string get_example_config()
+{
+    return json::parse(R"([{"size_1d": 4, "num_batches": 10}])").dump(4);
+}
+
+struct TensorState {
+    std::unique_ptr<gko::batch::matrix::Dense<vtype>> data_1d;
+    std::unique_ptr<gko::batch::MultiVector<vtype>> x;
+    std::unique_ptr<gko::batch::MultiVector<vtype>> b;
+};
+
+struct TensorBenchmark : public Benchmark<TensorState> {
+    std::vector<std::string> operations = split(FLAGS_apply);
+    std::string name = "Tensor";
+
+    const std::string& get_name() const override { return name; }
+    const std::vector<std::string>& get_operations() const override
+    {
+        return operations;
+    }
+    bool should_print() const override { return true; }
+    std::string get_example_config() const override
+    {
+        return ::get_example_config();
+    }
+    bool validate_config(const json& value) const override
+    {
+        return value.contains("size_1d") &&
+               value["size_1d"].is_number_integer() &&
+               value.contains("num_batches") &&
+               value["num_batches"].is_number_integer();
+    }
+    std::string describe_config(const json& test_case) const override
+    {
+        std::stringstream ss;
+        ss << "tensor(" << test_case["size_1d"].get<gko::int64>() << ") x "
+           << test_case["num_batches"].get<gko::int64>();
+        return ss.str();
+    }
+    TensorState setup(std::shared_ptr<gko::Executor> exec,
+                      json& test_case) const override
+    {
+        auto size_1d = test_case["size_1d"].get<gko::int64>();
+        auto num_batches = test_case["num_batches"].get<gko::int64>();
+        auto vec_size = gko::batch_dim<2>(
+            num_batches, gko::dim<2>(size_1d * size_1d * size_1d, 1));
+        auto engine = std::default_random_engine{42};
+        TensorState state{
+            gko::test::generate_random_batch_dense_matrix<
+                gko::batch::matrix::Dense<vtype>>(
+                num_batches, size_1d, size_1d,
+                std::uniform_real_distribution<>(), engine, exec),
+            gko::test::generate_random_batch_dense_matrix<
+                gko::batch::MultiVector<vtype>>(
+                num_batches, vec_size.get_common_size()[0], 1,
+                std::uniform_real_distribution<>(), engine, exec),
+            gko::batch::MultiVector<vtype>::create(exec, vec_size)};
+        state.b->fill(gko::zero<vtype>());
+
+        std::clog << "Matrix is of size (" << state.x->get_common_size()[0]
+                  << ", " << state.x->get_common_size()[0] << ")" << std::endl;
+        test_case["rows"] = state.x->get_common_size()[0];
+        test_case["cols"] = state.x->get_common_size()[0];
+
+        return state;
+    }
+    void run(std::shared_ptr<gko::Executor> exec, std::shared_ptr<Timer> timer,
+             annotate_functor annotate, TensorState& state,
+             const std::string& operation, json& operation_case) const override
+    {
+        auto run_impl = [&](const auto& op) {
+            IterationControl ic{timer};
+            // warm run
+            {
+                auto range = annotate("warmup", FLAGS_warmup > 0);
+                for (auto _ : ic.warmup_run()) {
+                    auto x_clone = clone(state.x);
+                    exec->synchronize();
+                    op->apply(state.b, x_clone);
+                    exec->synchronize();
+                }
+            }
+
+            // timed run
+            auto x_clone = clone(state.x);
+            for (auto _ : ic.run()) {
+                auto range = annotate("repetition");
+                op->apply(state.b, x_clone);
+            }
+            operation_case["time"] = ic.compute_time(FLAGS_timer_method);
+            operation_case["repetitions"] = ic.get_num_repetitions();
+        };
+
+        auto tensor =
+            std::make_shared<tensor::TensorLeft>(gko::clone(state.data_1d));
+        if (operation == "matrix-free") {
+            run_impl(tensor);
+        } else if (operation == "matrix-based") {
+            run_impl(tensor::convert(tensor));
+        } else {
+            throw std::runtime_error("Unsupported operation: " + operation);
+        }
+    }
+};
+
+
+int main(int argc, char* argv[])
+{
+    std::string header =
+        "A benchmark for measuring performance of Ginkgo's spmv.\n";
+    std::string format = get_example_config();
+    initialize_argument_parsing_matrix(&argc, &argv, header, format);
+
+    auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer);
+
+    std::string extra_information =
+        "The apply formats are " + FLAGS_apply + ".";
+    print_general_information(extra_information, exec);
+
+    auto test_cases = json::parse(get_input_stream());
+
+    auto benchmark = TensorBenchmark{};
+    run_test_cases(benchmark, exec, get_timer(exec, FLAGS_gpu_timer),
+                   test_cases);
+
+    std::cout << std::setw(4) << test_cases << std::endl;
+}