From c8fa79b00d029a1263d319a1f4f451a7a031654f Mon Sep 17 00:00:00 2001 From: Joana Niermann Date: Thu, 30 Mar 2023 14:40:47 +0200 Subject: [PATCH] This adds a new vector wrapper class that allows for an SoA layout, in particular, to enable us to use the Vc::Vector type efficiently. The new storage/vector keeps an array-like data structure, that holds the vector elements (say, x, y, z). Both the array type, as well as the value type of the elements are templated (storage/vector< algebraic vector dim, aos/soa value type (scalar vs e.g SIMD vector), array-like storage for the algebraic vector elements >), so that combinations like these are possible: 3-dim AoS std::array based vector (not necessary, since this would essentially duplicate the array plugin, while begin more complicated): storage::vector<3, scalar_t, std::array> 3-dim AoS vertical vectorized (Not there, yet, but would re-implement the current vc_vc plugin, while making the extra vc_array4 wrapper superfluous): storage::vector<3, scalar_t, Vc::SimdArray> 3-dim SoA of size N, std::array based: storage::vector<3, std::array, std::array> 3-dim vetorized SoA (in this PR): storage::vector<3, Vc::Vector, std::array> Also adds benchmarks to compare to the std::array and Eigen AoS plugins. The benchmarks contain a base class that holds the basic configuration (e.g. number of warmup samples) and a derived vector benchmark class that holds the samples of random inititialized vectors and is templated on the kind of vector operation to be benchmarked (unary and binary are possible). The benchmarks for the getter namespace also make use of the vector benchmark class. The benchmarks for single and double precision are then instantiated per plugin, including a prescription on how to produce a random vector. --- .gitignore | 3 + CMakeLists.txt | 5 + benchmarks/CMakeLists.txt | 74 ++++++ benchmarks/array/array_getter.cpp | 93 +++++++ benchmarks/array/array_vector.cpp | 93 +++++++ .../benchmark/array/data_generator.hpp | 32 +++ .../benchmark/common/benchmark_base.hpp | 108 ++++++++ .../benchmark/common/benchmark_getter.hpp | 53 ++++ .../benchmark/common/benchmark_vector.hpp | 181 +++++++++++++ benchmarks/eigen/eigen_getter.cpp | 93 +++++++ benchmarks/eigen/eigen_vector.cpp | 93 +++++++ .../benchmark/eigen/data_generator.hpp | 26 ++ .../benchmark/vc_soa/data_generator.hpp | 48 ++++ benchmarks/vc_soa/vc_soa_getter.cpp | 102 ++++++++ benchmarks/vc_soa/vc_soa_vector.cpp | 99 +++++++ cmake/algebra-plugins-functions.cmake | 19 ++ frontend/CMakeLists.txt | 3 + frontend/vc_soa/CMakeLists.txt | 14 + frontend/vc_soa/include/algebra/vc_soa.hpp | 80 ++++++ frontend/vc_vc/include/algebra/vc_vc.hpp | 2 +- math/CMakeLists.txt | 5 +- math/cmath/CMakeLists.txt | 1 + .../algebra/math/impl/cmath_operators.hpp | 3 +- math/vc_soa/CMakeLists.txt | 15 ++ .../algebra/math/impl/vc_soa_getter.hpp | 122 +++++++++ .../algebra/math/impl/vc_soa_vector.hpp | 93 +++++++ math/vc_soa/include/algebra/math/vc_soa.hpp | 14 + storage/CMakeLists.txt | 4 + storage/common/CMakeLists.txt | 12 + .../algebra/storage/array_operators.hpp | 107 ++++++++ .../common/include/algebra/storage/vector.hpp | 243 ++++++++++++++++++ storage/vc/CMakeLists.txt | 2 +- .../algebra/storage/impl/vc_array4.hpp | 1 + storage/vc/include/algebra/storage/vc.hpp | 2 +- storage/vc_soa/CMakeLists.txt | 13 + .../vc_soa/include/algebra/storage/vc_soa.hpp | 62 +++++ tests/CMakeLists.txt | 9 +- tests/vc_soa/vc_soa.cpp | 139 ++++++++++ 38 files changed, 2062 insertions(+), 6 deletions(-) create mode 100644 benchmarks/CMakeLists.txt create mode 100644 benchmarks/array/array_getter.cpp create mode 100644 benchmarks/array/array_vector.cpp create mode 100644 benchmarks/array/include/benchmark/array/data_generator.hpp create mode 100644 benchmarks/common/include/benchmark/common/benchmark_base.hpp create mode 100644 benchmarks/common/include/benchmark/common/benchmark_getter.hpp create mode 100644 benchmarks/common/include/benchmark/common/benchmark_vector.hpp create mode 100644 benchmarks/eigen/eigen_getter.cpp create mode 100644 benchmarks/eigen/eigen_vector.cpp create mode 100644 benchmarks/eigen/include/benchmark/eigen/data_generator.hpp create mode 100644 benchmarks/vc_soa/include/benchmark/vc_soa/data_generator.hpp create mode 100644 benchmarks/vc_soa/vc_soa_getter.cpp create mode 100644 benchmarks/vc_soa/vc_soa_vector.cpp create mode 100644 frontend/vc_soa/CMakeLists.txt create mode 100644 frontend/vc_soa/include/algebra/vc_soa.hpp create mode 100644 math/vc_soa/CMakeLists.txt create mode 100644 math/vc_soa/include/algebra/math/impl/vc_soa_getter.hpp create mode 100644 math/vc_soa/include/algebra/math/impl/vc_soa_vector.hpp create mode 100644 math/vc_soa/include/algebra/math/vc_soa.hpp create mode 100644 storage/common/CMakeLists.txt create mode 100644 storage/common/include/algebra/storage/array_operators.hpp create mode 100644 storage/common/include/algebra/storage/vector.hpp create mode 100644 storage/vc_soa/CMakeLists.txt create mode 100644 storage/vc_soa/include/algebra/storage/vc_soa.hpp create mode 100644 tests/vc_soa/vc_soa.cpp diff --git a/.gitignore b/.gitignore index 259148fa..352b242b 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,6 @@ *.exe *.out *.app + +# Local folders +/build \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 02dad413..c394307c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -175,5 +175,10 @@ if( BUILD_TESTING AND ALGEBRA_PLUGINS_BUILD_TESTING ) add_subdirectory( tests ) endif() +# Set up the benchmarks. +if( ALGEBRA_PLUGINS_BUILD_BENCHMARKS ) + add_subdirectory( benchmarks ) +endif() + # Set up the packaging of the project. include( algebra-plugins-packaging ) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt new file mode 100644 index 00000000..90cf663a --- /dev/null +++ b/benchmarks/CMakeLists.txt @@ -0,0 +1,74 @@ +# Algebra plugins library, part of the ACTS project (R&D line) +# +# (c) 2023 CERN for the benefit of the ACTS project +# +# Mozilla Public License Version 2.0 + +# Set the default C++ compiler flags. +include( algebra-plugins-compiler-options-cpp ) + +# Set up an INTERFACE library for the common header(s). +add_library( algebra_bench_common INTERFACE ) +target_include_directories( algebra_bench_common INTERFACE + "${CMAKE_CURRENT_SOURCE_DIR}/common/include" ) +if( "${CMAKE_CXX_COMPILER_ID}" MATCHES "MSVC" ) + target_compile_definitions( algebra_bench_common INTERFACE + -D_USE_MATH_DEFINES ) +endif() +add_library( algebra::bench_common ALIAS algebra_bench_common ) + +# Set up all of the (available) "host" benchmarks. +add_library( algebra_bench_array INTERFACE ) +target_include_directories( algebra_bench_array INTERFACE + "$" ) +target_link_libraries(algebra_bench_array INTERFACE algebra::array_cmath + algebra::common_storage) +add_library( algebra::bench_array ALIAS algebra_bench_array ) + +algebra_add_benchmark( array_getter + "array/array_getter.cpp" + LINK_LIBRARIES benchmark::benchmark algebra::bench_common + algebra::bench_array algebra::array_cmath ) +algebra_add_benchmark( array_vector + "array/array_vector.cpp" + LINK_LIBRARIES benchmark::benchmark algebra::bench_common + algebra::bench_array algebra::array_cmath ) + +if( ALGEBRA_PLUGINS_INCLUDE_EIGEN ) + add_library( algebra_bench_eigen INTERFACE ) + target_include_directories( algebra_bench_eigen INTERFACE + "$" ) + target_link_libraries(algebra_bench_eigen INTERFACE algebra::eigen_eigen + algebra::common_storage) + add_library( algebra::bench_eigen ALIAS algebra_bench_eigen ) + + algebra_add_benchmark( eigen_getter + "eigen/eigen_getter.cpp" + LINK_LIBRARIES benchmark::benchmark algebra::bench_common + algebra::bench_eigen algebra::eigen_eigen ) + algebra_add_benchmark( eigen_vector + "eigen/eigen_vector.cpp" + LINK_LIBRARIES benchmark::benchmark algebra::bench_common + algebra::bench_eigen algebra::eigen_eigen ) +endif() + +if( ALGEBRA_PLUGINS_INCLUDE_VC ) + if( NOT "${CMAKE_CXX_COMPILER_ID}" MATCHES "AppleClang" ) + add_library( algebra_bench_vc_soa INTERFACE ) + target_include_directories( algebra_bench_vc_soa INTERFACE + "$" ) + target_link_libraries(algebra_bench_vc_soa INTERFACE + algebra::vc_soa + algebra::common_storage) + add_library( algebra::bench_vc_soa ALIAS algebra_bench_vc_soa ) + + algebra_add_benchmark( vc_soa_getter + "vc_soa/vc_soa_getter.cpp" + LINK_LIBRARIES benchmark::benchmark algebra::bench_common + algebra::bench_vc_soa algebra::vc_soa ) + algebra_add_benchmark( vc_soa_vector + "vc_soa/vc_soa_vector.cpp" + LINK_LIBRARIES benchmark::benchmark algebra::bench_common + algebra::bench_vc_soa algebra::vc_soa ) + endif() +endif() diff --git a/benchmarks/array/array_getter.cpp b/benchmarks/array/array_getter.cpp new file mode 100644 index 00000000..e53cb24a --- /dev/null +++ b/benchmarks/array/array_getter.cpp @@ -0,0 +1,93 @@ +/** Algebra plugins library, part of the ACTS project + * + * (c) 2023 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +// Project include(s) +#include "algebra/array_cmath.hpp" +#include "benchmark/array/data_generator.hpp" +#include "benchmark/common/benchmark_getter.hpp" + +// Benchmark include +#include + +// System include(s) +#include + +using namespace algebra; + +/// Run vector benchmarks +int main(int argc, char** argv) { + + constexpr std::size_t n_samples{160000}; + constexpr std::size_t n_warmup{static_cast(0.1 * n_samples)}; + + // + // Prepare benchmarks + // + algebra::benchmark_base::configuration cfg{}; + cfg.n_samples(n_samples).n_warmup(n_warmup); + cfg.do_sleep(false); + + vector_unaryOP_bm v_phi_s{cfg}; + vector_unaryOP_bm v_theta_s{cfg}; + vector_unaryOP_bm v_perp_s{cfg}; + vector_unaryOP_bm v_norm_s{cfg}; + vector_unaryOP_bm v_eta_s{cfg}; + + vector_unaryOP_bm v_phi_d{cfg}; + vector_unaryOP_bm v_theta_d{cfg}; + vector_unaryOP_bm v_perp_d{cfg}; + vector_unaryOP_bm v_norm_d{cfg}; + vector_unaryOP_bm v_eta_d{cfg}; + + std::cout << "Algebra-Plugins 'getter' benchmark (std::array)\n" + << "-----------------------------------------------\n\n" + << cfg; + + // + // Register all benchmarks + // + ::benchmark::RegisterBenchmark((v_phi_s.name() + "_single").c_str(), v_phi_s) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_phi_d.name() + "_double").c_str(), v_phi_d) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_theta_s.name() + "_single").c_str(), + v_theta_s) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_theta_d.name() + "_double").c_str(), + v_theta_d) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_perp_s.name() + "_single").c_str(), + v_perp_s) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_perp_d.name() + "_double").c_str(), + v_perp_d) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_norm_s.name() + "_single").c_str(), + v_norm_s) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_norm_d.name() + "_double").c_str(), + v_norm_d) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_eta_s.name() + "_single").c_str(), v_eta_s) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_eta_d.name() + "_double").c_str(), v_eta_d) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + + ::benchmark::Initialize(&argc, argv); + ::benchmark::RunSpecifiedBenchmarks(); + ::benchmark::Shutdown(); +} diff --git a/benchmarks/array/array_vector.cpp b/benchmarks/array/array_vector.cpp new file mode 100644 index 00000000..20b7ae05 --- /dev/null +++ b/benchmarks/array/array_vector.cpp @@ -0,0 +1,93 @@ +/** Algebra plugins library, part of the ACTS project + * + * (c) 2023 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +// Project include(s) +#include "algebra/array_cmath.hpp" +#include "benchmark/array/data_generator.hpp" +#include "benchmark/common/benchmark_vector.hpp" + +// Benchmark include +#include + +// System include(s) +#include + +using namespace algebra; + +/// Run vector benchmarks +int main(int argc, char** argv) { + + constexpr std::size_t n_samples{160000}; + constexpr std::size_t n_warmup{static_cast(0.1 * n_samples)}; + + // + // Prepare benchmarks + // + algebra::benchmark_base::configuration cfg{}; + cfg.n_samples(n_samples).n_warmup(n_warmup); + cfg.do_sleep(false); + + vector_binaryOP_bm v_add_s{cfg}; + vector_binaryOP_bm v_sub_s{cfg}; + vector_binaryOP_bm v_dot_s{cfg}; + vector_binaryOP_bm v_cross_s{cfg}; + vector_unaryOP_bm v_normalize_s{ + cfg}; + + vector_binaryOP_bm v_add_d{cfg}; + vector_binaryOP_bm v_sub_d{cfg}; + vector_binaryOP_bm v_dot_d{cfg}; + vector_binaryOP_bm v_cross_d{cfg}; + vector_unaryOP_bm v_normalize_d{ + cfg}; + + std::cout << "Algebra-Plugins 'vector' benchmark (std::array)\n" + << "-----------------------------------------------\n\n" + << cfg; + + // + // Register all benchmarks + // + ::benchmark::RegisterBenchmark((v_add_s.name() + "_single").c_str(), v_add_s) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_add_d.name() + "_double").c_str(), v_add_d) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_sub_s.name() + "_single").c_str(), v_sub_s) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_sub_d.name() + "_double").c_str(), v_sub_d) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_dot_s.name() + "_single").c_str(), v_dot_s) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_dot_d.name() + "_double").c_str(), v_dot_d) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_cross_s.name() + "_single").c_str(), + v_cross_s) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_cross_d.name() + "_double").c_str(), + v_cross_d) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_normalize_s.name() + "_single").c_str(), + v_normalize_s) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + ::benchmark::RegisterBenchmark((v_normalize_d.name() + "_double").c_str(), + v_normalize_d) + ->MeasureProcessCPUTime() + ->ThreadPerCpu(); + + ::benchmark::Initialize(&argc, argv); + ::benchmark::RunSpecifiedBenchmarks(); + ::benchmark::Shutdown(); +} diff --git a/benchmarks/array/include/benchmark/array/data_generator.hpp b/benchmarks/array/include/benchmark/array/data_generator.hpp new file mode 100644 index 00000000..33708714 --- /dev/null +++ b/benchmarks/array/include/benchmark/array/data_generator.hpp @@ -0,0 +1,32 @@ +/** Algebra plugins library, part of the ACTS project + * + * (c) 2023 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// System include(s) +#include +#include +#include + +namespace algebra { + +/// Fill an @c std::array based vector with random values +template +inline void fill_random(std::vector &collection) { + + // Generate a vector of the right type with random values + std::random_device rd; + std::mt19937 mt(rd()); + std::uniform_real_distribution dist(0.f, 1.f); + + auto rand_obj = [&]() { return vector_t{dist(mt), dist(mt), dist(mt)}; }; + + collection.resize(collection.capacity()); + std::generate(collection.begin(), collection.end(), rand_obj); +} + +} // namespace algebra \ No newline at end of file diff --git a/benchmarks/common/include/benchmark/common/benchmark_base.hpp b/benchmarks/common/include/benchmark/common/benchmark_base.hpp new file mode 100644 index 00000000..b3ca9fdd --- /dev/null +++ b/benchmarks/common/include/benchmark/common/benchmark_base.hpp @@ -0,0 +1,108 @@ +/** Algebra plugins library, part of the ACTS project + * + * (c) 2023 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// Benchmark include +#include + +// System include(s) +#include +#include + +namespace algebra { + +/// Base type for linear algebra benchmarks with google benchmark +struct benchmark_base { + /// Local configuration type + struct configuration { + /// Size of data sample to be used in benchmark + std::size_t m_samples{100u}; + /// Run a number of operations before the benchmark + bool m_warmup = true; + // Sleep after building data sample + bool m_sleep = false; + // Size of data in warm-up round + std::size_t m_n_warmup{static_cast(0.1f * m_samples)}; + // Size of data in warm-up round + std::size_t m_n_sleep{1u}; + + /// Setters + /// @{ + configuration& n_samples(std::size_t n) { + m_samples = n; + return *this; + } + configuration& do_warmup(bool b) { + m_warmup = b; + return *this; + } + configuration& n_warmup(std::size_t n) { + m_n_warmup = n; + m_warmup = true; + return *this; + } + configuration& do_sleep(bool b) { + m_sleep = b; + return *this; + } + configuration& n_sleep(std::size_t n) { + m_n_sleep = n; + m_sleep = true; + return *this; + } + /// @} + + /// Getters + /// @{ + std::size_t n_samples() const { return m_samples; } + constexpr bool do_warmup() const { return m_warmup; } + constexpr std::size_t n_warmup() const { return m_n_warmup; } + constexpr bool do_sleep() const { return m_sleep; } + constexpr std::size_t n_sleep() const { return m_n_sleep; } + /// @} + + /// Print configuration + friend std::ostream& operator<<(std::ostream& os, const configuration& c); + }; + + /// The benchmark configuration + configuration m_cfg{}; + + /// Default construction + benchmark_base() = default; + + /// Construct from an externally provided configuration @param cfg + benchmark_base(configuration cfg) : m_cfg{cfg} {} + + /// @returns the benchmark configuration + configuration& config() { return m_cfg; } + + /// Default destructor + virtual ~benchmark_base() = default; + + /// @returns the benchmark name + virtual std::string name() const = 0; + + /// Benchmark case + virtual void operator()(::benchmark::State&) = 0; +}; + +std::ostream& operator<<(std::ostream& os, + const benchmark_base::configuration& cfg) { + os << " -> running:\t " << cfg.n_samples() << " samples" << std::endl; + if (cfg.do_warmup()) { + os << " -> warmup: \t " << cfg.n_warmup() << " samples" << std::endl; + } + if (cfg.do_sleep()) { + os << " -> cool down:\t " << cfg.n_sleep() << "s" << std::endl; + } + os << std::endl; + return os; +} + +} // namespace algebra \ No newline at end of file diff --git a/benchmarks/common/include/benchmark/common/benchmark_getter.hpp b/benchmarks/common/include/benchmark/common/benchmark_getter.hpp new file mode 100644 index 00000000..631d9c05 --- /dev/null +++ b/benchmarks/common/include/benchmark/common/benchmark_getter.hpp @@ -0,0 +1,53 @@ +/** Algebra plugins library, part of the ACTS project + * + * (c) 2023 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// Project include(s) +#include "benchmark_vector.hpp" + +namespace algebra::bench_op { + +// Functions to be benchmarked + +struct phi { + inline static const std::string name{"phi"}; + template + auto operator()(const vector_t &a) const { + return algebra::getter::phi(a); + } +}; +struct theta { + inline static const std::string name{"theta"}; + template + auto operator()(const vector_t &a) const { + return algebra::getter::theta(a); + } +}; +struct perp { + inline static const std::string name{"perp"}; + template + auto operator()(const vector_t &a) const { + return algebra::getter::perp(a); + } +}; +struct norm { + inline static const std::string name{"norm"}; + template + auto operator()(const vector_t &a) const { + return algebra::getter::norm(a); + } +}; +struct eta { + inline static const std::string name{"eta"}; + template + auto operator()(const vector_t &a) const { + return algebra::getter::eta(a); + } +}; + +} // namespace algebra::bench_op \ No newline at end of file diff --git a/benchmarks/common/include/benchmark/common/benchmark_vector.hpp b/benchmarks/common/include/benchmark/common/benchmark_vector.hpp new file mode 100644 index 00000000..81e2d43b --- /dev/null +++ b/benchmarks/common/include/benchmark/common/benchmark_vector.hpp @@ -0,0 +1,181 @@ +/** Algebra plugins library, part of the ACTS project + * + * (c) 2023 CERN for the benefit of the ACTS project + * + * Mozilla Public License Version 2.0 + */ + +#pragma once + +// Project include(s) +#include "benchmark_base.hpp" + +// System include(s) +#include +#include +#include +#include +#include + +namespace algebra { + +template +void fill_random(std::vector &); + +/// Benchmark for vector operations +template +struct vector_bm : public benchmark_base { + + /// Prefix for the benchmark name + inline static const std::string name{"vector"}; + + std::vector a, b, results; + + /// No default construction: Cannot prepare data + vector_bm() = delete; + + /// Construct from an externally provided configuration @param cfg + vector_bm(benchmark_base::configuration cfg) : benchmark_base{cfg} { + + const std::size_t n_data{this->m_cfg.n_samples() + this->m_cfg.n_warmup()}; + + a.reserve(n_data); + b.reserve(n_data); + + fill_random(a); + fill_random(b); + } + + /// Clear state + virtual ~vector_bm() { + a.clear(); + b.clear(); + } +}; + +/// Benchmark elementwise addition of vectors +template