From 75ed67f41a4d297351976e419c5fe5d3cc18fe38 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 6 Oct 2023 14:39:38 +0200 Subject: [PATCH 01/18] Add batch::matrix::Ell class and core Co-authored-by: Aditya Kashi --- core/matrix/batch_ell.cpp | 235 ++++++++++++++ include/ginkgo/core/matrix/batch_ell.hpp | 390 +++++++++++++++++++++++ 2 files changed, 625 insertions(+) create mode 100644 core/matrix/batch_ell.cpp create mode 100644 include/ginkgo/core/matrix/batch_ell.hpp diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp new file mode 100644 index 00000000000..63d4f0dda8a --- /dev/null +++ b/core/matrix/batch_ell.cpp @@ -0,0 +1,235 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/batch_ell_kernels.hpp" + + +namespace gko { +namespace batch { +namespace matrix { +namespace ell { +namespace { + + +GKO_REGISTER_OPERATION(simple_apply, batch_ell::simple_apply); +GKO_REGISTER_OPERATION(advanced_apply, batch_ell::advanced_apply); + + +} // namespace +} // namespace ell + + +namespace detail { + + +template +batch_dim<2> compute_batch_size( + const std::vector*>& matrices) +{ + auto common_size = matrices[0]->get_size(); + for (size_type i = 1; i < matrices.size(); ++i) { + GKO_ASSERT_EQUAL_DIMENSIONS(common_size, matrices[i]->get_size()); + } + return batch_dim<2>{matrices.size(), common_size}; +} + + +} // namespace detail + + +template +std::unique_ptr> +Ell::create_view_for_item(size_type item_id) +{ + auto exec = this->get_executor(); + auto num_rows = this->get_common_size()[0]; + auto stride = this->get_common_size()[1]; + auto mat = unbatch_type::create( + exec, this->get_common_size(), + make_array_view(exec, this->get_num_elements_per_item(), + this->get_values_for_item(item_id)), + make_array_view(exec, this->get_num_elements_per_item(), + this->get_col_idxs_for_item(item_id)), + this->get_num_stored_elements_per_row(), stride); + return mat; +} + + +template +std::unique_ptr> +Ell::create_const_view_for_item(size_type item_id) const +{ + auto exec = this->get_executor(); + auto num_rows = this->get_common_size()[0]; + auto stride = this->get_common_size()[1]; + auto mat = unbatch_type::create_const( + exec, this->get_common_size(), + make_const_array_view(exec, this->get_num_elements_per_item(), + this->get_const_values_for_item(item_id)), + make_const_array_view(exec, this->get_num_elements_per_item(), + this->get_const_col_idxs_for_item(item_id)), + this->get_num_stored_elements_per_row(), stride); + return mat; +} + + +template +std::unique_ptr> +Ell::create_with_config_of( + ptr_param> other) +{ + // De-referencing `other` before calling the functions (instead of + // using operator `->`) is currently required to be compatible with + // CUDA 10.1. + // Otherwise, it results in a compile error. + return (*other).create_with_same_config(); +} + + +template +std::unique_ptr> +Ell::create_with_same_config() const +{ + return Ell::create( + this->get_executor(), this->get_size(), + this->get_num_stored_elements_per_row()); +} + + +template +std::unique_ptr> +Ell::create_const( + std::shared_ptr exec, const batch_dim<2>& sizes, + int num_elems_per_row, gko::detail::const_array_view&& values, + gko::detail::const_array_view&& col_idxs) +{ + // cast const-ness away, but return a const object afterwards, + // so we can ensure that no modifications take place. + return std::unique_ptr( + new Ell{exec, sizes, num_elems_per_row, + gko::detail::array_const_cast(std::move(values)), + gko::detail::array_const_cast(std::move(col_idxs))}); +} + + +inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes) +{ + return batch_dim<2>(sizes.get_num_batch_items(), + dim<2>(1, sizes.get_common_size()[1])); +} + + +template +Ell::Ell(std::shared_ptr exec, + const batch_dim<2>& size, int num_elems_per_row) + : EnableBatchLinOp>(exec, size), + num_elems_per_row_(num_elems_per_row), + values_(exec, compute_num_elems(size, num_elems_per_row)), + col_idxs_(exec, compute_num_elems(size, num_elems_per_row)) +{} + + +template +void Ell::apply_impl(const MultiVector* b, + MultiVector* x) const +{ + GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items()); + GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); + + GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); + GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size()); + GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size()); + this->get_executor()->run(ell::make_simple_apply(this, b, x)); +} + + +template +void Ell::apply_impl(const MultiVector* alpha, + const MultiVector* b, + const MultiVector* beta, + MultiVector* x) const +{ + GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items()); + GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); + + GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); + GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size()); + GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size()); + GKO_ASSERT_EQUAL_DIMENSIONS(alpha->get_common_size(), gko::dim<2>(1, 1)); + GKO_ASSERT_EQUAL_DIMENSIONS(beta->get_common_size(), gko::dim<2>(1, 1)); + this->get_executor()->run( + ell::make_advanced_apply(alpha, this, b, beta, x)); +} + + +template +void Ell::convert_to( + Ell>* result) const +{ + result->values_ = this->values_; + result->col_idxs_ = this->col_idxs_; + result->num_elems_per_row_ = this->num_elems_per_row_; + result->set_size(this->get_size()); +} + + +template +void Ell::move_to( + Ell>* result) +{ + this->convert_to(result); +} + + +#define GKO_DECLARE_BATCH_ELL_MATRIX(_type) class Ell<_vtype, _itype> +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_BATCH_ELL_MATRIX); + + +} // namespace matrix +} // namespace batch +} // namespace gko diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp new file mode 100644 index 00000000000..374f1479664 --- /dev/null +++ b/include/ginkgo/core/matrix/batch_ell.hpp @@ -0,0 +1,390 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_MATRIX_BATCH_ELL_HPP_ +#define GKO_PUBLIC_CORE_MATRIX_BATCH_ELL_HPP_ + + +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace gko { +namespace batch { +namespace matrix { + + +/** + * Ell is a batch matrix format which explicitly stores all values of the + * matrix in each of the batches. + * + * The values in each of the batches are stored in row-major format (values + * belonging to the same row appear consecutive in the memory and the values of + * each batch item are also stored consecutively in memory). + * + * @note Though the storage layout is similar to the multi-vector object, the + * class semantics and the operations it aims to provide is different. Hence it + * is recommended to create multi-vector objects if the user means to view the + * data as a set of vectors. + * + * @tparam ValueType precision of matrix elements + * + * @ingroup batch_ell + * @ingroup mat_formats + * @ingroup BatchLinOp + */ +template +class Ell final + : public EnableBatchLinOp>, + public EnableCreateMethod>, + public ConvertibleTo, IndexType>> { + friend class EnableCreateMethod; + friend class EnablePolymorphicObject; + friend class Ell, IndexType>; + friend class Ell, IndexType>; + +public: + using EnableBatchLinOp::convert_to; + using EnableBatchLinOp::move_to; + + using value_type = ValueType; + using index_type = int32; + using transposed_type = Ell; + using unbatch_type = gko::matrix::Ell; + using absolute_type = remove_complex; + using complex_type = to_complex; + + /** + * Creates a Ell matrix with the configuration of another Ell + * matrix. + * + * @param other The other matrix whose configuration needs to copied. + */ + static std::unique_ptr create_with_config_of( + ptr_param other); + + void convert_to( + Ell, IndexType>* result) const override; + + void move_to(Ell, IndexType>* result) override; + + /** + * Creates a mutable view (of matrix::Ell type) of one item of the + * batch::matrix::Ell object. Does not perform any deep + * copies, but only returns a view of the data. + * + * @param item_id The index of the batch item + * + * @return a batch::matrix::Ell object with the data from the batch item + * at the given index. + */ + std::unique_ptr create_view_for_item(size_type item_id); + + /** + * @copydoc create_view_for_item(size_type) + */ + std::unique_ptr create_const_view_for_item( + size_type item_id) const; + + /** + * Returns a pointer to the array of values of the matrix + * + * @return the pointer to the array of values + */ + value_type* get_values() noexcept { return values_.get_data(); } + + /** + * @copydoc get_values() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const value_type* get_const_values() const noexcept + { + return values_.get_const_data(); + } + + /** + * Returns a pointer to the array of column indices of the matrix + * + * @return the pointer to the array of column indices + */ + index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); } + + /** + * @copydoc get_col_idxs() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const index_type* get_const_col_idxs() const noexcept + { + return col_idxs_.get_const_data(); + } + + /** + * Returns the number of elements per row explicitly stored. + * + * @return the number of elements stored in each row of the ELL matrix. Same + * for each batch item + */ + int get_num_stored_elements_per_row() const noexcept + { + return num_elems_per_row_; + } + + /** + * Returns the number of elements explicitly stored in the batch matrix, + * cumulative across all the batch items. + * + * @return the number of elements explicitly stored in the vector, + * cumulative across all the batch items + */ + size_type get_num_stored_elements() const noexcept + { + return values_.get_num_elems(); + } + + /** + * Returns the number of stored elements in each batch item. + * + * @return the number of stored elements per batch item. + */ + size_type get_num_elements_per_item() const noexcept + { + return this->get_num_stored_elements() / this->get_num_batch_items(); + } + + /** + * Returns a pointer to the array of col_idxs of the matrix for a + * specific batch item. + * + * @param batch_id the id of the batch item. + * + * @return the pointer to the array of col_idxs + */ + value_type* get_col_idxs_for_item(size_type batch_id) noexcept + { + GKO_ASSERT(batch_id < this->get_num_batch_items()); + return col_idxs_.get_data() + + batch_id * this->get_num_elements_per_item(); + } + + /** + * @copydoc get_col_idxs_for_item(size_type) + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const value_type* get_const_col_idxs_for_item( + size_type batch_id) const noexcept + { + GKO_ASSERT(batch_id < this->get_num_batch_items()); + return col_idxs_.get_const_data() + + batch_id * this->get_num_elements_per_item(); + } + + /** + * Returns a pointer to the array of values of the matrix for a + * specific batch item. + * + * @param batch_id the id of the batch item. + * + * @return the pointer to the array of values + */ + value_type* get_values_for_item(size_type batch_id) noexcept + { + GKO_ASSERT(batch_id < this->get_num_batch_items()); + return values_.get_data() + + batch_id * this->get_num_elements_per_item(); + } + + /** + * @copydoc get_values_for_item(size_type) + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const value_type* get_const_values_for_item( + size_type batch_id) const noexcept + { + GKO_ASSERT(batch_id < this->get_num_batch_items()); + return values_.get_const_data() + + batch_id * this->get_num_elements_per_item(); + } + + /** + * Creates a constant (immutable) batch ell matrix from a constant + * array. + * + * @param exec the executor to create the matrix on + * @param size the dimensions of the matrix + * @param num_elems_per_row the number of elements to be stored in each row + * @param values the value array of the matrix + * @param col_idxs the col_idxs array of the matrix + * + * @return A smart pointer to the constant matrix wrapping the input + * array (if it resides on the same executor as the matrix) or a copy of the + * array on the correct executor. + */ + static std::unique_ptr> create_const( + std::shared_ptr exec, const batch_dim<2>& sizes, + const int num_elems_per_row, + gko::detail::const_array_view&& values, + gko::detail::const_array_view&& col_idxs); + + /** + * Apply the matrix to a multi-vector. Represents the matrix vector + * multiplication, x = A * b, where x and b are both multi-vectors. + * + * @param b the multi-vector to be applied to + * @param x the output multi-vector + */ + void apply(const MultiVector* b, + MultiVector* x) const + { + this->apply_impl(b, x); + } + + /** + * Apply the matrix to a multi-vector with a linear combination of the given + * input vector. Represents the matrix vector multiplication, x = alpha* A * + * b + beta * x, where x and b are both multi-vectors. + * + * @param alpha the scalar to scale the matrix-vector product with + * @param b the multi-vector to be applied to + * @param beta the scalar to scale the x vector with + * @param x the output multi-vector + */ + void apply(const MultiVector* alpha, + const MultiVector* b, + const MultiVector* beta, + MultiVector* x) const + { + this->apply_impl(alpha, b, beta, x); + } + +private: + size_type compute_num_elems(const batch_dim<2>& size, int num_elems_per_row) + { + return size->get_common_size()[0] * num_elems_per_row; + } + + +protected: + /** + * Creates an uninitialized Ell matrix of the specified size. + * + * @param exec Executor associated to the matrix + * @param size size of the matrix + * @param num_elems_per_row the number of elements to be stored in each row + */ + Ell(std::shared_ptr exec, + const batch_dim<2>& size = batch_dim<2>{}, + const int num_elems_per_row = 0); + + /** + * Creates a Ell matrix from an already allocated (and initialized) + * array. + * + * @tparam ValuesArray type of array of values + * + * @param exec Executor associated to the matrix + * @param size size of the matrix + * @param num_elems_per_row the number of elements to be stored in each row + * @param values array of matrix values + * @param col_idxs the col_idxs array of the matrix + * + * @note If `values` is not an rvalue, not an array of ValueType, or is on + * the wrong executor, an internal copy will be created, and the + * original array data will not be used in the matrix. + */ + template + Ell(std::shared_ptr exec, const batch_dim<2>& size, + const int num_elems_per_row, ValuesArray&& values, + IndicesArray&& col_idxs) + : EnableBatchLinOp(exec, size), + num_elems_per_row_{num_elems_per_row}, + values_{exec, std::forward(values)}, + col_idxs_{exec, std::forward(col_idxs)} + { + // Ensure that the value and col_idxs arrays have the correct size + auto num_elems = this->get_size()[0] * num_elems_per_row() * + this->get_num_batch_items(); + GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems() + 1); + GKO_ENSURE_IN_BOUNDS(num_elems, col_idxs_.get_num_elems() + 1); + } + + /** + * Creates a Ell matrix with the same configuration as the callers + * matrix. + * + * @returns a Ell matrix with the same configuration as the caller. + */ + std::unique_ptr create_with_same_config() const; + + void apply_impl(const MultiVector* b, + MultiVector* x) const; + + void apply_impl(const MultiVector* alpha, + const MultiVector* b, + const MultiVector* beta, + MultiVector* x) const; + +private: + int num_elems_per_row_; + array values_; + array col_idxs_; +}; + + +} // namespace matrix +} // namespace batch +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_MATRIX_BATCH_ELL_HPP_ From d3b8ad1e7933851be777937705d1e3dcff618f54 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 6 Oct 2023 15:13:34 +0200 Subject: [PATCH 02/18] Add ref, omp kernels and scaffold Co-authored-by: Aditya Kashi --- core/CMakeLists.txt | 1 + core/device_hooks/common_kernels.inc.cpp | 10 + core/matrix/batch_ell_kernels.hpp | 84 ++++ core/test/matrix/CMakeLists.txt | 1 + core/test/matrix/batch_ell.cpp | 478 +++++++++++++++++++++ cuda/CMakeLists.txt | 1 + cuda/matrix/batch_ell_kernels.cu | 86 ++++ dpcpp/CMakeLists.txt | 1 + dpcpp/matrix/batch_ell_kernels.dp.cpp | 102 +++++ hip/CMakeLists.txt | 1 + hip/matrix/batch_ell_kernels.hip.cpp | 86 ++++ omp/CMakeLists.txt | 1 + omp/matrix/batch_ell_kernels.cpp | 117 +++++ reference/CMakeLists.txt | 1 + reference/matrix/batch_ell_kernels.cpp | 116 +++++ reference/matrix/batch_ell_kernels.hpp.inc | 78 ++++ 16 files changed, 1164 insertions(+) create mode 100644 core/matrix/batch_ell_kernels.hpp create mode 100644 core/test/matrix/batch_ell.cpp create mode 100644 cuda/matrix/batch_ell_kernels.cu create mode 100644 dpcpp/matrix/batch_ell_kernels.dp.cpp create mode 100644 hip/matrix/batch_ell_kernels.hip.cpp create mode 100644 omp/matrix/batch_ell_kernels.cpp create mode 100644 reference/matrix/batch_ell_kernels.cpp create mode 100644 reference/matrix/batch_ell_kernels.hpp.inc diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 46ea67abc65..ae8035bcbf9 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -40,6 +40,7 @@ target_sources(ginkgo log/record.cpp log/stream.cpp matrix/batch_dense.cpp + matrix/batch_ell.cpp matrix/coo.cpp matrix/csr.cpp matrix/dense.cpp diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 87cab3dcf0b..b685063da10 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -310,6 +310,16 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL); } // namespace batch_dense +namespace batch_ell { + + +GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL); +GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL); + + +} // namespace batch_ell + + namespace dense { diff --git a/core/matrix/batch_ell_kernels.hpp b/core/matrix/batch_ell_kernels.hpp new file mode 100644 index 00000000000..1b1ef345ae0 --- /dev/null +++ b/core/matrix/batch_ell_kernels.hpp @@ -0,0 +1,84 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_CORE_MATRIX_BATCH_ELL_KERNELS_HPP_ +#define GKO_CORE_MATRIX_BATCH_ELL_KERNELS_HPP_ + + +#include + + +#include +#include +#include + + +#include "core/base/kernel_declaration.hpp" + + +namespace gko { +namespace kernels { + + +#define GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL(_vtype, _itype) \ + void simple_apply(std::shared_ptr exec, \ + const batch::matrix::Ell<_vtype, _itype>* a, \ + const batch::MultiVector<_vtype, _itype>* b, \ + batch::MultiVector<_vtype, _itype>* c) + +#define GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL(_vtype, _itype) \ + void advanced_apply(std::shared_ptr exec, \ + const batch::MultiVector<_vtype, _itype>* alpha, \ + const batch::matrix::Ell<_vtype, _itype>* a, \ + const batch::MultiVector<_vtype, _itype>* b, \ + const batch::MultiVector<_vtype, _itype>* beta, \ + batch::MultiVector<_vtype, _itype>* c) + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL(ValueType, IndexType) + + +GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(batch_ell, + GKO_DECLARE_ALL_AS_TEMPLATES); + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_MATRIX_BATCH_ELL_KERNELS_HPP_ diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt index cca4b8da1c0..ec7ef93e517 100644 --- a/core/test/matrix/CMakeLists.txt +++ b/core/test/matrix/CMakeLists.txt @@ -1,4 +1,5 @@ ginkgo_create_test(batch_dense) +ginkgo_create_test(batch_ell) ginkgo_create_test(coo) ginkgo_create_test(coo_builder) ginkgo_create_test(csr) diff --git a/core/test/matrix/batch_ell.cpp b/core/test/matrix/batch_ell.cpp new file mode 100644 index 00000000000..931efb47d2e --- /dev/null +++ b/core/test/matrix/batch_ell.cpp @@ -0,0 +1,478 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include +#include + + +#include "core/base/batch_utilities.hpp" +#include "core/test/utils.hpp" +#include "core/test/utils/batch_helpers.hpp" + + +template +class Ell : public ::testing::Test { +protected: + using value_type = T; + using EllMtx = gko::matrix::Ell; + using size_type = gko::size_type; + Ell() + : exec(gko::ReferenceExecutor::create()), + mtx(gko::batch::initialize>( + {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, + {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}}, + exec)), + mvec(gko::batch::initialize>( + {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, + {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}}, + exec)), + ell_mtx(gko::initialize>( + {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec)) + {} + + + static void assert_equal_to_original_mtx( + gko::batch::matrix::Ell* m) + { + ASSERT_EQ(m->get_num_batch_items(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 3)); + EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0}); + EXPECT_EQ(m->at(0, 0, 1), value_type{2.0}); + EXPECT_EQ(m->at(0, 0, 2), value_type{3.0}); + EXPECT_EQ(m->at(0, 1, 0), value_type{-1.5}); + EXPECT_EQ(m->at(0, 1, 1), value_type{2.5}); + ASSERT_EQ(m->at(0, 1, 2), value_type{3.5}); + EXPECT_EQ(m->at(1, 0, 0), value_type{1.0}); + EXPECT_EQ(m->at(1, 0, 1), value_type{2.5}); + EXPECT_EQ(m->at(1, 0, 2), value_type{3.0}); + EXPECT_EQ(m->at(1, 1, 0), value_type{1.0}); + EXPECT_EQ(m->at(1, 1, 1), value_type{2.0}); + ASSERT_EQ(m->at(1, 1, 2), value_type{3.0}); + } + + static void assert_empty(gko::batch::matrix::Ell* m) + { + ASSERT_EQ(m->get_num_batch_items(), 0); + ASSERT_EQ(m->get_num_stored_elements(), 0); + } + + std::shared_ptr exec; + std::unique_ptr> mtx; + std::unique_ptr> mvec; + std::unique_ptr> ell_mtx; +}; + +TYPED_TEST_SUITE(Ell, gko::test::ValueTypes); + + +TYPED_TEST(Ell, KnowsItsSizeAndValues) +{ + this->assert_equal_to_original_mtx(this->mtx.get()); +} + + +TYPED_TEST(Ell, CanBeEmpty) +{ + auto empty = gko::batch::matrix::Ell::create(this->exec); + this->assert_empty(empty.get()); +} + + +TYPED_TEST(Ell, ReturnsNullValuesArrayWhenEmpty) +{ + auto empty = gko::batch::matrix::Ell::create(this->exec); + ASSERT_EQ(empty->get_const_values(), nullptr); +} + + +TYPED_TEST(Ell, CanGetValuesForEntry) +{ + using value_type = typename TestFixture::value_type; + + ASSERT_EQ(this->mtx->get_values_for_item(1)[0], value_type{1.0}); +} + + +TYPED_TEST(Ell, CanCreateEllItemView) +{ + GKO_ASSERT_MTX_NEAR(this->mtx->create_view_for_item(1), this->ell_mtx, 0.0); +} + + +TYPED_TEST(Ell, CanCreateMultiVectorView) +{ + GKO_ASSERT_BATCH_MTX_NEAR(this->mtx->create_multi_vector_view(), this->mvec, + 0.0); +} + + +TYPED_TEST(Ell, CanBeCopied) +{ + auto mtx_copy = gko::batch::matrix::Ell::create(this->exec); + + mtx_copy->copy_from(this->mtx.get()); + + this->assert_equal_to_original_mtx(this->mtx.get()); + this->mtx->at(0, 0, 0) = 7; + this->mtx->at(0, 1) = 7; + this->assert_equal_to_original_mtx(mtx_copy.get()); +} + + +TYPED_TEST(Ell, CanBeMoved) +{ + auto mtx_copy = gko::batch::matrix::Ell::create(this->exec); + + this->mtx->move_to(mtx_copy); + + this->assert_equal_to_original_mtx(mtx_copy.get()); +} + + +TYPED_TEST(Ell, CanBeCloned) +{ + auto mtx_clone = this->mtx->clone(); + + this->assert_equal_to_original_mtx( + dynamic_castmtx.get())>(mtx_clone.get())); +} + + +TYPED_TEST(Ell, CanBeCleared) +{ + this->mtx->clear(); + + this->assert_empty(this->mtx.get()); +} + + +TYPED_TEST(Ell, CanBeConstructedWithSize) +{ + using size_type = gko::size_type; + + auto m = gko::batch::matrix::Ell::create( + this->exec, gko::batch_dim<2>(2, gko::dim<2>{5, 3})); + + ASSERT_EQ(m->get_num_batch_items(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3)); + ASSERT_EQ(m->get_num_stored_elements(), 30); +} + + +TYPED_TEST(Ell, CanBeConstructedFromExistingData) +{ + using value_type = typename TestFixture::value_type; + using size_type = gko::size_type; + // clang-format off + value_type data[] = { + 1.0, 2.0, + -1.0, 3.0, + 4.0, -1.0, + 3.0, 5.0, + 1.0, 5.0, + 6.0, -3.0}; + // clang-format on + + auto m = gko::batch::matrix::Ell::create( + this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)), + gko::array::view(this->exec, 8, data)); + + ASSERT_EQ(m->get_const_values(), data); + ASSERT_EQ(m->at(0, 0, 0), value_type{1.0}); + ASSERT_EQ(m->at(0, 0, 1), value_type{2.0}); + ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0}); + ASSERT_EQ(m->at(0, 1, 1), value_type{3.0}); + ASSERT_EQ(m->at(1, 0, 0), value_type{4.0}); + ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0}); + ASSERT_EQ(m->at(1, 1, 0), value_type{3.0}); + ASSERT_EQ(m->at(1, 1, 1), value_type{5.0}); +} + + +TYPED_TEST(Ell, CanBeConstructedFromExistingConstData) +{ + using value_type = typename TestFixture::value_type; + using size_type = gko::size_type; + // clang-format off + const value_type data[] = { + 1.0, 2.0, + -1.0, 3.0, + 4.0, -1.0, + 3.0, 5.0, + 1.0, 5.0, + 6.0, -3.0}; + // clang-format on + + auto m = gko::batch::matrix::Ell::create_const( + this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)), + gko::array::const_view(this->exec, 8, data)); + + ASSERT_EQ(m->get_const_values(), data); + ASSERT_EQ(m->at(0, 0, 0), value_type{1.0}); + ASSERT_EQ(m->at(0, 0, 1), value_type{2.0}); + ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0}); + ASSERT_EQ(m->at(0, 1, 1), value_type{3.0}); + ASSERT_EQ(m->at(1, 0, 0), value_type{4.0}); + ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0}); + ASSERT_EQ(m->at(1, 1, 0), value_type{3.0}); + ASSERT_EQ(m->at(1, 1, 1), value_type{5.0}); +} + + +TYPED_TEST(Ell, CanBeConstructedFromEllMatrices) +{ + using value_type = typename TestFixture::value_type; + using EllMtx = typename TestFixture::EllMtx; + using size_type = gko::size_type; + + auto mat1 = gko::initialize({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, + this->exec); + auto mat2 = + gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); + + auto m = gko::batch::create_from_item>( + this->exec, std::vector{mat1.get(), mat2.get()}); + + this->assert_equal_to_original_mtx(m.get()); +} + + +TYPED_TEST(Ell, CanBeConstructedFromEllMatricesByDuplication) +{ + using value_type = typename TestFixture::value_type; + using EllMtx = typename TestFixture::EllMtx; + using size_type = gko::size_type; + + auto mat1 = gko::initialize(4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, + this->exec); + auto mat2 = + gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); + + auto bat_m = + gko::batch::create_from_item>( + this->exec, + std::vector{mat1.get(), mat1.get(), mat1.get()}); + auto m = gko::batch::create_from_item>( + this->exec, 3, mat1.get()); + + GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14); +} + + +TYPED_TEST(Ell, CanBeConstructedByDuplicatingEllMatrices) +{ + using value_type = typename TestFixture::value_type; + using EllMtx = typename TestFixture::EllMtx; + using size_type = gko::size_type; + + auto mat1 = gko::initialize({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, + this->exec); + auto mat2 = + gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); + + auto m = gko::batch::create_from_item>( + this->exec, std::vector{mat1.get(), mat2.get()}); + auto m_ref = + gko::batch::create_from_item>( + this->exec, + std::vector{mat1.get(), mat2.get(), mat1.get(), mat2.get(), + mat1.get(), mat2.get()}); + + auto m2 = gko::batch::duplicate>( + this->exec, 3, m.get()); + + GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14); +} + + +TYPED_TEST(Ell, CanBeUnbatchedIntoEllMatrices) +{ + using value_type = typename TestFixture::value_type; + using EllMtx = typename TestFixture::EllMtx; + using size_type = gko::size_type; + auto mat1 = gko::initialize(4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, + this->exec); + auto mat2 = + gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); + + auto ell_mats = gko::batch::unbatch>( + this->mtx.get()); + + GKO_ASSERT_MTX_NEAR(ell_mats[0].get(), mat1.get(), 0.); + GKO_ASSERT_MTX_NEAR(ell_mats[1].get(), mat2.get(), 0.); +} + + +TYPED_TEST(Ell, CanBeListConstructed) +{ + using value_type = typename TestFixture::value_type; + auto m = gko::batch::initialize>( + {{1.0, 2.0}, {1.0, 3.0}}, this->exec); + + ASSERT_EQ(m->get_num_batch_items(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1)); + EXPECT_EQ(m->at(0, 0), value_type{1}); + EXPECT_EQ(m->at(0, 1), value_type{2}); + EXPECT_EQ(m->at(1, 0), value_type{1}); + EXPECT_EQ(m->at(1, 1), value_type{3}); +} + + +TYPED_TEST(Ell, CanBeListConstructedByCopies) +{ + using value_type = typename TestFixture::value_type; + + auto m = gko::batch::initialize>( + 2, I({1.0, 2.0}), this->exec); + + ASSERT_EQ(m->get_num_batch_items(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1)); + EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); + EXPECT_EQ(m->at(0, 0, 1), value_type{2.0}); + EXPECT_EQ(m->at(1, 0, 0), value_type{1.0}); + EXPECT_EQ(m->at(1, 0, 1), value_type{2.0}); +} + + +TYPED_TEST(Ell, CanBeDoubleListConstructed) +{ + using value_type = typename TestFixture::value_type; + using T = value_type; + + auto m = gko::batch::initialize>( + {{I{1.0, 1.0, 0.0}, I{2.0, 4.0, 3.0}, I{3.0, 6.0, 1.0}}, + {I{1.0, 2.0, -1.0}, I{3.0, 4.0, -2.0}, I{5.0, 6.0, -3.0}}}, + this->exec); + + ASSERT_EQ(m->get_common_size(), gko::dim<2>(3, 3)); + EXPECT_EQ(m->at(0, 0), value_type{1.0}); + EXPECT_EQ(m->at(0, 1), value_type{1.0}); + EXPECT_EQ(m->at(0, 2), value_type{0.0}); + ASSERT_EQ(m->at(0, 3), value_type{2.0}); + EXPECT_EQ(m->at(0, 4), value_type{4.0}); + EXPECT_EQ(m->at(1, 0), value_type{1.0}); + EXPECT_EQ(m->at(1, 1), value_type{2.0}); + EXPECT_EQ(m->at(1, 2), value_type{-1.0}); + ASSERT_EQ(m->at(1, 3), value_type{3.0}); + EXPECT_EQ(m->at(1, 4), value_type{4.0}); +} + + +TYPED_TEST(Ell, CanBeReadFromMatrixData) +{ + using value_type = typename TestFixture::value_type; + using index_type = int; + + auto vec_data = std::vector>{}; + vec_data.emplace_back(gko::matrix_data( + {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 0, 0.0}, {1, 1, 5.0}})); + vec_data.emplace_back(gko::matrix_data( + {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 0, 0.0}, {1, 1, 9.0}})); + + auto m = gko::batch::read>(this->exec, + vec_data); + + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2)); + EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); + EXPECT_EQ(m->at(0, 0, 1), value_type{3.0}); + EXPECT_EQ(m->at(0, 1, 0), value_type{0.0}); + EXPECT_EQ(m->at(0, 1, 1), value_type{5.0}); + EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0}); + EXPECT_EQ(m->at(1, 0, 1), value_type{0.5}); + EXPECT_EQ(m->at(1, 1, 0), value_type{0.0}); + EXPECT_EQ(m->at(1, 1, 1), value_type{9.0}); +} + + +TYPED_TEST(Ell, CanBeReadFromSparseMatrixData) +{ + using value_type = typename TestFixture::value_type; + using index_type = int; + auto vec_data = std::vector>{}; + vec_data.emplace_back(gko::matrix_data( + {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 1, 5.0}})); + vec_data.emplace_back(gko::matrix_data( + {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 1, 9.0}})); + + auto m = gko::batch::read>(this->exec, + vec_data); + + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2)); + EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); + EXPECT_EQ(m->at(0, 0, 1), value_type{3.0}); + EXPECT_EQ(m->at(0, 1, 0), value_type{0.0}); + EXPECT_EQ(m->at(0, 1, 1), value_type{5.0}); + EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0}); + EXPECT_EQ(m->at(1, 0, 1), value_type{0.5}); + EXPECT_EQ(m->at(1, 1, 0), value_type{0.0}); + EXPECT_EQ(m->at(1, 1, 1), value_type{9.0}); +} + + +TYPED_TEST(Ell, GeneratesCorrectMatrixData) +{ + using value_type = typename TestFixture::value_type; + using index_type = int; + using tpl = typename gko::matrix_data::nonzero_type; + + auto data = + gko::batch::write>(this->mtx.get()); + + ASSERT_EQ(data[0].size, gko::dim<2>(2, 3)); + ASSERT_EQ(data[0].nonzeros.size(), 6); + EXPECT_EQ(data[0].nonzeros[0], tpl(0, 0, value_type{-1.0})); + EXPECT_EQ(data[0].nonzeros[1], tpl(0, 1, value_type{2.0})); + EXPECT_EQ(data[0].nonzeros[2], tpl(0, 2, value_type{3.0})); + EXPECT_EQ(data[0].nonzeros[3], tpl(1, 0, value_type{-1.5})); + EXPECT_EQ(data[0].nonzeros[4], tpl(1, 1, value_type{2.5})); + EXPECT_EQ(data[0].nonzeros[5], tpl(1, 2, value_type{3.5})); + ASSERT_EQ(data[1].size, gko::dim<2>(2, 3)); + ASSERT_EQ(data[1].nonzeros.size(), 6); + EXPECT_EQ(data[1].nonzeros[0], tpl(0, 0, value_type{1.0})); + EXPECT_EQ(data[1].nonzeros[1], tpl(0, 1, value_type{2.5})); + EXPECT_EQ(data[1].nonzeros[2], tpl(0, 2, value_type{3.0})); + EXPECT_EQ(data[1].nonzeros[3], tpl(1, 0, value_type{1.0})); + EXPECT_EQ(data[1].nonzeros[4], tpl(1, 1, value_type{2.0})); + EXPECT_EQ(data[1].nonzeros[5], tpl(1, 2, value_type{3.0})); +} diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index dfa1b2177ee..f5b7932ed39 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -39,6 +39,7 @@ target_sources(ginkgo_cuda factorization/par_ilut_spgeam_kernel.cu factorization/par_ilut_sweep_kernel.cu matrix/batch_dense_kernels.cu + matrix/batch_ell_kernels.cu matrix/coo_kernels.cu ${CSR_INSTANTIATE} matrix/dense_kernels.cu diff --git a/cuda/matrix/batch_ell_kernels.cu b/cuda/matrix/batch_ell_kernels.cu new file mode 100644 index 00000000000..c41b436daed --- /dev/null +++ b/cuda/matrix/batch_ell_kernels.cu @@ -0,0 +1,86 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/batch_dense_kernels.hpp" + + +#include +#include + + +#include + + +#include "core/base/batch_struct.hpp" +#include "core/matrix/batch_struct.hpp" +#include "cuda/base/batch_struct.hpp" +#include "cuda/base/config.hpp" +#include "cuda/base/cublas_bindings.hpp" +#include "cuda/base/pointer_mode_guard.hpp" +#include "cuda/base/thrust.cuh" +#include "cuda/components/cooperative_groups.cuh" +#include "cuda/components/reduction.cuh" +#include "cuda/components/thread_ids.cuh" +#include "cuda/components/uninitialized_array.hpp" +#include "cuda/matrix/batch_struct.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The Ell matrix format namespace. + * @ref Ell + * @ingroup batch_ell + */ +namespace batch_ell { + + +constexpr auto default_block_size = 256; +constexpr int sm_oversubscription = 4; + +// clang-format off + +// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES + +#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc" + + +#include "common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc" + +// clang-format on + + +} // namespace batch_ell +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt index 9990496c98f..9c2e799ede9 100644 --- a/dpcpp/CMakeLists.txt +++ b/dpcpp/CMakeLists.txt @@ -37,6 +37,7 @@ target_sources(ginkgo_dpcpp factorization/par_ilut_spgeam_kernel.dp.cpp factorization/par_ilut_sweep_kernel.dp.cpp matrix/batch_dense_kernels.dp.cpp + matrix/batch_ell_kernels.dp.cpp matrix/coo_kernels.dp.cpp matrix/csr_kernels.dp.cpp matrix/fbcsr_kernels.dp.cpp diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp new file mode 100644 index 00000000000..f886b7dd790 --- /dev/null +++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp @@ -0,0 +1,102 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/batch_dense_kernels.hpp" + + +#include + + +#include + + +#include +#include +#include +#include + + +#include "core/base/batch_struct.hpp" +#include "core/components/prefix_sum_kernels.hpp" +#include "core/matrix/batch_struct.hpp" +#include "dpcpp/base/batch_struct.hpp" +#include "dpcpp/base/config.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/dpct.hpp" +#include "dpcpp/base/helper.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/intrinsics.dp.hpp" +#include "dpcpp/components/reduction.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" +#include "dpcpp/matrix/batch_struct.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The Ell matrix format namespace. + * @ref Ell + * @ingroup batch_ell + */ +namespace batch_ell { + + +// #include "dpcpp/matrix/batch_dense_kernels.hpp.inc" + + +template +void simple_apply(std::shared_ptr exec, + const batch::matrix::Ell* mat, + const batch::MultiVector* b, + batch::MultiVector* x) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL); + + +template +void advanced_apply(std::shared_ptr exec, + const batch::MultiVector* alpha, + const batch::matrix::Ell* mat, + const batch::MultiVector* b, + const batch::MultiVector* beta, + batch::MultiVector* x) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL); + + +} // namespace batch_ell +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index 21b573b6cd0..ccc88769a4e 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -36,6 +36,7 @@ set(GINKGO_HIP_SOURCES factorization/par_ilut_spgeam_kernel.hip.cpp factorization/par_ilut_sweep_kernel.hip.cpp matrix/batch_dense_kernels.hip.cpp + matrix/batch_ell_kernels.hip.cpp matrix/coo_kernels.hip.cpp ${CSR_INSTANTIATE} matrix/dense_kernels.hip.cpp diff --git a/hip/matrix/batch_ell_kernels.hip.cpp b/hip/matrix/batch_ell_kernels.hip.cpp new file mode 100644 index 00000000000..c41b436daed --- /dev/null +++ b/hip/matrix/batch_ell_kernels.hip.cpp @@ -0,0 +1,86 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/batch_dense_kernels.hpp" + + +#include +#include + + +#include + + +#include "core/base/batch_struct.hpp" +#include "core/matrix/batch_struct.hpp" +#include "cuda/base/batch_struct.hpp" +#include "cuda/base/config.hpp" +#include "cuda/base/cublas_bindings.hpp" +#include "cuda/base/pointer_mode_guard.hpp" +#include "cuda/base/thrust.cuh" +#include "cuda/components/cooperative_groups.cuh" +#include "cuda/components/reduction.cuh" +#include "cuda/components/thread_ids.cuh" +#include "cuda/components/uninitialized_array.hpp" +#include "cuda/matrix/batch_struct.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The Ell matrix format namespace. + * @ref Ell + * @ingroup batch_ell + */ +namespace batch_ell { + + +constexpr auto default_block_size = 256; +constexpr int sm_oversubscription = 4; + +// clang-format off + +// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES + +#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc" + + +#include "common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc" + +// clang-format on + + +} // namespace batch_ell +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index d87399492f5..aa8e30cd590 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -24,6 +24,7 @@ target_sources(ginkgo_omp factorization/par_ilu_kernels.cpp factorization/par_ilut_kernels.cpp matrix/batch_dense_kernels.cpp + matrix/batch_ell_kernels.cpp matrix/coo_kernels.cpp matrix/csr_kernels.cpp matrix/dense_kernels.cpp diff --git a/omp/matrix/batch_ell_kernels.cpp b/omp/matrix/batch_ell_kernels.cpp new file mode 100644 index 00000000000..282920c05f3 --- /dev/null +++ b/omp/matrix/batch_ell_kernels.cpp @@ -0,0 +1,117 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/batch_ell_kernels.hpp" + + +#include + + +#include +#include + + +#include "core/base/batch_struct.hpp" +#include "core/matrix/batch_struct.hpp" +#include "reference/base/batch_struct.hpp" +#include "reference/matrix/batch_struct.hpp" + + +namespace gko { +namespace kernels { +namespace omp { +/** + * @brief The Ell matrix format namespace. + * @ref Ell + * @ingroup batch_ell + */ +namespace batch_ell { + + +#include "reference/matrix/batch_ell_kernels.hpp.inc" + + +template +void simple_apply(std::shared_ptr exec, + const batch::matrix::Ell* mat, + const batch::MultiVector* b, + batch::MultiVector* x) +{ + const auto b_ub = host::get_batch_struct(b); + const auto x_ub = host::get_batch_struct(x); + const auto mat_ub = host::get_batch_struct(mat); +#pragma omp parallel for + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + simple_apply_kernel(mat_item, b_item, x_item); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL); + + +template +void advanced_apply(std::shared_ptr exec, + const batch::MultiVector* alpha, + const batch::matrix::Ell* mat, + const batch::MultiVector* b, + const batch::MultiVector* beta, + batch::MultiVector* x) +{ + const auto b_ub = host::get_batch_struct(b); + const auto x_ub = host::get_batch_struct(x); + const auto mat_ub = host::get_batch_struct(mat); + const auto alpha_ub = host::get_batch_struct(alpha); + const auto beta_ub = host::get_batch_struct(beta); +#pragma omp parallel for + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + const auto alpha_item = batch::extract_batch_item(alpha_ub, batch); + const auto beta_item = batch::extract_batch_item(beta_ub, batch); + advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, + beta_item.values[0], x_item); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL); + + +} // namespace batch_ell +} // namespace omp +} // namespace kernels +} // namespace gko diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index 37498588ca7..21dfc0dfb5a 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -26,6 +26,7 @@ target_sources(ginkgo_reference factorization/par_ilu_kernels.cpp factorization/par_ilut_kernels.cpp matrix/batch_dense_kernels.cpp + matrix/batch_ell_kernels.cpp matrix/coo_kernels.cpp matrix/csr_kernels.cpp matrix/dense_kernels.cpp diff --git a/reference/matrix/batch_ell_kernels.cpp b/reference/matrix/batch_ell_kernels.cpp new file mode 100644 index 00000000000..1fab322dc5f --- /dev/null +++ b/reference/matrix/batch_ell_kernels.cpp @@ -0,0 +1,116 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/batch_ell_kernels.hpp" + + +#include + + +#include +#include +#include + + +#include "core/base/batch_struct.hpp" +#include "core/matrix/batch_struct.hpp" +#include "reference/base/batch_struct.hpp" +#include "reference/matrix/batch_struct.hpp" + + +namespace gko { +namespace kernels { +namespace reference { +/** + * @brief The Ell matrix format namespace. + * @ref Ell + * @ingroup batch_ell + */ +namespace batch_ell { + + +#include "reference/matrix/batch_ell_kernels.hpp.inc" + + +template +void simple_apply(std::shared_ptr exec, + const batch::matrix::Ell* mat, + const batch::MultiVector* b, + batch::MultiVector* x) +{ + const auto b_ub = host::get_batch_struct(b); + const auto x_ub = host::get_batch_struct(x); + const auto mat_ub = host::get_batch_struct(mat); + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + simple_apply_kernel(mat_item, b_item, x_item); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL); + + +template +void advanced_apply(std::shared_ptr exec, + const batch::MultiVector* alpha, + const batch::matrix::Ell* mat, + const batch::MultiVector* b, + const batch::MultiVector* beta, + batch::MultiVector* x) +{ + const auto b_ub = host::get_batch_struct(b); + const auto x_ub = host::get_batch_struct(x); + const auto mat_ub = host::get_batch_struct(mat); + const auto alpha_ub = host::get_batch_struct(alpha); + const auto beta_ub = host::get_batch_struct(beta); + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + const auto alpha_item = batch::extract_batch_item(alpha_ub, batch); + const auto beta_item = batch::extract_batch_item(beta_ub, batch); + advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, + beta_item.values[0], x_item); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL); + + +} // namespace batch_ell +} // namespace reference +} // namespace kernels +} // namespace gko diff --git a/reference/matrix/batch_ell_kernels.hpp.inc b/reference/matrix/batch_ell_kernels.hpp.inc new file mode 100644 index 00000000000..1874d1db9f3 --- /dev/null +++ b/reference/matrix/batch_ell_kernels.hpp.inc @@ -0,0 +1,78 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +template +inline void simple_apply_kernel( + const gko::batch::matrix::batch_ell::batch_item& a, + const gko::batch::multi_vector::batch_item& b, + const gko::batch::multi_vector::batch_item& c) +{ + for (int row = 0; row < a.num_rows; ++row) { + for (int j = 0; j < b.num_rhs; ++j) { + c.values[row * c.stride + j] = zero(); + } + for (auto k = 0; k < a.num_stored_elems_per_row; ++k) { + auto val = a.values[row + k * a.stride]; + auto col = a.col_idxs[row + k * a.stride]; + for (int j = 0; j < b.num_rhs; ++j) { + c.values[row * c.stride + j] += + val * b.values[col * b.stride + j]; + } + } + } +} + + +template +inline void advanced_apply_kernel( + const ValueType alpha, + const gko::batch::matrix::batch_ell::batch_item& a, + const gko::batch::multi_vector::batch_item& b, + const ValueType beta, + const gko::batch::multi_vector::batch_item& c) +{ + for (int row = 0; row < a.num_rows; ++row) { + for (int j = 0; j < c.num_rhs; ++j) { + c.values[row * c.stride + j] *= beta; + } + for (auto k = 0; k < a.num_stored_elems_per_row; ++k) { + auto val = a.values[row + k * a.stride]; + auto col = a.col_idxs[row + k * a.stride]; + for (int j = 0; j < b.num_rhs; ++j) { + c.values[row * c.stride + j] += + alpha * val * b.values[col * b.stride + j]; + } + } + } +} From 658a5af3bf652feb9d66d2ff97e9ef3829a9b7bd Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Fri, 6 Oct 2023 17:19:37 +0200 Subject: [PATCH 03/18] Use only int32 --- .../matrix/batch_ell_kernel_launcher.hpp.inc | 53 +++++++++++ core/device_hooks/common_kernels.inc.cpp | 10 +- core/matrix/batch_ell.cpp | 34 ++----- core/matrix/batch_ell_kernels.hpp | 20 ++-- core/matrix/batch_struct.hpp | 95 +++++++++++++++++++ cuda/matrix/batch_ell_kernels.cu | 4 +- dpcpp/matrix/batch_ell_kernels.dp.cpp | 4 +- hip/matrix/batch_ell_kernels.hip.cpp | 4 +- include/ginkgo/core/base/types.hpp | 16 ++++ include/ginkgo/core/matrix/batch_ell.hpp | 31 +++--- omp/matrix/batch_ell_kernels.cpp | 4 +- reference/matrix/batch_ell_kernels.cpp | 4 +- reference/matrix/batch_ell_kernels.hpp.inc | 10 +- reference/matrix/batch_struct.hpp | 35 +++++++ 14 files changed, 256 insertions(+), 68 deletions(-) create mode 100644 common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc diff --git a/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc new file mode 100644 index 00000000000..263e911c31a --- /dev/null +++ b/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc @@ -0,0 +1,53 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +template +void simple_apply(std::shared_ptr exec, + const batch::matrix::Ell* mat, + const batch::MultiVector* b, + batch::MultiVector* x) GKO_NOT_IMPLEMENTED; + + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( + GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL); + + +template +void advanced_apply(std::shared_ptr exec, + const batch::MultiVector* alpha, + const batch::matrix::Ell* mat, + const batch::MultiVector* b, + const batch::MultiVector* beta, + batch::MultiVector* x) GKO_NOT_IMPLEMENTED; + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( + GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL); diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index b685063da10..462675c15db 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -58,6 +58,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/factorization/par_ilu_kernels.hpp" #include "core/factorization/par_ilut_kernels.hpp" #include "core/matrix/batch_dense_kernels.hpp" +#include "core/matrix/batch_ell_kernels.hpp" #include "core/matrix/coo_kernels.hpp" #include "core/matrix/csr_kernels.hpp" #include "core/matrix/dense_kernels.hpp" @@ -137,6 +138,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. _macro(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); \ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(_macro) +#define GKO_STUB_VALUE_AND_INT32_TYPE(_macro) \ + template \ + _macro(ValueType, IndexType) GKO_NOT_COMPILED(GKO_HOOK_MODULE); \ + GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(_macro) + #define GKO_STUB_MIXED_VALUE_AND_INDEX_TYPE(_macro) \ template \ @@ -313,8 +319,8 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_DENSE_ADVANCED_APPLY_KERNEL); namespace batch_ell { -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL); -GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL); +GKO_STUB_VALUE_AND_INT32_TYPE(GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL); +GKO_STUB_VALUE_AND_INT32_TYPE(GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL); } // namespace batch_ell diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp index 63d4f0dda8a..3aea6e1aae4 100644 --- a/core/matrix/batch_ell.cpp +++ b/core/matrix/batch_ell.cpp @@ -64,24 +64,6 @@ GKO_REGISTER_OPERATION(advanced_apply, batch_ell::advanced_apply); } // namespace ell -namespace detail { - - -template -batch_dim<2> compute_batch_size( - const std::vector*>& matrices) -{ - auto common_size = matrices[0]->get_size(); - for (size_type i = 1; i < matrices.size(); ++i) { - GKO_ASSERT_EQUAL_DIMENSIONS(common_size, matrices[i]->get_size()); - } - return batch_dim<2>{matrices.size(), common_size}; -} - - -} // namespace detail - - template std::unique_ptr> Ell::create_view_for_item(size_type item_id) @@ -145,7 +127,8 @@ template std::unique_ptr> Ell::create_const( std::shared_ptr exec, const batch_dim<2>& sizes, - int num_elems_per_row, gko::detail::const_array_view&& values, + const IndexType num_elems_per_row, + gko::detail::const_array_view&& values, gko::detail::const_array_view&& col_idxs) { // cast const-ness away, but return a const object afterwards, @@ -166,7 +149,8 @@ inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes) template Ell::Ell(std::shared_ptr exec, - const batch_dim<2>& size, int num_elems_per_row) + const batch_dim<2>& size, + IndexType num_elems_per_row) : EnableBatchLinOp>(exec, size), num_elems_per_row_(num_elems_per_row), values_(exec, compute_num_elems(size, num_elems_per_row)), @@ -209,7 +193,7 @@ void Ell::apply_impl(const MultiVector* alpha, template void Ell::convert_to( - Ell>* result) const + Ell, IndexType>* result) const { result->values_ = this->values_; result->col_idxs_ = this->col_idxs_; @@ -218,16 +202,16 @@ void Ell::convert_to( } -template +template void Ell::move_to( - Ell>* result) + Ell, IndexType>* result) { this->convert_to(result); } -#define GKO_DECLARE_BATCH_ELL_MATRIX(_type) class Ell<_vtype, _itype> -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_BATCH_ELL_MATRIX); +#define GKO_DECLARE_BATCH_ELL_MATRIX(ValueType) class Ell +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_ELL_MATRIX); } // namespace matrix diff --git a/core/matrix/batch_ell_kernels.hpp b/core/matrix/batch_ell_kernels.hpp index 1b1ef345ae0..d3acc582f9b 100644 --- a/core/matrix/batch_ell_kernels.hpp +++ b/core/matrix/batch_ell_kernels.hpp @@ -52,16 +52,16 @@ namespace kernels { #define GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL(_vtype, _itype) \ void simple_apply(std::shared_ptr exec, \ const batch::matrix::Ell<_vtype, _itype>* a, \ - const batch::MultiVector<_vtype, _itype>* b, \ - batch::MultiVector<_vtype, _itype>* c) - -#define GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL(_vtype, _itype) \ - void advanced_apply(std::shared_ptr exec, \ - const batch::MultiVector<_vtype, _itype>* alpha, \ - const batch::matrix::Ell<_vtype, _itype>* a, \ - const batch::MultiVector<_vtype, _itype>* b, \ - const batch::MultiVector<_vtype, _itype>* beta, \ - batch::MultiVector<_vtype, _itype>* c) + const batch::MultiVector<_vtype>* b, \ + batch::MultiVector<_vtype>* c) + +#define GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL(_vtype, _itype) \ + void advanced_apply(std::shared_ptr exec, \ + const batch::MultiVector<_vtype>* alpha, \ + const batch::matrix::Ell<_vtype, _itype>* a, \ + const batch::MultiVector<_vtype>* b, \ + const batch::MultiVector<_vtype>* beta, \ + batch::MultiVector<_vtype>* c) #define GKO_DECLARE_ALL_AS_TEMPLATES \ template \ diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp index 0bbfde40cc9..272bb506df2 100644 --- a/core/matrix/batch_struct.hpp +++ b/core/matrix/batch_struct.hpp @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include namespace gko { @@ -82,6 +83,53 @@ struct uniform_batch { } // namespace dense +namespace batch_ell { + + +/** + * Encapsulates one matrix from a batch of ell matrices. + */ +template +struct batch_item { + using value_type = ValueType; + using index_type = int32; + + ValueType* values; + const index_type* col_idxs; + index_type stride; + index_type num_rows; + index_type num_cols; + index_type num_stored_elems_per_row; +}; + + +/** + * A 'simple' structure to store a global uniform batch of ell matrices. + */ +template +struct uniform_batch { + using value_type = ValueType; + using index_type = int; + using entry_type = batch_item; + + ValueType* values; + const index_type* col_idxs; + size_type num_batch_items; + index_type stride; + index_type num_rows; + index_type num_cols; + index_type num_stored_elems_per_row; + + size_type get_entry_storage() const + { + return num_rows * num_stored_elems_per_row * sizeof(value_type); + } +}; + + +} // namespace batch_ell + + template GKO_ATTRIBUTES GKO_INLINE dense::batch_item to_const( const dense::batch_item& b) @@ -116,6 +164,53 @@ GKO_ATTRIBUTES GKO_INLINE dense::batch_item extract_batch_item( } +template +GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item to_const( + const batch_ell::batch_item& b) +{ + return {b.values, b.col_idxs, b.stride, + b.num_rows, b.num_cols, b.num_stored_elems_per_row}; +} + + +template +GKO_ATTRIBUTES GKO_INLINE batch_ell::uniform_batch to_const( + const batch_ell::uniform_batch& ub) +{ + return {ub.values, ub.col_idxs, ub.num_batch_items, ub.stride, + ub.num_rows, ub.num_cols, ub.num_stored_elems_per_row}; +} + + +template +GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item extract_batch_item( + const batch_ell::uniform_batch& batch, const size_type batch_idx) +{ + return {batch.values + + batch_idx * batch.num_stored_elems_per_row * batch.num_rows, + batch.col_idxs + + batch_idx * batch.num_stored_elems_per_row * batch.num_rows, + batch.stride, + batch.num_rows, + batch.num_cols, + batch.num_stored_elems_per_row}; +} + +template +GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item extract_batch_item( + ValueType* const batch_values, int* const batch_col_idxs, const int stride, + const int num_rows, const int num_cols, int num_elems_per_row, + const size_type batch_idx) +{ + return {batch_values + batch_idx * num_elems_per_row * num_rows, + batch_col_idxs + batch_idx * num_elems_per_row * num_rows, + stride, + num_rows, + num_cols, + num_elems_per_row}; +} + + } // namespace matrix } // namespace batch } // namespace gko diff --git a/cuda/matrix/batch_ell_kernels.cu b/cuda/matrix/batch_ell_kernels.cu index c41b436daed..567d863d95c 100644 --- a/cuda/matrix/batch_ell_kernels.cu +++ b/cuda/matrix/batch_ell_kernels.cu @@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include "core/matrix/batch_dense_kernels.hpp" +#include "core/matrix/batch_ell_kernels.hpp" #include @@ -72,7 +72,7 @@ constexpr int sm_oversubscription = 4; // NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES -#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc" +// #include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc" #include "common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc" diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp index f886b7dd790..cdcd5abd024 100644 --- a/dpcpp/matrix/batch_ell_kernels.dp.cpp +++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp @@ -80,7 +80,7 @@ void simple_apply(std::shared_ptr exec, const batch::MultiVector* b, batch::MultiVector* x) GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL); @@ -92,7 +92,7 @@ void advanced_apply(std::shared_ptr exec, const batch::MultiVector* beta, batch::MultiVector* x) GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL); diff --git a/hip/matrix/batch_ell_kernels.hip.cpp b/hip/matrix/batch_ell_kernels.hip.cpp index c41b436daed..567d863d95c 100644 --- a/hip/matrix/batch_ell_kernels.hip.cpp +++ b/hip/matrix/batch_ell_kernels.hip.cpp @@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include "core/matrix/batch_dense_kernels.hpp" +#include "core/matrix/batch_ell_kernels.hpp" #include @@ -72,7 +72,7 @@ constexpr int sm_oversubscription = 4; // NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES -#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc" +// #include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc" #include "common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc" diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp index 68b5da6e3eb..f5a75c7448e 100644 --- a/include/ginkgo/core/base/types.hpp +++ b/include/ginkgo/core/base/types.hpp @@ -531,6 +531,22 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x, template _macro(double, int64) #endif +#if GINKGO_DPCPP_SINGLE_MODE +#define GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(_macro) \ + template _macro(float, int32); \ + template <> \ + _macro(double, int32) GKO_NOT_IMPLEMENTED; \ + template _macro(std::complex, int32); \ + template <> \ + _macro(std::complex, int32) GKO_NOT_IMPLEMENTED +#else +#define GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(_macro) \ + template _macro(float, int32); \ + template _macro(double, int32); \ + template _macro(std::complex, int32); \ + template _macro(std::complex, int32) +#endif + /** * Instantiates a template for each value and index type compiled by Ginkgo. diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp index 374f1479664..af77fc1e390 100644 --- a/include/ginkgo/core/matrix/batch_ell.hpp +++ b/include/ginkgo/core/matrix/batch_ell.hpp @@ -88,7 +88,7 @@ class Ell final using EnableBatchLinOp::move_to; using value_type = ValueType; - using index_type = int32; + using index_type = IndexType; using transposed_type = Ell; using unbatch_type = gko::matrix::Ell; using absolute_type = remove_complex; @@ -170,7 +170,7 @@ class Ell final * @return the number of elements stored in each row of the ELL matrix. Same * for each batch item */ - int get_num_stored_elements_per_row() const noexcept + index_type get_num_stored_elements_per_row() const noexcept { return num_elems_per_row_; } @@ -205,7 +205,7 @@ class Ell final * * @return the pointer to the array of col_idxs */ - value_type* get_col_idxs_for_item(size_type batch_id) noexcept + index_type* get_col_idxs_for_item(size_type batch_id) noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return col_idxs_.get_data() + @@ -219,8 +219,8 @@ class Ell final * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_col_idxs_for_item( - size_type batch_id) const noexcept + const index_type* get_const_col_idxs_for_item(size_type batch_id) const + noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return col_idxs_.get_const_data() + @@ -249,8 +249,8 @@ class Ell final * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item( - size_type batch_id) const noexcept + const value_type* get_const_values_for_item(size_type batch_id) const + noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + @@ -271,9 +271,9 @@ class Ell final * array (if it resides on the same executor as the matrix) or a copy of the * array on the correct executor. */ - static std::unique_ptr> create_const( + static std::unique_ptr create_const( std::shared_ptr exec, const batch_dim<2>& sizes, - const int num_elems_per_row, + const index_type num_elems_per_row, gko::detail::const_array_view&& values, gko::detail::const_array_view&& col_idxs); @@ -309,9 +309,10 @@ class Ell final } private: - size_type compute_num_elems(const batch_dim<2>& size, int num_elems_per_row) + size_type compute_num_elems(const batch_dim<2>& size, + IndexType num_elems_per_row) { - return size->get_common_size()[0] * num_elems_per_row; + return size.get_common_size()[0] * num_elems_per_row; } @@ -325,7 +326,7 @@ class Ell final */ Ell(std::shared_ptr exec, const batch_dim<2>& size = batch_dim<2>{}, - const int num_elems_per_row = 0); + const IndexType num_elems_per_row = 0); /** * Creates a Ell matrix from an already allocated (and initialized) @@ -345,7 +346,7 @@ class Ell final */ template Ell(std::shared_ptr exec, const batch_dim<2>& size, - const int num_elems_per_row, ValuesArray&& values, + const IndexType num_elems_per_row, ValuesArray&& values, IndicesArray&& col_idxs) : EnableBatchLinOp(exec, size), num_elems_per_row_{num_elems_per_row}, @@ -353,7 +354,7 @@ class Ell final col_idxs_{exec, std::forward(col_idxs)} { // Ensure that the value and col_idxs arrays have the correct size - auto num_elems = this->get_size()[0] * num_elems_per_row() * + auto num_elems = this->get_common_size()[0] * num_elems_per_row * this->get_num_batch_items(); GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems() + 1); GKO_ENSURE_IN_BOUNDS(num_elems, col_idxs_.get_num_elems() + 1); @@ -376,7 +377,7 @@ class Ell final MultiVector* x) const; private: - int num_elems_per_row_; + index_type num_elems_per_row_; array values_; array col_idxs_; }; diff --git a/omp/matrix/batch_ell_kernels.cpp b/omp/matrix/batch_ell_kernels.cpp index 282920c05f3..20ea4614e7d 100644 --- a/omp/matrix/batch_ell_kernels.cpp +++ b/omp/matrix/batch_ell_kernels.cpp @@ -78,7 +78,7 @@ void simple_apply(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL); @@ -107,7 +107,7 @@ void advanced_apply(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL); diff --git a/reference/matrix/batch_ell_kernels.cpp b/reference/matrix/batch_ell_kernels.cpp index 1fab322dc5f..a3f69827c02 100644 --- a/reference/matrix/batch_ell_kernels.cpp +++ b/reference/matrix/batch_ell_kernels.cpp @@ -78,7 +78,7 @@ void simple_apply(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL); @@ -106,7 +106,7 @@ void advanced_apply(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL); diff --git a/reference/matrix/batch_ell_kernels.hpp.inc b/reference/matrix/batch_ell_kernels.hpp.inc index 1874d1db9f3..37370261d44 100644 --- a/reference/matrix/batch_ell_kernels.hpp.inc +++ b/reference/matrix/batch_ell_kernels.hpp.inc @@ -30,10 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -template +template inline void simple_apply_kernel( - const gko::batch::matrix::batch_ell::batch_item& a, + const gko::batch::matrix::batch_ell::batch_item& a, const gko::batch::multi_vector::batch_item& b, const gko::batch::multi_vector::batch_item& c) { @@ -53,11 +52,10 @@ inline void simple_apply_kernel( } -template +template inline void advanced_apply_kernel( const ValueType alpha, - const gko::batch::matrix::batch_ell::batch_item& a, + const gko::batch::matrix::batch_ell::batch_item& a, const gko::batch::multi_vector::batch_item& b, const ValueType beta, const gko::batch::multi_vector::batch_item& c) diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp index 483d7717718..b5eacd80d18 100644 --- a/reference/matrix/batch_struct.hpp +++ b/reference/matrix/batch_struct.hpp @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include "core/base/batch_struct.hpp" @@ -90,6 +91,40 @@ inline batch::matrix::dense::uniform_batch get_batch_struct( } +/** + * Generates an immutable uniform batch struct from a batch of ell matrices. + */ +template +inline batch::matrix::batch_ell::uniform_batch +get_batch_struct(const batch::matrix::Ell* const op) +{ + return {op->get_const_values(), + op->get_const_col_idxs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; +} + + +/** + * Generates a uniform batch struct from a batch of ell matrices. + */ +template +inline batch::matrix::batch_ell::uniform_batch get_batch_struct( + batch::matrix::Ell* const op) +{ + return {op->get_values(), + op->get_col_idxs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; +} + + } // namespace host } // namespace kernels } // namespace gko From 1c3c5ff9a84cd3a5afee8bbbd219a1de3909a2b4 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Sat, 7 Oct 2023 17:31:07 +0200 Subject: [PATCH 04/18] Generalize and rewrite batch utils --- core/base/batch_utilities.hpp | 273 ++++++++++++++- core/matrix/batch_ell.cpp | 15 +- core/test/matrix/batch_ell.cpp | 330 +++++++++--------- .../ginkgo/core/base/batch_multi_vector.hpp | 222 +----------- include/ginkgo/core/matrix/batch_ell.hpp | 18 +- 5 files changed, 449 insertions(+), 409 deletions(-) diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp index 834e89c8358..c37c0cae721 100644 --- a/core/base/batch_utilities.hpp +++ b/core/base/batch_utilities.hpp @@ -39,7 +39,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include #include #include #include @@ -53,15 +52,18 @@ namespace gko { namespace batch { -template +template std::unique_ptr duplicate(std::shared_ptr exec, size_type num_duplications, - const OutputType* input) + const OutputType* input, + TArgs&&... create_args) { auto num_batch_items = input->get_num_batch_items(); - auto tmp = OutputType::create( - exec, batch_dim<2>(num_batch_items * num_duplications, - input->get_common_size())); + auto tmp = + OutputType::create(exec, + batch_dim<2>(num_batch_items * num_duplications, + input->get_common_size()), + std::forward(create_args)...); for (size_type i = 0; i < num_duplications; ++i) { for (size_type b = 0; b < num_batch_items; ++b) { @@ -74,14 +76,15 @@ std::unique_ptr duplicate(std::shared_ptr exec, } -template +template std::unique_ptr create_from_item( std::shared_ptr exec, const size_type num_duplications, - const typename OutputType::unbatch_type* input) + const typename OutputType::unbatch_type* input, TArgs&&... create_args) { auto num_batch_items = num_duplications; auto tmp = OutputType::create( - exec, batch_dim<2>(num_batch_items, input->get_size())); + exec, batch_dim<2>(num_batch_items, input->get_size()), + std::forward(create_args)...); for (size_type b = 0; b < num_batch_items; ++b) { tmp->create_view_for_item(b)->copy_from(input); @@ -91,14 +94,16 @@ std::unique_ptr create_from_item( } -template +template std::unique_ptr create_from_item( std::shared_ptr exec, - const std::vector& input) + const std::vector& input, + TArgs&&... create_args) { auto num_batch_items = input.size(); auto tmp = OutputType::create( - exec, batch_dim<2>(num_batch_items, input[0]->get_size())); + exec, batch_dim<2>(num_batch_items, input[0]->get_size()), + std::forward(create_args)...); for (size_type b = 0; b < num_batch_items; ++b) { tmp->create_view_for_item(b)->copy_from(input[b]); @@ -121,14 +126,17 @@ auto unbatch(const InputType* batch_object) } -template +template std::unique_ptr read( std::shared_ptr exec, - const std::vector>& data) + const std::vector>& data, + TArgs&&... create_args) { auto num_batch_items = data.size(); auto tmp = - OutputType::create(exec, batch_dim<2>(num_batch_items, data[0].size)); + OutputType::create(exec, batch_dim<2>(num_batch_items, data[0].size), + std::forward(create_args)...); for (size_type b = 0; b < num_batch_items; ++b) { tmp->create_view_for_item(b)->read(data[b]); @@ -154,6 +162,241 @@ std::vector> write( } +/** + * Creates and initializes a batch of single column-vectors. + * + * This function first creates a temporary MultiVector, fills it with + * passed in values, and then converts the vector to the requested type. + * + * @tparam Matrix matrix type to initialize + * (MultiVector has to implement the ConvertibleTo + * interface) + * @tparam TArgs argument types for Matrix::create method + * (not including the implied Executor as the first argument) + * + * @param vals values used to initialize the batch vector + * @param exec Executor associated to the vector + * @param create_args additional arguments passed to Matrix::create, not + * including the Executor, which is passed as the first + * argument + * + * @ingroup MultiVector + * @ingroup mat_formats + */ +template +std::unique_ptr initialize( + std::initializer_list> + vals, + std::shared_ptr exec, TArgs&&... create_args) +{ + using value_type = typename Matrix::value_type; + using index_type = typename Matrix::index_type; + using mat_data = gko::matrix_data; + size_type num_batch_items = vals.size(); + GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty"); + auto vals_begin = begin(vals); + size_type common_num_rows = vals_begin ? vals_begin->size() : 0; + auto common_size = dim<2>(common_num_rows, 1); + for (auto& val : vals) { + GKO_ASSERT_EQ(common_num_rows, val.size()); + } + auto b_size = batch_dim<2>(num_batch_items, common_size); + size_type batch = 0; + std::vector input_mat_data(num_batch_items, common_size); + for (const auto& b : vals) { + input_mat_data[batch].nonzeros.reserve(b.size()); + size_type idx = 0; + for (const auto& elem : b) { + if (elem != zero()) { + input_mat_data[batch].nonzeros.emplace_back(idx, 0, elem); + } + ++idx; + } + ++batch; + } + return read( + exec, input_mat_data, std::forward(create_args)...); +} + + +/** + * Creates and initializes a batch of multi-vectors. + * + * This function first creates a temporary MultiVector, fills it with + * passed in values, and then converts the vector to the requested type. + * + * @tparam Matrix matrix type to initialize + * (Dense has to implement the ConvertibleTo interface) + * @tparam TArgs argument types for Matrix::create method + * (not including the implied Executor as the first argument) + * + * @param vals values used to initialize the vector + * @param exec Executor associated to the vector + * @param create_args additional arguments passed to Matrix::create, not + * including the Executor, which is passed as the first + * argument + * + * @ingroup MultiVector + * @ingroup mat_formats + */ +template +std::unique_ptr initialize( + std::initializer_list>> + vals, + std::shared_ptr exec, TArgs&&... create_args) +{ + using value_type = typename Matrix::value_type; + using index_type = typename Matrix::index_type; + using mat_data = gko::matrix_data; + size_type num_batch_items = vals.size(); + GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty"); + auto vals_begin = begin(vals); + size_type common_num_rows = vals_begin ? vals_begin->size() : 0; + size_type common_num_cols = + vals_begin->begin() ? vals_begin->begin()->size() : 0; + auto common_size = dim<2>(common_num_rows, common_num_cols); + for (const auto& b : vals) { + auto num_rows = b.size(); + auto num_cols = begin(b)->size(); + auto b_size = dim<2>(num_rows, num_cols); + GKO_ASSERT_EQUAL_DIMENSIONS(b_size, common_size); + } + + auto b_size = batch_dim<2>(num_batch_items, common_size); + size_type batch = 0; + std::vector input_mat_data(num_batch_items, common_size); + for (const auto& b : vals) { + size_type ridx = 0; + for (const auto& row : b) { + size_type cidx = 0; + for (const auto& elem : row) { + if (elem != zero()) { + input_mat_data[batch].nonzeros.emplace_back(ridx, cidx, + elem); + } + ++cidx; + } + ++ridx; + } + ++batch; + } + return read( + exec, input_mat_data, std::forward(create_args)...); +} + + +/** + * Creates and initializes a batch single column-vector by making copies of the + * single input column vector. + * + * This function first creates a temporary batch multi-vector, fills it with + * passed in values, and then converts the vector to the requested type. + * + * @tparam Matrix matrix type to initialize + * (MultiVector has to implement the ConvertibleTo + * interface) + * @tparam TArgs argument types for Matrix::create method + * (not including the implied Executor as the first argument) + * + * @param num_vectors The number of times the input vector is to be duplicated + * @param vals values used to initialize each vector in the temp. batch + * @param exec Executor associated to the vector + * @param create_args additional arguments passed to Matrix::create, not + * including the Executor, which is passed as the first + * argument + * + * @ingroup MultiVector + * @ingroup mat_formats + */ +template +std::unique_ptr initialize( + const size_type num_vectors, + std::initializer_list vals, + std::shared_ptr exec, TArgs&&... create_args) +{ + using value_type = typename Matrix::value_type; + using index_type = typename Matrix::index_type; + using mat_data = gko::matrix_data; + size_type num_batch_items = num_vectors; + GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0, + "Input data is empty"); + auto num_rows = begin(vals) ? vals.size() : 0; + auto common_size = dim<2>(num_rows, 1); + auto b_size = batch_dim<2>(num_batch_items, common_size); + std::vector input_mat_data(num_batch_items, common_size); + for (size_type batch = 0; batch < num_vectors; batch++) { + input_mat_data[batch].nonzeros.reserve(num_rows); + size_type idx = 0; + for (const auto& elem : vals) { + if (elem != zero()) { + input_mat_data[batch].nonzeros.emplace_back(idx, 0, elem); + } + ++idx; + } + } + return read( + exec, input_mat_data, std::forward(create_args)...); +} + + +/** + * Creates and initializes a matrix from copies of a given matrix. + * + * This function first creates a temporary batch multi-vector, fills it with + * passed in values, and then converts the vector to the requested type. + * + * @tparam Matrix matrix type to initialize + * (MultiVector has to implement the ConvertibleTo + * interface) + * @tparam TArgs argument types for Matrix::create method + * (not including the implied Executor as the first argument) + * + * @param num_batch_items The number of times the input matrix is duplicated + * @param vals values used to initialize each vector in the temp. batch + * @param exec Executor associated to the vector + * @param create_args additional arguments passed to Matrix::create, not + * including the Executor, which is passed as the first + * argument + * + * @ingroup LinOp + * @ingroup mat_formats + */ +template +std::unique_ptr initialize( + const size_type num_batch_items, + std::initializer_list> + vals, + std::shared_ptr exec, TArgs&&... create_args) +{ + using value_type = typename Matrix::value_type; + using index_type = typename Matrix::index_type; + using mat_data = gko::matrix_data; + GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0, + "Input data is empty"); + auto common_size = dim<2>(begin(vals) ? vals.size() : 0, + begin(vals) ? begin(vals)->size() : 0); + batch_dim<2> b_size(num_batch_items, common_size); + std::vector input_mat_data(num_batch_items, common_size); + for (size_type batch = 0; batch < num_batch_items; batch++) { + size_type ridx = 0; + for (const auto& row : vals) { + size_type cidx = 0; + for (const auto& elem : row) { + if (elem != zero()) { + input_mat_data[batch].nonzeros.emplace_back(ridx, cidx, + elem); + } + ++cidx; + } + ++ridx; + } + } + return read( + exec, input_mat_data, std::forward(create_args)...); +} + + } // namespace batch } // namespace gko diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp index 3aea6e1aae4..0d903b10968 100644 --- a/core/matrix/batch_ell.cpp +++ b/core/matrix/batch_ell.cpp @@ -70,13 +70,13 @@ Ell::create_view_for_item(size_type item_id) { auto exec = this->get_executor(); auto num_rows = this->get_common_size()[0]; - auto stride = this->get_common_size()[1]; + auto stride = this->get_common_size()[0]; auto mat = unbatch_type::create( exec, this->get_common_size(), make_array_view(exec, this->get_num_elements_per_item(), this->get_values_for_item(item_id)), make_array_view(exec, this->get_num_elements_per_item(), - this->get_col_idxs_for_item(item_id)), + this->get_col_idxs()), this->get_num_stored_elements_per_row(), stride); return mat; } @@ -88,13 +88,13 @@ Ell::create_const_view_for_item(size_type item_id) const { auto exec = this->get_executor(); auto num_rows = this->get_common_size()[0]; - auto stride = this->get_common_size()[1]; + auto stride = this->get_common_size()[0]; auto mat = unbatch_type::create_const( exec, this->get_common_size(), make_const_array_view(exec, this->get_num_elements_per_item(), this->get_const_values_for_item(item_id)), make_const_array_view(exec, this->get_num_elements_per_item(), - this->get_const_col_idxs_for_item(item_id)), + this->get_const_col_idxs()), this->get_num_stored_elements_per_row(), stride); return mat; } @@ -152,9 +152,10 @@ Ell::Ell(std::shared_ptr exec, const batch_dim<2>& size, IndexType num_elems_per_row) : EnableBatchLinOp>(exec, size), - num_elems_per_row_(num_elems_per_row), - values_(exec, compute_num_elems(size, num_elems_per_row)), - col_idxs_(exec, compute_num_elems(size, num_elems_per_row)) + num_elems_per_row_(num_elems_per_row == 0 ? size.get_common_size()[1] + : num_elems_per_row), + values_(exec, compute_num_elems(size, num_elems_per_row_)), + col_idxs_(exec, this->get_common_size()[0] * num_elems_per_row_) {} diff --git a/core/test/matrix/batch_ell.cpp b/core/test/matrix/batch_ell.cpp index 931efb47d2e..2830705bf5f 100644 --- a/core/test/matrix/batch_ell.cpp +++ b/core/test/matrix/batch_ell.cpp @@ -51,6 +51,7 @@ template class Ell : public ::testing::Test { protected: using value_type = T; + using index_type = gko::int32; using EllMtx = gko::matrix::Ell; using size_type = gko::size_type; Ell() @@ -58,46 +59,71 @@ class Ell : public ::testing::Test { mtx(gko::batch::initialize>( {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}}, - exec)), - mvec(gko::batch::initialize>( - {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, - {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}}, - exec)), + exec, 3)), + sp_mtx(gko::batch::initialize>( + {{{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}}, + {{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}}, + exec, 2)), ell_mtx(gko::initialize>( - {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec)) + {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec, gko::dim<2>(2, 3), 3)), + sp_ell_mtx(gko::initialize>( + {{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, exec, gko::dim<2>(2, 3), 2)) {} + static void assert_equal_to_original_sparse_mtx( + const gko::batch::matrix::Ell* m) + { + ASSERT_EQ(m->get_num_batch_items(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 2)); + ASSERT_EQ(m->get_num_stored_elements_per_row(), 2); + EXPECT_EQ(m->get_const_values()[0], value_type{-1.0}); + EXPECT_EQ(m->get_const_values()[1], value_type{2.5}); + EXPECT_EQ(m->get_const_values()[2], value_type{0.0}); + EXPECT_EQ(m->get_const_values()[3], value_type{3.5}); + EXPECT_EQ(m->get_const_values()[4], value_type{1.0}); + EXPECT_EQ(m->get_const_values()[5], value_type{2.0}); + EXPECT_EQ(m->get_const_values()[6], value_type{0.0}); + EXPECT_EQ(m->get_const_values()[7], value_type{3.0}); + EXPECT_EQ(m->get_const_col_idxs()[0], index_type{0}); + EXPECT_EQ(m->get_const_col_idxs()[1], index_type{1}); + EXPECT_EQ(m->get_const_col_idxs()[2], index_type{-1}); + ASSERT_EQ(m->get_const_col_idxs()[3], index_type{2}); + } static void assert_equal_to_original_mtx( - gko::batch::matrix::Ell* m) + const gko::batch::matrix::Ell* m) { ASSERT_EQ(m->get_num_batch_items(), 2); ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3)); ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 3)); - EXPECT_EQ(m->at(0, 0, 0), value_type{-1.0}); - EXPECT_EQ(m->at(0, 0, 1), value_type{2.0}); - EXPECT_EQ(m->at(0, 0, 2), value_type{3.0}); - EXPECT_EQ(m->at(0, 1, 0), value_type{-1.5}); - EXPECT_EQ(m->at(0, 1, 1), value_type{2.5}); - ASSERT_EQ(m->at(0, 1, 2), value_type{3.5}); - EXPECT_EQ(m->at(1, 0, 0), value_type{1.0}); - EXPECT_EQ(m->at(1, 0, 1), value_type{2.5}); - EXPECT_EQ(m->at(1, 0, 2), value_type{3.0}); - EXPECT_EQ(m->at(1, 1, 0), value_type{1.0}); - EXPECT_EQ(m->at(1, 1, 1), value_type{2.0}); - ASSERT_EQ(m->at(1, 1, 2), value_type{3.0}); + ASSERT_EQ(m->get_num_stored_elements_per_row(), 3); + EXPECT_EQ(m->get_const_values()[0], value_type{-1.0}); + EXPECT_EQ(m->get_const_values()[1], value_type{-1.5}); + EXPECT_EQ(m->get_const_values()[2], value_type{2.0}); + EXPECT_EQ(m->get_const_values()[3], value_type{2.5}); + EXPECT_EQ(m->get_const_values()[4], value_type{3.0}); + EXPECT_EQ(m->get_const_values()[5], value_type{3.5}); + EXPECT_EQ(m->get_const_values()[6], value_type{1.0}); + EXPECT_EQ(m->get_const_values()[7], value_type{1.0}); + EXPECT_EQ(m->get_const_values()[8], value_type{2.5}); + EXPECT_EQ(m->get_const_values()[9], value_type{2.0}); + EXPECT_EQ(m->get_const_values()[10], value_type{3.0}); + ASSERT_EQ(m->get_const_values()[11], value_type{3.0}); } static void assert_empty(gko::batch::matrix::Ell* m) { ASSERT_EQ(m->get_num_batch_items(), 0); ASSERT_EQ(m->get_num_stored_elements(), 0); + ASSERT_EQ(m->get_num_stored_elements_per_row(), 0); } std::shared_ptr exec; std::unique_ptr> mtx; - std::unique_ptr> mvec; + std::unique_ptr> sp_mtx; std::unique_ptr> ell_mtx; + std::unique_ptr> sp_ell_mtx; }; TYPED_TEST_SUITE(Ell, gko::test::ValueTypes); @@ -109,6 +135,12 @@ TYPED_TEST(Ell, KnowsItsSizeAndValues) } +TYPED_TEST(Ell, SparseMtxKnowsItsSizeAndValues) +{ + this->assert_equal_to_original_sparse_mtx(this->sp_mtx.get()); +} + + TYPED_TEST(Ell, CanBeEmpty) { auto empty = gko::batch::matrix::Ell::create(this->exec); @@ -137,10 +169,10 @@ TYPED_TEST(Ell, CanCreateEllItemView) } -TYPED_TEST(Ell, CanCreateMultiVectorView) +TYPED_TEST(Ell, CanCreateSpEllItemView) { - GKO_ASSERT_BATCH_MTX_NEAR(this->mtx->create_multi_vector_view(), this->mvec, - 0.0); + GKO_ASSERT_MTX_NEAR(this->sp_mtx->create_view_for_item(1), this->sp_ell_mtx, + 0.0); } @@ -151,8 +183,7 @@ TYPED_TEST(Ell, CanBeCopied) mtx_copy->copy_from(this->mtx.get()); this->assert_equal_to_original_mtx(this->mtx.get()); - this->mtx->at(0, 0, 0) = 7; - this->mtx->at(0, 1) = 7; + this->mtx->get_values()[0] = 7; this->assert_equal_to_original_mtx(mtx_copy.get()); } @@ -189,71 +220,62 @@ TYPED_TEST(Ell, CanBeConstructedWithSize) using size_type = gko::size_type; auto m = gko::batch::matrix::Ell::create( - this->exec, gko::batch_dim<2>(2, gko::dim<2>{5, 3})); + this->exec, gko::batch_dim<2>(2, gko::dim<2>{5, 3}), 2); ASSERT_EQ(m->get_num_batch_items(), 2); ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3)); - ASSERT_EQ(m->get_num_stored_elements(), 30); + ASSERT_EQ(m->get_num_stored_elements_per_row(), 2); + ASSERT_EQ(m->get_num_stored_elements(), 20); } TYPED_TEST(Ell, CanBeConstructedFromExistingData) { using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; using size_type = gko::size_type; // clang-format off - value_type data[] = { + value_type values[] = { + -1.0, 2.5, + 0.0, 3.5, 1.0, 2.0, - -1.0, 3.0, - 4.0, -1.0, - 3.0, 5.0, - 1.0, 5.0, - 6.0, -3.0}; + 0.0, 3.0}; + index_type col_idxs[] = { + 0, 1, + -1, 2}; // clang-format on auto m = gko::batch::matrix::Ell::create( - this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)), - gko::array::view(this->exec, 8, data)); - - ASSERT_EQ(m->get_const_values(), data); - ASSERT_EQ(m->at(0, 0, 0), value_type{1.0}); - ASSERT_EQ(m->at(0, 0, 1), value_type{2.0}); - ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0}); - ASSERT_EQ(m->at(0, 1, 1), value_type{3.0}); - ASSERT_EQ(m->at(1, 0, 0), value_type{4.0}); - ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0}); - ASSERT_EQ(m->at(1, 1, 0), value_type{3.0}); - ASSERT_EQ(m->at(1, 1, 1), value_type{5.0}); + this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 3)), 2, + gko::array::view(this->exec, 8, values), + gko::array::view(this->exec, 4, col_idxs)); + + this->assert_equal_to_original_sparse_mtx(m.get()); } TYPED_TEST(Ell, CanBeConstructedFromExistingConstData) { using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; using size_type = gko::size_type; // clang-format off - const value_type data[] = { + value_type values[] = { + -1.0, 2.5, + 0.0, 3.5, 1.0, 2.0, - -1.0, 3.0, - 4.0, -1.0, - 3.0, 5.0, - 1.0, 5.0, - 6.0, -3.0}; + 0.0, 3.0}; + index_type col_idxs[] = { + 0, 1, + -1, 2}; // clang-format on auto m = gko::batch::matrix::Ell::create_const( - this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 2)), - gko::array::const_view(this->exec, 8, data)); - - ASSERT_EQ(m->get_const_values(), data); - ASSERT_EQ(m->at(0, 0, 0), value_type{1.0}); - ASSERT_EQ(m->at(0, 0, 1), value_type{2.0}); - ASSERT_EQ(m->at(0, 1, 0), value_type{-1.0}); - ASSERT_EQ(m->at(0, 1, 1), value_type{3.0}); - ASSERT_EQ(m->at(1, 0, 0), value_type{4.0}); - ASSERT_EQ(m->at(1, 0, 1), value_type{-1.0}); - ASSERT_EQ(m->at(1, 1, 0), value_type{3.0}); - ASSERT_EQ(m->at(1, 1, 1), value_type{5.0}); + this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 3)), 2, + gko::array::const_view(this->exec, 8, values), + gko::array::const_view(this->exec, 4, col_idxs)); + + this->assert_equal_to_original_sparse_mtx(m.get()); } @@ -263,35 +285,36 @@ TYPED_TEST(Ell, CanBeConstructedFromEllMatrices) using EllMtx = typename TestFixture::EllMtx; using size_type = gko::size_type; - auto mat1 = gko::initialize({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, + auto mat1 = gko::initialize({{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}}, this->exec); auto mat2 = - gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); + gko::initialize({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, this->exec); auto m = gko::batch::create_from_item>( - this->exec, std::vector{mat1.get(), mat2.get()}); + this->exec, std::vector{mat1.get(), mat2.get()}, + mat1->get_num_stored_elements_per_row()); - this->assert_equal_to_original_mtx(m.get()); + this->assert_equal_to_original_sparse_mtx(m.get()); } TYPED_TEST(Ell, CanBeConstructedFromEllMatricesByDuplication) { using value_type = typename TestFixture::value_type; + using index_type = int; using EllMtx = typename TestFixture::EllMtx; using size_type = gko::size_type; - auto mat1 = gko::initialize(4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, - this->exec); - auto mat2 = - gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); + auto mat1 = + gko::initialize({{1.0, 0.0, 0.0}, {0.0, 2.0, 0.0}}, this->exec); auto bat_m = gko::batch::create_from_item>( this->exec, - std::vector{mat1.get(), mat1.get(), mat1.get()}); + std::vector{mat1.get(), mat1.get(), mat1.get()}, + mat1->get_num_stored_elements_per_row()); auto m = gko::batch::create_from_item>( - this->exec, 3, mat1.get()); + this->exec, 3, mat1.get(), mat1->get_num_stored_elements_per_row()); GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14); } @@ -300,24 +323,27 @@ TYPED_TEST(Ell, CanBeConstructedFromEllMatricesByDuplication) TYPED_TEST(Ell, CanBeConstructedByDuplicatingEllMatrices) { using value_type = typename TestFixture::value_type; + using index_type = int; using EllMtx = typename TestFixture::EllMtx; using size_type = gko::size_type; - auto mat1 = gko::initialize({{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, + auto mat1 = gko::initialize({{-1.0, 0.0, 0.0}, {0.0, 2.5, 0.0}}, this->exec); auto mat2 = - gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); + gko::initialize({{1.0, 0.0, 0.0}, {0.0, 2.0, 0.0}}, this->exec); auto m = gko::batch::create_from_item>( - this->exec, std::vector{mat1.get(), mat2.get()}); + this->exec, std::vector{mat1.get(), mat2.get()}, + mat1->get_num_stored_elements_per_row()); auto m_ref = gko::batch::create_from_item>( this->exec, std::vector{mat1.get(), mat2.get(), mat1.get(), mat2.get(), - mat1.get(), mat2.get()}); + mat1.get(), mat2.get()}, + mat1->get_num_stored_elements_per_row()); auto m2 = gko::batch::duplicate>( - this->exec, 3, m.get()); + this->exec, 3, m.get(), mat1->get_num_stored_elements_per_row()); GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14); } @@ -326,15 +352,16 @@ TYPED_TEST(Ell, CanBeConstructedByDuplicatingEllMatrices) TYPED_TEST(Ell, CanBeUnbatchedIntoEllMatrices) { using value_type = typename TestFixture::value_type; + using index_type = int; using EllMtx = typename TestFixture::EllMtx; using size_type = gko::size_type; - auto mat1 = gko::initialize(4, {{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, + auto mat1 = gko::initialize({{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}}, this->exec); auto mat2 = - gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, this->exec); + gko::initialize({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, this->exec); auto ell_mats = gko::batch::unbatch>( - this->mtx.get()); + this->sp_mtx.get()); GKO_ASSERT_MTX_NEAR(ell_mats[0].get(), mat1.get(), 0.); GKO_ASSERT_MTX_NEAR(ell_mats[1].get(), mat2.get(), 0.); @@ -344,55 +371,83 @@ TYPED_TEST(Ell, CanBeUnbatchedIntoEllMatrices) TYPED_TEST(Ell, CanBeListConstructed) { using value_type = typename TestFixture::value_type; + using index_type = int; auto m = gko::batch::initialize>( - {{1.0, 2.0}, {1.0, 3.0}}, this->exec); + {{0.0, -1.0}, {1.0, 0.0}}, this->exec); ASSERT_EQ(m->get_num_batch_items(), 2); ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1)); - EXPECT_EQ(m->at(0, 0), value_type{1}); - EXPECT_EQ(m->at(0, 1), value_type{2}); - EXPECT_EQ(m->at(1, 0), value_type{1}); - EXPECT_EQ(m->at(1, 1), value_type{3}); + ASSERT_EQ(m->get_num_stored_elements(), 4); + ASSERT_EQ(m->get_num_stored_elements_per_row(), 1); + EXPECT_EQ(m->get_values()[0], value_type{0.0}); + EXPECT_EQ(m->get_values()[1], value_type{-1.0}); + EXPECT_EQ(m->get_values()[2], value_type{1.0}); + EXPECT_EQ(m->get_values()[3], value_type{0.0}); + EXPECT_EQ(m->get_col_idxs()[0], index_type{0}); + EXPECT_EQ(m->get_col_idxs()[1], index_type{-1}); } TYPED_TEST(Ell, CanBeListConstructedByCopies) { using value_type = typename TestFixture::value_type; + using index_type = int; auto m = gko::batch::initialize>( - 2, I({1.0, 2.0}), this->exec); + 2, I({0.0, -1.0}), this->exec, 1); ASSERT_EQ(m->get_num_batch_items(), 2); ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1)); - EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); - EXPECT_EQ(m->at(0, 0, 1), value_type{2.0}); - EXPECT_EQ(m->at(1, 0, 0), value_type{1.0}); - EXPECT_EQ(m->at(1, 0, 1), value_type{2.0}); + ASSERT_EQ(m->get_num_stored_elements(), 4); + ASSERT_EQ(m->get_num_stored_elements_per_row(), 1); + EXPECT_EQ(m->get_values()[0], value_type{0.0}); + EXPECT_EQ(m->get_values()[1], value_type{-1.0}); + EXPECT_EQ(m->get_values()[2], value_type{0.0}); + EXPECT_EQ(m->get_values()[3], value_type{-1.0}); + EXPECT_EQ(m->get_col_idxs()[0], index_type{-1}); + EXPECT_EQ(m->get_col_idxs()[1], index_type{0}); } TYPED_TEST(Ell, CanBeDoubleListConstructed) { using value_type = typename TestFixture::value_type; + using index_type = int; using T = value_type; auto m = gko::batch::initialize>( - {{I{1.0, 1.0, 0.0}, I{2.0, 4.0, 3.0}, I{3.0, 6.0, 1.0}}, - {I{1.0, 2.0, -1.0}, I{3.0, 4.0, -2.0}, I{5.0, 6.0, -3.0}}}, - this->exec); + // clang-format off + {{I{1.0, 0.0, 0.0}, + I{2.0, 0.0, 3.0}, + I{3.0, 6.0, 0.0}}, + {I{1.0, 0.0, 0.0}, + I{3.0, 0.0, -2.0}, + I{5.0, 8.0, 0.0}}}, + // clang-format on + this->exec, 2); + ASSERT_EQ(m->get_num_batch_items(), 2); ASSERT_EQ(m->get_common_size(), gko::dim<2>(3, 3)); - EXPECT_EQ(m->at(0, 0), value_type{1.0}); - EXPECT_EQ(m->at(0, 1), value_type{1.0}); - EXPECT_EQ(m->at(0, 2), value_type{0.0}); - ASSERT_EQ(m->at(0, 3), value_type{2.0}); - EXPECT_EQ(m->at(0, 4), value_type{4.0}); - EXPECT_EQ(m->at(1, 0), value_type{1.0}); - EXPECT_EQ(m->at(1, 1), value_type{2.0}); - EXPECT_EQ(m->at(1, 2), value_type{-1.0}); - ASSERT_EQ(m->at(1, 3), value_type{3.0}); - EXPECT_EQ(m->at(1, 4), value_type{4.0}); + ASSERT_EQ(m->get_num_stored_elements(), 2 * (2 * 3)); + ASSERT_EQ(m->get_num_stored_elements_per_row(), 2); + EXPECT_EQ(m->get_values()[0], value_type{1.0}); + EXPECT_EQ(m->get_values()[1], value_type{2.0}); + EXPECT_EQ(m->get_values()[2], value_type{3.0}); + EXPECT_EQ(m->get_values()[3], value_type{0.0}); + EXPECT_EQ(m->get_values()[4], value_type{3.0}); + EXPECT_EQ(m->get_values()[5], value_type{6.0}); + EXPECT_EQ(m->get_values()[6], value_type{1.0}); + EXPECT_EQ(m->get_values()[7], value_type{3.0}); + EXPECT_EQ(m->get_values()[8], value_type{5.0}); + EXPECT_EQ(m->get_values()[9], value_type{0.0}); + EXPECT_EQ(m->get_values()[10], value_type{-2.0}); + EXPECT_EQ(m->get_values()[11], value_type{8.0}); + EXPECT_EQ(m->get_col_idxs()[0], index_type{0}); + EXPECT_EQ(m->get_col_idxs()[1], index_type{0}); + EXPECT_EQ(m->get_col_idxs()[2], index_type{0}); + EXPECT_EQ(m->get_col_idxs()[3], index_type{-1}); + EXPECT_EQ(m->get_col_idxs()[4], index_type{2}); + EXPECT_EQ(m->get_col_idxs()[5], index_type{1}); } @@ -400,52 +455,17 @@ TYPED_TEST(Ell, CanBeReadFromMatrixData) { using value_type = typename TestFixture::value_type; using index_type = int; - auto vec_data = std::vector>{}; vec_data.emplace_back(gko::matrix_data( - {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 0, 0.0}, {1, 1, 5.0}})); + {2, 3}, {{0, 0, -1.0}, {1, 1, 2.5}, {1, 2, 3.5}})); vec_data.emplace_back(gko::matrix_data( - {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 0, 0.0}, {1, 1, 9.0}})); + {2, 3}, {{0, 0, 1.0}, {1, 1, 2.0}, {1, 2, 3.0}})); auto m = gko::batch::read>(this->exec, - vec_data); - - ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2)); - EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); - EXPECT_EQ(m->at(0, 0, 1), value_type{3.0}); - EXPECT_EQ(m->at(0, 1, 0), value_type{0.0}); - EXPECT_EQ(m->at(0, 1, 1), value_type{5.0}); - EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0}); - EXPECT_EQ(m->at(1, 0, 1), value_type{0.5}); - EXPECT_EQ(m->at(1, 1, 0), value_type{0.0}); - EXPECT_EQ(m->at(1, 1, 1), value_type{9.0}); -} - - -TYPED_TEST(Ell, CanBeReadFromSparseMatrixData) -{ - using value_type = typename TestFixture::value_type; - using index_type = int; - auto vec_data = std::vector>{}; - vec_data.emplace_back(gko::matrix_data( - {2, 2}, {{0, 0, 1.0}, {0, 1, 3.0}, {1, 1, 5.0}})); - vec_data.emplace_back(gko::matrix_data( - {2, 2}, {{0, 0, -1.0}, {0, 1, 0.5}, {1, 1, 9.0}})); + vec_data, 2); - auto m = gko::batch::read>(this->exec, - vec_data); - - ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 2)); - EXPECT_EQ(m->at(0, 0, 0), value_type{1.0}); - EXPECT_EQ(m->at(0, 0, 1), value_type{3.0}); - EXPECT_EQ(m->at(0, 1, 0), value_type{0.0}); - EXPECT_EQ(m->at(0, 1, 1), value_type{5.0}); - EXPECT_EQ(m->at(1, 0, 0), value_type{-1.0}); - EXPECT_EQ(m->at(1, 0, 1), value_type{0.5}); - EXPECT_EQ(m->at(1, 1, 0), value_type{0.0}); - EXPECT_EQ(m->at(1, 1, 1), value_type{9.0}); + this->assert_equal_to_original_sparse_mtx(m.get()); } @@ -455,24 +475,18 @@ TYPED_TEST(Ell, GeneratesCorrectMatrixData) using index_type = int; using tpl = typename gko::matrix_data::nonzero_type; - auto data = - gko::batch::write>(this->mtx.get()); + auto data = gko::batch::write>( + this->sp_mtx.get()); ASSERT_EQ(data[0].size, gko::dim<2>(2, 3)); - ASSERT_EQ(data[0].nonzeros.size(), 6); + ASSERT_EQ(data[0].nonzeros.size(), 3); EXPECT_EQ(data[0].nonzeros[0], tpl(0, 0, value_type{-1.0})); - EXPECT_EQ(data[0].nonzeros[1], tpl(0, 1, value_type{2.0})); - EXPECT_EQ(data[0].nonzeros[2], tpl(0, 2, value_type{3.0})); - EXPECT_EQ(data[0].nonzeros[3], tpl(1, 0, value_type{-1.5})); - EXPECT_EQ(data[0].nonzeros[4], tpl(1, 1, value_type{2.5})); - EXPECT_EQ(data[0].nonzeros[5], tpl(1, 2, value_type{3.5})); + EXPECT_EQ(data[0].nonzeros[1], tpl(1, 1, value_type{2.5})); + EXPECT_EQ(data[0].nonzeros[2], tpl(1, 2, value_type{3.5})); ASSERT_EQ(data[1].size, gko::dim<2>(2, 3)); - ASSERT_EQ(data[1].nonzeros.size(), 6); + ASSERT_EQ(data[1].nonzeros.size(), 3); EXPECT_EQ(data[1].nonzeros[0], tpl(0, 0, value_type{1.0})); - EXPECT_EQ(data[1].nonzeros[1], tpl(0, 1, value_type{2.5})); - EXPECT_EQ(data[1].nonzeros[2], tpl(0, 2, value_type{3.0})); - EXPECT_EQ(data[1].nonzeros[3], tpl(1, 0, value_type{1.0})); - EXPECT_EQ(data[1].nonzeros[4], tpl(1, 1, value_type{2.0})); - EXPECT_EQ(data[1].nonzeros[5], tpl(1, 2, value_type{3.0})); + EXPECT_EQ(data[1].nonzeros[1], tpl(1, 1, value_type{2.0})); + EXPECT_EQ(data[1].nonzeros[2], tpl(1, 2, value_type{3.0})); } diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp index 61dffba3193..45ba0686468 100644 --- a/include/ginkgo/core/base/batch_multi_vector.hpp +++ b/include/ginkgo/core/base/batch_multi_vector.hpp @@ -212,8 +212,8 @@ class MultiVector * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item( - size_type batch_id) const noexcept + const value_type* get_const_values_for_item(size_type batch_id) const + noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + this->get_cumulative_offset(batch_id); @@ -462,224 +462,6 @@ class MultiVector }; -/** - * Creates and initializes a batch of single column-vectors. - * - * This function first creates a temporary MultiVector, fills it with - * passed in values, and then converts the vector to the requested type. - * - * @tparam Matrix matrix type to initialize - * (MultiVector has to implement the ConvertibleTo - * interface) - * @tparam TArgs argument types for Matrix::create method - * (not including the implied Executor as the first argument) - * - * @param vals values used to initialize the batch vector - * @param exec Executor associated to the vector - * @param create_args additional arguments passed to Matrix::create, not - * including the Executor, which is passed as the first - * argument - * - * @ingroup MultiVector - * @ingroup mat_formats - */ -template -std::unique_ptr initialize( - std::initializer_list> - vals, - std::shared_ptr exec, TArgs&&... create_args) -{ - using batch_multi_vector = MultiVector; - size_type num_batch_items = vals.size(); - GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty"); - auto vals_begin = begin(vals); - size_type common_num_rows = vals_begin ? vals_begin->size() : 0; - auto common_size = dim<2>(common_num_rows, 1); - for (auto& val : vals) { - GKO_ASSERT_EQ(common_num_rows, val.size()); - } - auto b_size = batch_dim<2>(num_batch_items, common_size); - auto tmp = batch_multi_vector::create(exec->get_master(), b_size); - size_type batch = 0; - for (const auto& b : vals) { - size_type idx = 0; - for (const auto& elem : b) { - tmp->at(batch, idx) = elem; - ++idx; - } - ++batch; - } - auto mtx = Matrix::create(exec, std::forward(create_args)...); - tmp->move_to(mtx); - return mtx; -} - - -/** - * Creates and initializes a batch of multi-vectors. - * - * This function first creates a temporary MultiVector, fills it with - * passed in values, and then converts the vector to the requested type. - * - * @tparam Matrix matrix type to initialize - * (Dense has to implement the ConvertibleTo interface) - * @tparam TArgs argument types for Matrix::create method - * (not including the implied Executor as the first argument) - * - * @param vals values used to initialize the vector - * @param exec Executor associated to the vector - * @param create_args additional arguments passed to Matrix::create, not - * including the Executor, which is passed as the first - * argument - * - * @ingroup MultiVector - * @ingroup mat_formats - */ -template -std::unique_ptr initialize( - std::initializer_list>> - vals, - std::shared_ptr exec, TArgs&&... create_args) -{ - using batch_multi_vector = MultiVector; - size_type num_batch_items = vals.size(); - GKO_THROW_IF_INVALID(num_batch_items > 0, "Input data is empty"); - auto vals_begin = begin(vals); - size_type common_num_rows = vals_begin ? vals_begin->size() : 0; - size_type common_num_cols = - vals_begin->begin() ? vals_begin->begin()->size() : 0; - auto common_size = dim<2>(common_num_rows, common_num_cols); - for (const auto& b : vals) { - auto num_rows = b.size(); - auto num_cols = begin(b)->size(); - auto b_size = dim<2>(num_rows, num_cols); - GKO_ASSERT_EQUAL_DIMENSIONS(b_size, common_size); - } - - auto b_size = batch_dim<2>(num_batch_items, common_size); - auto tmp = batch_multi_vector::create(exec->get_master(), b_size); - size_type batch = 0; - for (const auto& b : vals) { - size_type ridx = 0; - for (const auto& row : b) { - size_type cidx = 0; - for (const auto& elem : row) { - tmp->at(batch, ridx, cidx) = elem; - ++cidx; - } - ++ridx; - } - ++batch; - } - auto mtx = Matrix::create(exec, std::forward(create_args)...); - tmp->move_to(mtx); - return mtx; -} - - -/** - * Creates and initializes a batch single column-vector by making copies of the - * single input column vector. - * - * This function first creates a temporary batch multi-vector, fills it with - * passed in values, and then converts the vector to the requested type. - * - * @tparam Matrix matrix type to initialize - * (MultiVector has to implement the ConvertibleTo - * interface) - * @tparam TArgs argument types for Matrix::create method - * (not including the implied Executor as the first argument) - * - * @param num_vectors The number of times the input vector is to be duplicated - * @param vals values used to initialize each vector in the temp. batch - * @param exec Executor associated to the vector - * @param create_args additional arguments passed to Matrix::create, not - * including the Executor, which is passed as the first - * argument - * - * @ingroup MultiVector - * @ingroup mat_formats - */ -template -std::unique_ptr initialize( - const size_type num_vectors, - std::initializer_list vals, - std::shared_ptr exec, TArgs&&... create_args) -{ - using batch_multi_vector = MultiVector; - size_type num_batch_items = num_vectors; - GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0, - "Input data is empty"); - auto b_size = - batch_dim<2>(num_batch_items, dim<2>(begin(vals) ? vals.size() : 0, 1)); - auto tmp = batch_multi_vector::create(exec->get_master(), b_size); - for (size_type batch = 0; batch < num_vectors; batch++) { - size_type idx = 0; - for (const auto& elem : vals) { - tmp->at(batch, idx) = elem; - ++idx; - } - } - auto mtx = Matrix::create(exec, std::forward(create_args)...); - tmp->move_to(mtx); - return mtx; -} - - -/** - * Creates and initializes a matrix from copies of a given matrix. - * - * This function first creates a temporary batch multi-vector, fills it with - * passed in values, and then converts the vector to the requested type. - * - * @tparam Matrix matrix type to initialize - * (MultiVector has to implement the ConvertibleTo - * interface) - * @tparam TArgs argument types for Matrix::create method - * (not including the implied Executor as the first argument) - * - * @param num_batch_items The number of times the input matrix is duplicated - * @param vals values used to initialize each vector in the temp. batch - * @param exec Executor associated to the vector - * @param create_args additional arguments passed to Matrix::create, not - * including the Executor, which is passed as the first - * argument - * - * @ingroup LinOp - * @ingroup mat_formats - */ -template -std::unique_ptr initialize( - const size_type num_batch_items, - std::initializer_list> - vals, - std::shared_ptr exec, TArgs&&... create_args) -{ - using batch_multi_vector = MultiVector; - GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0, - "Input data is empty"); - auto common_size = dim<2>(begin(vals) ? vals.size() : 0, - begin(vals) ? begin(vals)->size() : 0); - batch_dim<2> b_size(num_batch_items, common_size); - auto tmp = batch_multi_vector::create(exec->get_master(), b_size); - for (size_type batch = 0; batch < num_batch_items; batch++) { - size_type ridx = 0; - for (const auto& row : vals) { - size_type cidx = 0; - for (const auto& elem : row) { - tmp->at(batch, ridx, cidx) = elem; - ++cidx; - } - ++ridx; - } - } - auto mtx = Matrix::create(exec, std::forward(create_args)...); - tmp->move_to(mtx); - return mtx; -} - - } // namespace batch } // namespace gko diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp index af77fc1e390..490f7a7d4b0 100644 --- a/include/ginkgo/core/matrix/batch_ell.hpp +++ b/include/ginkgo/core/matrix/batch_ell.hpp @@ -198,8 +198,8 @@ class Ell final } /** - * Returns a pointer to the array of col_idxs of the matrix for a - * specific batch item. + * Returns a pointer to the array of col_idxs of the matrix. This is shared + * across all batch items. * * @param batch_id the id of the batch item. * @@ -208,8 +208,7 @@ class Ell final index_type* get_col_idxs_for_item(size_type batch_id) noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); - return col_idxs_.get_data() + - batch_id * this->get_num_elements_per_item(); + return col_idxs_.get_data(); } /** @@ -223,8 +222,7 @@ class Ell final noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); - return col_idxs_.get_const_data() + - batch_id * this->get_num_elements_per_item(); + return col_idxs_.get_const_data(); } /** @@ -312,7 +310,8 @@ class Ell final size_type compute_num_elems(const batch_dim<2>& size, IndexType num_elems_per_row) { - return size.get_common_size()[0] * num_elems_per_row; + return size.get_num_batch_items() * size.get_common_size()[0] * + num_elems_per_row; } @@ -356,8 +355,9 @@ class Ell final // Ensure that the value and col_idxs arrays have the correct size auto num_elems = this->get_common_size()[0] * num_elems_per_row * this->get_num_batch_items(); - GKO_ENSURE_IN_BOUNDS(num_elems, values_.get_num_elems() + 1); - GKO_ENSURE_IN_BOUNDS(num_elems, col_idxs_.get_num_elems() + 1); + GKO_ASSERT_EQ(num_elems, values_.get_num_elems()); + GKO_ASSERT_EQ(this->get_num_elements_per_item(), + col_idxs_.get_num_elems()); } /** From c5a14c0dae8a4961ebcb70fab82c52754af39548 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Sun, 8 Oct 2023 10:26:50 +0200 Subject: [PATCH 05/18] Add OMP, CUDA, HIP kernels and tests Co-authored-by: Aditya Kashi --- .../matrix/batch_ell_kernel_launcher.hpp.inc | 29 +- .../cuda_hip/matrix/batch_ell_kernels.hpp.inc | 155 +++++++++++ core/matrix/batch_struct.hpp | 5 +- cuda/matrix/batch_ell_kernels.cu | 2 +- cuda/matrix/batch_struct.hpp | 34 +++ hip/matrix/batch_dense_kernels.hip.cpp | 1 - hip/matrix/batch_ell_kernels.hip.cpp | 27 +- hip/matrix/batch_struct.hip.hpp | 34 +++ reference/matrix/batch_ell_kernels.hpp.inc | 6 +- reference/matrix/batch_struct.hpp | 4 +- reference/test/matrix/CMakeLists.txt | 1 + reference/test/matrix/batch_ell_kernels.cpp | 248 ++++++++++++++++++ test/matrix/CMakeLists.txt | 1 + test/matrix/batch_ell_kernels.cpp | 128 +++++++++ 14 files changed, 650 insertions(+), 25 deletions(-) create mode 100644 common/cuda_hip/matrix/batch_ell_kernels.hpp.inc create mode 100644 reference/test/matrix/batch_ell_kernels.cpp create mode 100644 test/matrix/batch_ell_kernels.cpp diff --git a/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc index 263e911c31a..f8da432aa4d 100644 --- a/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc +++ b/common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc @@ -34,7 +34,18 @@ template void simple_apply(std::shared_ptr exec, const batch::matrix::Ell* mat, const batch::MultiVector* b, - batch::MultiVector* x) GKO_NOT_IMPLEMENTED; + batch::MultiVector* x) +{ + const auto num_blocks = mat->get_num_batch_items(); + const auto b_ub = get_batch_struct(b); + const auto x_ub = get_batch_struct(x); + const auto mat_ub = get_batch_struct(mat); + if (b->get_common_size()[1] > 1) { + GKO_NOT_IMPLEMENTED; + } + simple_apply_kernel<<get_stream()>>>(mat_ub, b_ub, x_ub); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( @@ -47,7 +58,21 @@ void advanced_apply(std::shared_ptr exec, const batch::matrix::Ell* mat, const batch::MultiVector* b, const batch::MultiVector* beta, - batch::MultiVector* x) GKO_NOT_IMPLEMENTED; + batch::MultiVector* x) +{ + const auto num_blocks = mat->get_num_batch_items(); + const auto b_ub = get_batch_struct(b); + const auto x_ub = get_batch_struct(x); + const auto mat_ub = get_batch_struct(mat); + const auto alpha_ub = get_batch_struct(alpha); + const auto beta_ub = get_batch_struct(beta); + if (b->get_common_size()[1] > 1) { + GKO_NOT_IMPLEMENTED; + } + advanced_apply_kernel<<get_stream()>>>(alpha_ub, mat_ub, b_ub, + beta_ub, x_ub); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL); diff --git a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc new file mode 100644 index 00000000000..e55e7a60471 --- /dev/null +++ b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc @@ -0,0 +1,155 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + + +template +__device__ __forceinline__ void simple_apply( + const gko::batch::matrix::batch_ell::batch_item& mat, + const ValueType* const __restrict__ b, ValueType* const __restrict__ x) +{ + const auto num_rows = mat.num_rows; + const auto num_stored_elements_per_row = mat.num_stored_elems_per_row; + const auto stride = mat.stride; + const auto val = mat.values; + const auto col = mat.col_idxs; + for (int tidx = threadIdx.x; tidx < num_rows; tidx += blockDim.x) { + auto temp = zero(); + for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) { + const auto ind = tidx + idx * stride; + const auto col_idx = col[ind]; + if (col_idx < idx) { + break; + } else { + temp += val[ind] * b[col_idx]; + } + } + x[tidx] = temp; + } +} + +template +__global__ __launch_bounds__( + default_block_size, + sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix:: + batch_ell::uniform_batch< + const ValueType> + mat, + const gko::batch:: + multi_vector:: + uniform_batch< + const ValueType> + b, + const gko::batch:: + multi_vector:: + uniform_batch< + ValueType> + x) +{ + for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items; + batch_id += gridDim.x) { + const auto mat_b = + gko::batch::matrix::extract_batch_item(mat, batch_id); + const auto b_b = gko::batch::extract_batch_item(b, batch_id); + const auto x_b = gko::batch::extract_batch_item(x, batch_id); + simple_apply(mat_b, b_b.values, x_b.values); + } +} + + +template +__device__ __forceinline__ void advanced_apply( + const ValueType alpha, + const gko::batch::matrix::batch_ell::batch_item& mat, + const ValueType* const __restrict__ b, const ValueType beta, + ValueType* const __restrict__ x) +{ + const auto num_rows = mat.num_rows; + const auto num_stored_elements_per_row = mat.num_stored_elems_per_row; + const auto stride = mat.stride; + const auto val = mat.values; + const auto col = mat.col_idxs; + for (int tidx = threadIdx.x; tidx < num_rows; tidx += blockDim.x) { + auto temp = zero(); + for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) { + const auto ind = tidx + idx * stride; + const auto col_idx = col[ind]; + if (col_idx < idx) { + break; + } else { + temp += alpha * val[ind] * b[col_idx]; + } + } + x[tidx] = temp + beta * x[tidx]; + } +} + +template +__global__ __launch_bounds__( + default_block_size, + sm_oversubscription) void advanced_apply_kernel(const gko::batch:: + multi_vector:: + uniform_batch< + const ValueType> + alpha, + const gko::batch::matrix:: + batch_ell:: + uniform_batch< + const ValueType> + mat, + const gko::batch:: + multi_vector:: + uniform_batch< + const ValueType> + b, + const gko::batch:: + multi_vector:: + uniform_batch< + const ValueType> + beta, + const gko::batch:: + multi_vector:: + uniform_batch< + ValueType> + x) +{ + for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items; + batch_id += gridDim.x) { + const auto mat_b = + gko::batch::matrix::extract_batch_item(mat, batch_id); + const auto b_b = gko::batch::extract_batch_item(b, batch_id); + const auto x_b = gko::batch::extract_batch_item(x, batch_id); + const auto alpha_b = gko::batch::extract_batch_item(alpha, batch_id); + const auto beta_b = gko::batch::extract_batch_item(beta, batch_id); + advanced_apply(alpha_b.values[0], mat_b, b_b.values, beta_b.values[0], + x_b.values); + } +} diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp index 272bb506df2..2eed40882bc 100644 --- a/core/matrix/batch_struct.hpp +++ b/core/matrix/batch_struct.hpp @@ -188,8 +188,7 @@ GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item extract_batch_item( { return {batch.values + batch_idx * batch.num_stored_elems_per_row * batch.num_rows, - batch.col_idxs + - batch_idx * batch.num_stored_elems_per_row * batch.num_rows, + batch.col_idxs, batch.stride, batch.num_rows, batch.num_cols, @@ -203,7 +202,7 @@ GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item extract_batch_item( const size_type batch_idx) { return {batch_values + batch_idx * num_elems_per_row * num_rows, - batch_col_idxs + batch_idx * num_elems_per_row * num_rows, + batch_col_idxs, stride, num_rows, num_cols, diff --git a/cuda/matrix/batch_ell_kernels.cu b/cuda/matrix/batch_ell_kernels.cu index 567d863d95c..ee6a99f04ca 100644 --- a/cuda/matrix/batch_ell_kernels.cu +++ b/cuda/matrix/batch_ell_kernels.cu @@ -72,7 +72,7 @@ constexpr int sm_oversubscription = 4; // NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES -// #include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc" +#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc" #include "common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc" diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp index 73712a7b81b..7a6a4ac7f00 100644 --- a/cuda/matrix/batch_struct.hpp +++ b/cuda/matrix/batch_struct.hpp @@ -87,6 +87,40 @@ get_batch_struct(batch::matrix::Dense* const op) } +/** + * Generates an immutable uniform batch struct from a batch of ell matrices. + */ +template +inline batch::matrix::batch_ell::uniform_batch> +get_batch_struct(const batch::matrix::Ell* const op) +{ + return {as_cuda_type(op->get_const_values()), + op->get_const_col_idxs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; +} + + +/** + * Generates a uniform batch struct from a batch of ell matrices. + */ +template +inline batch::matrix::batch_ell::uniform_batch> +get_batch_struct(batch::matrix::Ell* const op) +{ + return {as_cuda_type(op->get_values()), + op->get_col_idxs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; +} + + } // namespace cuda } // namespace kernels } // namespace gko diff --git a/hip/matrix/batch_dense_kernels.hip.cpp b/hip/matrix/batch_dense_kernels.hip.cpp index eb3da83760a..3361feeb8b8 100644 --- a/hip/matrix/batch_dense_kernels.hip.cpp +++ b/hip/matrix/batch_dense_kernels.hip.cpp @@ -38,7 +38,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include #include "core/base/batch_struct.hpp" diff --git a/hip/matrix/batch_ell_kernels.hip.cpp b/hip/matrix/batch_ell_kernels.hip.cpp index 567d863d95c..fdd52c38f57 100644 --- a/hip/matrix/batch_ell_kernels.hip.cpp +++ b/hip/matrix/batch_ell_kernels.hip.cpp @@ -33,6 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/matrix/batch_ell_kernels.hpp" +#include #include #include @@ -42,21 +43,21 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "core/base/batch_struct.hpp" #include "core/matrix/batch_struct.hpp" -#include "cuda/base/batch_struct.hpp" -#include "cuda/base/config.hpp" -#include "cuda/base/cublas_bindings.hpp" -#include "cuda/base/pointer_mode_guard.hpp" -#include "cuda/base/thrust.cuh" -#include "cuda/components/cooperative_groups.cuh" -#include "cuda/components/reduction.cuh" -#include "cuda/components/thread_ids.cuh" -#include "cuda/components/uninitialized_array.hpp" -#include "cuda/matrix/batch_struct.hpp" +#include "hip/base/batch_struct.hip.hpp" +#include "hip/base/config.hip.hpp" +#include "hip/base/hipblas_bindings.hip.hpp" +#include "hip/base/pointer_mode_guard.hip.hpp" +#include "hip/base/thrust.hip.hpp" +#include "hip/components/cooperative_groups.hip.hpp" +#include "hip/components/reduction.hip.hpp" +#include "hip/components/thread_ids.hip.hpp" +#include "hip/components/uninitialized_array.hip.hpp" +#include "hip/matrix/batch_struct.hip.hpp" namespace gko { namespace kernels { -namespace cuda { +namespace hip { /** * @brief The Ell matrix format namespace. * @ref Ell @@ -72,7 +73,7 @@ constexpr int sm_oversubscription = 4; // NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES -// #include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc" +#include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc" #include "common/cuda_hip/matrix/batch_ell_kernel_launcher.hpp.inc" @@ -81,6 +82,6 @@ constexpr int sm_oversubscription = 4; } // namespace batch_ell -} // namespace cuda +} // namespace hip } // namespace kernels } // namespace gko diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp index 4670cf0988b..a43d7d058b0 100644 --- a/hip/matrix/batch_struct.hip.hpp +++ b/hip/matrix/batch_struct.hip.hpp @@ -87,6 +87,40 @@ get_batch_struct(batch::matrix::Dense* const op) } +/** + * Generates an immutable uniform batch struct from a batch of ell matrices. + */ +template +inline batch::matrix::batch_ell::uniform_batch> +get_batch_struct(const batch::matrix::Ell* const op) +{ + return {as_hip_type(op->get_const_values()), + op->get_const_col_idxs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; +} + + +/** + * Generates a uniform batch struct from a batch of ell matrices. + */ +template +inline batch::matrix::batch_ell::uniform_batch> +get_batch_struct(batch::matrix::Ell* const op) +{ + return {as_hip_type(op->get_values()), + op->get_col_idxs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; +} + + } // namespace hip } // namespace kernels } // namespace gko diff --git a/reference/matrix/batch_ell_kernels.hpp.inc b/reference/matrix/batch_ell_kernels.hpp.inc index 37370261d44..41d0a00ddcd 100644 --- a/reference/matrix/batch_ell_kernels.hpp.inc +++ b/reference/matrix/batch_ell_kernels.hpp.inc @@ -36,14 +36,14 @@ inline void simple_apply_kernel( const gko::batch::multi_vector::batch_item& b, const gko::batch::multi_vector::batch_item& c) { - for (int row = 0; row < a.num_rows; ++row) { - for (int j = 0; j < b.num_rhs; ++j) { + for (int row = 0; row < c.num_rows; ++row) { + for (int j = 0; j < c.num_rhs; ++j) { c.values[row * c.stride + j] = zero(); } for (auto k = 0; k < a.num_stored_elems_per_row; ++k) { auto val = a.values[row + k * a.stride]; auto col = a.col_idxs[row + k * a.stride]; - for (int j = 0; j < b.num_rhs; ++j) { + for (int j = 0; j < c.num_rhs; ++j) { c.values[row * c.stride + j] += val * b.values[col * b.stride + j]; } diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp index b5eacd80d18..3b562450ee0 100644 --- a/reference/matrix/batch_struct.hpp +++ b/reference/matrix/batch_struct.hpp @@ -101,7 +101,7 @@ get_batch_struct(const batch::matrix::Ell* const op) return {op->get_const_values(), op->get_const_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), static_cast(op->get_common_size()[0]), static_cast(op->get_common_size()[1]), static_cast(op->get_num_stored_elements_per_row())}; @@ -118,7 +118,7 @@ inline batch::matrix::batch_ell::uniform_batch get_batch_struct( return {op->get_values(), op->get_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[1]), + static_cast(op->get_common_size()[0]), static_cast(op->get_common_size()[0]), static_cast(op->get_common_size()[1]), static_cast(op->get_num_stored_elements_per_row())}; diff --git a/reference/test/matrix/CMakeLists.txt b/reference/test/matrix/CMakeLists.txt index 18634de662d..05498cbadc4 100644 --- a/reference/test/matrix/CMakeLists.txt +++ b/reference/test/matrix/CMakeLists.txt @@ -1,4 +1,5 @@ ginkgo_create_test(batch_dense_kernels) +ginkgo_create_test(batch_ell_kernels) ginkgo_create_test(coo_kernels) ginkgo_create_test(csr_kernels) ginkgo_create_test(dense_kernels) diff --git a/reference/test/matrix/batch_ell_kernels.cpp b/reference/test/matrix/batch_ell_kernels.cpp new file mode 100644 index 00000000000..76b681c69f7 --- /dev/null +++ b/reference/test/matrix/batch_ell_kernels.cpp @@ -0,0 +1,248 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/batch_ell_kernels.hpp" +#include "core/test/utils.hpp" + + +template +class Ell : public ::testing::Test { +protected: + using value_type = T; + using size_type = gko::size_type; + using Mtx = gko::batch::matrix::Ell; + using MVec = gko::batch::MultiVector; + using EllMtx = gko::matrix::Ell; + using DenseMtx = gko::matrix::Dense; + using ComplexMtx = gko::to_complex; + using RealMtx = gko::remove_complex; + Ell() + : exec(gko::ReferenceExecutor::create()), + mtx_0(gko::batch::initialize( + {{I({1.0, -1.0, 1.5}), I({-2.0, 2.0, 3.0})}, + {{1.0, -2.0, -0.5}, {1.0, -2.5, 4.0}}}, + exec)), + mtx_00(gko::initialize( + {I({1.0, -1.0, 1.5}), I({-2.0, 2.0, 3.0})}, exec)), + mtx_01(gko::initialize( + {I({1.0, -2.0, -0.5}), I({1.0, -2.5, 4.0})}, exec)), + b_0(gko::batch::initialize( + {{I({1.0, 0.0, 1.0}), I({2.0, 0.0, 1.0}), + I({1.0, 0.0, 2.0})}, + {I({-1.0, 1.0, 1.0}), I({1.0, -1.0, 1.0}), + I({1.0, 0.0, 2.0})}}, + exec)), + b_00(gko::initialize( + {I({1.0, 0.0, 1.0}), I({2.0, 0.0, 1.0}), + I({1.0, 0.0, 2.0})}, + exec)), + b_01(gko::initialize( + {I({-1.0, 1.0, 1.0}), I({1.0, -1.0, 1.0}), + I({1.0, 0.0, 2.0})}, + exec)), + x_0(gko::batch::initialize( + {{I({2.0, 0.0, 1.0}), I({2.0, 0.0, 2.0})}, + {I({-2.0, 1.0, 1.0}), I({1.0, -1.0, -1.0})}}, + exec)), + x_00(gko::initialize( + {I({2.0, 0.0, 1.0}), I({2.0, 0.0, 2.0})}, exec)), + x_01(gko::initialize( + {I({-2.0, 1.0, 1.0}), I({1.0, -1.0, -1.0})}, exec)) + {} + + std::shared_ptr exec; + std::unique_ptr mtx_0; + std::unique_ptr mtx_00; + std::unique_ptr mtx_01; + std::unique_ptr b_0; + std::unique_ptr b_00; + std::unique_ptr b_01; + std::unique_ptr x_0; + std::unique_ptr x_00; + std::unique_ptr x_01; + + std::ranlux48 rand_engine; +}; + + +TYPED_TEST_SUITE(Ell, gko::test::ValueTypes); + + +TYPED_TEST(Ell, AppliesToBatchMultiVector) +{ + using T = typename TestFixture::value_type; + + this->mtx_0->apply(this->b_0.get(), this->x_0.get()); + this->mtx_00->apply(this->b_00.get(), this->x_00.get()); + this->mtx_01->apply(this->b_01.get(), this->x_01.get()); + + auto res = gko::batch::unbatch>(this->x_0.get()); + + GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); + GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); +} + + +TYPED_TEST(Ell, AppliesLinearCombinationWithSameAlphaToBatchMultiVector) +{ + using Mtx = typename TestFixture::Mtx; + using MVec = typename TestFixture::MVec; + using DenseMtx = typename TestFixture::DenseMtx; + using T = typename TestFixture::value_type; + auto alpha = gko::batch::initialize(2, {1.5}, this->exec); + auto beta = gko::batch::initialize(2, {-4.0}, this->exec); + auto alpha0 = gko::initialize({1.5}, this->exec); + auto alpha1 = gko::initialize({1.5}, this->exec); + auto beta0 = gko::initialize({-4.0}, this->exec); + auto beta1 = gko::initialize({-4.0}, this->exec); + + this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(), + this->x_0.get()); + this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(), + this->x_00.get()); + this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(), + this->x_01.get()); + + auto res = gko::batch::unbatch>(this->x_0.get()); + + GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); + GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); +} + + +TYPED_TEST(Ell, AppliesLinearCombinationToBatchMultiVector) +{ + using Mtx = typename TestFixture::Mtx; + using MVec = typename TestFixture::MVec; + using DenseMtx = typename TestFixture::DenseMtx; + using T = typename TestFixture::value_type; + auto alpha = gko::batch::initialize({{1.5}, {-1.0}}, this->exec); + auto beta = gko::batch::initialize({{2.5}, {-4.0}}, this->exec); + auto alpha0 = gko::initialize({1.5}, this->exec); + auto alpha1 = gko::initialize({-1.0}, this->exec); + auto beta0 = gko::initialize({2.5}, this->exec); + auto beta1 = gko::initialize({-4.0}, this->exec); + + this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(), + this->x_0.get()); + this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(), + this->x_00.get()); + this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(), + this->x_01.get()); + + auto res = gko::batch::unbatch>(this->x_0.get()); + + GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); + GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); +} + + +TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultCols) +{ + using MVec = typename TestFixture::MVec; + auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}}); + + ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultRows) +{ + using MVec = typename TestFixture::MVec; + auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}}); + + ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Ell, ApplyFailsOnWrongInnerDimension) +{ + using MVec = typename TestFixture::MVec; + auto res = + MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}}); + + ASSERT_THROW(this->mtx_0->apply(res.get(), this->x_0.get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Ell, AdvancedApplyFailsOnWrongInnerDimension) +{ + using MVec = typename TestFixture::MVec; + auto res = + MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}}); + auto alpha = + MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + auto beta = + MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + + ASSERT_THROW( + this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Ell, AdvancedApplyFailsOnWrongAlphaDimension) +{ + using MVec = typename TestFixture::MVec; + auto res = + MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}}); + auto alpha = + MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}}); + auto beta = + MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + + ASSERT_THROW( + this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()), + gko::DimensionMismatch); +} diff --git a/test/matrix/CMakeLists.txt b/test/matrix/CMakeLists.txt index 9f3b17cd858..f1c91e615e7 100644 --- a/test/matrix/CMakeLists.txt +++ b/test/matrix/CMakeLists.txt @@ -1,4 +1,5 @@ ginkgo_create_common_test(batch_dense_kernels) +ginkgo_create_common_test(batch_ell_kernels DISABLE_EXECUTORS dpcpp) ginkgo_create_common_device_test(csr_kernels) ginkgo_create_common_test(csr_kernels2) ginkgo_create_common_test(coo_kernels) diff --git a/test/matrix/batch_ell_kernels.cpp b/test/matrix/batch_ell_kernels.cpp new file mode 100644 index 00000000000..9629a2263ff --- /dev/null +++ b/test/matrix/batch_ell_kernels.cpp @@ -0,0 +1,128 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "core/matrix/batch_ell_kernels.hpp" + + +#include +#include + + +#include + + +#include +#include +#include +#include + + +#include "core/base/batch_utilities.hpp" +#include "core/test/utils.hpp" +#include "core/test/utils/assertions.hpp" +#include "core/test/utils/batch_helpers.hpp" +#include "test/utils/executor.hpp" + + +class Ell : public CommonTestFixture { +protected: + using Mtx = gko::batch::matrix::Ell; + using MVec = gko::batch::MultiVector; + + Ell() : rand_engine(15) {} + + template + std::unique_ptr gen_mtx(const gko::size_type num_batch_items, + gko::size_type num_rows, + gko::size_type num_cols) + { + return gko::test::generate_random_batch_matrix( + num_batch_items, num_rows, num_cols, + std::uniform_int_distribution<>(num_cols, num_cols), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + void set_up_apply_data(gko::size_type num_vecs = 1) + { + const int num_rows = 252; + const int num_cols = 32; + x = gen_mtx(batch_size, num_rows, num_cols); + y = gen_mtx(batch_size, num_cols, num_vecs); + alpha = gen_mtx(batch_size, 1, 1); + beta = gen_mtx(batch_size, 1, 1); + dx = gko::clone(exec, x); + dy = gko::clone(exec, y); + dalpha = gko::clone(exec, alpha); + dbeta = gko::clone(exec, beta); + expected = MVec::create( + ref, + gko::batch_dim<2>(batch_size, gko::dim<2>{num_rows, num_vecs})); + expected->fill(gko::one()); + dresult = gko::clone(exec, expected); + } + + std::ranlux48 rand_engine; + + const size_t batch_size = 11; + std::unique_ptr x; + std::unique_ptr y; + std::unique_ptr alpha; + std::unique_ptr beta; + std::unique_ptr expected; + std::unique_ptr dresult; + std::unique_ptr dx; + std::unique_ptr dy; + std::unique_ptr dalpha; + std::unique_ptr dbeta; +}; + + +TEST_F(Ell, SingleVectorApplyIsEquivalentToRef) +{ + set_up_apply_data(1); + + x->apply(y.get(), expected.get()); + dx->apply(dy.get(), dresult.get()); + + GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Ell, SingleVectorAdvancedApplyIsEquivalentToRef) +{ + set_up_apply_data(1); + + x->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r::value); +} From 174c3fdc726ce1a896939dfb4f6a335b3b6ed25f Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Sun, 8 Oct 2023 12:03:36 +0200 Subject: [PATCH 06/18] Add DPCPP kernels and tests Co-authored-by: Phuong Nguyen --- core/test/utils/batch_helpers.hpp | 2 - dpcpp/matrix/batch_ell_kernels.dp.cpp | 84 ++++++++++++++++++++++++-- dpcpp/matrix/batch_ell_kernels.hpp.inc | 79 ++++++++++++++++++++++++ dpcpp/matrix/batch_struct.hpp | 34 +++++++++++ test/matrix/CMakeLists.txt | 2 +- test/matrix/batch_ell_kernels.cpp | 26 ++++++-- 6 files changed, 213 insertions(+), 14 deletions(-) create mode 100644 dpcpp/matrix/batch_ell_kernels.hpp.inc diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp index 4cf9d4973e2..b040691999e 100644 --- a/core/test/utils/batch_helpers.hpp +++ b/core/test/utils/batch_helpers.hpp @@ -83,8 +83,6 @@ std::unique_ptr generate_random_batch_matrix( exec, batch_dim<2>(num_batch_items, dim<2>(num_rows, num_cols)), std::forward(args)...); - // TODO: Need to preserve sparsity pattern across batch items for batched - // sparse matrix formats for (size_type b = 0; b < num_batch_items; b++) { auto rand_mat = generate_random_matrix( diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp index cdcd5abd024..1ed83d79630 100644 --- a/dpcpp/matrix/batch_ell_kernels.dp.cpp +++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp @@ -30,7 +30,7 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include "core/matrix/batch_dense_kernels.hpp" +#include "core/matrix/batch_ell_kernels.hpp" #include @@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include -#include +#include #include "core/base/batch_struct.hpp" @@ -71,14 +71,48 @@ namespace dpcpp { namespace batch_ell { -// #include "dpcpp/matrix/batch_dense_kernels.hpp.inc" +#include "dpcpp/matrix/batch_ell_kernels.hpp.inc" template void simple_apply(std::shared_ptr exec, const batch::matrix::Ell* mat, const batch::MultiVector* b, - batch::MultiVector* x) GKO_NOT_IMPLEMENTED; + batch::MultiVector* x) +{ + const size_type num_rows = mat->get_common_size()[0]; + const size_type num_cols = mat->get_common_size()[1]; + + const auto num_batch_items = mat->get_num_batch_items(); + auto device = exec->get_queue()->get_device(); + auto group_size = + device.get_info(); + + const dim3 block(group_size); + const dim3 grid(num_batch_items); + const auto x_ub = get_batch_struct(x); + const auto b_ub = get_batch_struct(b); + const auto mat_ub = get_batch_struct(mat); + if (b_ub.num_rhs > 1) { + GKO_NOT_IMPLEMENTED; + } + + // Launch a kernel that has nbatches blocks, each block has max group size + (exec->get_queue())->submit([&](sycl::handler& cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [= + ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( + config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + simple_apply_kernel(mat_b, b_b, x_b, item_ct1); + }); + }); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( GKO_DECLARE_BATCH_ELL_SIMPLE_APPLY_KERNEL); @@ -90,7 +124,47 @@ void advanced_apply(std::shared_ptr exec, const batch::matrix::Ell* mat, const batch::MultiVector* b, const batch::MultiVector* beta, - batch::MultiVector* x) GKO_NOT_IMPLEMENTED; + batch::MultiVector* x) +{ + const auto mat_ub = get_batch_struct(mat); + const auto b_ub = get_batch_struct(b); + const auto x_ub = get_batch_struct(x); + const auto alpha_ub = get_batch_struct(alpha); + const auto beta_ub = get_batch_struct(beta); + + if (b_ub.num_rhs > 1) { + GKO_NOT_IMPLEMENTED; + } + + const auto num_batch_items = mat_ub.num_batch_items; + auto device = exec->get_queue()->get_device(); + auto group_size = + device.get_info(); + + const dim3 block(group_size); + const dim3 grid(num_batch_items); + + // Launch a kernel that has nbatches blocks, each block has max group size + (exec->get_queue())->submit([&](sycl::handler& cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), [= + ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( + config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + const auto alpha_b = + batch::extract_batch_item(alpha_ub, group_id); + const auto beta_b = + batch::extract_batch_item(beta_ub, group_id); + advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b, + item_ct1); + }); + }); +} GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( GKO_DECLARE_BATCH_ELL_ADVANCED_APPLY_KERNEL); diff --git a/dpcpp/matrix/batch_ell_kernels.hpp.inc b/dpcpp/matrix/batch_ell_kernels.hpp.inc new file mode 100644 index 00000000000..1048f2f8ff8 --- /dev/null +++ b/dpcpp/matrix/batch_ell_kernels.hpp.inc @@ -0,0 +1,79 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +template +__dpct_inline__ void simple_apply_kernel( + const gko::batch::matrix::batch_ell::batch_item& mat, + const gko::batch::multi_vector::batch_item& b, + const gko::batch::multi_vector::batch_item& x, + sycl::nd_item<3>& item_ct1) +{ + for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows; + tidx += item_ct1.get_local_range().size()) { + auto temp = zero(); + for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) { + const auto col_idx = mat.col_idxs[tidx + idx * mat.stride]; + if (col_idx < idx) + break; + else + temp += mat.values[tidx + idx * mat.stride] * + b.values[col_idx * b.stride]; + } + x.values[tidx * x.stride] = temp; + } +} + + +template +__dpct_inline__ void advanced_apply_kernel( + const gko::batch::multi_vector::batch_item& alpha, + const gko::batch::matrix::batch_ell::batch_item& mat, + const gko::batch::multi_vector::batch_item& b, + const gko::batch::multi_vector::batch_item& beta, + const gko::batch::multi_vector::batch_item& x, + sycl::nd_item<3>& item_ct1) +{ + for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows; + tidx += item_ct1.get_local_range().size()) { + auto temp = zero(); + for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) { + const auto col_idx = mat.col_idxs[tidx + idx * mat.stride]; + if (col_idx < idx) + break; + else + temp += alpha.values[0] * mat.values[tidx + idx * mat.stride] * + b.values[col_idx * b.stride]; + } + x.values[tidx * x.stride] = + temp + beta.values[0] * x.values[tidx * x.stride]; + } +} diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp index b0393daf55d..35ff1148dd5 100644 --- a/dpcpp/matrix/batch_struct.hpp +++ b/dpcpp/matrix/batch_struct.hpp @@ -86,6 +86,40 @@ inline batch::matrix::dense::uniform_batch get_batch_struct( } +/** + * Generates an immutable uniform batch struct from a batch of ell matrices. + */ +template +inline batch::matrix::batch_ell::uniform_batch +get_batch_struct(const batch::matrix::Ell* const op) +{ + return {op->get_const_values(), + op->get_const_col_idxs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; +} + + +/** + * Generates a uniform batch struct from a batch of ell matrices. + */ +template +inline batch::matrix::batch_ell::uniform_batch get_batch_struct( + batch::matrix::Ell* const op) +{ + return {op->get_values(), + op->get_col_idxs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; +} + + } // namespace dpcpp } // namespace kernels } // namespace gko diff --git a/test/matrix/CMakeLists.txt b/test/matrix/CMakeLists.txt index f1c91e615e7..a03a0a0bb4e 100644 --- a/test/matrix/CMakeLists.txt +++ b/test/matrix/CMakeLists.txt @@ -1,5 +1,5 @@ ginkgo_create_common_test(batch_dense_kernels) -ginkgo_create_common_test(batch_ell_kernels DISABLE_EXECUTORS dpcpp) +ginkgo_create_common_test(batch_ell_kernels) ginkgo_create_common_device_test(csr_kernels) ginkgo_create_common_test(csr_kernels2) ginkgo_create_common_test(coo_kernels) diff --git a/test/matrix/batch_ell_kernels.cpp b/test/matrix/batch_ell_kernels.cpp index 9629a2263ff..bc1e0c7fb42 100644 --- a/test/matrix/batch_ell_kernels.cpp +++ b/test/matrix/batch_ell_kernels.cpp @@ -63,22 +63,36 @@ class Ell : public CommonTestFixture { template std::unique_ptr gen_mtx(const gko::size_type num_batch_items, gko::size_type num_rows, - gko::size_type num_cols) + gko::size_type num_cols, + int num_elems_per_row) { return gko::test::generate_random_batch_matrix( + num_batch_items, num_rows, num_cols, + std::uniform_int_distribution<>(num_elems_per_row, + num_elems_per_row), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref, + num_elems_per_row); + } + + std::unique_ptr gen_mvec(const gko::size_type num_batch_items, + gko::size_type num_rows, + gko::size_type num_cols) + { + return gko::test::generate_random_batch_matrix( num_batch_items, num_rows, num_cols, std::uniform_int_distribution<>(num_cols, num_cols), std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); } - void set_up_apply_data(gko::size_type num_vecs = 1) + void set_up_apply_data(gko::size_type num_vecs = 1, + int num_elems_per_row = 5) { const int num_rows = 252; const int num_cols = 32; - x = gen_mtx(batch_size, num_rows, num_cols); - y = gen_mtx(batch_size, num_cols, num_vecs); - alpha = gen_mtx(batch_size, 1, 1); - beta = gen_mtx(batch_size, 1, 1); + x = gen_mtx(batch_size, num_rows, num_cols, num_elems_per_row); + y = gen_mvec(batch_size, num_cols, num_vecs); + alpha = gen_mvec(batch_size, 1, 1); + beta = gen_mvec(batch_size, 1, 1); dx = gko::clone(exec, x); dy = gko::clone(exec, y); dalpha = gko::clone(exec, alpha); From f4168f95277eb6d6e30c8e43e4ab7e31ce63da16 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Sun, 8 Oct 2023 12:15:00 +0200 Subject: [PATCH 07/18] Update docs --- include/ginkgo/core/matrix/batch_ell.hpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp index 490f7a7d4b0..48a3a6d9831 100644 --- a/include/ginkgo/core/matrix/batch_ell.hpp +++ b/include/ginkgo/core/matrix/batch_ell.hpp @@ -55,19 +55,15 @@ namespace matrix { /** - * Ell is a batch matrix format which explicitly stores all values of the - * matrix in each of the batches. + * Ell is a sparse matrix format that stores the same number of nonzeros in each + * row, enabling coalesced accesses. It is suitable for sparsity patterns that + * have a similar number of nonzeros in every row. The values are stored in a + * column-major fashion similar to the monolithic gko::matrix::Ell class. It is + * also assumed that the sparsity pattern of all the items in the batch is the + * same and therefore only a single copy of the sparsity pattern is stored. * - * The values in each of the batches are stored in row-major format (values - * belonging to the same row appear consecutive in the memory and the values of - * each batch item are also stored consecutively in memory). - * - * @note Though the storage layout is similar to the multi-vector object, the - * class semantics and the operations it aims to provide is different. Hence it - * is recommended to create multi-vector objects if the user means to view the - * data as a set of vectors. - * - * @tparam ValueType precision of matrix elements + * @tparam ValueType value precision of matrix elements + * @tparam IndexType index precision of matrix elements * * @ingroup batch_ell * @ingroup mat_formats From 7cb4c51c2135404ff67cbbc0322e3401530379eb Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Sun, 8 Oct 2023 10:49:57 +0000 Subject: [PATCH 08/18] Format files Co-authored-by: Pratik Nayak --- dpcpp/matrix/batch_ell_kernels.dp.cpp | 54 +++++++++---------- .../ginkgo/core/base/batch_multi_vector.hpp | 4 +- include/ginkgo/core/matrix/batch_ell.hpp | 8 +-- include/ginkgo/ginkgo.hpp | 1 + 4 files changed, 34 insertions(+), 33 deletions(-) diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp index 1ed83d79630..1d1210cc270 100644 --- a/dpcpp/matrix/batch_ell_kernels.dp.cpp +++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp @@ -100,17 +100,17 @@ void simple_apply(std::shared_ptr exec, // Launch a kernel that has nbatches blocks, each block has max group size (exec->get_queue())->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), [= - ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( - config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto mat_b = - batch::matrix::extract_batch_item(mat_ub, group_id); - const auto b_b = batch::extract_batch_item(b_ub, group_id); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - simple_apply_kernel(mat_b, b_b, x_b, item_ct1); - }); + sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) + [[sycl::reqd_sub_group_size(config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + simple_apply_kernel(mat_b, b_b, x_b, item_ct1); + }); }); } @@ -147,22 +147,22 @@ void advanced_apply(std::shared_ptr exec, // Launch a kernel that has nbatches blocks, each block has max group size (exec->get_queue())->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), [= - ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( - config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto mat_b = - batch::matrix::extract_batch_item(mat_ub, group_id); - const auto b_b = batch::extract_batch_item(b_ub, group_id); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - const auto alpha_b = - batch::extract_batch_item(alpha_ub, group_id); - const auto beta_b = - batch::extract_batch_item(beta_ub, group_id); - advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b, - item_ct1); - }); + sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) + [[sycl::reqd_sub_group_size(config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + const auto alpha_b = + batch::extract_batch_item(alpha_ub, group_id); + const auto beta_b = + batch::extract_batch_item(beta_ub, group_id); + advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b, + item_ct1); + }); }); } diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp index 45ba0686468..9a4b8d5cf1d 100644 --- a/include/ginkgo/core/base/batch_multi_vector.hpp +++ b/include/ginkgo/core/base/batch_multi_vector.hpp @@ -212,8 +212,8 @@ class MultiVector * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item(size_type batch_id) const - noexcept + const value_type* get_const_values_for_item( + size_type batch_id) const noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + this->get_cumulative_offset(batch_id); diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp index 48a3a6d9831..5cb5f73dec5 100644 --- a/include/ginkgo/core/matrix/batch_ell.hpp +++ b/include/ginkgo/core/matrix/batch_ell.hpp @@ -214,8 +214,8 @@ class Ell final * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type* get_const_col_idxs_for_item(size_type batch_id) const - noexcept + const index_type* get_const_col_idxs_for_item( + size_type batch_id) const noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return col_idxs_.get_const_data(); @@ -243,8 +243,8 @@ class Ell final * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item(size_type batch_id) const - noexcept + const value_type* get_const_values_for_item( + size_type batch_id) const noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 8bb29242e88..ad90e264189 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -109,6 +109,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include #include From 88b6e3aee599f26e339d1b7357c0509bd28afdd1 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 10 Oct 2023 17:15:48 +0200 Subject: [PATCH 09/18] Some general fixes. --- .../cuda_hip/matrix/batch_ell_kernels.hpp.inc | 13 ++- core/matrix/batch_ell.cpp | 32 +------ core/matrix/batch_struct.hpp | 20 ++--- core/test/matrix/batch_ell.cpp | 8 +- cuda/matrix/batch_dense_kernels.cu | 3 +- cuda/matrix/batch_ell_kernels.cu | 6 +- cuda/matrix/batch_struct.hpp | 23 ++--- dpcpp/matrix/batch_ell_kernels.dp.cpp | 62 +++++++------ dpcpp/matrix/batch_ell_kernels.hpp.inc | 4 +- dpcpp/matrix/batch_struct.hpp | 23 ++--- hip/matrix/batch_ell_kernels.hip.cpp | 6 +- hip/matrix/batch_struct.hip.hpp | 23 ++--- include/ginkgo/core/matrix/batch_ell.hpp | 8 -- omp/matrix/batch_dense_kernels.cpp | 4 +- omp/matrix/batch_ell_kernels.cpp | 4 +- reference/matrix/batch_dense_kernels.cpp | 5 +- reference/matrix/batch_ell_kernels.cpp | 5 +- reference/matrix/batch_ell_kernels.hpp.inc | 4 +- reference/matrix/batch_struct.hpp | 22 ++--- reference/test/matrix/batch_ell_kernels.cpp | 87 ++++++------------- test/matrix/batch_ell_kernels.cpp | 59 ++++++------- test/test_install/test_install.cpp | 9 +- 22 files changed, 184 insertions(+), 246 deletions(-) diff --git a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc index e55e7a60471..5c00358c5a0 100644 --- a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc +++ b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc @@ -33,7 +33,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template __device__ __forceinline__ void simple_apply( - const gko::batch::matrix::batch_ell::batch_item& mat, + const gko::batch::matrix::ell::batch_item& mat, const ValueType* const __restrict__ b, ValueType* const __restrict__ x) { const auto num_rows = mat.num_rows; @@ -60,7 +60,7 @@ template __global__ __launch_bounds__( default_block_size, sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix:: - batch_ell::uniform_batch< + ell::uniform_batch< const ValueType> mat, const gko::batch:: @@ -88,7 +88,7 @@ __global__ __launch_bounds__( template __device__ __forceinline__ void advanced_apply( const ValueType alpha, - const gko::batch::matrix::batch_ell::batch_item& mat, + const gko::batch::matrix::ell::batch_item& mat, const ValueType* const __restrict__ b, const ValueType beta, ValueType* const __restrict__ x) { @@ -121,10 +121,9 @@ __global__ __launch_bounds__( const ValueType> alpha, const gko::batch::matrix:: - batch_ell:: - uniform_batch< - const ValueType> - mat, + ell::uniform_batch< + const ValueType> + mat, const gko::batch:: multi_vector:: uniform_batch< diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp index 0d903b10968..f421fdf2b49 100644 --- a/core/matrix/batch_ell.cpp +++ b/core/matrix/batch_ell.cpp @@ -104,22 +104,10 @@ template std::unique_ptr> Ell::create_with_config_of( ptr_param> other) -{ - // De-referencing `other` before calling the functions (instead of - // using operator `->`) is currently required to be compatible with - // CUDA 10.1. - // Otherwise, it results in a compile error. - return (*other).create_with_same_config(); -} - - -template -std::unique_ptr> -Ell::create_with_same_config() const { return Ell::create( - this->get_executor(), this->get_size(), - this->get_num_stored_elements_per_row()); + other->get_executor(), other->get_size(), + other->get_num_stored_elements_per_row()); } @@ -163,12 +151,7 @@ template void Ell::apply_impl(const MultiVector* b, MultiVector* x) const { - GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items()); - GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); - - GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); - GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size()); - GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size()); + this->validate_application_parameters(b, x); this->get_executor()->run(ell::make_simple_apply(this, b, x)); } @@ -179,14 +162,7 @@ void Ell::apply_impl(const MultiVector* alpha, const MultiVector* beta, MultiVector* x) const { - GKO_ASSERT_EQ(b->get_num_batch_items(), this->get_num_batch_items()); - GKO_ASSERT_EQ(this->get_num_batch_items(), x->get_num_batch_items()); - - GKO_ASSERT_CONFORMANT(this->get_common_size(), b->get_common_size()); - GKO_ASSERT_EQUAL_ROWS(this->get_common_size(), x->get_common_size()); - GKO_ASSERT_EQUAL_COLS(b->get_common_size(), x->get_common_size()); - GKO_ASSERT_EQUAL_DIMENSIONS(alpha->get_common_size(), gko::dim<2>(1, 1)); - GKO_ASSERT_EQUAL_DIMENSIONS(beta->get_common_size(), gko::dim<2>(1, 1)); + this->validate_application_parameters(alpha, b, beta, x); this->get_executor()->run( ell::make_advanced_apply(alpha, this, b, beta, x)); } diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp index 2eed40882bc..eeeeebd53d6 100644 --- a/core/matrix/batch_struct.hpp +++ b/core/matrix/batch_struct.hpp @@ -83,7 +83,7 @@ struct uniform_batch { } // namespace dense -namespace batch_ell { +namespace ell { /** @@ -109,7 +109,7 @@ struct batch_item { template struct uniform_batch { using value_type = ValueType; - using index_type = int; + using index_type = int32; using entry_type = batch_item; ValueType* values; @@ -127,7 +127,7 @@ struct uniform_batch { }; -} // namespace batch_ell +} // namespace ell template @@ -165,8 +165,8 @@ GKO_ATTRIBUTES GKO_INLINE dense::batch_item extract_batch_item( template -GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item to_const( - const batch_ell::batch_item& b) +GKO_ATTRIBUTES GKO_INLINE ell::batch_item to_const( + const ell::batch_item& b) { return {b.values, b.col_idxs, b.stride, b.num_rows, b.num_cols, b.num_stored_elems_per_row}; @@ -174,8 +174,8 @@ GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item to_const( template -GKO_ATTRIBUTES GKO_INLINE batch_ell::uniform_batch to_const( - const batch_ell::uniform_batch& ub) +GKO_ATTRIBUTES GKO_INLINE ell::uniform_batch to_const( + const ell::uniform_batch& ub) { return {ub.values, ub.col_idxs, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_cols, ub.num_stored_elems_per_row}; @@ -183,8 +183,8 @@ GKO_ATTRIBUTES GKO_INLINE batch_ell::uniform_batch to_const( template -GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item extract_batch_item( - const batch_ell::uniform_batch& batch, const size_type batch_idx) +GKO_ATTRIBUTES GKO_INLINE ell::batch_item extract_batch_item( + const ell::uniform_batch& batch, const size_type batch_idx) { return {batch.values + batch_idx * batch.num_stored_elems_per_row * batch.num_rows, @@ -196,7 +196,7 @@ GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item extract_batch_item( } template -GKO_ATTRIBUTES GKO_INLINE batch_ell::batch_item extract_batch_item( +GKO_ATTRIBUTES GKO_INLINE ell::batch_item extract_batch_item( ValueType* const batch_values, int* const batch_col_idxs, const int stride, const int num_rows, const int num_cols, int num_elems_per_row, const size_type batch_idx) diff --git a/core/test/matrix/batch_ell.cpp b/core/test/matrix/batch_ell.cpp index 2830705bf5f..e4dcab23917 100644 --- a/core/test/matrix/batch_ell.cpp +++ b/core/test/matrix/batch_ell.cpp @@ -144,6 +144,7 @@ TYPED_TEST(Ell, SparseMtxKnowsItsSizeAndValues) TYPED_TEST(Ell, CanBeEmpty) { auto empty = gko::batch::matrix::Ell::create(this->exec); + this->assert_empty(empty.get()); } @@ -151,6 +152,7 @@ TYPED_TEST(Ell, CanBeEmpty) TYPED_TEST(Ell, ReturnsNullValuesArrayWhenEmpty) { auto empty = gko::batch::matrix::Ell::create(this->exec); + ASSERT_EQ(empty->get_const_values(), nullptr); } @@ -284,7 +286,6 @@ TYPED_TEST(Ell, CanBeConstructedFromEllMatrices) using value_type = typename TestFixture::value_type; using EllMtx = typename TestFixture::EllMtx; using size_type = gko::size_type; - auto mat1 = gko::initialize({{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}}, this->exec); auto mat2 = @@ -304,15 +305,14 @@ TYPED_TEST(Ell, CanBeConstructedFromEllMatricesByDuplication) using index_type = int; using EllMtx = typename TestFixture::EllMtx; using size_type = gko::size_type; - auto mat1 = gko::initialize({{1.0, 0.0, 0.0}, {0.0, 2.0, 0.0}}, this->exec); - auto bat_m = gko::batch::create_from_item>( this->exec, std::vector{mat1.get(), mat1.get(), mat1.get()}, mat1->get_num_stored_elements_per_row()); + auto m = gko::batch::create_from_item>( this->exec, 3, mat1.get(), mat1->get_num_stored_elements_per_row()); @@ -326,7 +326,6 @@ TYPED_TEST(Ell, CanBeConstructedByDuplicatingEllMatrices) using index_type = int; using EllMtx = typename TestFixture::EllMtx; using size_type = gko::size_type; - auto mat1 = gko::initialize({{-1.0, 0.0, 0.0}, {0.0, 2.5, 0.0}}, this->exec); auto mat2 = @@ -372,6 +371,7 @@ TYPED_TEST(Ell, CanBeListConstructed) { using value_type = typename TestFixture::value_type; using index_type = int; + auto m = gko::batch::initialize>( {{0.0, -1.0}, {1.0, 0.0}}, this->exec); diff --git a/cuda/matrix/batch_dense_kernels.cu b/cuda/matrix/batch_dense_kernels.cu index dd82e15b8cc..c693a3ae861 100644 --- a/cuda/matrix/batch_dense_kernels.cu +++ b/cuda/matrix/batch_dense_kernels.cu @@ -36,7 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include +#include +#include #include "core/base/batch_struct.hpp" diff --git a/cuda/matrix/batch_ell_kernels.cu b/cuda/matrix/batch_ell_kernels.cu index ee6a99f04ca..6dd268a2d8e 100644 --- a/cuda/matrix/batch_ell_kernels.cu +++ b/cuda/matrix/batch_ell_kernels.cu @@ -34,18 +34,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include -#include +#include +#include #include "core/base/batch_struct.hpp" #include "core/matrix/batch_struct.hpp" #include "cuda/base/batch_struct.hpp" #include "cuda/base/config.hpp" -#include "cuda/base/cublas_bindings.hpp" -#include "cuda/base/pointer_mode_guard.hpp" #include "cuda/base/thrust.cuh" #include "cuda/components/cooperative_groups.cuh" #include "cuda/components/reduction.cuh" diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp index 7a6a4ac7f00..e2db1ea6e97 100644 --- a/cuda/matrix/batch_struct.hpp +++ b/cuda/matrix/batch_struct.hpp @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include "core/base/batch_struct.hpp" @@ -91,16 +92,16 @@ get_batch_struct(batch::matrix::Dense* const op) * Generates an immutable uniform batch struct from a batch of ell matrices. */ template -inline batch::matrix::batch_ell::uniform_batch> +inline batch::matrix::ell::uniform_batch> get_batch_struct(const batch::matrix::Ell* const op) { return {as_cuda_type(op->get_const_values()), op->get_const_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } @@ -108,16 +109,16 @@ get_batch_struct(const batch::matrix::Ell* const op) * Generates a uniform batch struct from a batch of ell matrices. */ template -inline batch::matrix::batch_ell::uniform_batch> -get_batch_struct(batch::matrix::Ell* const op) +inline batch::matrix::ell::uniform_batch> get_batch_struct( + batch::matrix::Ell* const op) { return {as_cuda_type(op->get_values()), op->get_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp index 1d1210cc270..fca265eceb0 100644 --- a/dpcpp/matrix/batch_ell_kernels.dp.cpp +++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp @@ -39,17 +39,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include #include -#include #include #include "core/base/batch_struct.hpp" -#include "core/components/prefix_sum_kernels.hpp" #include "core/matrix/batch_struct.hpp" #include "dpcpp/base/batch_struct.hpp" -#include "dpcpp/base/config.hpp" #include "dpcpp/base/dim3.dp.hpp" #include "dpcpp/base/dpct.hpp" #include "dpcpp/base/helper.hpp" @@ -98,19 +94,19 @@ void simple_apply(std::shared_ptr exec, } // Launch a kernel that has nbatches blocks, each block has max group size - (exec->get_queue())->submit([&](sycl::handler& cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto mat_b = - batch::matrix::extract_batch_item(mat_ub, group_id); - const auto b_b = batch::extract_batch_item(b_ub, group_id); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - simple_apply_kernel(mat_b, b_b, x_b, item_ct1); - }); + sycl_nd_range(grid, block), [= + ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( + config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + simple_apply_kernel(mat_b, b_b, x_b, item_ct1); + }); }); } @@ -145,24 +141,24 @@ void advanced_apply(std::shared_ptr exec, const dim3 grid(num_batch_items); // Launch a kernel that has nbatches blocks, each block has max group size - (exec->get_queue())->submit([&](sycl::handler& cgh) { + exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto mat_b = - batch::matrix::extract_batch_item(mat_ub, group_id); - const auto b_b = batch::extract_batch_item(b_ub, group_id); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - const auto alpha_b = - batch::extract_batch_item(alpha_ub, group_id); - const auto beta_b = - batch::extract_batch_item(beta_ub, group_id); - advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b, - item_ct1); - }); + sycl_nd_range(grid, block), [= + ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( + config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + const auto alpha_b = + batch::extract_batch_item(alpha_ub, group_id); + const auto beta_b = + batch::extract_batch_item(beta_ub, group_id); + advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b, + item_ct1); + }); }); } diff --git a/dpcpp/matrix/batch_ell_kernels.hpp.inc b/dpcpp/matrix/batch_ell_kernels.hpp.inc index 1048f2f8ff8..7500ae9e060 100644 --- a/dpcpp/matrix/batch_ell_kernels.hpp.inc +++ b/dpcpp/matrix/batch_ell_kernels.hpp.inc @@ -32,7 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template __dpct_inline__ void simple_apply_kernel( - const gko::batch::matrix::batch_ell::batch_item& mat, + const gko::batch::matrix::ell::batch_item& mat, const gko::batch::multi_vector::batch_item& b, const gko::batch::multi_vector::batch_item& x, sycl::nd_item<3>& item_ct1) @@ -56,7 +56,7 @@ __dpct_inline__ void simple_apply_kernel( template __dpct_inline__ void advanced_apply_kernel( const gko::batch::multi_vector::batch_item& alpha, - const gko::batch::matrix::batch_ell::batch_item& mat, + const gko::batch::matrix::ell::batch_item& mat, const gko::batch::multi_vector::batch_item& b, const gko::batch::multi_vector::batch_item& beta, const gko::batch::multi_vector::batch_item& x, diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp index 35ff1148dd5..f857653e05e 100644 --- a/dpcpp/matrix/batch_struct.hpp +++ b/dpcpp/matrix/batch_struct.hpp @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include "core/base/batch_struct.hpp" @@ -90,16 +91,16 @@ inline batch::matrix::dense::uniform_batch get_batch_struct( * Generates an immutable uniform batch struct from a batch of ell matrices. */ template -inline batch::matrix::batch_ell::uniform_batch -get_batch_struct(const batch::matrix::Ell* const op) +inline batch::matrix::ell::uniform_batch get_batch_struct( + const batch::matrix::Ell* const op) { return {op->get_const_values(), op->get_const_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } @@ -107,16 +108,16 @@ get_batch_struct(const batch::matrix::Ell* const op) * Generates a uniform batch struct from a batch of ell matrices. */ template -inline batch::matrix::batch_ell::uniform_batch get_batch_struct( +inline batch::matrix::ell::uniform_batch get_batch_struct( batch::matrix::Ell* const op) { return {op->get_values(), op->get_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } diff --git a/hip/matrix/batch_ell_kernels.hip.cpp b/hip/matrix/batch_ell_kernels.hip.cpp index fdd52c38f57..5c6d5179a21 100644 --- a/hip/matrix/batch_ell_kernels.hip.cpp +++ b/hip/matrix/batch_ell_kernels.hip.cpp @@ -35,18 +35,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#include -#include +#include +#include #include "core/base/batch_struct.hpp" #include "core/matrix/batch_struct.hpp" #include "hip/base/batch_struct.hip.hpp" #include "hip/base/config.hip.hpp" -#include "hip/base/hipblas_bindings.hip.hpp" -#include "hip/base/pointer_mode_guard.hip.hpp" #include "hip/base/thrust.hip.hpp" #include "hip/components/cooperative_groups.hip.hpp" #include "hip/components/reduction.hip.hpp" diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp index a43d7d058b0..6f15b2d966a 100644 --- a/hip/matrix/batch_struct.hip.hpp +++ b/hip/matrix/batch_struct.hip.hpp @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include "core/base/batch_struct.hpp" @@ -91,16 +92,16 @@ get_batch_struct(batch::matrix::Dense* const op) * Generates an immutable uniform batch struct from a batch of ell matrices. */ template -inline batch::matrix::batch_ell::uniform_batch> +inline batch::matrix::ell::uniform_batch> get_batch_struct(const batch::matrix::Ell* const op) { return {as_hip_type(op->get_const_values()), op->get_const_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } @@ -108,16 +109,16 @@ get_batch_struct(const batch::matrix::Ell* const op) * Generates a uniform batch struct from a batch of ell matrices. */ template -inline batch::matrix::batch_ell::uniform_batch> -get_batch_struct(batch::matrix::Ell* const op) +inline batch::matrix::ell::uniform_batch> get_batch_struct( + batch::matrix::Ell* const op) { return {as_hip_type(op->get_values()), op->get_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp index 5cb5f73dec5..6f3db1bb96b 100644 --- a/include/ginkgo/core/matrix/batch_ell.hpp +++ b/include/ginkgo/core/matrix/batch_ell.hpp @@ -356,14 +356,6 @@ class Ell final col_idxs_.get_num_elems()); } - /** - * Creates a Ell matrix with the same configuration as the callers - * matrix. - * - * @returns a Ell matrix with the same configuration as the caller. - */ - std::unique_ptr create_with_same_config() const; - void apply_impl(const MultiVector* b, MultiVector* x) const; diff --git a/omp/matrix/batch_dense_kernels.cpp b/omp/matrix/batch_dense_kernels.cpp index 2d0b7ed4d40..b91a4133dba 100644 --- a/omp/matrix/batch_dense_kernels.cpp +++ b/omp/matrix/batch_dense_kernels.cpp @@ -36,8 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include -#include +#include +#include #include "core/base/batch_struct.hpp" diff --git a/omp/matrix/batch_ell_kernels.cpp b/omp/matrix/batch_ell_kernels.cpp index 20ea4614e7d..17710a97366 100644 --- a/omp/matrix/batch_ell_kernels.cpp +++ b/omp/matrix/batch_ell_kernels.cpp @@ -36,8 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include -#include +#include +#include #include "core/base/batch_struct.hpp" diff --git a/reference/matrix/batch_dense_kernels.cpp b/reference/matrix/batch_dense_kernels.cpp index 3d7ef03a3bd..87d73bb8e34 100644 --- a/reference/matrix/batch_dense_kernels.cpp +++ b/reference/matrix/batch_dense_kernels.cpp @@ -36,9 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include -#include -#include +#include +#include #include "core/base/batch_struct.hpp" diff --git a/reference/matrix/batch_ell_kernels.cpp b/reference/matrix/batch_ell_kernels.cpp index a3f69827c02..1d3a0e1ef94 100644 --- a/reference/matrix/batch_ell_kernels.cpp +++ b/reference/matrix/batch_ell_kernels.cpp @@ -36,9 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include -#include -#include +#include +#include #include "core/base/batch_struct.hpp" diff --git a/reference/matrix/batch_ell_kernels.hpp.inc b/reference/matrix/batch_ell_kernels.hpp.inc index 41d0a00ddcd..44de2a57af9 100644 --- a/reference/matrix/batch_ell_kernels.hpp.inc +++ b/reference/matrix/batch_ell_kernels.hpp.inc @@ -32,7 +32,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. template inline void simple_apply_kernel( - const gko::batch::matrix::batch_ell::batch_item& a, + const gko::batch::matrix::ell::batch_item& a, const gko::batch::multi_vector::batch_item& b, const gko::batch::multi_vector::batch_item& c) { @@ -55,7 +55,7 @@ inline void simple_apply_kernel( template inline void advanced_apply_kernel( const ValueType alpha, - const gko::batch::matrix::batch_ell::batch_item& a, + const gko::batch::matrix::ell::batch_item& a, const gko::batch::multi_vector::batch_item& b, const ValueType beta, const gko::batch::multi_vector::batch_item& c) diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp index 3b562450ee0..fb0e08c16f5 100644 --- a/reference/matrix/batch_struct.hpp +++ b/reference/matrix/batch_struct.hpp @@ -95,16 +95,16 @@ inline batch::matrix::dense::uniform_batch get_batch_struct( * Generates an immutable uniform batch struct from a batch of ell matrices. */ template -inline batch::matrix::batch_ell::uniform_batch -get_batch_struct(const batch::matrix::Ell* const op) +inline batch::matrix::ell::uniform_batch get_batch_struct( + const batch::matrix::Ell* const op) { return {op->get_const_values(), op->get_const_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } @@ -112,16 +112,16 @@ get_batch_struct(const batch::matrix::Ell* const op) * Generates a uniform batch struct from a batch of ell matrices. */ template -inline batch::matrix::batch_ell::uniform_batch get_batch_struct( +inline batch::matrix::ell::uniform_batch get_batch_struct( batch::matrix::Ell* const op) { return {op->get_values(), op->get_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } diff --git a/reference/test/matrix/batch_ell_kernels.cpp b/reference/test/matrix/batch_ell_kernels.cpp index 76b681c69f7..8a5806a9513 100644 --- a/reference/test/matrix/batch_ell_kernels.cpp +++ b/reference/test/matrix/batch_ell_kernels.cpp @@ -58,15 +58,13 @@ class Ell : public ::testing::Test { protected: using value_type = T; using size_type = gko::size_type; - using Mtx = gko::batch::matrix::Ell; - using MVec = gko::batch::MultiVector; + using BMtx = gko::batch::matrix::Ell; + using BMVec = gko::batch::MultiVector; using EllMtx = gko::matrix::Ell; using DenseMtx = gko::matrix::Dense; - using ComplexMtx = gko::to_complex; - using RealMtx = gko::remove_complex; Ell() : exec(gko::ReferenceExecutor::create()), - mtx_0(gko::batch::initialize( + mtx_0(gko::batch::initialize( {{I({1.0, -1.0, 1.5}), I({-2.0, 2.0, 3.0})}, {{1.0, -2.0, -0.5}, {1.0, -2.5, 4.0}}}, exec)), @@ -74,7 +72,7 @@ class Ell : public ::testing::Test { {I({1.0, -1.0, 1.5}), I({-2.0, 2.0, 3.0})}, exec)), mtx_01(gko::initialize( {I({1.0, -2.0, -0.5}), I({1.0, -2.5, 4.0})}, exec)), - b_0(gko::batch::initialize( + b_0(gko::batch::initialize( {{I({1.0, 0.0, 1.0}), I({2.0, 0.0, 1.0}), I({1.0, 0.0, 2.0})}, {I({-1.0, 1.0, 1.0}), I({1.0, -1.0, 1.0}), @@ -88,7 +86,7 @@ class Ell : public ::testing::Test { {I({-1.0, 1.0, 1.0}), I({1.0, -1.0, 1.0}), I({1.0, 0.0, 2.0})}, exec)), - x_0(gko::batch::initialize( + x_0(gko::batch::initialize( {{I({2.0, 0.0, 1.0}), I({2.0, 0.0, 2.0})}, {I({-2.0, 1.0, 1.0}), I({1.0, -1.0, -1.0})}}, exec)), @@ -99,13 +97,13 @@ class Ell : public ::testing::Test { {} std::shared_ptr exec; - std::unique_ptr mtx_0; + std::unique_ptr mtx_0; std::unique_ptr mtx_00; std::unique_ptr mtx_01; - std::unique_ptr b_0; + std::unique_ptr b_0; std::unique_ptr b_00; std::unique_ptr b_01; - std::unique_ptr x_0; + std::unique_ptr x_0; std::unique_ptr x_00; std::unique_ptr x_01; @@ -121,38 +119,10 @@ TYPED_TEST(Ell, AppliesToBatchMultiVector) using T = typename TestFixture::value_type; this->mtx_0->apply(this->b_0.get(), this->x_0.get()); + this->mtx_00->apply(this->b_00.get(), this->x_00.get()); this->mtx_01->apply(this->b_01.get(), this->x_01.get()); - - auto res = gko::batch::unbatch>(this->x_0.get()); - - GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); - GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); -} - - -TYPED_TEST(Ell, AppliesLinearCombinationWithSameAlphaToBatchMultiVector) -{ - using Mtx = typename TestFixture::Mtx; - using MVec = typename TestFixture::MVec; - using DenseMtx = typename TestFixture::DenseMtx; - using T = typename TestFixture::value_type; - auto alpha = gko::batch::initialize(2, {1.5}, this->exec); - auto beta = gko::batch::initialize(2, {-4.0}, this->exec); - auto alpha0 = gko::initialize({1.5}, this->exec); - auto alpha1 = gko::initialize({1.5}, this->exec); - auto beta0 = gko::initialize({-4.0}, this->exec); - auto beta1 = gko::initialize({-4.0}, this->exec); - - this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(), - this->x_0.get()); - this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(), - this->x_00.get()); - this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(), - this->x_01.get()); - auto res = gko::batch::unbatch>(this->x_0.get()); - GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); } @@ -160,12 +130,12 @@ TYPED_TEST(Ell, AppliesLinearCombinationWithSameAlphaToBatchMultiVector) TYPED_TEST(Ell, AppliesLinearCombinationToBatchMultiVector) { - using Mtx = typename TestFixture::Mtx; - using MVec = typename TestFixture::MVec; + using BMtx = typename TestFixture::BMtx; + using BMVec = typename TestFixture::BMVec; using DenseMtx = typename TestFixture::DenseMtx; using T = typename TestFixture::value_type; - auto alpha = gko::batch::initialize({{1.5}, {-1.0}}, this->exec); - auto beta = gko::batch::initialize({{2.5}, {-4.0}}, this->exec); + auto alpha = gko::batch::initialize({{1.5}, {-1.0}}, this->exec); + auto beta = gko::batch::initialize({{2.5}, {-4.0}}, this->exec); auto alpha0 = gko::initialize({1.5}, this->exec); auto alpha1 = gko::initialize({-1.0}, this->exec); auto beta0 = gko::initialize({2.5}, this->exec); @@ -173,13 +143,12 @@ TYPED_TEST(Ell, AppliesLinearCombinationToBatchMultiVector) this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(), this->x_0.get()); + this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(), this->x_00.get()); this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(), this->x_01.get()); - auto res = gko::batch::unbatch>(this->x_0.get()); - GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); } @@ -187,8 +156,8 @@ TYPED_TEST(Ell, AppliesLinearCombinationToBatchMultiVector) TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultCols) { - using MVec = typename TestFixture::MVec; - auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}}); + using BMVec = typename TestFixture::BMVec; + auto res = BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}}); ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()), gko::DimensionMismatch); @@ -197,8 +166,8 @@ TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultCols) TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultRows) { - using MVec = typename TestFixture::MVec; - auto res = MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}}); + using BMVec = typename TestFixture::BMVec; + auto res = BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}}); ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()), gko::DimensionMismatch); @@ -207,9 +176,9 @@ TYPED_TEST(Ell, ApplyFailsOnWrongNumberOfResultRows) TYPED_TEST(Ell, ApplyFailsOnWrongInnerDimension) { - using MVec = typename TestFixture::MVec; + using BMVec = typename TestFixture::BMVec; auto res = - MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}}); + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}}); ASSERT_THROW(this->mtx_0->apply(res.get(), this->x_0.get()), gko::DimensionMismatch); @@ -218,13 +187,13 @@ TYPED_TEST(Ell, ApplyFailsOnWrongInnerDimension) TYPED_TEST(Ell, AdvancedApplyFailsOnWrongInnerDimension) { - using MVec = typename TestFixture::MVec; + using BMVec = typename TestFixture::BMVec; auto res = - MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}}); + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}}); auto alpha = - MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); auto beta = - MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); ASSERT_THROW( this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()), @@ -234,13 +203,13 @@ TYPED_TEST(Ell, AdvancedApplyFailsOnWrongInnerDimension) TYPED_TEST(Ell, AdvancedApplyFailsOnWrongAlphaDimension) { - using MVec = typename TestFixture::MVec; + using BMVec = typename TestFixture::BMVec; auto res = - MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}}); + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}}); auto alpha = - MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}}); + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}}); auto beta = - MVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); ASSERT_THROW( this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()), diff --git a/test/matrix/batch_ell_kernels.cpp b/test/matrix/batch_ell_kernels.cpp index bc1e0c7fb42..083af0a0938 100644 --- a/test/matrix/batch_ell_kernels.cpp +++ b/test/matrix/batch_ell_kernels.cpp @@ -55,18 +55,18 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. class Ell : public CommonTestFixture { protected: - using Mtx = gko::batch::matrix::Ell; - using MVec = gko::batch::MultiVector; + using BMtx = gko::batch::matrix::Ell; + using BMVec = gko::batch::MultiVector; Ell() : rand_engine(15) {} - template - std::unique_ptr gen_mtx(const gko::size_type num_batch_items, - gko::size_type num_rows, - gko::size_type num_cols, - int num_elems_per_row) + template + std::unique_ptr gen_mtx(const gko::size_type num_batch_items, + gko::size_type num_rows, + gko::size_type num_cols, + int num_elems_per_row) { - return gko::test::generate_random_batch_matrix( + return gko::test::generate_random_batch_matrix( num_batch_items, num_rows, num_cols, std::uniform_int_distribution<>(num_elems_per_row, num_elems_per_row), @@ -74,11 +74,11 @@ class Ell : public CommonTestFixture { num_elems_per_row); } - std::unique_ptr gen_mvec(const gko::size_type num_batch_items, - gko::size_type num_rows, - gko::size_type num_cols) + std::unique_ptr gen_mvec(const gko::size_type num_batch_items, + gko::size_type num_rows, + gko::size_type num_cols) { - return gko::test::generate_random_batch_matrix( + return gko::test::generate_random_batch_matrix( num_batch_items, num_rows, num_cols, std::uniform_int_distribution<>(num_cols, num_cols), std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); @@ -89,15 +89,16 @@ class Ell : public CommonTestFixture { { const int num_rows = 252; const int num_cols = 32; - x = gen_mtx(batch_size, num_rows, num_cols, num_elems_per_row); + GKO_ASSERT(num_elems_per_row <= num_cols); + mat = gen_mtx(batch_size, num_rows, num_cols, num_elems_per_row); y = gen_mvec(batch_size, num_cols, num_vecs); alpha = gen_mvec(batch_size, 1, 1); beta = gen_mvec(batch_size, 1, 1); - dx = gko::clone(exec, x); + dmat = gko::clone(exec, mat); dy = gko::clone(exec, y); dalpha = gko::clone(exec, alpha); dbeta = gko::clone(exec, beta); - expected = MVec::create( + expected = BMVec::create( ref, gko::batch_dim<2>(batch_size, gko::dim<2>{num_rows, num_vecs})); expected->fill(gko::one()); @@ -107,16 +108,16 @@ class Ell : public CommonTestFixture { std::ranlux48 rand_engine; const size_t batch_size = 11; - std::unique_ptr x; - std::unique_ptr y; - std::unique_ptr alpha; - std::unique_ptr beta; - std::unique_ptr expected; - std::unique_ptr dresult; - std::unique_ptr dx; - std::unique_ptr dy; - std::unique_ptr dalpha; - std::unique_ptr dbeta; + std::unique_ptr mat; + std::unique_ptr y; + std::unique_ptr alpha; + std::unique_ptr beta; + std::unique_ptr expected; + std::unique_ptr dresult; + std::unique_ptr dmat; + std::unique_ptr dy; + std::unique_ptr dalpha; + std::unique_ptr dbeta; }; @@ -124,8 +125,8 @@ TEST_F(Ell, SingleVectorApplyIsEquivalentToRef) { set_up_apply_data(1); - x->apply(y.get(), expected.get()); - dx->apply(dy.get(), dresult.get()); + mat->apply(y.get(), expected.get()); + dmat->apply(dy.get(), dresult.get()); GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r::value); } @@ -135,8 +136,8 @@ TEST_F(Ell, SingleVectorAdvancedApplyIsEquivalentToRef) { set_up_apply_data(1); - x->apply(alpha.get(), y.get(), beta.get(), expected.get()); - dx->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + mat->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmat->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r::value); } diff --git a/test/test_install/test_install.cpp b/test/test_install/test_install.cpp index 7e53ea8f165..c00bb594ecd 100644 --- a/test/test_install/test_install.cpp +++ b/test/test_install/test_install.cpp @@ -219,13 +219,20 @@ int main() auto test = batch_multi_vector_type::create(exec); } - // core/base/batch_dense.hpp + // core/matrix/batch_dense.hpp { using type1 = float; using batch_dense_type = gko::batch::matrix::Dense; auto test = batch_dense_type::create(exec); } + // core/matrix/batch_ell.hpp + { + using type1 = float; + using batch_ell_type = gko::batch::matrix::Ell; + auto test = batch_ell_type::create(exec); + } + // core/base/combination.hpp { using type1 = int; From b78c0cc422aaff4850e1b35a1ef1e18ae405f04b Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 11 Oct 2023 14:07:13 +0200 Subject: [PATCH 10/18] Kernel updates and batch_random_matrix gen --- .../cuda_hip/matrix/batch_ell_kernels.hpp.inc | 4 +- core/matrix/batch_ell.cpp | 7 -- core/test/utils/batch_helpers.hpp | 17 +++- core/test/utils/matrix_generator.hpp | 90 +++++++++++++++++++ cuda/matrix/batch_ell_kernels.cu | 1 + dpcpp/matrix/batch_ell_kernels.hpp.inc | 57 ++++++------ hip/matrix/batch_ell_kernels.hip.cpp | 1 + include/ginkgo/core/matrix/batch_ell.hpp | 19 ++-- test/matrix/batch_ell_kernels.cpp | 2 +- 9 files changed, 149 insertions(+), 49 deletions(-) diff --git a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc index 5c00358c5a0..19c29f14aa8 100644 --- a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc +++ b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc @@ -46,7 +46,7 @@ __device__ __forceinline__ void simple_apply( for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) { const auto ind = tidx + idx * stride; const auto col_idx = col[ind]; - if (col_idx < idx) { + if (col_idx == invalid_index()) { break; } else { temp += val[ind] * b[col_idx]; @@ -102,7 +102,7 @@ __device__ __forceinline__ void advanced_apply( for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) { const auto ind = tidx + idx * stride; const auto col_idx = col[ind]; - if (col_idx < idx) { + if (col_idx == invalid_index()) { break; } else { temp += alpha * val[ind] * b[col_idx]; diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp index f421fdf2b49..c9dbe6d51c9 100644 --- a/core/matrix/batch_ell.cpp +++ b/core/matrix/batch_ell.cpp @@ -128,13 +128,6 @@ Ell::create_const( } -inline const batch_dim<2> get_col_sizes(const batch_dim<2>& sizes) -{ - return batch_dim<2>(sizes.get_num_batch_items(), - dim<2>(1, sizes.get_common_size()[1])); -} - - template Ell::Ell(std::shared_ptr exec, const batch_dim<2>& size, diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp index b040691999e..0b6197b5062 100644 --- a/core/test/utils/batch_helpers.hpp +++ b/core/test/utils/batch_helpers.hpp @@ -82,11 +82,22 @@ std::unique_ptr generate_random_batch_matrix( auto result = MatrixType::create( exec, batch_dim<2>(num_batch_items, dim<2>(num_rows, num_cols)), std::forward(args)...); + auto sp_mat = generate_random_device_matrix_data( + num_rows, num_cols, nonzero_dist, value_dist, engine, + exec->get_master()); + auto row_idxs = gko::array::const_view( + exec->get_master(), sp_mat.get_num_elems(), + sp_mat.get_const_row_idxs()) + .copy_to_array(); + auto col_idxs = gko::array::const_view( + exec->get_master(), sp_mat.get_num_elems(), + sp_mat.get_const_col_idxs()) + .copy_to_array(); for (size_type b = 0; b < num_batch_items; b++) { - auto rand_mat = - generate_random_matrix( - num_rows, num_cols, nonzero_dist, value_dist, engine, exec); + auto rand_mat = fill_random_matrix_with_sparsity_pattern< + typename MatrixType::unbatch_type, index_type>( + num_rows, num_cols, row_idxs, col_idxs, value_dist, engine, exec); result->create_view_for_item(b)->copy_from(rand_mat.get()); } diff --git a/core/test/utils/matrix_generator.hpp b/core/test/utils/matrix_generator.hpp index 6928c5424a5..8a82ae744e7 100644 --- a/core/test/utils/matrix_generator.hpp +++ b/core/test/utils/matrix_generator.hpp @@ -42,6 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include @@ -54,6 +55,49 @@ namespace gko { namespace test { +/** + * Fills matrix data for a random matrix given a sparsity pattern + * + * @tparam ValueType the type for matrix values + * @tparam IndexType the type for row and column indices + * @tparam ValueDistribution type of value distribution + * @tparam Engine type of random engine + * + * @param num_rows number of rows + * @param num_cols number of columns + * @param row_idxs the row indices of the matrix + * @param col_idxs the column indices of the matrix + * @param value_dist distribution of matrix values + * @param engine a random engine + * + * @return the generated matrix_data with entries according to the given + * dimensions and nonzero count and value distributions. + */ +template +matrix_data fill_random_matrix_data( + size_type num_rows, size_type num_cols, + const gko::array& row_indices, + const gko::array& col_indices, ValueDistribution&& value_dist, + Engine&& engine) +{ + matrix_data data{gko::dim<2>{num_rows, num_cols}, {}}; + auto host_exec = row_indices.get_executor()->get_master(); + auto host_row_indices = make_temporary_clone(host_exec, &row_indices); + auto host_col_indices = make_temporary_clone(host_exec, &col_indices); + + for (int nnz = 0; nnz < row_indices.get_num_elems(); ++nnz) { + data.nonzeros.emplace_back( + host_row_indices->get_const_data()[nnz], + host_col_indices->get_const_data()[nnz], + detail::get_rand_value(value_dist, engine)); + } + + data.ensure_row_major_order(); + return data; +} + + /** * Generates matrix data for a random matrix. * @@ -156,6 +200,48 @@ generate_random_device_matrix_data(gko::size_type num_rows, } +/** + * Fills a random matrix with given sparsity pattern. + * + * @tparam MatrixType type of matrix to generate (must implement + * the interface `ReadableFromMatrixData<>` and provide + * matching `value_type` and `index_type` type aliases) + * + * @param num_rows number of rows + * @param num_cols number of columns + * @param value_dist distribution of matrix values + * @param row_idxs the row indices of the matrix + * @param col_idxs the column indices of the matrix + * @param exec executor where the matrix should be allocated + * @param args additional arguments for the matrix constructor + * + * The other (template) parameters match generate_random_matrix_data. + * + * @return the unique pointer of MatrixType + */ +template , + typename IndexType = typename MatrixType::index_type, + typename ValueDistribution, typename Engine, typename... MatrixArgs> +std::unique_ptr fill_random_matrix_with_sparsity_pattern( + size_type num_rows, size_type num_cols, + const gko::array& row_idxs, + const gko::array& col_idxs, ValueDistribution&& value_dist, + Engine&& engine, std::shared_ptr exec, MatrixArgs&&... args) +{ + using value_type = typename MatrixType::value_type; + using index_type = IndexType; + + GKO_ASSERT(row_idxs.get_num_elems() == col_idxs.get_num_elems()); + GKO_ASSERT(row_idxs.get_num_elems() < (num_rows * num_cols)); + auto result = MatrixType::create(exec, std::forward(args)...); + result->read(fill_random_matrix_data( + num_rows, num_cols, row_idxs, col_idxs, + std::forward(value_dist), + std::forward(engine))); + return result; +} + + /** * Generates a random matrix. * @@ -163,6 +249,10 @@ generate_random_device_matrix_data(gko::size_type num_rows, * the interface `ReadableFromMatrixData<>` and provide * matching `value_type` and `index_type` type aliases) * + * @param num_rows number of rows + * @param num_cols number of columns + * @param nonzero_dist distribution of nonzeros per row + * @param value_dist distribution of matrix values * @param exec executor where the matrix should be allocated * @param args additional arguments for the matrix constructor * diff --git a/cuda/matrix/batch_ell_kernels.cu b/cuda/matrix/batch_ell_kernels.cu index 6dd268a2d8e..5cadd7755a2 100644 --- a/cuda/matrix/batch_ell_kernels.cu +++ b/cuda/matrix/batch_ell_kernels.cu @@ -37,6 +37,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include diff --git a/dpcpp/matrix/batch_ell_kernels.hpp.inc b/dpcpp/matrix/batch_ell_kernels.hpp.inc index 7500ae9e060..e6501bafaba 100644 --- a/dpcpp/matrix/batch_ell_kernels.hpp.inc +++ b/dpcpp/matrix/batch_ell_kernels.hpp.inc @@ -42,38 +42,37 @@ __dpct_inline__ void simple_apply_kernel( auto temp = zero(); for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) { const auto col_idx = mat.col_idxs[tidx + idx * mat.stride]; - if (col_idx < idx) + if (col_idx == invalid_index()) { break; - else - temp += mat.values[tidx + idx * mat.stride] * - b.values[col_idx * b.stride]; + else temp += mat.values[tidx + idx * mat.stride] * + b.values[col_idx * b.stride]; + } + x.values[tidx * x.stride] = temp; } - x.values[tidx * x.stride] = temp; } -} -template -__dpct_inline__ void advanced_apply_kernel( - const gko::batch::multi_vector::batch_item& alpha, - const gko::batch::matrix::ell::batch_item& mat, - const gko::batch::multi_vector::batch_item& b, - const gko::batch::multi_vector::batch_item& beta, - const gko::batch::multi_vector::batch_item& x, - sycl::nd_item<3>& item_ct1) -{ - for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows; - tidx += item_ct1.get_local_range().size()) { - auto temp = zero(); - for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) { - const auto col_idx = mat.col_idxs[tidx + idx * mat.stride]; - if (col_idx < idx) - break; - else - temp += alpha.values[0] * mat.values[tidx + idx * mat.stride] * - b.values[col_idx * b.stride]; + template + __dpct_inline__ void advanced_apply_kernel( + const gko::batch::multi_vector::batch_item& alpha, + const gko::batch::matrix::ell::batch_item& mat, + const gko::batch::multi_vector::batch_item& b, + const gko::batch::multi_vector::batch_item& beta, + const gko::batch::multi_vector::batch_item& x, + sycl::nd_item<3>& item_ct1) + { + for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows; + tidx += item_ct1.get_local_range().size()) { + auto temp = zero(); + for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) { + const auto col_idx = mat.col_idxs[tidx + idx * mat.stride]; + if (col_idx == invalid_index()) { + break; + else temp += alpha.values[0] * + mat.values[tidx + idx * mat.stride] * + b.values[col_idx * b.stride]; + } + x.values[tidx * x.stride] = + temp + beta.values[0] * x.values[tidx * x.stride]; + } } - x.values[tidx * x.stride] = - temp + beta.values[0] * x.values[tidx * x.stride]; - } -} diff --git a/hip/matrix/batch_ell_kernels.hip.cpp b/hip/matrix/batch_ell_kernels.hip.cpp index 5c6d5179a21..96e7cdb298e 100644 --- a/hip/matrix/batch_ell_kernels.hip.cpp +++ b/hip/matrix/batch_ell_kernels.hip.cpp @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp index 6f3db1bb96b..be49e2cff41 100644 --- a/include/ginkgo/core/matrix/batch_ell.hpp +++ b/include/ginkgo/core/matrix/batch_ell.hpp @@ -58,9 +58,14 @@ namespace matrix { * Ell is a sparse matrix format that stores the same number of nonzeros in each * row, enabling coalesced accesses. It is suitable for sparsity patterns that * have a similar number of nonzeros in every row. The values are stored in a - * column-major fashion similar to the monolithic gko::matrix::Ell class. It is - * also assumed that the sparsity pattern of all the items in the batch is the - * same and therefore only a single copy of the sparsity pattern is stored. + * column-major fashion similar to the monolithic gko::matrix::Ell class. + * + * Similar to the monolithic gko::matrix::Ell class, invalid_index is + * used as the column index for padded zero entries. + * + * @note It is also assumed that the sparsity pattern of all the items in the + * batch is the same and therefore only a single copy of the sparsity pattern is + * stored. * * @tparam ValueType value precision of matrix elements * @tparam IndexType index precision of matrix elements @@ -253,13 +258,13 @@ class Ell final /** * Creates a constant (immutable) batch ell matrix from a constant - * array. + * array. The column indices array needs to be the same for all batch items. * * @param exec the executor to create the matrix on * @param size the dimensions of the matrix * @param num_elems_per_row the number of elements to be stored in each row * @param values the value array of the matrix - * @param col_idxs the col_idxs array of the matrix + * @param col_idxs the col_idxs array of a single batch item of the matrix. * * @return A smart pointer to the constant matrix wrapping the input * array (if it resides on the same executor as the matrix) or a copy of the @@ -325,7 +330,7 @@ class Ell final /** * Creates a Ell matrix from an already allocated (and initialized) - * array. + * array. The column indices array needs to be the same for all batch items. * * @tparam ValuesArray type of array of values * @@ -333,7 +338,7 @@ class Ell final * @param size size of the matrix * @param num_elems_per_row the number of elements to be stored in each row * @param values array of matrix values - * @param col_idxs the col_idxs array of the matrix + * @param col_idxs the col_idxs array of a single batch item of the matrix. * * @note If `values` is not an rvalue, not an array of ValueType, or is on * the wrong executor, an internal copy will be created, and the diff --git a/test/matrix/batch_ell_kernels.cpp b/test/matrix/batch_ell_kernels.cpp index 083af0a0938..572f47ba47d 100644 --- a/test/matrix/batch_ell_kernels.cpp +++ b/test/matrix/batch_ell_kernels.cpp @@ -55,7 +55,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. class Ell : public CommonTestFixture { protected: - using BMtx = gko::batch::matrix::Ell; + using BMtx = gko::batch::matrix::Ell; using BMVec = gko::batch::MultiVector; Ell() : rand_engine(15) {} From 4179654e60313541032435ae52557785787a8001 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Wed, 11 Oct 2023 15:41:55 +0200 Subject: [PATCH 11/18] Review updates Co-authored-by: Marcel Koch Co-authored-by: Yu-Hsiang Tsai --- .../cuda_hip/matrix/batch_ell_kernels.hpp.inc | 22 +-- core/base/batch_multi_vector.cpp | 21 --- core/base/batch_utilities.hpp | 47 ++--- core/matrix/batch_struct.hpp | 40 ++--- core/test/matrix/batch_ell.cpp | 160 ++++++++---------- core/test/utils/matrix_generator.hpp | 2 +- cuda/matrix/batch_struct.hpp | 28 +-- dpcpp/matrix/batch_ell_kernels.dp.cpp | 2 + dpcpp/matrix/batch_ell_kernels.hpp.inc | 61 +++---- dpcpp/matrix/batch_struct.hpp | 28 +-- hip/matrix/batch_struct.hip.hpp | 28 +-- .../ginkgo/core/base/batch_multi_vector.hpp | 18 +- include/ginkgo/core/matrix/batch_dense.hpp | 2 - include/ginkgo/core/matrix/batch_ell.hpp | 7 +- reference/matrix/batch_ell_kernels.hpp.inc | 24 +-- reference/matrix/batch_struct.hpp | 28 +-- reference/test/matrix/batch_ell_kernels.cpp | 8 +- 17 files changed, 235 insertions(+), 291 deletions(-) diff --git a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc index 19c29f14aa8..de6ca879890 100644 --- a/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc +++ b/common/cuda_hip/matrix/batch_ell_kernels.hpp.inc @@ -31,9 +31,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -template +template __device__ __forceinline__ void simple_apply( - const gko::batch::matrix::ell::batch_item& mat, + const gko::batch::matrix::ell::batch_item& mat, const ValueType* const __restrict__ b, ValueType* const __restrict__ x) { const auto num_rows = mat.num_rows; @@ -46,7 +46,7 @@ __device__ __forceinline__ void simple_apply( for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) { const auto ind = tidx + idx * stride; const auto col_idx = col[ind]; - if (col_idx == invalid_index()) { + if (col_idx == invalid_index()) { break; } else { temp += val[ind] * b[col_idx]; @@ -56,12 +56,13 @@ __device__ __forceinline__ void simple_apply( } } -template +template __global__ __launch_bounds__( default_block_size, sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix:: ell::uniform_batch< - const ValueType> + const ValueType, + IndexType> mat, const gko::batch:: multi_vector:: @@ -85,10 +86,10 @@ __global__ __launch_bounds__( } -template +template __device__ __forceinline__ void advanced_apply( const ValueType alpha, - const gko::batch::matrix::ell::batch_item& mat, + const gko::batch::matrix::ell::batch_item& mat, const ValueType* const __restrict__ b, const ValueType beta, ValueType* const __restrict__ x) { @@ -102,7 +103,7 @@ __device__ __forceinline__ void advanced_apply( for (size_type idx = 0; idx < num_stored_elements_per_row; idx++) { const auto ind = tidx + idx * stride; const auto col_idx = col[ind]; - if (col_idx == invalid_index()) { + if (col_idx == invalid_index()) { break; } else { temp += alpha * val[ind] * b[col_idx]; @@ -112,7 +113,7 @@ __device__ __forceinline__ void advanced_apply( } } -template +template __global__ __launch_bounds__( default_block_size, sm_oversubscription) void advanced_apply_kernel(const gko::batch:: @@ -122,7 +123,8 @@ __global__ __launch_bounds__( alpha, const gko::batch::matrix:: ell::uniform_batch< - const ValueType> + const ValueType, + IndexType> mat, const gko::batch:: multi_vector:: diff --git a/core/base/batch_multi_vector.cpp b/core/base/batch_multi_vector.cpp index 6a14919bf2f..6dcf8dd90b5 100644 --- a/core/base/batch_multi_vector.cpp +++ b/core/base/batch_multi_vector.cpp @@ -291,27 +291,6 @@ void MultiVector::move_to( } -template -void MultiVector::convert_to(matrix::Dense* result) const -{ - auto exec = result->get_executor() == nullptr ? this->get_executor() - : result->get_executor(); - auto tmp = gko::batch::matrix::Dense::create_const( - exec, this->get_size(), - make_const_array_view(this->get_executor(), - this->get_num_stored_elements(), - this->get_const_values())); - result->copy_from(tmp); -} - - -template -void MultiVector::move_to(matrix::Dense* result) -{ - this->convert_to(result); -} - - #define GKO_DECLARE_BATCH_MULTI_VECTOR(_type) class MultiVector<_type> GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR); diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp index c37c0cae721..7204c78a552 100644 --- a/core/base/batch_utilities.hpp +++ b/core/base/batch_utilities.hpp @@ -165,12 +165,8 @@ std::vector> write( /** * Creates and initializes a batch of single column-vectors. * - * This function first creates a temporary MultiVector, fills it with - * passed in values, and then converts the vector to the requested type. - * - * @tparam Matrix matrix type to initialize - * (MultiVector has to implement the ConvertibleTo - * interface) + * @tparam Matrix matrix type to initialize (It has to implement the + * read function) * @tparam TArgs argument types for Matrix::create method * (not including the implied Executor as the first argument) * @@ -180,7 +176,6 @@ std::vector> write( * including the Executor, which is passed as the first * argument * - * @ingroup MultiVector * @ingroup mat_formats */ template @@ -220,23 +215,19 @@ std::unique_ptr initialize( /** - * Creates and initializes a batch of multi-vectors. - * - * This function first creates a temporary MultiVector, fills it with - * passed in values, and then converts the vector to the requested type. + * Creates and initializes a batch of matrices. * - * @tparam Matrix matrix type to initialize - * (Dense has to implement the ConvertibleTo interface) + * @tparam Matrix matrix type to initialize (It has to implement the + * read function) * @tparam TArgs argument types for Matrix::create method * (not including the implied Executor as the first argument) * - * @param vals values used to initialize the vector - * @param exec Executor associated to the vector + * @param vals values used to initialize the matrix + * @param exec Executor associated with the matrix * @param create_args additional arguments passed to Matrix::create, not * including the Executor, which is passed as the first * argument * - * @ingroup MultiVector * @ingroup mat_formats */ template @@ -290,23 +281,18 @@ std::unique_ptr initialize( * Creates and initializes a batch single column-vector by making copies of the * single input column vector. * - * This function first creates a temporary batch multi-vector, fills it with - * passed in values, and then converts the vector to the requested type. - * - * @tparam Matrix matrix type to initialize - * (MultiVector has to implement the ConvertibleTo - * interface) + * @tparam Matrix matrix type to initialize (It has to implement the + * read function) * @tparam TArgs argument types for Matrix::create method * (not including the implied Executor as the first argument) * * @param num_vectors The number of times the input vector is to be duplicated * @param vals values used to initialize each vector in the temp. batch - * @param exec Executor associated to the vector + * @param exec Executor associated with the matrix * @param create_args additional arguments passed to Matrix::create, not * including the Executor, which is passed as the first * argument * - * @ingroup MultiVector * @ingroup mat_formats */ template @@ -343,23 +329,18 @@ std::unique_ptr initialize( /** * Creates and initializes a matrix from copies of a given matrix. * - * This function first creates a temporary batch multi-vector, fills it with - * passed in values, and then converts the vector to the requested type. - * - * @tparam Matrix matrix type to initialize - * (MultiVector has to implement the ConvertibleTo - * interface) + * @tparam Matrix matrix type to initialize (It has to implement the + * read function) * @tparam TArgs argument types for Matrix::create method * (not including the implied Executor as the first argument) * * @param num_batch_items The number of times the input matrix is duplicated - * @param vals values used to initialize each vector in the temp. batch - * @param exec Executor associated to the vector + * @param vals values used to initialize each matrix in the temp. batch + * @param exec Executor associated to the matrix * @param create_args additional arguments passed to Matrix::create, not * including the Executor, which is passed as the first * argument * - * @ingroup LinOp * @ingroup mat_formats */ template diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp index eeeeebd53d6..f208f5ff078 100644 --- a/core/matrix/batch_struct.hpp +++ b/core/matrix/batch_struct.hpp @@ -89,10 +89,10 @@ namespace ell { /** * Encapsulates one matrix from a batch of ell matrices. */ -template +template struct batch_item { using value_type = ValueType; - using index_type = int32; + using index_type = IndexType; ValueType* values; const index_type* col_idxs; @@ -106,11 +106,11 @@ struct batch_item { /** * A 'simple' structure to store a global uniform batch of ell matrices. */ -template +template struct uniform_batch { using value_type = ValueType; - using index_type = int32; - using entry_type = batch_item; + using index_type = IndexType; + using entry_type = batch_item; ValueType* values; const index_type* col_idxs; @@ -164,27 +164,28 @@ GKO_ATTRIBUTES GKO_INLINE dense::batch_item extract_batch_item( } -template -GKO_ATTRIBUTES GKO_INLINE ell::batch_item to_const( - const ell::batch_item& b) +template +GKO_ATTRIBUTES GKO_INLINE ell::batch_item to_const( + const ell::batch_item& b) { return {b.values, b.col_idxs, b.stride, b.num_rows, b.num_cols, b.num_stored_elems_per_row}; } -template -GKO_ATTRIBUTES GKO_INLINE ell::uniform_batch to_const( - const ell::uniform_batch& ub) +template +GKO_ATTRIBUTES GKO_INLINE ell::uniform_batch +to_const(const ell::uniform_batch& ub) { return {ub.values, ub.col_idxs, ub.num_batch_items, ub.stride, ub.num_rows, ub.num_cols, ub.num_stored_elems_per_row}; } -template -GKO_ATTRIBUTES GKO_INLINE ell::batch_item extract_batch_item( - const ell::uniform_batch& batch, const size_type batch_idx) +template +GKO_ATTRIBUTES GKO_INLINE ell::batch_item +extract_batch_item(const ell::uniform_batch& batch, + const size_type batch_idx) { return {batch.values + batch_idx * batch.num_stored_elems_per_row * batch.num_rows, @@ -195,11 +196,12 @@ GKO_ATTRIBUTES GKO_INLINE ell::batch_item extract_batch_item( batch.num_stored_elems_per_row}; } -template -GKO_ATTRIBUTES GKO_INLINE ell::batch_item extract_batch_item( - ValueType* const batch_values, int* const batch_col_idxs, const int stride, - const int num_rows, const int num_cols, int num_elems_per_row, - const size_type batch_idx) +template +GKO_ATTRIBUTES GKO_INLINE ell::batch_item +extract_batch_item(ValueType* const batch_values, + IndexType* const batch_col_idxs, const int stride, + const int num_rows, const int num_cols, + int num_elems_per_row, const size_type batch_idx) { return {batch_values + batch_idx * num_elems_per_row * num_rows, batch_col_idxs, diff --git a/core/test/matrix/batch_ell.cpp b/core/test/matrix/batch_ell.cpp index e4dcab23917..c36a877ac14 100644 --- a/core/test/matrix/batch_ell.cpp +++ b/core/test/matrix/batch_ell.cpp @@ -38,7 +38,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#include #include @@ -52,26 +51,26 @@ class Ell : public ::testing::Test { protected: using value_type = T; using index_type = gko::int32; - using EllMtx = gko::matrix::Ell; + using BatchEllMtx = gko::batch::matrix::Ell; + using EllMtx = gko::matrix::Ell; using size_type = gko::size_type; Ell() : exec(gko::ReferenceExecutor::create()), - mtx(gko::batch::initialize>( + mtx(gko::batch::initialize( {{{-1.0, 2.0, 3.0}, {-1.5, 2.5, 3.5}}, {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}}, exec, 3)), - sp_mtx(gko::batch::initialize>( + sp_mtx(gko::batch::initialize( {{{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}}, {{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}}, exec, 2)), - ell_mtx(gko::initialize>( - {{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, exec, gko::dim<2>(2, 3), 3)), - sp_ell_mtx(gko::initialize>( - {{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, exec, gko::dim<2>(2, 3), 2)) + ell_mtx(gko::initialize({{1.0, 2.5, 3.0}, {1.0, 2.0, 3.0}}, + exec, gko::dim<2>(2, 3), 3)), + sp_ell_mtx(gko::initialize({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, + exec, gko::dim<2>(2, 3), 2)) {} - static void assert_equal_to_original_sparse_mtx( - const gko::batch::matrix::Ell* m) + static void assert_equal_to_original_sparse_mtx(const BatchEllMtx* m) { ASSERT_EQ(m->get_num_batch_items(), 2); ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3)); @@ -91,8 +90,7 @@ class Ell : public ::testing::Test { ASSERT_EQ(m->get_const_col_idxs()[3], index_type{2}); } - static void assert_equal_to_original_mtx( - const gko::batch::matrix::Ell* m) + static void assert_equal_to_original_mtx(const BatchEllMtx* m) { ASSERT_EQ(m->get_num_batch_items(), 2); ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3)); @@ -112,7 +110,7 @@ class Ell : public ::testing::Test { ASSERT_EQ(m->get_const_values()[11], value_type{3.0}); } - static void assert_empty(gko::batch::matrix::Ell* m) + static void assert_empty(BatchEllMtx* m) { ASSERT_EQ(m->get_num_batch_items(), 0); ASSERT_EQ(m->get_num_stored_elements(), 0); @@ -120,10 +118,10 @@ class Ell : public ::testing::Test { } std::shared_ptr exec; - std::unique_ptr> mtx; - std::unique_ptr> sp_mtx; - std::unique_ptr> ell_mtx; - std::unique_ptr> sp_ell_mtx; + std::unique_ptr mtx; + std::unique_ptr sp_mtx; + std::unique_ptr ell_mtx; + std::unique_ptr sp_ell_mtx; }; TYPED_TEST_SUITE(Ell, gko::test::ValueTypes); @@ -143,16 +141,11 @@ TYPED_TEST(Ell, SparseMtxKnowsItsSizeAndValues) TYPED_TEST(Ell, CanBeEmpty) { - auto empty = gko::batch::matrix::Ell::create(this->exec); + using BatchEllMtx = typename TestFixture::BatchEllMtx; - this->assert_empty(empty.get()); -} - - -TYPED_TEST(Ell, ReturnsNullValuesArrayWhenEmpty) -{ - auto empty = gko::batch::matrix::Ell::create(this->exec); + auto empty = BatchEllMtx::create(this->exec); + this->assert_empty(empty.get()); ASSERT_EQ(empty->get_const_values(), nullptr); } @@ -180,7 +173,9 @@ TYPED_TEST(Ell, CanCreateSpEllItemView) TYPED_TEST(Ell, CanBeCopied) { - auto mtx_copy = gko::batch::matrix::Ell::create(this->exec); + using BatchEllMtx = typename TestFixture::BatchEllMtx; + + auto mtx_copy = BatchEllMtx::create(this->exec); mtx_copy->copy_from(this->mtx.get()); @@ -192,7 +187,9 @@ TYPED_TEST(Ell, CanBeCopied) TYPED_TEST(Ell, CanBeMoved) { - auto mtx_copy = gko::batch::matrix::Ell::create(this->exec); + using BatchEllMtx = typename TestFixture::BatchEllMtx; + + auto mtx_copy = BatchEllMtx::create(this->exec); this->mtx->move_to(mtx_copy); @@ -219,10 +216,10 @@ TYPED_TEST(Ell, CanBeCleared) TYPED_TEST(Ell, CanBeConstructedWithSize) { - using size_type = gko::size_type; + using BatchEllMtx = typename TestFixture::BatchEllMtx; - auto m = gko::batch::matrix::Ell::create( - this->exec, gko::batch_dim<2>(2, gko::dim<2>{5, 3}), 2); + auto m = BatchEllMtx::create(this->exec, + gko::batch_dim<2>(2, gko::dim<2>{5, 3}), 2); ASSERT_EQ(m->get_num_batch_items(), 2); ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3)); @@ -235,19 +232,19 @@ TYPED_TEST(Ell, CanBeConstructedFromExistingData) { using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - using size_type = gko::size_type; + using BatchEllMtx = typename TestFixture::BatchEllMtx; // clang-format off value_type values[] = { -1.0, 2.5, - 0.0, 3.5, - 1.0, 2.0, - 0.0, 3.0}; + 0.0, 3.5, + 1.0, 2.0, + 0.0, 3.0}; index_type col_idxs[] = { - 0, 1, + 0, 1, -1, 2}; // clang-format on - auto m = gko::batch::matrix::Ell::create( + auto m = BatchEllMtx::create( this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 3)), 2, gko::array::view(this->exec, 8, values), gko::array::view(this->exec, 4, col_idxs)); @@ -260,19 +257,19 @@ TYPED_TEST(Ell, CanBeConstructedFromExistingConstData) { using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - using size_type = gko::size_type; + using BatchEllMtx = typename TestFixture::BatchEllMtx; // clang-format off value_type values[] = { -1.0, 2.5, - 0.0, 3.5, - 1.0, 2.0, - 0.0, 3.0}; + 0.0, 3.5, + 1.0, 2.0, + 0.0, 3.0}; index_type col_idxs[] = { - 0, 1, + 0, 1, -1, 2}; // clang-format on - auto m = gko::batch::matrix::Ell::create_const( + auto m = BatchEllMtx::create_const( this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 3)), 2, gko::array::const_view(this->exec, 8, values), gko::array::const_view(this->exec, 4, col_idxs)); @@ -283,15 +280,14 @@ TYPED_TEST(Ell, CanBeConstructedFromExistingConstData) TYPED_TEST(Ell, CanBeConstructedFromEllMatrices) { - using value_type = typename TestFixture::value_type; + using BatchEllMtx = typename TestFixture::BatchEllMtx; using EllMtx = typename TestFixture::EllMtx; - using size_type = gko::size_type; auto mat1 = gko::initialize({{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}}, this->exec); auto mat2 = gko::initialize({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, this->exec); - auto m = gko::batch::create_from_item>( + auto m = gko::batch::create_from_item( this->exec, std::vector{mat1.get(), mat2.get()}, mat1->get_num_stored_elements_per_row()); @@ -301,19 +297,15 @@ TYPED_TEST(Ell, CanBeConstructedFromEllMatrices) TYPED_TEST(Ell, CanBeConstructedFromEllMatricesByDuplication) { - using value_type = typename TestFixture::value_type; - using index_type = int; + using BatchEllMtx = typename TestFixture::BatchEllMtx; using EllMtx = typename TestFixture::EllMtx; - using size_type = gko::size_type; auto mat1 = gko::initialize({{1.0, 0.0, 0.0}, {0.0, 2.0, 0.0}}, this->exec); - auto bat_m = - gko::batch::create_from_item>( - this->exec, - std::vector{mat1.get(), mat1.get(), mat1.get()}, - mat1->get_num_stored_elements_per_row()); + auto bat_m = gko::batch::create_from_item( + this->exec, std::vector{mat1.get(), mat1.get(), mat1.get()}, + mat1->get_num_stored_elements_per_row()); - auto m = gko::batch::create_from_item>( + auto m = gko::batch::create_from_item( this->exec, 3, mat1.get(), mat1->get_num_stored_elements_per_row()); GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 1e-14); @@ -322,26 +314,23 @@ TYPED_TEST(Ell, CanBeConstructedFromEllMatricesByDuplication) TYPED_TEST(Ell, CanBeConstructedByDuplicatingEllMatrices) { - using value_type = typename TestFixture::value_type; - using index_type = int; + using BatchEllMtx = typename TestFixture::BatchEllMtx; using EllMtx = typename TestFixture::EllMtx; - using size_type = gko::size_type; auto mat1 = gko::initialize({{-1.0, 0.0, 0.0}, {0.0, 2.5, 0.0}}, this->exec); auto mat2 = gko::initialize({{1.0, 0.0, 0.0}, {0.0, 2.0, 0.0}}, this->exec); - auto m = gko::batch::create_from_item>( + auto m = gko::batch::create_from_item( this->exec, std::vector{mat1.get(), mat2.get()}, mat1->get_num_stored_elements_per_row()); - auto m_ref = - gko::batch::create_from_item>( - this->exec, - std::vector{mat1.get(), mat2.get(), mat1.get(), mat2.get(), - mat1.get(), mat2.get()}, - mat1->get_num_stored_elements_per_row()); - - auto m2 = gko::batch::duplicate>( + auto m_ref = gko::batch::create_from_item( + this->exec, + std::vector{mat1.get(), mat2.get(), mat1.get(), mat2.get(), + mat1.get(), mat2.get()}, + mat1->get_num_stored_elements_per_row()); + + auto m2 = gko::batch::duplicate( this->exec, 3, m.get(), mat1->get_num_stored_elements_per_row()); GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 1e-14); @@ -350,17 +339,14 @@ TYPED_TEST(Ell, CanBeConstructedByDuplicatingEllMatrices) TYPED_TEST(Ell, CanBeUnbatchedIntoEllMatrices) { - using value_type = typename TestFixture::value_type; - using index_type = int; + using BatchEllMtx = typename TestFixture::BatchEllMtx; using EllMtx = typename TestFixture::EllMtx; - using size_type = gko::size_type; auto mat1 = gko::initialize({{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}}, this->exec); auto mat2 = gko::initialize({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, this->exec); - auto ell_mats = gko::batch::unbatch>( - this->sp_mtx.get()); + auto ell_mats = gko::batch::unbatch(this->sp_mtx.get()); GKO_ASSERT_MTX_NEAR(ell_mats[0].get(), mat1.get(), 0.); GKO_ASSERT_MTX_NEAR(ell_mats[1].get(), mat2.get(), 0.); @@ -370,10 +356,12 @@ TYPED_TEST(Ell, CanBeUnbatchedIntoEllMatrices) TYPED_TEST(Ell, CanBeListConstructed) { using value_type = typename TestFixture::value_type; - using index_type = int; + using index_type = typename TestFixture::index_type; + using BatchEllMtx = typename TestFixture::BatchEllMtx; + using EllMtx = typename TestFixture::EllMtx; - auto m = gko::batch::initialize>( - {{0.0, -1.0}, {1.0, 0.0}}, this->exec); + auto m = gko::batch::initialize({{0.0, -1.0}, {1.0, 0.0}}, + this->exec); ASSERT_EQ(m->get_num_batch_items(), 2); ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1)); @@ -391,10 +379,11 @@ TYPED_TEST(Ell, CanBeListConstructed) TYPED_TEST(Ell, CanBeListConstructedByCopies) { using value_type = typename TestFixture::value_type; - using index_type = int; + using index_type = typename TestFixture::index_type; + using BatchEllMtx = typename TestFixture::BatchEllMtx; - auto m = gko::batch::initialize>( - 2, I({0.0, -1.0}), this->exec, 1); + auto m = gko::batch::initialize(2, I({0.0, -1.0}), + this->exec, 1); ASSERT_EQ(m->get_num_batch_items(), 2); ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1)); @@ -412,10 +401,11 @@ TYPED_TEST(Ell, CanBeListConstructedByCopies) TYPED_TEST(Ell, CanBeDoubleListConstructed) { using value_type = typename TestFixture::value_type; - using index_type = int; + using index_type = typename TestFixture::index_type; + using BatchEllMtx = typename TestFixture::BatchEllMtx; using T = value_type; - auto m = gko::batch::initialize>( + auto m = gko::batch::initialize( // clang-format off {{I{1.0, 0.0, 0.0}, I{2.0, 0.0, 3.0}, @@ -454,15 +444,15 @@ TYPED_TEST(Ell, CanBeDoubleListConstructed) TYPED_TEST(Ell, CanBeReadFromMatrixData) { using value_type = typename TestFixture::value_type; - using index_type = int; + using index_type = typename TestFixture::index_type; + using BatchEllMtx = typename TestFixture::BatchEllMtx; auto vec_data = std::vector>{}; vec_data.emplace_back(gko::matrix_data( {2, 3}, {{0, 0, -1.0}, {1, 1, 2.5}, {1, 2, 3.5}})); vec_data.emplace_back(gko::matrix_data( {2, 3}, {{0, 0, 1.0}, {1, 1, 2.0}, {1, 2, 3.0}})); - auto m = gko::batch::read>(this->exec, + auto m = gko::batch::read(this->exec, vec_data, 2); this->assert_equal_to_original_sparse_mtx(m.get()); @@ -472,11 +462,11 @@ TYPED_TEST(Ell, CanBeReadFromMatrixData) TYPED_TEST(Ell, GeneratesCorrectMatrixData) { using value_type = typename TestFixture::value_type; - using index_type = int; + using index_type = typename TestFixture::index_type; + using BatchEllMtx = typename TestFixture::BatchEllMtx; using tpl = typename gko::matrix_data::nonzero_type; - auto data = gko::batch::write>( + auto data = gko::batch::write( this->sp_mtx.get()); ASSERT_EQ(data[0].size, gko::dim<2>(2, 3)); diff --git a/core/test/utils/matrix_generator.hpp b/core/test/utils/matrix_generator.hpp index 8a82ae744e7..7490a24bbe5 100644 --- a/core/test/utils/matrix_generator.hpp +++ b/core/test/utils/matrix_generator.hpp @@ -232,7 +232,7 @@ std::unique_ptr fill_random_matrix_with_sparsity_pattern( using index_type = IndexType; GKO_ASSERT(row_idxs.get_num_elems() == col_idxs.get_num_elems()); - GKO_ASSERT(row_idxs.get_num_elems() < (num_rows * num_cols)); + GKO_ASSERT(row_idxs.get_num_elems() <= (num_rows * num_cols)); auto result = MatrixType::create(exec, std::forward(args)...); result->read(fill_random_matrix_data( num_rows, num_cols, row_idxs, col_idxs, diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp index e2db1ea6e97..4a2a1835961 100644 --- a/cuda/matrix/batch_struct.hpp +++ b/cuda/matrix/batch_struct.hpp @@ -91,34 +91,34 @@ get_batch_struct(batch::matrix::Dense* const op) /** * Generates an immutable uniform batch struct from a batch of ell matrices. */ -template -inline batch::matrix::ell::uniform_batch> -get_batch_struct(const batch::matrix::Ell* const op) +template +inline batch::matrix::ell::uniform_batch, IndexType> +get_batch_struct(const batch::matrix::Ell* const op) { return {as_cuda_type(op->get_const_values()), op->get_const_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } /** * Generates a uniform batch struct from a batch of ell matrices. */ -template -inline batch::matrix::ell::uniform_batch> get_batch_struct( - batch::matrix::Ell* const op) +template +inline batch::matrix::ell::uniform_batch, IndexType> +get_batch_struct(batch::matrix::Ell* const op) { return {as_cuda_type(op->get_values()), op->get_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp index fca265eceb0..e4d2421a42f 100644 --- a/dpcpp/matrix/batch_ell_kernels.dp.cpp +++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp @@ -81,6 +81,7 @@ void simple_apply(std::shared_ptr exec, const auto num_batch_items = mat->get_num_batch_items(); auto device = exec->get_queue()->get_device(); + // TODO: use runtime selection of group size based on num_rows. auto group_size = device.get_info(); @@ -134,6 +135,7 @@ void advanced_apply(std::shared_ptr exec, const auto num_batch_items = mat_ub.num_batch_items; auto device = exec->get_queue()->get_device(); + // TODO: use runtime selection of group size based on num_rows. auto group_size = device.get_info(); diff --git a/dpcpp/matrix/batch_ell_kernels.hpp.inc b/dpcpp/matrix/batch_ell_kernels.hpp.inc index e6501bafaba..553e0aa1f3c 100644 --- a/dpcpp/matrix/batch_ell_kernels.hpp.inc +++ b/dpcpp/matrix/batch_ell_kernels.hpp.inc @@ -30,9 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -template +template __dpct_inline__ void simple_apply_kernel( - const gko::batch::matrix::ell::batch_item& mat, + const gko::batch::matrix::ell::batch_item& mat, const gko::batch::multi_vector::batch_item& b, const gko::batch::multi_vector::batch_item& x, sycl::nd_item<3>& item_ct1) @@ -42,37 +42,38 @@ __dpct_inline__ void simple_apply_kernel( auto temp = zero(); for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) { const auto col_idx = mat.col_idxs[tidx + idx * mat.stride]; - if (col_idx == invalid_index()) { + if (col_idx != invalid_index()) { break; - else temp += mat.values[tidx + idx * mat.stride] * - b.values[col_idx * b.stride]; - } - x.values[tidx * x.stride] = temp; + } else + temp += mat.values[tidx + idx * mat.stride] * + b.values[col_idx * b.stride]; } + x.values[tidx * x.stride] = temp; } +} - template - __dpct_inline__ void advanced_apply_kernel( - const gko::batch::multi_vector::batch_item& alpha, - const gko::batch::matrix::ell::batch_item& mat, - const gko::batch::multi_vector::batch_item& b, - const gko::batch::multi_vector::batch_item& beta, - const gko::batch::multi_vector::batch_item& x, - sycl::nd_item<3>& item_ct1) - { - for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows; - tidx += item_ct1.get_local_range().size()) { - auto temp = zero(); - for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) { - const auto col_idx = mat.col_idxs[tidx + idx * mat.stride]; - if (col_idx == invalid_index()) { - break; - else temp += alpha.values[0] * - mat.values[tidx + idx * mat.stride] * - b.values[col_idx * b.stride]; - } - x.values[tidx * x.stride] = - temp + beta.values[0] * x.values[tidx * x.stride]; - } +template +__dpct_inline__ void advanced_apply_kernel( + const gko::batch::multi_vector::batch_item& alpha, + const gko::batch::matrix::ell::batch_item& mat, + const gko::batch::multi_vector::batch_item& b, + const gko::batch::multi_vector::batch_item& beta, + const gko::batch::multi_vector::batch_item& x, + sycl::nd_item<3>& item_ct1) +{ + for (int tidx = item_ct1.get_local_linear_id(); tidx < mat.num_rows; + tidx += item_ct1.get_local_range().size()) { + auto temp = zero(); + for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) { + const auto col_idx = mat.col_idxs[tidx + idx * mat.stride]; + if (col_idx != invalid_index()) { + break; + } else + temp += alpha.values[0] * mat.values[tidx + idx * mat.stride] * + b.values[col_idx * b.stride]; } + x.values[tidx * x.stride] = + temp + beta.values[0] * x.values[tidx * x.stride]; + } +} diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp index f857653e05e..fe04407d82d 100644 --- a/dpcpp/matrix/batch_struct.hpp +++ b/dpcpp/matrix/batch_struct.hpp @@ -90,34 +90,34 @@ inline batch::matrix::dense::uniform_batch get_batch_struct( /** * Generates an immutable uniform batch struct from a batch of ell matrices. */ -template -inline batch::matrix::ell::uniform_batch get_batch_struct( - const batch::matrix::Ell* const op) +template +inline batch::matrix::ell::uniform_batch +get_batch_struct(const batch::matrix::Ell* const op) { return {op->get_const_values(), op->get_const_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } /** * Generates a uniform batch struct from a batch of ell matrices. */ -template -inline batch::matrix::ell::uniform_batch get_batch_struct( - batch::matrix::Ell* const op) +template +inline batch::matrix::ell::uniform_batch get_batch_struct( + batch::matrix::Ell* const op) { return {op->get_values(), op->get_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp index 6f15b2d966a..e35f13f1249 100644 --- a/hip/matrix/batch_struct.hip.hpp +++ b/hip/matrix/batch_struct.hip.hpp @@ -91,34 +91,34 @@ get_batch_struct(batch::matrix::Dense* const op) /** * Generates an immutable uniform batch struct from a batch of ell matrices. */ -template -inline batch::matrix::ell::uniform_batch> -get_batch_struct(const batch::matrix::Ell* const op) +template +inline batch::matrix::ell::uniform_batch, IndexType> +get_batch_struct(const batch::matrix::Ell* const op) { return {as_hip_type(op->get_const_values()), op->get_const_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } /** * Generates a uniform batch struct from a batch of ell matrices. */ -template -inline batch::matrix::ell::uniform_batch> get_batch_struct( - batch::matrix::Ell* const op) +template +inline batch::matrix::ell::uniform_batch, IndexType> +get_batch_struct(batch::matrix::Ell* const op) { return {as_hip_type(op->get_values()), op->get_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } diff --git a/include/ginkgo/core/base/batch_multi_vector.hpp b/include/ginkgo/core/base/batch_multi_vector.hpp index 9a4b8d5cf1d..405603269ff 100644 --- a/include/ginkgo/core/base/batch_multi_vector.hpp +++ b/include/ginkgo/core/base/batch_multi_vector.hpp @@ -52,14 +52,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { namespace batch { -namespace matrix { - - -template -class Dense; - - -} /** @@ -90,21 +82,17 @@ class MultiVector : public EnablePolymorphicObject>, public EnablePolymorphicAssignment>, public EnableCreateMethod>, - public ConvertibleTo>>, - public ConvertibleTo> { + public ConvertibleTo>> { friend class EnableCreateMethod; friend class EnablePolymorphicObject; friend class MultiVector>; friend class MultiVector>; - friend class matrix::Dense; public: using EnablePolymorphicAssignment::convert_to; using EnablePolymorphicAssignment::move_to; using ConvertibleTo>>::convert_to; using ConvertibleTo>>::move_to; - using ConvertibleTo>::convert_to; - using ConvertibleTo>::move_to; using value_type = ValueType; using index_type = int32; @@ -126,10 +114,6 @@ class MultiVector void move_to(MultiVector>* result) override; - void convert_to(matrix::Dense* result) const override; - - void move_to(matrix::Dense* result) override; - /** * Creates a mutable view (of matrix::Dense type) of one item of the Batch * MultiVector object. Does not perform any deep copies, but only returns a diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 7f3ce5890e4..cbec04482a3 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -306,7 +306,6 @@ class Dense final : public EnableBatchLinOp>, size.get_common_size()[1]; } -protected: /** * Creates an uninitialized Dense matrix of the specified size. * @@ -362,7 +361,6 @@ class Dense final : public EnableBatchLinOp>, idx % this->get_common_size()[1]); } -private: array values_; }; diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp index be49e2cff41..943f63bfdd7 100644 --- a/include/ginkgo/core/matrix/batch_ell.hpp +++ b/include/ginkgo/core/matrix/batch_ell.hpp @@ -67,6 +67,8 @@ namespace matrix { * batch is the same and therefore only a single copy of the sparsity pattern is * stored. * + * @note Currently only IndexType of int32 is supported. + * * @tparam ValueType value precision of matrix elements * @tparam IndexType index precision of matrix elements * @@ -83,6 +85,8 @@ class Ell final friend class EnablePolymorphicObject; friend class Ell, IndexType>; friend class Ell, IndexType>; + static_assert(std::is_same::value, + "IndexType must be a 32 bit integer"); public: using EnableBatchLinOp::convert_to; @@ -315,8 +319,6 @@ class Ell final num_elems_per_row; } - -protected: /** * Creates an uninitialized Ell matrix of the specified size. * @@ -369,7 +371,6 @@ class Ell final const MultiVector* beta, MultiVector* x) const; -private: index_type num_elems_per_row_; array values_; array col_idxs_; diff --git a/reference/matrix/batch_ell_kernels.hpp.inc b/reference/matrix/batch_ell_kernels.hpp.inc index 44de2a57af9..979df1a19bd 100644 --- a/reference/matrix/batch_ell_kernels.hpp.inc +++ b/reference/matrix/batch_ell_kernels.hpp.inc @@ -30,9 +30,9 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -template +template inline void simple_apply_kernel( - const gko::batch::matrix::ell::batch_item& a, + const gko::batch::matrix::ell::batch_item& a, const gko::batch::multi_vector::batch_item& b, const gko::batch::multi_vector::batch_item& c) { @@ -43,19 +43,21 @@ inline void simple_apply_kernel( for (auto k = 0; k < a.num_stored_elems_per_row; ++k) { auto val = a.values[row + k * a.stride]; auto col = a.col_idxs[row + k * a.stride]; - for (int j = 0; j < c.num_rhs; ++j) { - c.values[row * c.stride + j] += - val * b.values[col * b.stride + j]; + if (col != invalid_index()) { + for (int j = 0; j < c.num_rhs; ++j) { + c.values[row * c.stride + j] += + val * b.values[col * b.stride + j]; + } } } } } -template +template inline void advanced_apply_kernel( const ValueType alpha, - const gko::batch::matrix::ell::batch_item& a, + const gko::batch::matrix::ell::batch_item& a, const gko::batch::multi_vector::batch_item& b, const ValueType beta, const gko::batch::multi_vector::batch_item& c) @@ -67,9 +69,11 @@ inline void advanced_apply_kernel( for (auto k = 0; k < a.num_stored_elems_per_row; ++k) { auto val = a.values[row + k * a.stride]; auto col = a.col_idxs[row + k * a.stride]; - for (int j = 0; j < b.num_rhs; ++j) { - c.values[row * c.stride + j] += - alpha * val * b.values[col * b.stride + j]; + if (col != invalid_index()) { + for (int j = 0; j < b.num_rhs; ++j) { + c.values[row * c.stride + j] += + alpha * val * b.values[col * b.stride + j]; + } } } } diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp index fb0e08c16f5..bb7680d1493 100644 --- a/reference/matrix/batch_struct.hpp +++ b/reference/matrix/batch_struct.hpp @@ -94,34 +94,34 @@ inline batch::matrix::dense::uniform_batch get_batch_struct( /** * Generates an immutable uniform batch struct from a batch of ell matrices. */ -template -inline batch::matrix::ell::uniform_batch get_batch_struct( - const batch::matrix::Ell* const op) +template +inline batch::matrix::ell::uniform_batch +get_batch_struct(const batch::matrix::Ell* const op) { return {op->get_const_values(), op->get_const_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } /** * Generates a uniform batch struct from a batch of ell matrices. */ -template -inline batch::matrix::ell::uniform_batch get_batch_struct( - batch::matrix::Ell* const op) +template +inline batch::matrix::ell::uniform_batch get_batch_struct( + batch::matrix::Ell* const op) { return {op->get_values(), op->get_col_idxs(), op->get_num_batch_items(), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[0]), - static_cast(op->get_common_size()[1]), - static_cast(op->get_num_stored_elements_per_row())}; + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_stored_elements_per_row())}; } diff --git a/reference/test/matrix/batch_ell_kernels.cpp b/reference/test/matrix/batch_ell_kernels.cpp index 8a5806a9513..81f189c3e02 100644 --- a/reference/test/matrix/batch_ell_kernels.cpp +++ b/reference/test/matrix/batch_ell_kernels.cpp @@ -123,8 +123,8 @@ TYPED_TEST(Ell, AppliesToBatchMultiVector) this->mtx_00->apply(this->b_00.get(), this->x_00.get()); this->mtx_01->apply(this->b_01.get(), this->x_01.get()); auto res = gko::batch::unbatch>(this->x_0.get()); - GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); - GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); + GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), r::value); + GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), r::value); } @@ -149,8 +149,8 @@ TYPED_TEST(Ell, AppliesLinearCombinationToBatchMultiVector) this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(), this->x_01.get()); auto res = gko::batch::unbatch>(this->x_0.get()); - GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), 0.); - GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), 0.); + GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), r::value); + GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), r::value); } From 6d16d3b0ca4dd1fc683409871021817931ca7bc3 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 12 Oct 2023 12:05:08 +0200 Subject: [PATCH 12/18] Add apply temp clone, review updates Co-authored-by: Tobias Ribizel --- core/matrix/batch_dense.cpp | 64 +++++++++++++++++++++- core/matrix/batch_ell.cpp | 62 +++++++++++++++++++++ dpcpp/matrix/batch_ell_kernels.hpp.inc | 4 +- include/ginkgo/core/matrix/batch_dense.hpp | 38 ++++++++----- include/ginkgo/core/matrix/batch_ell.hpp | 55 +++++++++++-------- 5 files changed, 182 insertions(+), 41 deletions(-) diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp index 758635cea7f..8390d43fd7d 100644 --- a/core/matrix/batch_dense.cpp +++ b/core/matrix/batch_dense.cpp @@ -124,11 +124,72 @@ Dense::Dense(std::shared_ptr exec, {} +template +Dense* Dense::apply( + ptr_param> b, + ptr_param> x) +{ + this->validate_application_parameters(b.get(), x.get()); + auto exec = this->get_executor(); + this->apply_impl(make_temporary_clone(exec, b).get(), + make_temporary_clone(exec, x).get()); + return this; +} + + +template +const Dense* Dense::apply( + ptr_param> b, + ptr_param> x) const +{ + this->validate_application_parameters(b.get(), x.get()); + auto exec = this->get_executor(); + this->apply_impl(make_temporary_clone(exec, b).get(), + make_temporary_clone(exec, x).get()); + return this; +} + + +template +Dense* Dense::apply( + ptr_param> alpha, + ptr_param> b, + ptr_param> beta, + ptr_param> x) +{ + this->validate_application_parameters(alpha.get(), b.get(), beta.get(), + x.get()); + auto exec = this->get_executor(); + this->apply_impl(make_temporary_clone(exec, alpha).get(), + make_temporary_clone(exec, b).get(), + make_temporary_clone(exec, beta).get(), + make_temporary_clone(exec, x).get()); + return this; +} + + +template +const Dense* Dense::apply( + ptr_param> alpha, + ptr_param> b, + ptr_param> beta, + ptr_param> x) const +{ + this->validate_application_parameters(alpha.get(), b.get(), beta.get(), + x.get()); + auto exec = this->get_executor(); + this->apply_impl(make_temporary_clone(exec, alpha).get(), + make_temporary_clone(exec, b).get(), + make_temporary_clone(exec, beta).get(), + make_temporary_clone(exec, x).get()); + return this; +} + + template void Dense::apply_impl(const MultiVector* b, MultiVector* x) const { - this->validate_application_parameters(b, x); this->get_executor()->run(dense::make_simple_apply(this, b, x)); } @@ -139,7 +200,6 @@ void Dense::apply_impl(const MultiVector* alpha, const MultiVector* beta, MultiVector* x) const { - this->validate_application_parameters(alpha, b, beta, x); this->get_executor()->run( dense::make_advanced_apply(alpha, this, b, beta, x)); } diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp index c9dbe6d51c9..a50b2f3e23a 100644 --- a/core/matrix/batch_ell.cpp +++ b/core/matrix/batch_ell.cpp @@ -140,6 +140,68 @@ Ell::Ell(std::shared_ptr exec, {} +template +Ell* Ell::apply( + ptr_param> b, + ptr_param> x) +{ + this->validate_application_parameters(b.get(), x.get()); + auto exec = this->get_executor(); + this->apply_impl(make_temporary_clone(exec, b).get(), + make_temporary_clone(exec, x).get()); + return this; +} + + +template +const Ell* Ell::apply( + ptr_param> b, + ptr_param> x) const +{ + this->validate_application_parameters(b.get(), x.get()); + auto exec = this->get_executor(); + this->apply_impl(make_temporary_clone(exec, b).get(), + make_temporary_clone(exec, x).get()); + return this; +} + + +template +Ell* Ell::apply( + ptr_param> alpha, + ptr_param> b, + ptr_param> beta, + ptr_param> x) +{ + this->validate_application_parameters(alpha.get(), b.get(), beta.get(), + x.get()); + auto exec = this->get_executor(); + this->apply_impl(make_temporary_clone(exec, alpha).get(), + make_temporary_clone(exec, b).get(), + make_temporary_clone(exec, beta).get(), + make_temporary_clone(exec, x).get()); + return this; +} + + +template +const Ell* Ell::apply( + ptr_param> alpha, + ptr_param> b, + ptr_param> beta, + ptr_param> x) const +{ + this->validate_application_parameters(alpha.get(), b.get(), beta.get(), + x.get()); + auto exec = this->get_executor(); + this->apply_impl(make_temporary_clone(exec, alpha).get(), + make_temporary_clone(exec, b).get(), + make_temporary_clone(exec, beta).get(), + make_temporary_clone(exec, x).get()); + return this; +} + + template void Ell::apply_impl(const MultiVector* b, MultiVector* x) const diff --git a/dpcpp/matrix/batch_ell_kernels.hpp.inc b/dpcpp/matrix/batch_ell_kernels.hpp.inc index 553e0aa1f3c..8cdb8daa273 100644 --- a/dpcpp/matrix/batch_ell_kernels.hpp.inc +++ b/dpcpp/matrix/batch_ell_kernels.hpp.inc @@ -42,7 +42,7 @@ __dpct_inline__ void simple_apply_kernel( auto temp = zero(); for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) { const auto col_idx = mat.col_idxs[tidx + idx * mat.stride]; - if (col_idx != invalid_index()) { + if (col_idx == invalid_index()) { break; } else temp += mat.values[tidx + idx * mat.stride] * @@ -67,7 +67,7 @@ __dpct_inline__ void advanced_apply_kernel( auto temp = zero(); for (size_type idx = 0; idx < mat.num_stored_elems_per_row; idx++) { const auto col_idx = mat.col_idxs[tidx + idx * mat.stride]; - if (col_idx != invalid_index()) { + if (col_idx == invalid_index()) { break; } else temp += alpha.values[0] * mat.values[tidx + idx * mat.stride] * diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index cbec04482a3..07b862ef484 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -233,8 +233,8 @@ class Dense final : public EnableBatchLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item( - size_type batch_id) const noexcept + const value_type* get_const_values_for_item(size_type batch_id) const + noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + this->get_cumulative_offset(batch_id); @@ -275,11 +275,8 @@ class Dense final : public EnableBatchLinOp>, * @param b the multi-vector to be applied to * @param x the output multi-vector */ - void apply(const MultiVector* b, - MultiVector* x) const - { - this->apply_impl(b, x); - } + Dense* apply(ptr_param> b, + ptr_param> x); /** * Apply the matrix to a multi-vector with a linear combination of the given @@ -291,13 +288,26 @@ class Dense final : public EnableBatchLinOp>, * @param beta the scalar to scale the x vector with * @param x the output multi-vector */ - void apply(const MultiVector* alpha, - const MultiVector* b, - const MultiVector* beta, - MultiVector* x) const - { - this->apply_impl(alpha, b, beta, x); - } + Dense* apply(ptr_param> alpha, + ptr_param> b, + ptr_param> beta, + ptr_param> x); + + /** + * @copydoc apply(const MultiVector*, MultiVector*) + */ + const Dense* apply(ptr_param> b, + ptr_param> x) const; + + /** + * @copydoc apply(const MultiVector*, const + * MultiVector*, const MultiVector*, + * MultiVector*) + */ + const Dense* apply(ptr_param> alpha, + ptr_param> b, + ptr_param> beta, + ptr_param> x) const; private: inline size_type compute_num_elems(const batch_dim<2>& size) diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp index 943f63bfdd7..5be94f1035e 100644 --- a/include/ginkgo/core/matrix/batch_ell.hpp +++ b/include/ginkgo/core/matrix/batch_ell.hpp @@ -85,7 +85,7 @@ class Ell final friend class EnablePolymorphicObject; friend class Ell, IndexType>; friend class Ell, IndexType>; - static_assert(std::is_same::value, + static_assert(std::is_same::value, "IndexType must be a 32 bit integer"); public: @@ -94,8 +94,7 @@ class Ell final using value_type = ValueType; using index_type = IndexType; - using transposed_type = Ell; - using unbatch_type = gko::matrix::Ell; + using unbatch_type = gko::matrix::Ell; using absolute_type = remove_complex; using complex_type = to_complex; @@ -223,8 +222,8 @@ class Ell final * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type* get_const_col_idxs_for_item( - size_type batch_id) const noexcept + const index_type* get_const_col_idxs_for_item(size_type batch_id) const + noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return col_idxs_.get_const_data(); @@ -252,8 +251,8 @@ class Ell final * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item( - size_type batch_id) const noexcept + const value_type* get_const_values_for_item(size_type batch_id) const + noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + @@ -277,8 +276,8 @@ class Ell final static std::unique_ptr create_const( std::shared_ptr exec, const batch_dim<2>& sizes, const index_type num_elems_per_row, - gko::detail::const_array_view&& values, - gko::detail::const_array_view&& col_idxs); + gko::detail::const_array_view&& values, + gko::detail::const_array_view&& col_idxs); /** * Apply the matrix to a multi-vector. Represents the matrix vector @@ -287,29 +286,39 @@ class Ell final * @param b the multi-vector to be applied to * @param x the output multi-vector */ - void apply(const MultiVector* b, - MultiVector* x) const - { - this->apply_impl(b, x); - } + Ell* apply(ptr_param> b, + ptr_param> x); /** * Apply the matrix to a multi-vector with a linear combination of the given - * input vector. Represents the matrix vector multiplication, x = alpha* A * - * b + beta * x, where x and b are both multi-vectors. + * input vector. Represents the matrix vector multiplication, x = alpha * A + * * b + beta * x, where x and b are both multi-vectors. * * @param alpha the scalar to scale the matrix-vector product with * @param b the multi-vector to be applied to * @param beta the scalar to scale the x vector with * @param x the output multi-vector */ - void apply(const MultiVector* alpha, - const MultiVector* b, - const MultiVector* beta, - MultiVector* x) const - { - this->apply_impl(alpha, b, beta, x); - } + Ell* apply(ptr_param> alpha, + ptr_param> b, + ptr_param> beta, + ptr_param> x); + + /** + * @copydoc apply(const MultiVector*, MultiVector*) + */ + const Ell* apply(ptr_param> b, + ptr_param> x) const; + + /** + * @copydoc apply(const MultiVector*, const + * MultiVector*, const MultiVector*, + * MultiVector*) + */ + const Ell* apply(ptr_param> alpha, + ptr_param> b, + ptr_param> beta, + ptr_param> x) const; private: size_type compute_num_elems(const batch_dim<2>& size, From 40741cba653a680eec84915623b6b2e93a513775 Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Thu, 12 Oct 2023 10:56:01 +0000 Subject: [PATCH 13/18] Format files Co-authored-by: Pratik Nayak --- dpcpp/matrix/batch_ell_kernels.dp.cpp | 54 +++++++++++----------- include/ginkgo/core/matrix/batch_dense.hpp | 4 +- include/ginkgo/core/matrix/batch_ell.hpp | 8 ++-- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/dpcpp/matrix/batch_ell_kernels.dp.cpp b/dpcpp/matrix/batch_ell_kernels.dp.cpp index e4d2421a42f..5a69bbd3d5d 100644 --- a/dpcpp/matrix/batch_ell_kernels.dp.cpp +++ b/dpcpp/matrix/batch_ell_kernels.dp.cpp @@ -97,17 +97,17 @@ void simple_apply(std::shared_ptr exec, // Launch a kernel that has nbatches blocks, each block has max group size exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), [= - ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( - config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto mat_b = - batch::matrix::extract_batch_item(mat_ub, group_id); - const auto b_b = batch::extract_batch_item(b_ub, group_id); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - simple_apply_kernel(mat_b, b_b, x_b, item_ct1); - }); + sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) + [[sycl::reqd_sub_group_size(config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + simple_apply_kernel(mat_b, b_b, x_b, item_ct1); + }); }); } @@ -145,22 +145,22 @@ void advanced_apply(std::shared_ptr exec, // Launch a kernel that has nbatches blocks, each block has max group size exec->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for( - sycl_nd_range(grid, block), [= - ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( - config::warp_size)]] { - auto group = item_ct1.get_group(); - auto group_id = group.get_group_linear_id(); - const auto mat_b = - batch::matrix::extract_batch_item(mat_ub, group_id); - const auto b_b = batch::extract_batch_item(b_ub, group_id); - const auto x_b = batch::extract_batch_item(x_ub, group_id); - const auto alpha_b = - batch::extract_batch_item(alpha_ub, group_id); - const auto beta_b = - batch::extract_batch_item(beta_ub, group_id); - advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b, - item_ct1); - }); + sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) + [[sycl::reqd_sub_group_size(config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + const auto alpha_b = + batch::extract_batch_item(alpha_ub, group_id); + const auto beta_b = + batch::extract_batch_item(beta_ub, group_id); + advanced_apply_kernel(alpha_b, mat_b, b_b, beta_b, x_b, + item_ct1); + }); }); } diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 07b862ef484..0b2bcc49166 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -233,8 +233,8 @@ class Dense final : public EnableBatchLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item(size_type batch_id) const - noexcept + const value_type* get_const_values_for_item( + size_type batch_id) const noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + this->get_cumulative_offset(batch_id); diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp index 5be94f1035e..a6381f90f10 100644 --- a/include/ginkgo/core/matrix/batch_ell.hpp +++ b/include/ginkgo/core/matrix/batch_ell.hpp @@ -222,8 +222,8 @@ class Ell final * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type* get_const_col_idxs_for_item(size_type batch_id) const - noexcept + const index_type* get_const_col_idxs_for_item( + size_type batch_id) const noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return col_idxs_.get_const_data(); @@ -251,8 +251,8 @@ class Ell final * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item(size_type batch_id) const - noexcept + const value_type* get_const_values_for_item( + size_type batch_id) const noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + From b15308fddaaf4d3604e530a51c0d310f66d72134 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Thu, 12 Oct 2023 16:06:05 +0200 Subject: [PATCH 14/18] Fix sparsity issues and review updates Co-authored-by: Marcel Koch Co-authored-by: Yu-Hsiang Tsai --- core/base/batch_utilities.hpp | 55 ++++++++++++++++++++++---- core/matrix/batch_ell.cpp | 2 - core/test/matrix/batch_ell.cpp | 32 ++++++++++++--- core/test/utils/batch_helpers.hpp | 7 ++-- core/test/utils/matrix_generator.hpp | 9 +++-- dpcpp/matrix/batch_ell_kernels.hpp.inc | 10 +++-- 6 files changed, 89 insertions(+), 26 deletions(-) diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp index 7204c78a552..3117b35d0f4 100644 --- a/core/base/batch_utilities.hpp +++ b/core/base/batch_utilities.hpp @@ -39,6 +39,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include #include #include @@ -46,6 +47,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include namespace gko { @@ -126,6 +128,36 @@ auto unbatch(const InputType* batch_object) } +namespace detail { + + +template +void assert_same_sparsity_in_batched_data( + const std::vector>& data) +{ + auto num_nnz = data[0].nonzeros.size(); + auto base_data = data[0]; + base_data.ensure_row_major_order(); + for (int b = 0; b < data.size(); ++b) { + if (data[b].nonzeros.size() != num_nnz) { + GKO_NOT_IMPLEMENTED; + } + auto temp_data = data[b]; + temp_data.ensure_row_major_order(); + for (int nnz = 0; nnz < num_nnz; ++nnz) { + if (temp_data.nonzeros[nnz].row != base_data.nonzeros[nnz].row || + temp_data.nonzeros[nnz].column != + base_data.nonzeros[nnz].column) { + GKO_NOT_IMPLEMENTED; + } + } + } +} + + +} // namespace detail + + template std::unique_ptr read( @@ -134,6 +166,12 @@ std::unique_ptr read( TArgs&&... create_args) { auto num_batch_items = data.size(); + // Throw if all the items in the batch dont have same sparsity. + if (!std::is_same>::value && + !std::is_same>::value) { + detail::assert_same_sparsity_in_batched_data(data); + } auto tmp = OutputType::create(exec, batch_dim<2>(num_batch_items, data[0].size), std::forward(create_args)...); @@ -163,7 +201,8 @@ std::vector> write( /** - * Creates and initializes a batch of single column-vectors. + * Creates and initializes a batch of the specified Matrix type with a single + * column-vector. * * @tparam Matrix matrix type to initialize (It has to implement the * read function) @@ -278,15 +317,16 @@ std::unique_ptr initialize( /** - * Creates and initializes a batch single column-vector by making copies of the - * single input column vector. + * Creates and initializes a batch of specified Matrix type with a single + * column-vector by making copies of the single input column vector. * * @tparam Matrix matrix type to initialize (It has to implement the * read function) * @tparam TArgs argument types for Matrix::create method * (not including the implied Executor as the first argument) * - * @param num_vectors The number of times the input vector is to be duplicated + * @param num_batch_items The number of times the input vector is to be + * duplicated * @param vals values used to initialize each vector in the temp. batch * @param exec Executor associated with the matrix * @param create_args additional arguments passed to Matrix::create, not @@ -297,21 +337,20 @@ std::unique_ptr initialize( */ template std::unique_ptr initialize( - const size_type num_vectors, + const size_type num_batch_items, std::initializer_list vals, std::shared_ptr exec, TArgs&&... create_args) { using value_type = typename Matrix::value_type; using index_type = typename Matrix::index_type; using mat_data = gko::matrix_data; - size_type num_batch_items = num_vectors; GKO_THROW_IF_INVALID(num_batch_items > 0 && vals.size() > 0, "Input data is empty"); auto num_rows = begin(vals) ? vals.size() : 0; auto common_size = dim<2>(num_rows, 1); auto b_size = batch_dim<2>(num_batch_items, common_size); std::vector input_mat_data(num_batch_items, common_size); - for (size_type batch = 0; batch < num_vectors; batch++) { + for (size_type batch = 0; batch < num_batch_items; batch++) { input_mat_data[batch].nonzeros.reserve(num_rows); size_type idx = 0; for (const auto& elem : vals) { @@ -334,7 +373,7 @@ std::unique_ptr initialize( * @tparam TArgs argument types for Matrix::create method * (not including the implied Executor as the first argument) * - * @param num_batch_items The number of times the input matrix is duplicated + * @param num_batch_items The number of times the input matrix is duplicated * @param vals values used to initialize each matrix in the temp. batch * @param exec Executor associated to the matrix * @param create_args additional arguments passed to Matrix::create, not diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp index a50b2f3e23a..5626860e7ee 100644 --- a/core/matrix/batch_ell.cpp +++ b/core/matrix/batch_ell.cpp @@ -206,7 +206,6 @@ template void Ell::apply_impl(const MultiVector* b, MultiVector* x) const { - this->validate_application_parameters(b, x); this->get_executor()->run(ell::make_simple_apply(this, b, x)); } @@ -217,7 +216,6 @@ void Ell::apply_impl(const MultiVector* alpha, const MultiVector* beta, MultiVector* x) const { - this->validate_application_parameters(alpha, b, beta, x); this->get_executor()->run( ell::make_advanced_apply(alpha, this, b, beta, x)); } diff --git a/core/test/matrix/batch_ell.cpp b/core/test/matrix/batch_ell.cpp index c36a877ac14..e04ed96bf4c 100644 --- a/core/test/matrix/batch_ell.cpp +++ b/core/test/matrix/batch_ell.cpp @@ -360,7 +360,7 @@ TYPED_TEST(Ell, CanBeListConstructed) using BatchEllMtx = typename TestFixture::BatchEllMtx; using EllMtx = typename TestFixture::EllMtx; - auto m = gko::batch::initialize({{0.0, -1.0}, {1.0, 0.0}}, + auto m = gko::batch::initialize({{0.0, -1.0}, {0.0, -5.0}}, this->exec); ASSERT_EQ(m->get_num_batch_items(), 2); @@ -369,10 +369,10 @@ TYPED_TEST(Ell, CanBeListConstructed) ASSERT_EQ(m->get_num_stored_elements_per_row(), 1); EXPECT_EQ(m->get_values()[0], value_type{0.0}); EXPECT_EQ(m->get_values()[1], value_type{-1.0}); - EXPECT_EQ(m->get_values()[2], value_type{1.0}); - EXPECT_EQ(m->get_values()[3], value_type{0.0}); - EXPECT_EQ(m->get_col_idxs()[0], index_type{0}); - EXPECT_EQ(m->get_col_idxs()[1], index_type{-1}); + EXPECT_EQ(m->get_values()[2], value_type{0.0}); + EXPECT_EQ(m->get_values()[3], value_type{-5.0}); + EXPECT_EQ(m->get_col_idxs()[0], index_type{-1}); + EXPECT_EQ(m->get_col_idxs()[1], index_type{0}); } @@ -459,6 +459,28 @@ TYPED_TEST(Ell, CanBeReadFromMatrixData) } +TYPED_TEST(Ell, CanBeDetectDataWithDifferentSparsity) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using BatchEllMtx = typename TestFixture::BatchEllMtx; + auto vec_data = std::vector>{}; + vec_data.emplace_back( + gko::matrix_data({2, 3}, { + {0, 0, -1.0}, + {1, 1, 2.5}, + {1, 2, 0.5}, + {2, 2, -3.0}, + })); + vec_data.emplace_back(gko::matrix_data( + {2, 3}, {{0, 0, 1.0}, {1, 1, 2.0}, {1, 2, 3.0}})); + + EXPECT_THROW( + gko::batch::detail::assert_same_sparsity_in_batched_data(vec_data), + gko::NotImplemented); +} + + TYPED_TEST(Ell, GeneratesCorrectMatrixData) { using value_type = typename TestFixture::value_type; diff --git a/core/test/utils/batch_helpers.hpp b/core/test/utils/batch_helpers.hpp index 0b6197b5062..5b1fa60ed36 100644 --- a/core/test/utils/batch_helpers.hpp +++ b/core/test/utils/batch_helpers.hpp @@ -95,9 +95,10 @@ std::unique_ptr generate_random_batch_matrix( .copy_to_array(); for (size_type b = 0; b < num_batch_items; b++) { - auto rand_mat = fill_random_matrix_with_sparsity_pattern< - typename MatrixType::unbatch_type, index_type>( - num_rows, num_cols, row_idxs, col_idxs, value_dist, engine, exec); + auto rand_mat = + fill_random_matrix( + num_rows, num_cols, row_idxs, col_idxs, value_dist, engine, + exec); result->create_view_for_item(b)->copy_from(rand_mat.get()); } diff --git a/core/test/utils/matrix_generator.hpp b/core/test/utils/matrix_generator.hpp index 7490a24bbe5..d5370c6ef6a 100644 --- a/core/test/utils/matrix_generator.hpp +++ b/core/test/utils/matrix_generator.hpp @@ -206,23 +206,24 @@ generate_random_device_matrix_data(gko::size_type num_rows, * @tparam MatrixType type of matrix to generate (must implement * the interface `ReadableFromMatrixData<>` and provide * matching `value_type` and `index_type` type aliases) + * @tparam IndexType the type for row and column indices + * @tparam ValueDistribution type of value distribution + * @tparam Engine type of random engine * * @param num_rows number of rows * @param num_cols number of columns - * @param value_dist distribution of matrix values * @param row_idxs the row indices of the matrix * @param col_idxs the column indices of the matrix + * @param value_dist distribution of matrix values * @param exec executor where the matrix should be allocated * @param args additional arguments for the matrix constructor * - * The other (template) parameters match generate_random_matrix_data. - * * @return the unique pointer of MatrixType */ template , typename IndexType = typename MatrixType::index_type, typename ValueDistribution, typename Engine, typename... MatrixArgs> -std::unique_ptr fill_random_matrix_with_sparsity_pattern( +std::unique_ptr fill_random_matrix( size_type num_rows, size_type num_cols, const gko::array& row_idxs, const gko::array& col_idxs, ValueDistribution&& value_dist, diff --git a/dpcpp/matrix/batch_ell_kernels.hpp.inc b/dpcpp/matrix/batch_ell_kernels.hpp.inc index 8cdb8daa273..64d71710dbb 100644 --- a/dpcpp/matrix/batch_ell_kernels.hpp.inc +++ b/dpcpp/matrix/batch_ell_kernels.hpp.inc @@ -44,9 +44,10 @@ __dpct_inline__ void simple_apply_kernel( const auto col_idx = mat.col_idxs[tidx + idx * mat.stride]; if (col_idx == invalid_index()) { break; - } else + } else { temp += mat.values[tidx + idx * mat.stride] * b.values[col_idx * b.stride]; + } } x.values[tidx * x.stride] = temp; } @@ -69,11 +70,12 @@ __dpct_inline__ void advanced_apply_kernel( const auto col_idx = mat.col_idxs[tidx + idx * mat.stride]; if (col_idx == invalid_index()) { break; - } else - temp += alpha.values[0] * mat.values[tidx + idx * mat.stride] * + } else { + temp += mat.values[tidx + idx * mat.stride] * b.values[col_idx * b.stride]; + } } x.values[tidx * x.stride] = - temp + beta.values[0] * x.values[tidx * x.stride]; + alpha.values[0] * temp + beta.values[0] * x.values[tidx * x.stride]; } } From 4b1fbc1cd5ecda697967e05bf17a32ffc18a3cb1 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 16 Oct 2023 16:28:44 +0200 Subject: [PATCH 15/18] vector mat data with duplication --- core/base/batch_utilities.hpp | 39 ++++++++++++++++------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp index 3117b35d0f4..e6a52250565 100644 --- a/core/base/batch_utilities.hpp +++ b/core/base/batch_utilities.hpp @@ -349,17 +349,16 @@ std::unique_ptr initialize( auto num_rows = begin(vals) ? vals.size() : 0; auto common_size = dim<2>(num_rows, 1); auto b_size = batch_dim<2>(num_batch_items, common_size); - std::vector input_mat_data(num_batch_items, common_size); - for (size_type batch = 0; batch < num_batch_items; batch++) { - input_mat_data[batch].nonzeros.reserve(num_rows); - size_type idx = 0; - for (const auto& elem : vals) { - if (elem != zero()) { - input_mat_data[batch].nonzeros.emplace_back(idx, 0, elem); - } - ++idx; + mat_data single_mat_data(common_size); + single_mat_data.nonzeros.reserve(num_rows); + size_type idx = 0; + for (const auto& elem : vals) { + if (elem != zero()) { + single_mat_data.nonzeros.emplace_back(idx, 0, elem); } + ++idx; } + std::vector input_mat_data(num_batch_items, single_mat_data); return read( exec, input_mat_data, std::forward(create_args)...); } @@ -397,21 +396,19 @@ std::unique_ptr initialize( auto common_size = dim<2>(begin(vals) ? vals.size() : 0, begin(vals) ? begin(vals)->size() : 0); batch_dim<2> b_size(num_batch_items, common_size); - std::vector input_mat_data(num_batch_items, common_size); - for (size_type batch = 0; batch < num_batch_items; batch++) { - size_type ridx = 0; - for (const auto& row : vals) { - size_type cidx = 0; - for (const auto& elem : row) { - if (elem != zero()) { - input_mat_data[batch].nonzeros.emplace_back(ridx, cidx, - elem); - } - ++cidx; + mat_data single_mat_data(common_size); + size_type ridx = 0; + for (const auto& row : vals) { + size_type cidx = 0; + for (const auto& elem : row) { + if (elem != zero()) { + single_mat_data.nonzeros.emplace_back(ridx, cidx, elem); } - ++ridx; + ++cidx; } + ++ridx; } + std::vector input_mat_data(num_batch_items, single_mat_data); return read( exec, input_mat_data, std::forward(create_args)...); } From d231ca60ed109326799dfe4da78860d628d26312 Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Mon, 16 Oct 2023 21:49:43 +0200 Subject: [PATCH 16/18] Review updates Co-authored-by: Yu-Hsiang Tsai Co-authored-by: Marcel Koch --- core/base/batch_utilities.hpp | 44 +++++++++++++++++----- core/matrix/batch_dense.cpp | 8 ---- core/matrix/batch_ell.cpp | 11 ------ core/test/matrix/batch_ell.cpp | 23 ++++++++++- include/ginkgo/core/matrix/batch_dense.hpp | 13 +------ include/ginkgo/core/matrix/batch_ell.hpp | 17 ++------- 6 files changed, 62 insertions(+), 54 deletions(-) diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp index e6a52250565..febfd59b636 100644 --- a/core/base/batch_utilities.hpp +++ b/core/base/batch_utilities.hpp @@ -54,6 +54,9 @@ namespace gko { namespace batch { +/** + * Duplicate a given input batch object. + */ template std::unique_ptr duplicate(std::shared_ptr exec, size_type num_duplications, @@ -78,6 +81,9 @@ std::unique_ptr duplicate(std::shared_ptr exec, } +/** + * Duplicate a monolithic matrix and create a batch object. + */ template std::unique_ptr create_from_item( std::shared_ptr exec, const size_type num_duplications, @@ -96,6 +102,13 @@ std::unique_ptr create_from_item( } +/** + * Create a batch object from a vector of monolithic object that share the same + * sparsity pattern. + * + * @note The sparsity of the elements in the input vector of matrices needs to + * be the same. TODO: Check for same sparsity among the different input items + */ template std::unique_ptr create_from_item( std::shared_ptr exec, @@ -115,6 +128,9 @@ std::unique_ptr create_from_item( } +/** + * Unbatch a batched object into a vector of items of its unbatch_type. + */ template auto unbatch(const InputType* batch_object) { @@ -135,19 +151,20 @@ template void assert_same_sparsity_in_batched_data( const std::vector>& data) { - auto num_nnz = data[0].nonzeros.size(); - auto base_data = data[0]; + auto num_nnz = data.at(0).nonzeros.size(); + auto base_data = data.at(0); base_data.ensure_row_major_order(); - for (int b = 0; b < data.size(); ++b) { + for (int b = 1; b < data.size(); ++b) { if (data[b].nonzeros.size() != num_nnz) { GKO_NOT_IMPLEMENTED; } - auto temp_data = data[b]; + auto temp_data = data.at(b); temp_data.ensure_row_major_order(); for (int nnz = 0; nnz < num_nnz; ++nnz) { - if (temp_data.nonzeros[nnz].row != base_data.nonzeros[nnz].row || - temp_data.nonzeros[nnz].column != - base_data.nonzeros[nnz].column) { + if (temp_data.nonzeros.at(nnz).row != + base_data.nonzeros.at(nnz).row || + temp_data.nonzeros.at(nnz).column != + base_data.nonzeros.at(nnz).column) { GKO_NOT_IMPLEMENTED; } } @@ -158,6 +175,10 @@ void assert_same_sparsity_in_batched_data( } // namespace detail +/** + * Create a batch object from a vector of gko::matrix_data objects. Each item of + * the vector needs to store the same sparsity pattern. + */ template std::unique_ptr read( @@ -173,7 +194,7 @@ std::unique_ptr read( detail::assert_same_sparsity_in_batched_data(data); } auto tmp = - OutputType::create(exec, batch_dim<2>(num_batch_items, data[0].size), + OutputType::create(exec, batch_dim<2>(num_batch_items, data.at(0).size), std::forward(create_args)...); for (size_type b = 0; b < num_batch_items; ++b) { @@ -184,6 +205,9 @@ std::unique_ptr read( } +/** + * Write a vector of matrix data objects from an input batch object. + */ template std::vector> write( const OutputType* mvec) @@ -201,8 +225,8 @@ std::vector> write( /** - * Creates and initializes a batch of the specified Matrix type with a single - * column-vector. + * Creates and initializes a batch of the specified Matrix type from a series of + * single column-vectors. * * @tparam Matrix matrix type to initialize (It has to implement the * read function) diff --git a/core/matrix/batch_dense.cpp b/core/matrix/batch_dense.cpp index 8390d43fd7d..58c7fa25cea 100644 --- a/core/matrix/batch_dense.cpp +++ b/core/matrix/batch_dense.cpp @@ -96,14 +96,6 @@ Dense::create_const_view_for_item(size_type item_id) const } -template -std::unique_ptr> Dense::create_with_config_of( - ptr_param> other) -{ - return Dense::create(other->get_executor(), other->get_size()); -} - - template std::unique_ptr> Dense::create_const( std::shared_ptr exec, const batch_dim<2>& sizes, diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp index 5626860e7ee..88863a05dd4 100644 --- a/core/matrix/batch_ell.cpp +++ b/core/matrix/batch_ell.cpp @@ -100,17 +100,6 @@ Ell::create_const_view_for_item(size_type item_id) const } -template -std::unique_ptr> -Ell::create_with_config_of( - ptr_param> other) -{ - return Ell::create( - other->get_executor(), other->get_size(), - other->get_num_stored_elements_per_row()); -} - - template std::unique_ptr> Ell::create_const( diff --git a/core/test/matrix/batch_ell.cpp b/core/test/matrix/batch_ell.cpp index e04ed96bf4c..2c8166aa023 100644 --- a/core/test/matrix/batch_ell.cpp +++ b/core/test/matrix/batch_ell.cpp @@ -459,7 +459,7 @@ TYPED_TEST(Ell, CanBeReadFromMatrixData) } -TYPED_TEST(Ell, CanBeDetectDataWithDifferentSparsity) +TYPED_TEST(Ell, ThrowsForDataWithDifferentNnz) { using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; @@ -481,6 +481,27 @@ TYPED_TEST(Ell, CanBeDetectDataWithDifferentSparsity) } +TYPED_TEST(Ell, ThrowsForDataWithDifferentSparsity) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using BatchEllMtx = typename TestFixture::BatchEllMtx; + auto vec_data = std::vector>{}; + vec_data.emplace_back( + gko::matrix_data({2, 3}, { + {0, 0, -1.0}, + {1, 1, 2.5}, + {2, 2, -3.0}, + })); + vec_data.emplace_back(gko::matrix_data( + {2, 3}, {{0, 0, 1.0}, {1, 1, 2.0}, {1, 2, 3.0}})); + + EXPECT_THROW( + gko::batch::detail::assert_same_sparsity_in_batched_data(vec_data), + gko::NotImplemented); +} + + TYPED_TEST(Ell, GeneratesCorrectMatrixData) { using value_type = typename TestFixture::value_type; diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 0b2bcc49166..5a1697afec4 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -93,15 +93,6 @@ class Dense final : public EnableBatchLinOp>, using absolute_type = remove_complex; using complex_type = to_complex; - /** - * Creates a Dense matrix with the configuration of another Dense - * matrix. - * - * @param other The other matrix whose configuration needs to copied. - */ - static std::unique_ptr create_with_config_of( - ptr_param other); - void convert_to(Dense>* result) const override; void move_to(Dense>* result) override; @@ -233,8 +224,8 @@ class Dense final : public EnableBatchLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item( - size_type batch_id) const noexcept + const value_type* get_const_values_for_item(size_type batch_id) const + noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + this->get_cumulative_offset(batch_id); diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp index a6381f90f10..a02d6c81fe8 100644 --- a/include/ginkgo/core/matrix/batch_ell.hpp +++ b/include/ginkgo/core/matrix/batch_ell.hpp @@ -98,15 +98,6 @@ class Ell final using absolute_type = remove_complex; using complex_type = to_complex; - /** - * Creates a Ell matrix with the configuration of another Ell - * matrix. - * - * @param other The other matrix whose configuration needs to copied. - */ - static std::unique_ptr create_with_config_of( - ptr_param other); - void convert_to( Ell, IndexType>* result) const override; @@ -222,8 +213,8 @@ class Ell final * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type* get_const_col_idxs_for_item( - size_type batch_id) const noexcept + const index_type* get_const_col_idxs_for_item(size_type batch_id) const + noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return col_idxs_.get_const_data(); @@ -251,8 +242,8 @@ class Ell final * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item( - size_type batch_id) const noexcept + const value_type* get_const_values_for_item(size_type batch_id) const + noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + From 48e94bb6b45092f5eaf7bedcd83646a21467f414 Mon Sep 17 00:00:00 2001 From: ginkgo-bot Date: Mon, 16 Oct 2023 19:52:38 +0000 Subject: [PATCH 17/18] Format files Co-authored-by: Pratik Nayak --- include/ginkgo/core/matrix/batch_dense.hpp | 4 ++-- include/ginkgo/core/matrix/batch_ell.hpp | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/ginkgo/core/matrix/batch_dense.hpp b/include/ginkgo/core/matrix/batch_dense.hpp index 5a1697afec4..47230c24e32 100644 --- a/include/ginkgo/core/matrix/batch_dense.hpp +++ b/include/ginkgo/core/matrix/batch_dense.hpp @@ -224,8 +224,8 @@ class Dense final : public EnableBatchLinOp>, * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item(size_type batch_id) const - noexcept + const value_type* get_const_values_for_item( + size_type batch_id) const noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + this->get_cumulative_offset(batch_id); diff --git a/include/ginkgo/core/matrix/batch_ell.hpp b/include/ginkgo/core/matrix/batch_ell.hpp index a02d6c81fe8..fa00a0631fd 100644 --- a/include/ginkgo/core/matrix/batch_ell.hpp +++ b/include/ginkgo/core/matrix/batch_ell.hpp @@ -213,8 +213,8 @@ class Ell final * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const index_type* get_const_col_idxs_for_item(size_type batch_id) const - noexcept + const index_type* get_const_col_idxs_for_item( + size_type batch_id) const noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return col_idxs_.get_const_data(); @@ -242,8 +242,8 @@ class Ell final * significantly more memory efficient than the non-constant version, * so always prefer this version. */ - const value_type* get_const_values_for_item(size_type batch_id) const - noexcept + const value_type* get_const_values_for_item( + size_type batch_id) const noexcept { GKO_ASSERT(batch_id < this->get_num_batch_items()); return values_.get_const_data() + From 0949431736b4ebaeea11d877dca0cc076871273a Mon Sep 17 00:00:00 2001 From: Pratik Nayak Date: Tue, 17 Oct 2023 10:11:59 +0200 Subject: [PATCH 18/18] Review updates Co-authored-by: Marcel Koch --- core/base/batch_utilities.hpp | 3 +++ core/matrix/batch_ell.cpp | 13 ++----------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/core/base/batch_utilities.hpp b/core/base/batch_utilities.hpp index febfd59b636..b4e380a4162 100644 --- a/core/base/batch_utilities.hpp +++ b/core/base/batch_utilities.hpp @@ -151,6 +151,9 @@ template void assert_same_sparsity_in_batched_data( const std::vector>& data) { + if (data.empty()) { + return; + } auto num_nnz = data.at(0).nonzeros.size(); auto base_data = data.at(0); base_data.ensure_row_major_order(); diff --git a/core/matrix/batch_ell.cpp b/core/matrix/batch_ell.cpp index 88863a05dd4..b2987e741d9 100644 --- a/core/matrix/batch_ell.cpp +++ b/core/matrix/batch_ell.cpp @@ -147,10 +147,7 @@ const Ell* Ell::apply( ptr_param> b, ptr_param> x) const { - this->validate_application_parameters(b.get(), x.get()); - auto exec = this->get_executor(); - this->apply_impl(make_temporary_clone(exec, b).get(), - make_temporary_clone(exec, x).get()); + this->apply(b, x); return this; } @@ -180,13 +177,7 @@ const Ell* Ell::apply( ptr_param> beta, ptr_param> x) const { - this->validate_application_parameters(alpha.get(), b.get(), beta.get(), - x.get()); - auto exec = this->get_executor(); - this->apply_impl(make_temporary_clone(exec, alpha).get(), - make_temporary_clone(exec, b).get(), - make_temporary_clone(exec, beta).get(), - make_temporary_clone(exec, x).get()); + this->apply(alpha, b, beta, x); return this; }