From b17e2b3f5277d35aee6ef87e11b074f5322026c6 Mon Sep 17 00:00:00 2001 From: Tetyushkin Ivan Date: Fri, 16 Aug 2024 11:07:26 +0300 Subject: [PATCH 1/8] [riscv] Add support for RISC-V vector extension in arch With this patch unit.arch tests are fully passed. --- cmake/toolchain/clang.rvv128.cmake | 19 + cmake/toolchain/run_rvv128.sh | 8 + include/eve/arch/abi_of.hpp | 1 + include/eve/arch/as_register.hpp | 9 +- include/eve/arch/riscv/as_register.hpp | 457 ++++++++++++++ include/eve/arch/riscv/predef.hpp | 17 + include/eve/arch/riscv/rvv_common_masks.hpp | 54 ++ include/eve/arch/riscv/spec.hpp | 39 ++ include/eve/arch/riscv/tags.hpp | 115 ++++ include/eve/arch/riscv/top_bits.hpp | 146 +++++ include/eve/arch/spec.hpp | 7 +- include/eve/arch/tags.hpp | 7 +- include/eve/arch/top_bits.hpp | 4 + include/eve/detail/function/bit_cast.hpp | 13 + include/eve/detail/function/bitmask.hpp | 4 + include/eve/detail/function/combine.hpp | 5 + include/eve/detail/function/compounds.hpp | 4 + include/eve/detail/function/friends.hpp | 4 + include/eve/detail/function/load.hpp | 4 + include/eve/detail/function/make.hpp | 4 + .../detail/function/simd/riscv/bit_cast.hpp | 567 ++++++++++++++++++ .../detail/function/simd/riscv/bitmask.hpp | 46 ++ .../detail/function/simd/riscv/combine.hpp | 59 ++ .../detail/function/simd/riscv/compounds.hpp | 316 ++++++++++ .../detail/function/simd/riscv/friends.hpp | 282 +++++++++ .../eve/detail/function/simd/riscv/load.hpp | 92 +++ .../eve/detail/function/simd/riscv/make.hpp | 148 +++++ .../eve/detail/function/simd/riscv/slice.hpp | 187 ++++++ .../detail/function/simd/riscv/subscript.hpp | 112 ++++ .../detail/function/simd/riscv/to_logical.hpp | 26 + include/eve/detail/function/slice.hpp | 4 + include/eve/detail/function/subscript.hpp | 4 + include/eve/detail/function/to_logical.hpp | 4 + include/eve/detail/spy.hpp | 153 +++-- include/eve/module/core/regular/if_else.hpp | 4 + .../core/regular/impl/simd/riscv/if_else.hpp | 63 ++ test/test.hpp | 24 +- test/unit/arch/is_supported.cpp | 7 + test/unit/arch/top_bits.cpp | 3 + 39 files changed, 2959 insertions(+), 63 deletions(-) create mode 100644 cmake/toolchain/clang.rvv128.cmake create mode 100755 cmake/toolchain/run_rvv128.sh create mode 100644 include/eve/arch/riscv/as_register.hpp create mode 100644 include/eve/arch/riscv/predef.hpp create mode 100644 include/eve/arch/riscv/rvv_common_masks.hpp create mode 100644 include/eve/arch/riscv/spec.hpp create mode 100644 include/eve/arch/riscv/tags.hpp create mode 100644 include/eve/arch/riscv/top_bits.hpp create mode 100644 include/eve/detail/function/simd/riscv/bit_cast.hpp create mode 100644 include/eve/detail/function/simd/riscv/bitmask.hpp create mode 100644 include/eve/detail/function/simd/riscv/combine.hpp create mode 100644 include/eve/detail/function/simd/riscv/compounds.hpp create mode 100644 include/eve/detail/function/simd/riscv/friends.hpp create mode 100644 include/eve/detail/function/simd/riscv/load.hpp create mode 100644 include/eve/detail/function/simd/riscv/make.hpp create mode 100644 include/eve/detail/function/simd/riscv/slice.hpp create mode 100644 include/eve/detail/function/simd/riscv/subscript.hpp create mode 100644 include/eve/detail/function/simd/riscv/to_logical.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/if_else.hpp diff --git a/cmake/toolchain/clang.rvv128.cmake b/cmake/toolchain/clang.rvv128.cmake new file mode 100644 index 0000000000..c73ad14291 --- /dev/null +++ b/cmake/toolchain/clang.rvv128.cmake @@ -0,0 +1,19 @@ +##================================================================================================== +## EVE - Expressive Vector Engine +## Copyright : EVE Project Contributors +## SPDX-License-Identifier: BSL-1.0 +##================================================================================================== +set(CMAKE_SYSTEM_NAME Generic) +set(CMAKE_SYSTEM_PROCESSOR riscv64) + +set(CMAKE_C_COMPILER clang ) +set(CMAKE_CXX_COMPILER clang++ ) + + +if(NOT DEFINED ENV{RISCV_GCC}) + message(FATAL_ERROR "expected to have RISCV_GCC in environment") +endif() + +set(CMAKE_CXX_FLAGS "-O3 -march=rv64gcv -std=c++20 -mrvv-vector-bits=128 --sysroot=$ENV{RISCV_GCC}/sysroot --gcc-toolchain=$ENV{RISCV_GCC} --static --target=riscv64-unknown-linux-gnu ${EVE_OPTIONS}" ) + +set(CMAKE_CROSSCOMPILING_CMD ${PROJECT_SOURCE_DIR}/cmake/toolchain/run_rvv128.sh ) diff --git a/cmake/toolchain/run_rvv128.sh b/cmake/toolchain/run_rvv128.sh new file mode 100755 index 0000000000..45a54168b3 --- /dev/null +++ b/cmake/toolchain/run_rvv128.sh @@ -0,0 +1,8 @@ +##================================================================================================== +## EVE - Expressive Vector Engine +## Copyright : EVE Project Contributors +## SPDX-License-Identifier: BSL-1.0 +##================================================================================================== +#!/bin/sh + +qemu-riscv64 --cpu rv64,v=true,vlen=128 $@ diff --git a/include/eve/arch/abi_of.hpp b/include/eve/arch/abi_of.hpp index 0171f949fe..df0f646bb5 100644 --- a/include/eve/arch/abi_of.hpp +++ b/include/eve/arch/abi_of.hpp @@ -65,6 +65,7 @@ namespace eve else return emulated_{}; } } + else if constexpr( spy::simd_instruction_set == spy::rvv_ ) { return riscv_rvv_dyn_ {}; } else { return emulated_{}; diff --git a/include/eve/arch/as_register.hpp b/include/eve/arch/as_register.hpp index caeb48fd76..eb3ce582ba 100644 --- a/include/eve/arch/as_register.hpp +++ b/include/eve/arch/as_register.hpp @@ -10,9 +10,10 @@ #include #if !defined(EVE_NO_SIMD) -#include -#include -#include -#include +# include +# include +# include +# include +# include #endif diff --git a/include/eve/arch/riscv/as_register.hpp b/include/eve/arch/riscv/as_register.hpp new file mode 100644 index 0000000000..8738e30ffb --- /dev/null +++ b/include/eve/arch/riscv/as_register.hpp @@ -0,0 +1,457 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +#include +#if defined(EVE_HW_RISCV_SVE) +namespace eve +{ +template struct logical; +} + +namespace eve +{ +template struct as_register +{ + static constexpr auto lmul = riscv_rvv_dyn_::getLMUL(Size::value); + + static constexpr bool is_fp_v = std::is_floating_point_v; + static constexpr bool is_signed_v = std::is_signed_v; + + static consteval auto find_vint_mf8() + { + constexpr std::size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 8 ) + { + using type = vint8mf8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 8))); + return type {}; + } + } + + static consteval auto find_vint_mf4() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 8 ) + { + using type = vint8mf4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 4))); + return type {}; + } else if constexpr( bin_size == 16 ) + { + using type = vint16mf4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 4))); + return type {}; + } + } + + static consteval auto find_vint_mf2() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 8 ) + { + using type = vint8mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2))); + return type {}; + } else if constexpr( bin_size == 16 ) + { + using type = vint16mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2))); + return type {}; + } else if constexpr( bin_size == 32 ) + { + using type = vint32mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2))); + return type {}; + } + } + + static consteval auto find_vint_m1() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 8 ) + { + using type = vint8m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); + return type {}; + } else if constexpr( bin_size == 16 ) + { + using type = vint16m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); + return type {}; + } else if constexpr( bin_size == 32 ) + { + using type = vint32m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); + return type {}; + } else if constexpr( bin_size == 64 ) + { + using type = vint64m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); + return type {}; + } + } + + static consteval auto find_vint_m2() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 8 ) + { + using type = vint8m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2))); + return type {}; + } else if constexpr( bin_size == 16 ) + { + using type = vint16m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2))); + return type {}; + } else if constexpr( bin_size == 32 ) + { + using type = vint32m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2))); + return type {}; + } else if constexpr( bin_size == 64 ) + { + using type = vint64m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2))); + return type {}; + } + } + + static consteval auto find_vint_m4() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 8 ) + { + using type = vint8m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4))); + return type {}; + } else if constexpr( bin_size == 16 ) + { + using type = vint16m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4))); + return type {}; + } else if constexpr( bin_size == 32 ) + { + using type = vint32m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4))); + return type {}; + } else if constexpr( bin_size == 64 ) + { + using type = vint64m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4))); + return type {}; + } + } + + static consteval auto find_vint_m8() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 8 ) + { + using type = vint8m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8))); + return type {}; + } else if constexpr( bin_size == 16 ) + { + using type = vint16m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8))); + return type {}; + } else if constexpr( bin_size == 32 ) + { + using type = vint32m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8))); + return type {}; + } else if constexpr( bin_size == 64 ) + { + using type = vint64m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8))); + return type {}; + } + } + + static consteval auto find_vint() + { + if constexpr( lmul == 1 ) return find_vint_m1(); + else if constexpr( lmul == 2 ) return find_vint_m2(); + else if constexpr( lmul == 4 ) return find_vint_m4(); + else if constexpr( lmul == 8 ) return find_vint_m8(); + else if constexpr( lmul == -2 ) return find_vint_mf2(); + else if constexpr( lmul == -4 ) return find_vint_mf4(); + else if constexpr( lmul == -8 ) return find_vint_mf8(); + } + + static consteval auto find_vuint_mf8() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 8 ) + { + using type = vuint8mf8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 8))); + return type {}; + } + } + + static consteval auto find_vuint_mf4() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 8 ) + { + using type = vuint8mf4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 4))); + return type {}; + } else if constexpr( bin_size == 16 ) + { + using type = vuint16mf4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 4))); + return type {}; + } + } + + static consteval auto find_vuint_mf2() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 8 ) + { + using type = vuint8mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2))); + return type {}; + } else if constexpr( bin_size == 16 ) + { + using type = vuint16mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2))); + return type {}; + } else if constexpr( bin_size == 32 ) + { + using type = vuint32mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2))); + return type {}; + } + } + + static consteval auto find_vuint_m1() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 8 ) + { + using type = vuint8m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); + return type {}; + } else if constexpr( bin_size == 16 ) + { + using type = vuint16m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); + return type {}; + } else if constexpr( bin_size == 32 ) + { + using type = vuint32m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); + return type {}; + } else if constexpr( bin_size == 64 ) + { + using type = vuint64m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); + return type {}; + } + } + + static consteval auto find_vuint_m2() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 8 ) + { + using type = vuint8m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2))); + return type {}; + } else if constexpr( bin_size == 16 ) + { + using type = vuint16m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2))); + return type {}; + } else if constexpr( bin_size == 32 ) + { + using type = vuint32m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2))); + return type {}; + } else if constexpr( bin_size == 64 ) + { + using type = vuint64m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2))); + return type {}; + } + } + + static consteval auto find_vuint_m4() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 8 ) + { + using type = vuint8m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4))); + return type {}; + } else if constexpr( bin_size == 16 ) + { + using type = vuint16m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4))); + return type {}; + } else if constexpr( bin_size == 32 ) + { + using type = vuint32m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4))); + return type {}; + } else if constexpr( bin_size == 64 ) + { + using type = vuint64m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4))); + return type {}; + } + } + + static consteval auto find_vuint_m8() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 8 ) + { + using type = vuint8m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8))); + return type {}; + } else if constexpr( bin_size == 16 ) + { + using type = vuint16m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8))); + return type {}; + } else if constexpr( bin_size == 32 ) + { + using type = vuint32m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8))); + return type {}; + } else if constexpr( bin_size == 64 ) + { + using type = vuint64m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8))); + return type {}; + } + } + + static consteval auto find_vuint() + { + if constexpr( lmul == 1 ) return find_vuint_m1(); + else if constexpr( lmul == 2 ) return find_vuint_m2(); + else if constexpr( lmul == 4 ) return find_vuint_m4(); + else if constexpr( lmul == 8 ) return find_vuint_m8(); + else if constexpr( lmul == -2 ) return find_vuint_mf2(); + else if constexpr( lmul == -4 ) return find_vuint_mf4(); + else if constexpr( lmul == -8 ) return find_vuint_mf8(); + } + + static consteval auto find_vfloat_mf4() + { + // For MF4 we have float16 types, but for now we do not support this, as + // it requires additional RISC-V extension Zvfh. + return; + } + + static consteval auto find_vfloat_mf2() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 32 ) + { + using type = vfloat32mf2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2))); + return type {}; + } + } + + static consteval auto find_vfloat_m1() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 32 ) + { + using type = vfloat32m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); + return type {}; + } else if constexpr( bin_size == 64 ) + { + using type = vfloat64m1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); + return type {}; + } + } + + static consteval auto find_vfloat_m2() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 32 ) + { + using type = vfloat32m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2))); + return type {}; + } else if constexpr( bin_size == 64 ) + { + using type = vfloat64m2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 2))); + return type {}; + } + } + + static consteval auto find_vfloat_m4() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 32 ) + { + using type = vfloat32m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4))); + return type {}; + } else if constexpr( bin_size == 64 ) + { + using type = vfloat64m4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 4))); + return type {}; + } + } + + static consteval auto find_vfloat_m8() + { + constexpr size_t bin_size = sizeof(Type) * 8; + if constexpr( bin_size == 32 ) + { + using type = vfloat32m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8))); + return type {}; + } else if constexpr( bin_size == 64 ) + { + using type = vfloat64m8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen * 8))); + return type {}; + } + } + + static consteval auto find_vfloat() + { + if constexpr( lmul == 1 ) return find_vfloat_m1(); + else if constexpr( lmul == 2 ) return find_vfloat_m2(); + else if constexpr( lmul == 4 ) return find_vfloat_m4(); + else if constexpr( lmul == 8 ) return find_vfloat_m8(); + else if constexpr( lmul == -2 ) return find_vfloat_mf2(); + else if constexpr( lmul == -4 ) return find_vfloat_mf4(); + // MF8 for float not supported. + } + + public: + static consteval auto find() + { + constexpr auto width = sizeof(Type) * Size::value * 8; + + static_assert(width <= __riscv_v_fixed_vlen * ABI::MaxLmul, + "[eve riscv] - Type is not usable in SIMD register (too big)"); + if constexpr( is_fp_v ) return find_vfloat(); + else if constexpr( !is_fp_v && is_signed_v ) return find_vint(); + else if constexpr( !is_fp_v && !is_signed_v ) return find_vuint(); + } + using type = decltype(find()); + static_assert(!std::is_void_v, + "[eve riscv] - Type is not usable in a SIMD register (unknown type)"); +}; + +// --------------------------------------------------------------------------------------------- +// logical cases +template struct as_logical_register +{ + static constexpr auto lmul = riscv_rvv_dyn_::getLMUL(Size::value); + static constexpr size_t size = sizeof(Type) * 8; + static constexpr size_t bit_size = lmul > 0 ? size / lmul : size * (-lmul); + + static constexpr auto find() + { + if constexpr( bit_size == 1 ) + { + using type = vbool1_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen))); + return type {}; + } else if constexpr( bit_size == 2 ) + { + using type = vbool2_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 2))); + return type {}; + } else if constexpr( bit_size == 4 ) + { + using type = vbool4_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 4))); + return type {}; + } else if constexpr( bit_size == 8 ) + { + using type = vbool8_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 8))); + return type {}; + } else if constexpr( bit_size == 16 ) + { + using type = vbool16_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 16))); + return type {}; + } else if constexpr( bit_size == 32 ) + { + using type = vbool32_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 32))); + return type {}; + } else if constexpr( bit_size == 64 ) + { + using type = vbool64_t __attribute__((riscv_rvv_vector_bits(__riscv_v_fixed_vlen / 64))); + return type {}; + } + } + + using type = decltype(find()); + static_assert(!std::is_void_v, + "[eve riscv] - Type is not usable as logical(mask) SIMD register"); +}; +} +#endif diff --git a/include/eve/arch/riscv/predef.hpp b/include/eve/arch/riscv/predef.hpp new file mode 100644 index 0000000000..7afae91f52 --- /dev/null +++ b/include/eve/arch/riscv/predef.hpp @@ -0,0 +1,17 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include + +// We successfully detected some native SIMD +#if defined(SPY_SIMD_IS_RISCV_FLEXIBLE) && !defined(EVE_NO_SIMD) +# define EVE_SUPPORTS_NATIVE_SIMD +# define EVE_HW_RISCV_SVE +# define EVE_INCLUDE_RISCV_HEADER +#endif diff --git a/include/eve/arch/riscv/rvv_common_masks.hpp b/include/eve/arch/riscv/rvv_common_masks.hpp new file mode 100644 index 0000000000..73d6bf1ab4 --- /dev/null +++ b/include/eve/arch/riscv/rvv_common_masks.hpp @@ -0,0 +1,54 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +namespace eve::detail +{ +template +EVE_FORCEINLINE logical> + rvv_true() +{ + static constexpr auto lmul = riscv_rvv_dyn_::getLMUL(N::value); + static constexpr size_t size = sizeof(T) * 8; + static constexpr size_t ratio = lmul > 0 ? size / lmul : size * (-lmul); + if constexpr( ratio == 1 ) return __riscv_vmset_m_b1(N::value); + else if constexpr( ratio == 2 ) return __riscv_vmset_m_b2(N::value); + else if constexpr( ratio == 4 ) return __riscv_vmset_m_b4(N::value); + else if constexpr( ratio == 8 ) return __riscv_vmset_m_b8(N::value); + else if constexpr( ratio == 16 ) return __riscv_vmset_m_b16(N::value); + else if constexpr( ratio == 32 ) return __riscv_vmset_m_b32(N::value); + else if constexpr( ratio == 64 ) return __riscv_vmset_m_b64(N::value); +} + +template +EVE_FORCEINLINE logical> + rvv_none() +{ + static constexpr auto lmul = riscv_rvv_dyn_::getLMUL(N::value); + static constexpr size_t size = sizeof(T) * 8; + static constexpr size_t ratio = lmul > 0 ? size / lmul : size * (-lmul); + if constexpr( ratio == 1 ) return __riscv_vmclr_m_b1(N::value); + else if constexpr( ratio == 2 ) return __riscv_vmclr_m_b2(N::value); + else if constexpr( ratio == 4 ) return __riscv_vmclr_m_b4(N::value); + else if constexpr( ratio == 8 ) return __riscv_vmclr_m_b8(N::value); + else if constexpr( ratio == 16 ) return __riscv_vmclr_m_b16(N::value); + else if constexpr( ratio == 32 ) return __riscv_vmclr_m_b32(N::value); + else if constexpr( ratio == 64 ) return __riscv_vmclr_m_b64(N::value); +} + +template +EVE_FORCEINLINE logical> + rvv_one(size_t i) +{ + std::array Vals = {}; + Vals[i] = 1; + wide Vector(Vals.begin(), Vals.end()); + return Vector != static_cast(0); +} + +} diff --git a/include/eve/arch/riscv/spec.hpp b/include/eve/arch/riscv/spec.hpp new file mode 100644 index 0000000000..af7ca6e661 --- /dev/null +++ b/include/eve/arch/riscv/spec.hpp @@ -0,0 +1,39 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include + +#include + +//================================================================================================== +// Register count +//================================================================================================== +#if defined(EVE_HW_RISCV_SVE) + +namespace eve +{ +struct register_count +{ + static constexpr std::size_t general = 32; + static constexpr std::size_t simd = 32; +}; +} + +//================================================================================================== +// RVV SIMD ABI +//================================================================================================== +# if !defined(EVE_CURRENT_API) +# include +# define EVE_CURRENT_ABI ::eve::riscv_rvv_dyn_ +# define EVE_CURRENT_API ::eve::rvv_api_ +# define EVE_ABI_NAMESPACE riscv_abi_namespace +# define EVE_ABI_DETECTED +# endif + +#endif diff --git a/include/eve/arch/riscv/tags.hpp b/include/eve/arch/riscv/tags.hpp new file mode 100644 index 0000000000..9ef925e61d --- /dev/null +++ b/include/eve/arch/riscv/tags.hpp @@ -0,0 +1,115 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace eve +{ +//================================================================================================ +// ABI tags for all RISCV bits SIMD registers +//================================================================================================ +template struct rvv_abi_ +{ + static constexpr std::size_t MaxLmul = 8; + static constexpr std::size_t bits = Size * MaxLmul; + static constexpr std::size_t bytes = bits / 8; + static constexpr bool is_wide_logical = false; + + template + static constexpr bool is_full = ((Type::size() * sizeof(typename Type::value_type)) >= bytes); + + template + static constexpr std::size_t fundamental_cardinal = Size / 8 / sizeof(Type); + + template + static constexpr std::size_t expected_cardinal = Size / 8 / sizeof(Type) * MaxLmul; + + template static consteval int get_min_frac_lmul() + { + auto type_size = sizeof(Type); + if( type_size == 1 ) return 8; + if( type_size == 2 ) return 4; + if( type_size == 4 ) return 2; + // for bigger types we can not use frac lmul. + // Return bigger than size of vector register value. + return Size + 1; + } + + template static consteval int get_max_frac_lmul() + { + auto type_size = sizeof(Type); + if( type_size <= 4 ) return 2; + // for bigger types we can not use frac lmul. + // Return bigger than size of vector register value. + return Size + 1; + } + + // return natural lmul if > 0, frac otherwise + template static consteval int getLMUL(size_t cardinal) + { + auto binary_size = cardinal * sizeof(Type) * 8; + auto min_frac_lmul = get_min_frac_lmul(); + auto max_frac_lmul = get_max_frac_lmul(); + + auto frac_lmul_border = Size / max_frac_lmul; + if( binary_size <= frac_lmul_border ) + { + auto frac_lmul = Size / binary_size; + frac_lmul = std::bit_floor(frac_lmul); + if( frac_lmul > min_frac_lmul ) frac_lmul = min_frac_lmul; + return -frac_lmul; + } + auto lmul = binary_size / Size; + if( binary_size % Size ) ++lmul; + return std::bit_ceil(lmul); + } + + template using m1_fixed = fixed; +}; + +#ifdef __riscv_v_fixed_vlen +struct riscv_rvv_dyn_ : rvv_abi_<__riscv_v_fixed_vlen> +{}; +#else +struct riscv_rvv_dyn_ : rvv_abi_<1> +{}; +#endif + +//================================================================================================ +// Dispatching tag for RISC-V SIMD implementation +//================================================================================================ +struct rvv_ : simd_api +{ + using is_rvv = void; +}; +struct rvv_api_ : simd_api +{ + using is_rvv = void; +}; + +//================================================================================================ +// RISC-V extensions tag objects +//================================================================================================ +inline constexpr rvv_ rvv = {}; +inline constexpr rvv_api_ rvv_api = {}; + +//================================================================================================ +// RISC-V RVV ABI concept +//================================================================================================ +template +concept rvv_abi = detail::is_one_of(detail::types {}); +template +concept rvv_tag = requires(T) { typename T::is_rvv; }; +} diff --git a/include/eve/arch/riscv/top_bits.hpp b/include/eve/arch/riscv/top_bits.hpp new file mode 100644 index 0000000000..b6d985412c --- /dev/null +++ b/include/eve/arch/riscv/top_bits.hpp @@ -0,0 +1,146 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +namespace eve +{ + +template +requires(current_api == rvv && !has_aggregated_abi_v) +struct top_bits +{ + using logical_type = Logical; + using scalar_type = typename as_arithmetic_t::value_type; + using abi_type = typename as_arithmetic_t::abi_type; + + static constexpr std::ptrdiff_t static_size = logical_type::size(); + static constexpr bool is_aggregated = false; + + static constexpr auto half_size = (static_size / 2 > 0) ? static_size / 2 : 1; + using half_logical = logical>>; + using storage_type = logical>>; + + static constexpr std::ptrdiff_t bits_per_element = 1; + static constexpr std::ptrdiff_t static_bits_size = static_size * bits_per_element; + static constexpr bool is_cheap = true; + + storage_type storage; + + // constructors --------------------------------- + + EVE_FORCEINLINE constexpr top_bits() = default; + + EVE_FORCEINLINE constexpr explicit top_bits(storage_type storage) : storage(storage) {} + + EVE_FORCEINLINE constexpr explicit top_bits(logical_type p) + requires(!std::same_as) + : storage {bit_cast(p, eve::as {})} + { + operator&=(top_bits(ignore_none_ {})); + } + + // -- constructor(ignore) + template + EVE_FORCEINLINE constexpr explicit top_bits(C c) : storage {c.mask(eve::as {})} + {} + + // -- constructor: logical + ignore + + EVE_FORCEINLINE explicit top_bits(logical_type p, relative_conditional_expr auto ignore) + : top_bits(p) + { + operator&=(top_bits(ignore)); + } + + // -- slicing + + EVE_FORCEINLINE + kumi::tuple, top_bits> slice() const + requires(Logical::size() > 1) + { + auto [l, h] = to_logical(*this).slice(); + return {top_bits {l}, top_bits {h}}; + } + + template EVE_FORCEINLINE top_bits slice(slice_t) const + { + auto [l, h] = slice(); + + if constexpr( Slice == 0 ) return l; + else return h; + } + + // getters/setter ---------------------- + static constexpr std::ptrdiff_t size() { return static_size; } + + EVE_FORCEINLINE constexpr void set(std::ptrdiff_t i, bool x) { storage.set(i, x); } + EVE_FORCEINLINE constexpr bool get(std::ptrdiff_t i) const { return storage.get(i); } + + EVE_FORCEINLINE constexpr explicit operator bool() + { + return __riscv_vcpop(storage, static_size) != 0; + } + + EVE_FORCEINLINE constexpr auto as_int() const + requires(static_bits_size <= 64) + { + constexpr size_t size = __riscv_v_fixed_vlen / 8; + std::array Values = {0}; + __riscv_vsm(Values.data(), storage, static_size); + std::uint64_t to_return = 0; + for( int id = size - 1; id >= 0; --id ) + { + to_return <<= 8; + to_return |= Values.at(id); + } + // we need to clear result from agnostic values. + std::bitset<64> to_clean; + for( int i = 0; i < static_size; ++i ) to_clean.set(i, true); + + to_return &= to_clean.to_ullong(); + return to_return; + } + + EVE_FORCEINLINE constexpr bool operator==(top_bits const& x) const + { + auto neq_res = __riscv_vmxor(storage, x.storage, static_size); + return __riscv_vcpop(neq_res, static_size) == 0; + } + + EVE_FORCEINLINE top_bits& operator&=(top_bits x) + { + storage = storage && x.storage; + return *this; + } + + EVE_FORCEINLINE top_bits& operator|=(top_bits x) + { + storage = storage || x.storage; + return *this; + } + + EVE_FORCEINLINE top_bits& operator^=(top_bits x) + { + storage = storage != x.storage; + return *this; + } + + EVE_FORCEINLINE constexpr top_bits operator~() const + { + return top_bits {!storage} & top_bits {ignore_none_ {}}; + } + + // streaming ---------------------------------- + + EVE_FORCEINLINE friend std::ostream& operator<<(std::ostream& o, const top_bits& x) + { + return o << x.storage << "\n"; + } +}; +} diff --git a/include/eve/arch/spec.hpp b/include/eve/arch/spec.hpp index b487a2f268..49deb2d22a 100644 --- a/include/eve/arch/spec.hpp +++ b/include/eve/arch/spec.hpp @@ -8,10 +8,11 @@ #pragma once #if !defined(EVE_NO_SIMD) -# include -# include -# include # include +# include +# include +# include +# include #endif #include diff --git a/include/eve/arch/tags.hpp b/include/eve/arch/tags.hpp index b9c1060fdd..38f857d91f 100644 --- a/include/eve/arch/tags.hpp +++ b/include/eve/arch/tags.hpp @@ -7,8 +7,9 @@ //================================================================================================== #pragma once +#include +#include #include -#include #include -#include -#include +#include +#include diff --git a/include/eve/arch/top_bits.hpp b/include/eve/arch/top_bits.hpp index b82b7e2a47..5e0680ecc0 100644 --- a/include/eve/arch/top_bits.hpp +++ b/include/eve/arch/top_bits.hpp @@ -39,3 +39,7 @@ namespace eve { #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/detail/function/bit_cast.hpp b/include/eve/detail/function/bit_cast.hpp index 131ddba6a0..bf40c2c93e 100644 --- a/include/eve/detail/function/bit_cast.hpp +++ b/include/eve/detail/function/bit_cast.hpp @@ -21,6 +21,15 @@ namespace eve { return EVE_DISPATCH_CALL(a,tgt); } + // In riscv we may have different underlay type sizes +#if defined(EVE_INCLUDE_RISCV_HEADER) + template + requires(sizeof(T) != sizeof(Target)) + EVE_FORCEINLINE Target operator()(T const& a, as const& tgt) const noexcept + { + return EVE_DISPATCH_CALL(a, tgt); + } +#endif EVE_CALLABLE_OBJECT(bit_cast_t, bit_cast_); }; @@ -33,3 +42,7 @@ namespace eve #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/detail/function/bitmask.hpp b/include/eve/detail/function/bitmask.hpp index d27f1a0d8e..d0d6ce0a3a 100644 --- a/include/eve/detail/function/bitmask.hpp +++ b/include/eve/detail/function/bitmask.hpp @@ -17,3 +17,7 @@ #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/detail/function/combine.hpp b/include/eve/detail/function/combine.hpp index 898990aeb3..8f539a0156 100644 --- a/include/eve/detail/function/combine.hpp +++ b/include/eve/detail/function/combine.hpp @@ -25,3 +25,8 @@ #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include + +#endif diff --git a/include/eve/detail/function/compounds.hpp b/include/eve/detail/function/compounds.hpp index e763baaf36..7d63b0cbcc 100644 --- a/include/eve/detail/function/compounds.hpp +++ b/include/eve/detail/function/compounds.hpp @@ -31,3 +31,7 @@ # include # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/detail/function/friends.hpp b/include/eve/detail/function/friends.hpp index 02a6cfa3f4..60f2d5d3f5 100644 --- a/include/eve/detail/function/friends.hpp +++ b/include/eve/detail/function/friends.hpp @@ -25,3 +25,7 @@ #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/detail/function/load.hpp b/include/eve/detail/function/load.hpp index 5c6bf5e246..f6ffd17281 100644 --- a/include/eve/detail/function/load.hpp +++ b/include/eve/detail/function/load.hpp @@ -35,3 +35,7 @@ namespace eve #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/detail/function/make.hpp b/include/eve/detail/function/make.hpp index f8d18c9252..643fd6dae5 100644 --- a/include/eve/detail/function/make.hpp +++ b/include/eve/detail/function/make.hpp @@ -25,3 +25,7 @@ #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/detail/function/simd/riscv/bit_cast.hpp b/include/eve/detail/function/simd/riscv/bit_cast.hpp new file mode 100644 index 0000000000..bf85778739 --- /dev/null +++ b/include/eve/detail/function/simd/riscv/bit_cast.hpp @@ -0,0 +1,567 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include +#include + +namespace eve::detail +{ +template +EVE_FORCEINLINE wide + riscv_lmul_ext(wide a) noexcept +requires rvv_abi> +{ + constexpr auto c = categorize>(); + static_assert(N::combined_type::value != N::value); + constexpr auto in_lmul = riscv_rvv_dyn_::getLMUL(N::value); + constexpr auto out_lmul = riscv_rvv_dyn_::getLMUL(N::combined_type::value); + if constexpr( out_lmul == in_lmul ) return a.storage(); + else if constexpr( match(c, category::float64) ) + { + if constexpr( out_lmul == 2 ) return __riscv_vlmul_ext_f64m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_ext_f64m4(a); + else if constexpr( out_lmul == 8 ) return __riscv_vlmul_ext_f64m8(a); + } + else if constexpr( match(c, category::int64) ) + { + if constexpr( out_lmul == 2 ) return __riscv_vlmul_ext_i64m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_ext_i64m4(a); + else if constexpr( out_lmul == 8 ) return __riscv_vlmul_ext_i64m8(a); + } + else if constexpr( match(c, category::uint64) ) + { + if constexpr( out_lmul == 2 ) return __riscv_vlmul_ext_u64m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_ext_u64m4(a); + else if constexpr( out_lmul == 8 ) return __riscv_vlmul_ext_u64m8(a); + } + else if constexpr( match(c, category::float32) ) + { + if constexpr( out_lmul == 1 ) return __riscv_vlmul_ext_f32m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_ext_f32m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_ext_f32m4(a); + else if constexpr( out_lmul == 8 ) return __riscv_vlmul_ext_f32m8(a); + } + else if constexpr( match(c, category::int32) ) + { + if constexpr( out_lmul == 1 ) return __riscv_vlmul_ext_i32m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_ext_i32m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_ext_i32m4(a); + else if constexpr( out_lmul == 8 ) return __riscv_vlmul_ext_i32m8(a); + } + else if constexpr( match(c, category::uint32) ) + { + if constexpr( out_lmul == 1 ) return __riscv_vlmul_ext_u32m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_ext_u32m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_ext_u32m4(a); + else if constexpr( out_lmul == 8 ) return __riscv_vlmul_ext_u32m8(a); + } + else if constexpr( match(c, category::int16) ) + { + if constexpr( out_lmul == -2 ) return __riscv_vlmul_ext_i16mf2(a); + else if constexpr( out_lmul == 1 ) return __riscv_vlmul_ext_i16m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_ext_i16m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_ext_i16m4(a); + else if constexpr( out_lmul == 8 ) return __riscv_vlmul_ext_i16m8(a); + } + else if constexpr( match(c, category::uint16) ) + { + if constexpr( out_lmul == -2 ) return __riscv_vlmul_ext_u16mf2(a); + else if constexpr( out_lmul == 1 ) return __riscv_vlmul_ext_u16m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_ext_u16m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_ext_u16m4(a); + else if constexpr( out_lmul == 8 ) return __riscv_vlmul_ext_u16m8(a); + } + else if constexpr( match(c, category::int8) ) + { + if constexpr( out_lmul == -4 ) return __riscv_vlmul_ext_i8mf4(a); + else if constexpr( out_lmul == -2 ) return __riscv_vlmul_ext_i8mf2(a); + else if constexpr( out_lmul == 1 ) return __riscv_vlmul_ext_i8m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_ext_i8m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_ext_i8m4(a); + else if constexpr( out_lmul == 8 ) return __riscv_vlmul_ext_i8m8(a); + } + else if constexpr( match(c, category::uint8) ) + { + if constexpr( out_lmul == -4 ) return __riscv_vlmul_ext_u8mf4(a); + else if constexpr( out_lmul == -2 ) return __riscv_vlmul_ext_u8mf2(a); + else if constexpr( out_lmul == 1 ) return __riscv_vlmul_ext_u8m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_ext_u8m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_ext_u8m4(a); + else if constexpr( out_lmul == 8 ) return __riscv_vlmul_ext_u8m8(a); + } +} + +template +concept same_wide_size = sizeof(wide) == sizeof(wide); + +template +concept same_raw_wide_size = sizeof(T) * +N::value == sizeof(U) * M::value; + +template +concept same_element_size = sizeof(T) == sizeof(U); + +template +concept different_type = ! +std::is_same_v; + +template +concept same_type_types = (match(categorize>(), category::int_) + && match(categorize>(), category::int_)) + || (match(categorize>(), category::uint_) + && match(categorize>(), category::uint_)) + || (match(categorize>(), category::float_) + && match(categorize>(), category::float_)); + +template +EVE_FORCEINLINE wide +bit_cast_(EVE_REQUIRES(rvv_), O const&, const wide& x, as> const&) noexcept +requires rvv_abi> && rvv_abi> && same_wide_size + && same_element_size && different_type + && (sizeof(T) * N::value == sizeof(U) * N::value) +{ + using out_wide = wide; + constexpr auto out_c = categorize(); + constexpr auto out_lmul = riscv_rvv_dyn_::getLMUL(N::value); + if constexpr( match(out_c, category::float64) ) + { + if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_f64m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_f64m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_f64m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_f64m8(x); + } + else if constexpr( match(out_c, category::int64) ) + { + if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_i64m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_i64m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_i64m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_i64m8(x); + } + else if constexpr( match(out_c, category::uint64) ) + { + if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_u64m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_u64m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_u64m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_u64m8(x); + } + else if constexpr( match(out_c, category::float32) ) + { + if constexpr( out_lmul == -2 ) return __riscv_vreinterpret_f32mf2(x); + else if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_f32m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_f32m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_f32m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_f32m8(x); + } + else if constexpr( match(out_c, category::int32) ) + { + if constexpr( out_lmul == -2 ) return __riscv_vreinterpret_i32mf2(x); + else if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_i32m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_i32m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_i32m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_i32m8(x); + } + else if constexpr( match(out_c, category::uint32) ) + { + if constexpr( out_lmul == -2 ) return __riscv_vreinterpret_u32mf2(x); + else if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_u32m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_u32m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_u32m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_u32m8(x); + } + else if constexpr( match(out_c, category::int16) ) + { + if constexpr( out_lmul == -4 ) return __riscv_vreinterpret_i16mf4(x); + else if constexpr( out_lmul == -2 ) return __riscv_vreinterpret_i16mf2(x); + else if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_i16m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_i16m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_i16m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_i16m8(x); + } + else if constexpr( match(out_c, category::uint16) ) + { + if constexpr( out_lmul == -4 ) return __riscv_vreinterpret_u16mf4(x); + else if constexpr( out_lmul == -2 ) return __riscv_vreinterpret_u16mf2(x); + else if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_u16m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_u16m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_u16m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_u16m8(x); + } + else if constexpr( match(out_c, category::int8) ) + { + if constexpr( out_lmul == -8 ) return __riscv_vreinterpret_i8mf8(x); + else if constexpr( out_lmul == -4 ) return __riscv_vreinterpret_i8mf4(x); + else if constexpr( out_lmul == -2 ) return __riscv_vreinterpret_i8mf2(x); + else if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_i8m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_i8m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_i8m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_i8m8(x); + } + else if constexpr( match(out_c, category::uint8) ) + { + if constexpr( out_lmul == -8 ) return __riscv_vreinterpret_u8mf8(x); + else if constexpr( out_lmul == -4 ) return __riscv_vreinterpret_u8mf4(x); + else if constexpr( out_lmul == -2 ) return __riscv_vreinterpret_u8mf2(x); + else if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_u8m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_u8m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_u8m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_u8m8(x); + } +} + +// change sew only. +template +EVE_FORCEINLINE wide +bit_cast_(EVE_REQUIRES(rvv_), const O&, const wide& x, as> const&) noexcept +requires rvv_abi> && rvv_abi> && same_wide_size + && (!same_element_size) && same_type_types + && same_raw_wide_size +{ + using in_wide = wide; + using out_wide = wide; + constexpr auto out_lmul = riscv_rvv_dyn_::getLMUL(M::value); + + constexpr auto in_c = categorize(); + constexpr auto out_c = categorize(); + + if constexpr( match(out_c, category::int8) ) + { + if constexpr( out_lmul == -8 ) return __riscv_vreinterpret_i8mf8(x); + else if constexpr( out_lmul == -4 ) return __riscv_vreinterpret_i8mf4(x); + else if constexpr( out_lmul == -2 ) return __riscv_vreinterpret_i8mf2(x); + else if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_i8m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_i8m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_i8m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_i8m8(x); + } + else if constexpr( match(out_c, category::uint8) ) + { + if constexpr( out_lmul == -8 ) return __riscv_vreinterpret_u8mf8(x); + else if constexpr( out_lmul == -4 ) return __riscv_vreinterpret_u8mf4(x); + else if constexpr( out_lmul == -2 ) return __riscv_vreinterpret_u8mf2(x); + else if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_u8m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_u8m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_u8m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_u8m8(x); + } + else if constexpr( match(out_c, category::int16) ) + { + if constexpr( out_lmul == -4 ) return __riscv_vreinterpret_i16mf4(x); + else if constexpr( out_lmul == -2 ) return __riscv_vreinterpret_i16mf2(x); + else if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_i16m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_i16m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_i16m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_i16m8(x); + } + else if constexpr( match(out_c, category::uint16) ) + { + if constexpr( out_lmul == -4 ) return __riscv_vreinterpret_u16mf4(x); + else if constexpr( out_lmul == -2 ) return __riscv_vreinterpret_u16mf2(x); + else if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_u16m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_u16m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_u16m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_u16m8(x); + } + else if constexpr( match(out_c, category::int32) ) + { + if constexpr( out_lmul == -2 ) return __riscv_vreinterpret_i32mf2(x); + else if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_i32m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_i32m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_i32m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_i32m8(x); + } + else if constexpr( match(out_c, category::uint32) ) + { + if constexpr( out_lmul == -2 ) return __riscv_vreinterpret_u32mf2(x); + else if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_u32m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_u32m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_u32m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_u32m8(x); + } + else if constexpr( match(out_c, category::float32) ) + { + if constexpr( out_lmul == -2 ) + { + auto casted_to_32 = bit_cast(x, as> {}); + return __riscv_vreinterpret_f32mf2(casted_to_32); + } + else if constexpr( out_lmul == 1 ) + { + auto casted_to_32 = bit_cast(x, as> {}); + return __riscv_vreinterpret_f32m1(casted_to_32); + } + else if constexpr( out_lmul == 2 ) + { + auto casted_to_32 = bit_cast(x, as> {}); + return __riscv_vreinterpret_f32m2(casted_to_32); + } + else if constexpr( out_lmul == 4 ) + { + auto casted_to_32 = bit_cast(x, as> {}); + return __riscv_vreinterpret_f32m4(casted_to_32); + } + else if constexpr( out_lmul == 8 ) + { + auto casted_to_32 = bit_cast(x, as> {}); + return __riscv_vreinterpret_f32m8(casted_to_32); + } + } + else if constexpr( match(out_c, category::int64) ) + { + if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_i64m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_i64m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_i64m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_i64m8(x); + } + else if constexpr( match(out_c, category::uint64) ) + { + if constexpr( out_lmul == 1 ) return __riscv_vreinterpret_u64m1(x); + else if constexpr( out_lmul == 2 ) return __riscv_vreinterpret_u64m2(x); + else if constexpr( out_lmul == 4 ) return __riscv_vreinterpret_u64m4(x); + else if constexpr( out_lmul == 8 ) return __riscv_vreinterpret_u64m8(x); + } + if constexpr( match(out_c, category::float64) ) + { + if constexpr( out_lmul == 1 ) + { + auto casted_to_64 = bit_cast(x, as> {}); + return __riscv_vreinterpret_f64m1(casted_to_64); + } + else if constexpr( out_lmul == 2 ) + { + auto casted_to_64 = bit_cast(x, as> {}); + return __riscv_vreinterpret_f64m2(casted_to_64); + } + else if constexpr( out_lmul == 4 ) + { + auto casted_to_64 = bit_cast(x, as> {}); + return __riscv_vreinterpret_f64m4(casted_to_64); + } + else if constexpr( out_lmul == 8 ) + { + auto casted_to_64 = bit_cast(x, as> {}); + return __riscv_vreinterpret_f64m8(casted_to_64); + } + } +} + +template +EVE_FORCEINLINE wide + bit_cast_(EVE_REQUIRES(rvv_), + const O&, + const kumi::tuple> &x, + as> const &to_as) noexcept +requires rvv_abi> && rvv_abi> +{ + return bit_cast(get<0>(x), to_as); +} + +template +EVE_FORCEINLINE auto +bit_cast_(EVE_REQUIRES(rvv_), + const O&, + const kumi::tuple>>& x, + as>> const & to_as) noexcept +requires rvv_abi> && rvv_abi> +{ + return bit_cast(get<0>(x), to_as); +} + +template +EVE_FORCEINLINE wide +bit_cast_(EVE_REQUIRES(rvv_), const O&, const wide& x, as> const& to_as) noexcept +requires rvv_abi> && rvv_abi> && same_wide_size + && same_raw_wide_size +{ + using in_wide = wide; + using out_wide = wide; + + constexpr auto in_c = categorize(); + constexpr auto out_c = categorize(); + + if constexpr( match(in_c, category::float_) ) + { + // float. We need cast to the to intype with the same width. + using sign = default_as_integer_sign_t; + using out_part_scalar = as_integer_t; + using out_part_wide = wide; + auto part_done = bit_cast(x, as {}); + + return bit_cast(part_done, to_as); + } + else + { + // first change sew, then cast to type + using sign = default_as_integer_sign_t; + using out_part_scalar = as_integer_t; + using out_part_wide = wide; + auto part_done = bit_cast(x, as {}); + return bit_cast(part_done, to_as); + } +} + +// we need to fill other values by zero +template +EVE_FORCEINLINE wide +bit_cast_(EVE_REQUIRES(rvv_), const O&, const wide& x, as> const& to_as) noexcept +requires(!same_wide_size) && rvv_abi> && rvv_abi> +{ + constexpr size_t in_size = sizeof(wide); + constexpr size_t out_size = sizeof(wide); + constexpr auto in_lmul = riscv_rvv_dyn_::getLMUL(N::value); + constexpr auto out_lmul = riscv_rvv_dyn_::getLMUL(M::value); + if constexpr( in_lmul < out_lmul ) + { + static_assert(in_size < out_size); + // extend lmul and go again + // out lmul > in_lmul + auto extended_input = riscv_lmul_ext(x); + for( auto id = N::value; id < N::combined_type::value; ++id ) + { + extended_input.set(id, static_cast(0)); + } + auto to_ret = bit_cast(extended_input, to_as); + return to_ret; + } + else + { + static_assert(in_lmul != out_lmul); + static_assert(in_size > out_size); + // in_lmul > out_lmul + auto bigger_cast = bit_cast(x, as> {}); + return riscv_lmul_trunc(bigger_cast); + } +} + +// Logical-wide conversions. +template +EVE_FORCEINLINE logical> + bit_cast_(EVE_REQUIRES(rvv_), + const O&, + const wide &x, + as>> const &tgt) noexcept +requires rvv_abi> && rvv_abi> +{ + constexpr auto in_lmul = riscv_rvv_dyn_::getLMUL(N::value); + if constexpr( in_lmul > 1 ) return bit_cast(riscv_lmul_trunc(x), tgt); + else if constexpr( in_lmul < 0 ) + { + auto extended_in = bit_cast(x, as> {}); + return bit_cast(extended_in, tgt); + } + else if constexpr( std::is_floating_point_v ) + return bit_cast(bit_cast(x, as, N>> {}), tgt); + else + { + static_assert(in_lmul == 1, "[riscv eve] Can not bitcast to logical not m1 vector register"); + constexpr auto out_lmul = riscv_rvv_dyn_::getLMUL(M::value); + constexpr auto size = sizeof(U) * 8; + constexpr auto bit_size = out_lmul > 0 ? size / out_lmul : size * (-out_lmul); + if constexpr( bit_size == 1 ) return __riscv_vreinterpret_b1(x); + if constexpr( bit_size == 2 ) return __riscv_vreinterpret_b2(x); + if constexpr( bit_size == 4 ) return __riscv_vreinterpret_b4(x); + if constexpr( bit_size == 8 ) return __riscv_vreinterpret_b8(x); + if constexpr( bit_size == 16 ) return __riscv_vreinterpret_b16(x); + if constexpr( bit_size == 32 ) return __riscv_vreinterpret_b32(x); + if constexpr( bit_size == 64 ) return __riscv_vreinterpret_b64(x); + } +} + +template +EVE_FORCEINLINE wide + bit_cast_(EVE_REQUIRES(rvv_), + const O&, + const logical> &x, + as> const&) noexcept +requires rvv_abi> +{ + constexpr auto out_lmul = riscv_rvv_dyn_::getLMUL(M::value); + static_assert(out_lmul == 1, + "[riscv eve] Can not bitcast from logical not to m1 vector register"); + constexpr auto c = categorize>(); + if constexpr( match(c, category::int64) ) return __riscv_vreinterpret_i64m1(x); + else if constexpr( match(c, category::uint64) ) return __riscv_vreinterpret_u64m1(x); + else if constexpr( match(c, category::int32) ) return __riscv_vreinterpret_i32m1(x); + else if constexpr( match(c, category::uint32) ) return __riscv_vreinterpret_u32m1(x); + else if constexpr( match(c, category::int16) ) return __riscv_vreinterpret_i16m1(x); + else if constexpr( match(c, category::uint16) ) return __riscv_vreinterpret_u16m1(x); + else if constexpr( match(c, category::int8) ) return __riscv_vreinterpret_i8m1(x); + else if constexpr( match(c, category::uint8) ) return __riscv_vreinterpret_u8m1(x); + else if constexpr( match(c, category::float32) ) + return __riscv_vreinterpret_f32m1(__riscv_vreinterpret_u32m1(x)); + else if constexpr( match(c, category::float64) ) + return __riscv_vreinterpret_f64m1(__riscv_vreinterpret_u64m1(x)); +} + +// logical-logical - with logical-wide-logical +template +EVE_FORCEINLINE logical> + bit_cast_(EVE_REQUIRES(rvv_), + const O&, + const logical> &x, + as>> const &tgt) noexcept +requires rvv_abi> || rvv_abi> +{ + if constexpr( is_aggregated_v> || is_aggregated_v> ) + { + auto [lv, hv] = x.slice(); + auto half_tgt = as>> {}; + auto to_ret = logical> {bit_cast(lv, half_tgt), bit_cast(hv, half_tgt)}; + return to_ret; + } + else + { + // combine. + using u_m1_cardinal = typename riscv_rvv_dyn_::m1_fixed; + using part_type_cast = wide; + auto u_casted_in = bit_cast(x, as {}); + // Note. We can have some bits in unspecified state. + // And we need to zero them. + constexpr size_t u_size = sizeof(unsigned) * 8; + constexpr size_t valid_elements = N::value / u_size; + constexpr size_t valid_bits_in_last_element = N::value % u_size; + if constexpr ( constexpr auto element = valid_elements; element < u_m1_cardinal::value ) + { + auto part_valid_element = u_casted_in.get(element); + unsigned Mask = (1 << (valid_bits_in_last_element)) - 1; + auto valid_element = Mask & part_valid_element; + u_casted_in.set(element, valid_element); + } + for( size_t element = valid_elements + 1; element < u_m1_cardinal::value; ++element ) + { + u_casted_in.set(element, 0); + } + auto to_ret = bit_cast(u_casted_in, tgt); + return to_ret; + } +} + +// equality. +template +EVE_FORCEINLINE logical> + bit_cast_(EVE_REQUIRES(rvv_), + const O&, + const logical> &x, + as>> const&) noexcept +requires rvv_abi> +{ + return x; +} + +template +EVE_FORCEINLINE wide +bit_cast_(EVE_REQUIRES(rvv_), const O&, const wide& x, as> const& tgt) noexcept +requires rvv_abi> +{ + return x; +} + +} diff --git a/include/eve/detail/function/simd/riscv/bitmask.hpp b/include/eve/detail/function/simd/riscv/bitmask.hpp new file mode 100644 index 0000000000..7e7465f6c6 --- /dev/null +++ b/include/eve/detail/function/simd/riscv/bitmask.hpp @@ -0,0 +1,46 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +#include + +namespace eve::detail +{ +//================================================================================================ +// Logical to Bits +//================================================================================================ +template +EVE_FORCEINLINE auto +to_bits(rvv_ const&, logical> p) noexcept +requires rvv_abi> +{ + // As return by arm. + using int_type = as_integer_t; + using u_wide_t = wide; + using u_mask_t = logical; + auto u_mask = bit_cast(p, as {}); + u_wide_t if_else_res = if_else(u_mask, + /* True value */ static_cast(-1), + /* False Value */ static_cast(0)); + return if_else_res; +} + +//================================================================================================ +// Logical to Mask +//================================================================================================ +template +EVE_FORCEINLINE auto +to_mask(rvv_ const&, logical> p) noexcept +{ + return bit_cast(p.bits(), as>::mask_type> {}); +} + +} diff --git a/include/eve/detail/function/simd/riscv/combine.hpp b/include/eve/detail/function/simd/riscv/combine.hpp new file mode 100644 index 0000000000..b73c08f67e --- /dev/null +++ b/include/eve/detail/function/simd/riscv/combine.hpp @@ -0,0 +1,59 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include + +namespace eve::detail +{ +template +EVE_FORCEINLINE auto +combine(rvv_ const&, wide const& l, wide const& h) noexcept +requires rvv_abi> +{ + using that_t = wide; + + constexpr size_t combined_vl = N::combined_type::value; + if constexpr( eve::has_aggregated_abi_v ) + { + that_t that; + that.storage().assign_parts(l, h); + return that; + } + else + { + auto wider_l = bit_cast(l, as {}); + auto wider_h = bit_cast(h, as {}); + constexpr auto shift_size = N::value; + that_t wider_h_placed = __riscv_vslideup(wider_h, wider_h, shift_size, combined_vl); + auto mask_all_ones = rvv_true(); + auto wider_mask = bit_cast(mask_all_ones, as> {}); + return if_else(wider_mask, wider_l, wider_h_placed); + } +} + +template +EVE_FORCEINLINE auto +combine(rvv_ const&, logical> const& l, logical> const& h) noexcept +requires rvv_abi> +{ + constexpr size_t combined_vl = N::combined_type::value; + using that_t = logical>; + + if constexpr( eve::has_aggregated_abi_v ) + { + that_t that; + that.storage().assign_parts(l, h); + return that; + } + else { return to_logical(eve::combine(l.mask(), h.mask())); } +} +} diff --git a/include/eve/detail/function/simd/riscv/compounds.hpp b/include/eve/detail/function/simd/riscv/compounds.hpp new file mode 100644 index 0000000000..3c48b92b99 --- /dev/null +++ b/include/eve/detail/function/simd/riscv/compounds.hpp @@ -0,0 +1,316 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include + +namespace eve::detail +{ + +template +EVE_FORCEINLINE auto& +self_mul(wide& self, U const& other) noexcept +requires std::same_as, U> && rvv_abi> +{ + constexpr auto c = categorize>(); + if constexpr( match(c, category::integer_) ) return self = __riscv_vmul(self, other, N::value); + else if constexpr( match(c, category::float_) ) return self = __riscv_vfmul(self, other, N::value); +} + +template +EVE_FORCEINLINE auto& +self_mul(wide& self, U const& other) noexcept +requires scalar_value && rvv_abi> +{ + constexpr auto c = categorize>(); + auto y = static_cast(other); + if constexpr( match(c, category::integer_) ) return self = __riscv_vmul(self, y, N::value); + else if constexpr( match(c, category::float_) ) return self = __riscv_vfmul(self, y, N::value); +} + +template +EVE_FORCEINLINE auto& +self_add(wide& self, U const& other) noexcept +requires std::same_as, U> && rvv_abi> +{ + constexpr auto c = categorize>(); + if constexpr( match(c, category::integer_) ) return self = __riscv_vadd(self, other, N::value); + else if constexpr( match(c, category::float_) ) return self = __riscv_vfadd(self, other, N::value); +} + +template +EVE_FORCEINLINE auto& +self_add(wide& self, U const& other) noexcept +requires scalar_value && rvv_abi> +{ + constexpr auto c = categorize>(); + auto y = static_cast(other); + if constexpr( match(c, category::integer_) ) return self = __riscv_vadd(self, y, N::value); + else if constexpr( match(c, category::float_) ) return self = __riscv_vfadd(self, y, N::value); +} + +template +EVE_FORCEINLINE auto& +self_sub(wide& self, U const& other) noexcept +requires std::same_as, U> && rvv_abi> +{ + constexpr auto c = categorize>(); + if constexpr( match(c, category::integer_) ) return self = __riscv_vsub(self, other, N::value); + else if constexpr( match(c, category::float_) ) return self = __riscv_vfsub(self, other, N::value); +} + +template +EVE_FORCEINLINE auto& +self_sub(wide& self, U const& other) noexcept +requires scalar_value && rvv_abi> +{ + constexpr auto c = categorize>(); + auto y = static_cast(other); + if constexpr( match(c, category::integer_) ) return self = __riscv_vsub(self, y, N::value); + else if constexpr( match(c, category::float_) ) return self = __riscv_vfsub(self, y, N::value); +} + +template +EVE_FORCEINLINE auto& +self_div(wide& self, U const& other) noexcept +requires std::same_as, U> && rvv_abi> +{ + constexpr auto c = categorize>(); + if constexpr( match(c, category::int_) ) return self = __riscv_vdiv(self, other, N::value); + else if constexpr( match(c, category::uint_) ) return self = __riscv_vdivu(self, other, N::value); + else if constexpr( match(c, category::float_) ) return self = __riscv_vfdiv(self, other, N::value); +} + +template +EVE_FORCEINLINE auto& +self_div(wide& self, U const& other) noexcept +requires scalar_value && rvv_abi> +{ + constexpr auto c = categorize>(); + auto y = static_cast(other); + if constexpr( match(c, category::int_) ) return self = __riscv_vdiv(self, y, N::value); + else if constexpr( match(c, category::uint_) ) return self = __riscv_vdivu(self, y, N::value); + else if constexpr( match(c, category::float_) ) return self = __riscv_vfdiv(self, y, N::value); +} + +template +EVE_FORCEINLINE auto& +self_bitand(wide& self, wide const& other) noexcept +requires rvv_abi> && (sizeof(T) * N::value == sizeof(U) * M::value) +{ + using wide_t = wide; + constexpr auto cat = categorize(); + if constexpr( match(cat, category::unsigned_) ) + { + auto other_cast = eve::bit_cast(other, as()); + return self = __riscv_vand(self, other_cast, N::value); + } + else + { + using scalar_t = as_integer_t; + using i_t = wide_t::template rebind; + constexpr auto tgt = as(); + auto self_cast = bit_cast(self, tgt); + self = bit_cast(self_bitand(self_cast, other), as(self)); + return self; + } +} + +template +EVE_FORCEINLINE wide & +self_bitand(wide &self, P const &other) noexcept +requires rvv_abi> && (sizeof(T) == sizeof(P)) +{ + using wide_t = wide; + constexpr auto cat = categorize(); + if constexpr( match(cat, category::unsigned_) ) + { + auto other_cast = std::bit_cast(other); + return self = __riscv_vand(self, other_cast, N::value); + } + else + { + using scalar_t = as_integer_t; + using i_t = wide_t::template rebind; + constexpr auto tgt = as(); + auto self_cast = bit_cast(self, tgt); + return self = bit_cast(self_bitand(self_cast, other), as(self)); + } +} + +template +EVE_FORCEINLINE wide & +self_bitxor(wide &self, wide const &other) noexcept +requires rvv_abi> && (sizeof(T) * N::value == sizeof(U) * M::value) +{ + using wide_t = wide; + constexpr auto cat = categorize(); + if constexpr( match(cat, category::unsigned_) ) + { + auto other_cast = eve::bit_cast(other, as()); + return self = __riscv_vxor(self, other_cast, N::value); + } + else + { + using scalar_t = as_integer_t; + using i_t = wide_t::template rebind; + constexpr auto tgt = as(); + auto self_cast = bit_cast(self, tgt); + return self = bit_cast(self_bitxor(self_cast, other), as(self)); + } +} + +template +EVE_FORCEINLINE auto& +self_bitxor(wide& self, P const& other) noexcept +requires rvv_abi> && (sizeof(T) == sizeof(P)) +{ + using wide_t = wide; + constexpr auto cat = categorize(); + if constexpr( match(cat, category::unsigned_) ) + { + auto other_cast = std::bit_cast(other); + return self = __riscv_vxor(self, other_cast, N::value); + } + else + { + using scalar_t = as_integer_t; + using i_t = wide_t::template rebind; + constexpr auto tgt = as(); + auto self_cast = bit_cast(self, tgt); + return self = bit_cast(self_bitxor(self_cast, other), as(self)); + } +} + +template +EVE_FORCEINLINE wide & +self_bitor(wide &self, wide const &other) noexcept +requires rvv_abi> && (sizeof(T) * N::value == sizeof(U) * M::value) +{ + using wide_t = wide; + constexpr auto cat = categorize(); + if constexpr( match(cat, category::unsigned_) ) + { + auto other_cast = eve::bit_cast(other, as()); + return self = __riscv_vor(self, other_cast, N::value); + } + else + { + using scalar_t = as_integer_t; + using i_t = wide_t::template rebind; + constexpr auto tgt = as(); + auto self_cast = bit_cast(self, tgt); + return self = bit_cast(self_bitor(self_cast, other), as(self)); + } +} + +template +EVE_FORCEINLINE wide & +self_bitor(wide &self, P const &other) noexcept +requires rvv_abi> && (sizeof(T) == sizeof(P)) +{ + using wide_t = wide; + constexpr auto cat = categorize(); + if constexpr( match(cat, category::unsigned_) ) + { + auto other_cast = std::bit_cast(other); + return self = __riscv_vor(self, other_cast, N::value); + } + else + { + using scalar_t = as_integer_t; + using i_t = wide_t::template rebind; + constexpr auto tgt = as(); + auto self_cast = bit_cast(self, tgt); + return self = bit_cast(self_bitor(self_cast, other), as(self)); + } +} + +template +EVE_FORCEINLINE wide & +self_bitnot(wide &self) +requires rvv_abi> +{ + using wide_t = wide; + constexpr auto cat = categorize(); + if constexpr( match(cat, category::unsigned_) ) return self = __riscv_vnot(self, N::value); + else + { + using scalar_t = as_integer_t; + using i_t = wide_t::template rebind; + constexpr auto tgt = as(); + auto self_cast = bit_cast(self, tgt); + return self = bit_cast(self_bitnot(self_cast), as(self)); + } +} + +template +EVE_FORCEINLINE auto& +self_shl(wide& self, wide shift) noexcept +requires(rvv_abi> && sizeof(T) == sizeof(U)) +{ + using i_t = typename wide::template rebind, N>; + auto const si = bit_cast(shift, as()); + + self = __riscv_vsll(self, si, N::value); + return self; +} + +template +EVE_FORCEINLINE auto& +self_shl(wide& self, U shift) noexcept +requires rvv_abi> +{ + self = __riscv_vsll(self, shift, N::value); + return self; +} + +template +EVE_FORCEINLINE auto& +self_shl(wide& self, index_t const&) noexcept +requires rvv_abi> +{ + self = __riscv_vsll(self, U, N::value); + return self; +} + +template +EVE_FORCEINLINE auto& +self_shr(wide& self, wide shift) noexcept +requires(rvv_abi> && sizeof(T) == sizeof(U)) +{ + constexpr auto c = categorize>(); + auto shift_casted = convert(shift, as>()); + if constexpr( match(c, category::uint_) ) + return self = __riscv_vsrl(self, shift_casted, N::value); + else if constexpr( match(c, category::int_) ) return self = __riscv_vsra(self, shift_casted, N::value); +} + +template +EVE_FORCEINLINE auto& +self_shr(wide& self, U shift) noexcept +requires rvv_abi> +{ + constexpr auto c = categorize>(); + if constexpr( match(c, category::uint_) ) return self = __riscv_vsrl(self, shift, N::value); + else if constexpr( match(c, category::int_) ) return self = __riscv_vsra(self, shift, N::value); +} + +template +EVE_FORCEINLINE auto& +self_shr(wide& self, index_t const&) noexcept +requires rvv_abi> +{ + constexpr auto c = categorize>(); + if constexpr( match(c, category::uint_) ) return self = __riscv_vsrl(self, U, N::value); + else if constexpr( match(c, category::int_) ) return self = __riscv_vsra(self, U, N::value); +} + +} diff --git a/include/eve/detail/function/simd/riscv/friends.hpp b/include/eve/detail/function/simd/riscv/friends.hpp new file mode 100644 index 0000000000..10a0839acb --- /dev/null +++ b/include/eve/detail/function/simd/riscv/friends.hpp @@ -0,0 +1,282 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +namespace eve::detail +{ + +template +EVE_FORCEINLINE auto +self_greater(wide lhs, wide rhs) noexcept -> logical> +requires rvv_abi> && (!std::is_floating_point_v) +{ + if constexpr( std::is_signed_v ) return __riscv_vmsgt(lhs, rhs, N::value); + else return __riscv_vmsgtu(lhs, rhs, N::value); +} + +template +EVE_FORCEINLINE auto +self_greater(wide lhs, std::convertible_to auto rhs) noexcept -> logical> +requires rvv_abi> && (!std::is_floating_point_v) +{ + auto rhs_cast = static_cast(rhs); + if constexpr( std::is_signed_v ) return __riscv_vmsgt(lhs, rhs_cast, N::value); + else return __riscv_vmsgtu(lhs, rhs_cast, N::value); +} + +template +EVE_FORCEINLINE auto +self_greater(wide lhs, wide rhs) noexcept -> logical> +requires rvv_abi> && (std::is_floating_point_v) +{ + return __riscv_vmfgt(lhs, rhs, N::value); +} + +template +EVE_FORCEINLINE auto +self_greater(wide lhs, std::convertible_to auto rhs) noexcept -> logical> +requires rvv_abi> && (std::is_floating_point_v) +{ + return __riscv_vmfgt(lhs, static_cast(rhs), N::value); +} + +template +EVE_FORCEINLINE auto +self_less(wide lhs, wide rhs) noexcept -> logical> +requires rvv_abi> && (!std::is_floating_point_v) +{ + if constexpr( std::is_signed_v ) return __riscv_vmslt(lhs, rhs, N::value); + else return __riscv_vmsltu(lhs, rhs, N::value); +} + +template +EVE_FORCEINLINE auto +self_less(wide lhs, std::convertible_to auto rhs) noexcept -> logical> +requires rvv_abi> && (!std::is_floating_point_v) +{ + auto rhs_cast = static_cast(rhs); + if constexpr( std::is_signed_v ) return __riscv_vmslt(lhs, rhs_cast, N::value); + else return __riscv_vmsltu(lhs, rhs_cast, N::value); +} + +template +EVE_FORCEINLINE auto +self_less(wide lhs, wide rhs) noexcept -> logical> +requires rvv_abi> && (std::is_floating_point_v) +{ + return __riscv_vmflt(lhs, rhs, N::value); +} + +template +EVE_FORCEINLINE auto +self_less(wide lhs, std::convertible_to auto rhs) noexcept -> logical> +requires rvv_abi> && (std::is_floating_point_v) +{ + return __riscv_vmflt(lhs, static_cast(rhs), N::value); +} + +template +EVE_FORCEINLINE auto +self_geq(wide lhs, wide rhs) noexcept -> logical> +requires rvv_abi> && (!std::is_floating_point_v) +{ + if constexpr( std::is_signed_v ) return __riscv_vmsge(lhs, rhs, N::value); + else return __riscv_vmsgeu(lhs, rhs, N::value); +} + +template +EVE_FORCEINLINE auto +self_geq(wide lhs, std::convertible_to auto rhs) noexcept -> logical> +requires rvv_abi> && (!std::is_floating_point_v) +{ + auto rhs_cast = static_cast(rhs); + if constexpr( std::is_signed_v ) return __riscv_vmsge(lhs, rhs_cast, N::value); + else return __riscv_vmsgeu(lhs, rhs_cast, N::value); +} + +template +EVE_FORCEINLINE auto +self_geq(wide lhs, wide rhs) noexcept -> logical> +requires rvv_abi> && (std::is_floating_point_v) +{ + return __riscv_vmfge(lhs, rhs, N::value); +} + +template +EVE_FORCEINLINE auto +self_geq(wide lhs, std::convertible_to auto rhs) noexcept -> logical> +requires rvv_abi> && (std::is_floating_point_v) +{ + return __riscv_vmfge(lhs, static_cast(rhs), N::value); +} + +template +EVE_FORCEINLINE auto +self_leq(wide lhs, wide rhs) noexcept -> logical> +requires rvv_abi> && (!std::is_floating_point_v) +{ + if constexpr( std::is_signed_v ) return __riscv_vmsle(lhs, rhs, N::value); + else return __riscv_vmsleu(lhs, rhs, N::value); +} + +template +EVE_FORCEINLINE auto +self_leq(wide lhs, std::convertible_to auto rhs) noexcept -> logical> +requires rvv_abi> && (!std::is_floating_point_v) +{ + auto rhs_cast = static_cast(rhs); + if constexpr( std::is_signed_v ) return __riscv_vmsle(lhs, rhs_cast, N::value); + else return __riscv_vmsleu(lhs, rhs_cast, N::value); +} + +template +EVE_FORCEINLINE auto +self_leq(wide lhs, wide rhs) noexcept -> logical> +requires rvv_abi> && (std::is_floating_point_v) +{ + return __riscv_vmfle(lhs, rhs, N::value); +} + +template +EVE_FORCEINLINE auto +self_leq(wide lhs, std::convertible_to auto rhs) noexcept -> logical> +requires rvv_abi> && (std::is_floating_point_v) +{ + return __riscv_vmfle(lhs, static_cast(rhs), N::value); +} + +template +EVE_FORCEINLINE auto +self_eq(wide lhs, wide rhs) noexcept -> logical> +requires rvv_abi> && (!std::is_floating_point_v) +{ + return __riscv_vmseq(lhs, rhs, N::value); +} + +template +EVE_FORCEINLINE auto +self_eq(wide lhs, std::convertible_to auto rhs) noexcept -> logical> +requires rvv_abi> && (!std::is_floating_point_v) +{ + return __riscv_vmseq(lhs, static_cast(rhs), N::value); +} + +template +EVE_FORCEINLINE auto +self_eq(wide lhs, wide rhs) noexcept -> logical> +requires rvv_abi> && (std::is_floating_point_v) +{ + return __riscv_vmfeq(lhs, rhs, N::value); +} + +template +EVE_FORCEINLINE auto +self_eq(wide lhs, std::convertible_to auto rhs) noexcept -> logical> +requires rvv_abi> && (std::is_floating_point_v) +{ + return __riscv_vmfeq(lhs, static_cast(rhs), N::value); +} + +template +EVE_FORCEINLINE auto +self_neq(wide lhs, wide rhs) noexcept -> logical> +requires rvv_abi> && (!std::is_floating_point_v) +{ + return __riscv_vmsne(lhs, rhs, N::value); +} + +template +EVE_FORCEINLINE auto +self_neq(wide lhs, std::convertible_to auto rhs) noexcept -> logical> +requires rvv_abi> && (!std::is_floating_point_v) +{ + return __riscv_vmsne(lhs, static_cast(rhs), N::value); +} + +template +EVE_FORCEINLINE auto +self_neq(wide lhs, wide rhs) noexcept -> logical> +requires rvv_abi> && (std::is_floating_point_v) +{ + return __riscv_vmfne(lhs, rhs, N::value); +} + +template +EVE_FORCEINLINE auto +self_neq(wide lhs, std::convertible_to auto rhs) noexcept -> logical> +requires rvv_abi> && (std::is_floating_point_v) +{ + return __riscv_vmfne(lhs, static_cast(rhs), N::value); +} + +template +EVE_FORCEINLINE auto +self_eq(logical> lhs, logical> rhs) noexcept -> logical> +requires rvv_abi> +{ + return __riscv_vmxnor(lhs, rhs, N::value); +} + +template +EVE_FORCEINLINE auto +self_neq(logical> lhs, logical> rhs) noexcept -> logical> +requires rvv_abi> +{ + return __riscv_vmxor(lhs, rhs, N::value); +} + +template +EVE_FORCEINLINE auto +self_logand(rvv_ const&, logical> v, logical> w) noexcept + -> logical> +requires(rvv_abi> || rvv_abi>) +{ + if constexpr( !is_aggregated_v> && !is_aggregated_v> ) + { + auto casted_w = bit_cast(w, as>> {}); + logical> to_ret = __riscv_vmand(v, casted_w, N::value); + return to_ret; + } + else + { + auto [lv, hv] = v.slice(); + auto [lw, hw] = w.slice(); + auto res = logical> {lv && lw, hv && hw}; + return res; + } +} + +template +EVE_FORCEINLINE auto +self_logor(rvv_ const&, logical> v, logical> w) noexcept + -> logical> +requires(rvv_abi> || rvv_abi>) +{ + if constexpr( !is_aggregated_v> && !is_aggregated_v> ) + { + auto casted_w = bit_cast(w, as>> {}); + logical> to_ret = __riscv_vmor(v, casted_w, N::value); + return to_ret; + } + else + { + auto [lv, hv] = v.slice(); + auto [lw, hw] = w.slice(); + return logical> {lv || lw, hv || hw}; + } +} + +template +EVE_FORCEINLINE auto +self_lognot(logical> v) noexcept -> logical> +requires rvv_abi> +{ + return __riscv_vmnot(v, N::value); +} + +} diff --git a/include/eve/detail/function/simd/riscv/load.hpp b/include/eve/detail/function/simd/riscv/load.hpp new file mode 100644 index 0000000000..2513e94a8d --- /dev/null +++ b/include/eve/detail/function/simd/riscv/load.hpp @@ -0,0 +1,92 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace eve::detail +{ + +template +EVE_FORCEINLINE wide + perform_load(logical> mask, as> tgt, PtrTy p) +{ + wide zero_init {0}; + constexpr auto c = categorize>(); + if constexpr( match(c, category::size8_) ) return __riscv_vle8_tumu(mask, zero_init, p, N::value); + else if constexpr( match(c, category::size16_) ) + return __riscv_vle16_tumu(mask, zero_init, p, N::value); + else if constexpr( match(c, category::size32_) ) + return __riscv_vle32_tumu(mask, zero_init, p, N::value); + else if constexpr( match(c, category::size64_) ) + return __riscv_vle64_tumu(mask, zero_init, p, N::value); +} + +template> Ptr> +EVE_FORCEINLINE wide +load_(EVE_SUPPORTS(rvv_), C const& cond, safe_type const& s, eve::as> const& tgt, Ptr p) +requires(rvv_abi>) +{ + auto ptr = unalign(p); + + if constexpr( C::has_alternative ) + { + auto res = load(drop_alternative(cond), s, tgt, p); + return eve::replace_ignored(res, cond, cond.alternative); + } + else if constexpr( C::is_complete && !C::is_inverted ) return wide(0); + else if constexpr( C::is_complete && C::is_inverted && N() == expected_cardinal_v ) + { + return perform_load(rvv_true(), tgt, ptr); + } + else return perform_load(cond.mask(tgt), tgt, ptr); +} + +template>> Pointer> +EVE_FORCEINLINE logical> + load_(EVE_SUPPORTS(rvv_), + C const &cond, + safe_type const&, + eve::as>> const&, + Pointer ptr) noexcept +requires rvv_abi> +{ + auto const c1 = map_alternative(cond, [](auto alt) { return alt.mask(); }); + auto const block = load(c1, safe, eve::as> {}, ptr_cast(ptr)); + return block != 0; +} + +template +EVE_FORCEINLINE logical> + load_(EVE_SUPPORTS(rvv_), + C const &cond, + safe_type const&, + eve::as>> const&, + Iterator b, + Iterator e) noexcept +requires rvv_abi> +{ + auto const c1 = map_alternative(cond, [](auto alt) { return alt.mask(); }); + auto const block = load(c1, safe, eve::as> {}, b, e); + return to_logical(block); +} + +} diff --git a/include/eve/detail/function/simd/riscv/make.hpp b/include/eve/detail/function/simd/riscv/make.hpp new file mode 100644 index 0000000000..260e2853b1 --- /dev/null +++ b/include/eve/detail/function/simd/riscv/make.hpp @@ -0,0 +1,148 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace eve::detail +{ +//================================================================================================ +// Enumerated make +//================================================================================================ +template +EVE_FORCEINLINE auto +make(eve::as>, Vs... vs) noexcept +requires rvv_abi> +{ + static_assert(sizeof...(Vs) == N::value, "[eve::make] - Invalid number of arguments"); + + std::array on_stack {static_cast(vs)...}; + return load(ignore_none, safe, as> {}, on_stack.data()); +} + +//================================================================================================ +// splat make +//================================================================================================ + +template +EVE_FORCEINLINE wide + make(eve::as>, T x) noexcept +requires rvv_abi> +{ + constexpr auto c = categorize>(); + constexpr auto lmul = riscv_rvv_dyn_::getLMUL(N::value); + if constexpr( match(c, category::float64) ) + { + if constexpr( lmul == 1 ) return __riscv_vfmv_v_f_f64m1(x, N::value); + else if constexpr( lmul == 2 ) return __riscv_vfmv_v_f_f64m2(x, N::value); + else if constexpr( lmul == 4 ) return __riscv_vfmv_v_f_f64m4(x, N::value); + else if constexpr( lmul == 8 ) return __riscv_vfmv_v_f_f64m8(x, N::value); + } + else if constexpr( match(c, category::float32) ) + { + if constexpr( lmul == -2 ) return __riscv_vfmv_v_f_f32mf2(x, N::value); + else if constexpr( lmul == 1 ) return __riscv_vfmv_v_f_f32m1(x, N::value); + else if constexpr( lmul == 2 ) return __riscv_vfmv_v_f_f32m2(x, N::value); + else if constexpr( lmul == 4 ) return __riscv_vfmv_v_f_f32m4(x, N::value); + else if constexpr( lmul == 8 ) return __riscv_vfmv_v_f_f32m8(x, N::value); + } + else if constexpr( match(c, category::int64) ) + { + if constexpr( lmul == 1 ) return __riscv_vmv_v_x_i64m1(x, N::value); + else if constexpr( lmul == 2 ) return __riscv_vmv_v_x_i64m2(x, N::value); + else if constexpr( lmul == 4 ) return __riscv_vmv_v_x_i64m4(x, N::value); + else if constexpr( lmul == 8 ) return __riscv_vmv_v_x_i64m8(x, N::value); + } + else if constexpr( match(c, category::uint64) ) + { + if constexpr( lmul == 1 ) return __riscv_vmv_v_x_u64m1(x, N::value); + else if constexpr( lmul == 2 ) return __riscv_vmv_v_x_u64m2(x, N::value); + else if constexpr( lmul == 4 ) return __riscv_vmv_v_x_u64m4(x, N::value); + else if constexpr( lmul == 8 ) return __riscv_vmv_v_x_u64m8(x, N::value); + } + else if constexpr( match(c, category::int32) ) + { + if constexpr( lmul == -2 ) return __riscv_vmv_v_x_i32mf2(x, N::value); + else if constexpr( lmul == 1 ) return __riscv_vmv_v_x_i32m1(x, N::value); + else if constexpr( lmul == 2 ) return __riscv_vmv_v_x_i32m2(x, N::value); + else if constexpr( lmul == 4 ) return __riscv_vmv_v_x_i32m4(x, N::value); + else if constexpr( lmul == 8 ) return __riscv_vmv_v_x_i32m8(x, N::value); + } + else if constexpr( match(c, category::uint32) ) + { + if constexpr( lmul == -2 ) return __riscv_vmv_v_x_u32mf2(x, N::value); + else if constexpr( lmul == 1 ) return __riscv_vmv_v_x_u32m1(x, N::value); + else if constexpr( lmul == 2 ) return __riscv_vmv_v_x_u32m2(x, N::value); + else if constexpr( lmul == 4 ) return __riscv_vmv_v_x_u32m4(x, N::value); + else if constexpr( lmul == 8 ) return __riscv_vmv_v_x_u32m8(x, N::value); + } + else if constexpr( match(c, category::int16) ) + { + if constexpr( lmul == -4 ) return __riscv_vmv_v_x_i16mf4(x, N::value); + else if constexpr( lmul == -2 ) return __riscv_vmv_v_x_i16mf2(x, N::value); + else if constexpr( lmul == 1 ) return __riscv_vmv_v_x_i16m1(x, N::value); + else if constexpr( lmul == 2 ) return __riscv_vmv_v_x_i16m2(x, N::value); + else if constexpr( lmul == 4 ) return __riscv_vmv_v_x_i16m4(x, N::value); + else if constexpr( lmul == 8 ) return __riscv_vmv_v_x_i16m8(x, N::value); + } + else if constexpr( match(c, category::uint16) ) + { + if constexpr( lmul == -4 ) return __riscv_vmv_v_x_u16mf4(x, N::value); + else if constexpr( lmul == -2 ) return __riscv_vmv_v_x_u16mf2(x, N::value); + else if constexpr( lmul == 1 ) return __riscv_vmv_v_x_u16m1(x, N::value); + else if constexpr( lmul == 2 ) return __riscv_vmv_v_x_u16m2(x, N::value); + else if constexpr( lmul == 4 ) return __riscv_vmv_v_x_u16m4(x, N::value); + else if constexpr( lmul == 8 ) return __riscv_vmv_v_x_u16m8(x, N::value); + } + else if constexpr( match(c, category::int8) ) + { + if constexpr( lmul == -8 ) return __riscv_vmv_v_x_i8mf8(x, N::value); + else if constexpr( lmul == -4 ) return __riscv_vmv_v_x_i8mf4(x, N::value); + else if constexpr( lmul == -2 ) return __riscv_vmv_v_x_i8mf2(x, N::value); + else if constexpr( lmul == 1 ) return __riscv_vmv_v_x_i8m1(x, N::value); + else if constexpr( lmul == 2 ) return __riscv_vmv_v_x_i8m2(x, N::value); + else if constexpr( lmul == 4 ) return __riscv_vmv_v_x_i8m4(x, N::value); + else if constexpr( lmul == 8 ) return __riscv_vmv_v_x_i8m8(x, N::value); + } + else if constexpr( match(c, category::uint8) ) + { + if constexpr( lmul == -8 ) return __riscv_vmv_v_x_u8mf8(x, N::value); + else if constexpr( lmul == -4 ) return __riscv_vmv_v_x_u8mf4(x, N::value); + else if constexpr( lmul == -2 ) return __riscv_vmv_v_x_u8mf2(x, N::value); + else if constexpr( lmul == 1 ) return __riscv_vmv_v_x_u8m1(x, N::value); + else if constexpr( lmul == 2 ) return __riscv_vmv_v_x_u8m2(x, N::value); + else if constexpr( lmul == 4 ) return __riscv_vmv_v_x_u8m4(x, N::value); + else if constexpr( lmul == 8 ) return __riscv_vmv_v_x_u8m8(x, N::value); + } +} + +//================================================================================================ +// logical cases +//================================================================================================ +template +EVE_FORCEINLINE logical> + make(as>>, Vs... vs) noexcept +requires rvv_abi> +{ + using bits_type = typename logical>::bits_type; + using e_t = element_type_t; + + auto bits = make(as {}, (vs ? static_cast(-1) : static_cast(0))...); + return bit_cast(self_greater(bits, static_cast(0)), as>> {}); +} + +} diff --git a/include/eve/detail/function/simd/riscv/slice.hpp b/include/eve/detail/function/simd/riscv/slice.hpp new file mode 100644 index 0000000000..d4908a0049 --- /dev/null +++ b/include/eve/detail/function/simd/riscv/slice.hpp @@ -0,0 +1,187 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include + +// Return first or second part of vector. + +namespace eve::detail +{ + +template +EVE_FORCEINLINE wide + riscv_lmul_trunc(wide a) noexcept +requires rvv_abi> +{ + constexpr auto c = categorize>(); + constexpr auto out_lmul = riscv_rvv_dyn_::getLMUL(N::split_type::value); + constexpr auto in_lmul = riscv_rvv_dyn_::getLMUL(N::value); + if constexpr( out_lmul == in_lmul ) return a.storage(); + else + { + static_assert(in_lmul > out_lmul); + + if constexpr( match(c, category::float64) ) + { + if constexpr( out_lmul == 1 ) return __riscv_vlmul_trunc_f64m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_trunc_f64m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_trunc_f64m4(a); + } + else if constexpr( match(c, category::int64) ) + { + if constexpr( out_lmul == 1 ) return __riscv_vlmul_trunc_i64m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_trunc_i64m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_trunc_i64m4(a); + } + else if constexpr( match(c, category::uint64) ) + { + if constexpr( out_lmul == 1 ) return __riscv_vlmul_trunc_u64m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_trunc_u64m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_trunc_u64m4(a); + } + else if constexpr( match(c, category::float32) ) + { + if constexpr( out_lmul == -2 ) return __riscv_vlmul_trunc_f32mf2(a); + else if constexpr( out_lmul == 1 ) return __riscv_vlmul_trunc_f32m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_trunc_f32m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_trunc_f32m4(a); + } + else if constexpr( match(c, category::int32) ) + { + if constexpr( out_lmul == -2 ) return __riscv_vlmul_trunc_i32mf2(a); + else if constexpr( out_lmul == 1 ) return __riscv_vlmul_trunc_i32m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_trunc_i32m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_trunc_i32m4(a); + } + else if constexpr( match(c, category::uint32) ) + { + if constexpr( out_lmul == -2 ) return __riscv_vlmul_trunc_u32mf2(a); + else if constexpr( out_lmul == 1 ) return __riscv_vlmul_trunc_u32m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_trunc_u32m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_trunc_u32m4(a); + } + else if constexpr( match(c, category::int16) ) + { + if constexpr( out_lmul == -4 ) return __riscv_vlmul_trunc_i16mf4(a); + else if constexpr( out_lmul == -2 ) return __riscv_vlmul_trunc_i16mf2(a); + else if constexpr( out_lmul == 1 ) return __riscv_vlmul_trunc_i16m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_trunc_i16m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_trunc_i16m4(a); + } + else if constexpr( match(c, category::uint16) ) + { + if constexpr( out_lmul == -4 ) return __riscv_vlmul_trunc_u16mf4(a); + else if constexpr( out_lmul == -2 ) return __riscv_vlmul_trunc_u16mf2(a); + else if constexpr( out_lmul == 1 ) return __riscv_vlmul_trunc_u16m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_trunc_u16m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_trunc_u16m4(a); + } + else if constexpr( match(c, category::int8) ) + { + if constexpr( out_lmul == -8 ) return __riscv_vlmul_trunc_i8mf8(a); + else if constexpr( out_lmul == -4 ) return __riscv_vlmul_trunc_i8mf4(a); + else if constexpr( out_lmul == -2 ) return __riscv_vlmul_trunc_i8mf2(a); + else if constexpr( out_lmul == 1 ) return __riscv_vlmul_trunc_i8m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_trunc_i8m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_trunc_i8m4(a); + } + else if constexpr( match(c, category::uint8) ) + { + if constexpr( out_lmul == -8 ) return __riscv_vlmul_trunc_u8mf8(a); + else if constexpr( out_lmul == -4 ) return __riscv_vlmul_trunc_u8mf4(a); + else if constexpr( out_lmul == -2 ) return __riscv_vlmul_trunc_u8mf2(a); + else if constexpr( out_lmul == 1 ) return __riscv_vlmul_trunc_u8m1(a); + else if constexpr( out_lmul == 2 ) return __riscv_vlmul_trunc_u8m2(a); + else if constexpr( out_lmul == 4 ) return __riscv_vlmul_trunc_u8m4(a); + } + } +} + +//================================================================================================ +// Single slice +//================================================================================================ + +template +EVE_FORCEINLINE wide + slice(wide a, lower_slice_t) noexcept +requires rvv_abi> +{ + constexpr auto in_lmul = riscv_rvv_dyn_::getLMUL(N::value); + constexpr auto out_lmul = riscv_rvv_dyn_::getLMUL(N::split_type::value); + if constexpr( in_lmul == out_lmul ) return a.storage(); + else + { + // we need to lower lmul - call lmul trunc. + return riscv_lmul_trunc(a); + } +} + +template +EVE_FORCEINLINE wide + slice(wide a, upper_slice_t) noexcept +requires rvv_abi> +{ + constexpr auto shift_size = N::split_type::value; + wide res = __riscv_vslidedown(a, shift_size, N::value); + constexpr auto in_lmul = riscv_rvv_dyn_::getLMUL(N::value); + constexpr auto out_lmul = riscv_rvv_dyn_::getLMUL(N::split_type::value); + if constexpr( in_lmul == out_lmul ) return res.storage(); + else + { + // we need to lower lmul - call lmul trunc. + return riscv_lmul_trunc(res); + } +} + +template +EVE_FORCEINLINE logical> + slice(logical> a, lower_slice_t) noexcept +requires rvv_abi> +{ + return bit_cast(a, as>> {}); +} + +template +EVE_FORCEINLINE logical> + slice(logical> a, upper_slice_t) noexcept +requires rvv_abi> +{ + auto bits_slice = a.bits().slice(upper_slice_t {}); + + logical, typename N::split_type>> neq = bits_slice != 0; + return bit_cast(neq, as>> {}); +} + +//================================================================================================ +// Both slice +//================================================================================================ +template +EVE_FORCEINLINE auto +slice(wide a) noexcept +requires rvv_abi> +{ + std::array, 2> that {slice(a, lower_), slice(a, upper_)}; + return that; +} + +template +EVE_FORCEINLINE auto +slice(logical> a) noexcept +requires rvv_abi> +{ + logical> lower = slice(a, lower_); + logical> upper = slice(a, upper_); + + std::array>, 2> that {lower, upper}; + return that; +} + +} diff --git a/include/eve/detail/function/simd/riscv/subscript.hpp b/include/eve/detail/function/simd/riscv/subscript.hpp new file mode 100644 index 0000000000..7a3098de27 --- /dev/null +++ b/include/eve/detail/function/simd/riscv/subscript.hpp @@ -0,0 +1,112 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include + +namespace eve::detail +{ +template +EVE_FORCEINLINE T +extract(wide const& v, std::size_t i) noexcept +requires rvv_abi> +{ + auto OnFirstNeeded = __riscv_vslidedown_tu(v, v, i, N::value); + if constexpr( std::is_floating_point_v ) return __riscv_vfmv_f(OnFirstNeeded); + else return __riscv_vmv_x(OnFirstNeeded); +} + +template +EVE_FORCEINLINE void +insert(wide& v, std::size_t i, std::convertible_to auto x) noexcept +requires rvv_abi> +{ + // get mask with 1 on i'th element. + auto mask = rvv_one(i); + v = if_else(mask, static_cast(x), v); +} + +template +EVE_FORCEINLINE logical + extract(logical> const &v, std::size_t i) noexcept +requires rvv_abi> +{ + using u_m1_cardinal = typename riscv_rvv_dyn_::m1_fixed; + using inter_type = wide; + auto uint_wide = bit_cast(v, as {}); + constexpr size_t sew = sizeof(unsigned) * 8; + size_t expected_element = i / sew; + auto val = extract(uint_wide, expected_element); + bool to_ret = std::bitset(val)[i % sew]; + return to_ret; +} + +// For riscv logical we can not rely on common algorithm. +template +EVE_FORCEINLINE logical + extract(logical> const &v, std::size_t i) noexcept +{ + using Wide = logical>; + using abi_t = typename Wide::abi_type; + + if constexpr( has_bundle_abi_v ) + { + return kumi::apply([=](auto const&...m) { return typename Wide::value_type {m.get(i)...}; }, + v.storage()); + } + else if constexpr( has_aggregated_abi_v ) + { + constexpr auto sz = Wide::size() / 2; + if( i < sz ) return extract(v.slice(lower_), i); + else return extract(v.slice(upper_), i - sz); + } + else { static_assert("[eve riscv] -- should not be called"); } +} +// For riscv logical we can not rely on common algorithm. +template +EVE_FORCEINLINE void +insert(logical>& p, std::size_t i, Value v) noexcept +{ + using Wide = logical>; + if constexpr( has_aggregated_abi_v ) + { + constexpr auto sz = Wide::size() / 2; + auto [l, h] = p.slice(); + + if( i < sz ) insert(l, i, v); + else insert(h, i - sz, v); + + p = Wide {l, h}; + } + else if constexpr( has_emulated_abi_v ) { p.storage()[i] = v; } + else if constexpr( has_bundle_abi_v ) + { + kumi::for_each([i](auto& m, auto w) { m.set(i, w); }, p.storage(), v); + } + else static_assert("[eve riscv] -- should not be called"); +} + +template +EVE_FORCEINLINE void +insert(logical>& v, std::size_t i, std::convertible_to auto x) noexcept +requires rvv_abi> +{ + using u_m1_cardinal = typename riscv_rvv_dyn_::m1_fixed; + using inter_type = wide; + auto uint_wide = bit_cast(v, as {}); + constexpr size_t sew = sizeof(unsigned) * 8; + size_t expected_element = i / sew; + auto val = extract(uint_wide, expected_element); + auto new_bits = std::bitset(val); + new_bits[i % sew] = static_cast(x); + unsigned new_val = new_bits.to_ullong(); + insert(uint_wide, expected_element, new_val); + v = bit_cast(uint_wide, as>> {}); +} + +} diff --git a/include/eve/detail/function/simd/riscv/to_logical.hpp b/include/eve/detail/function/simd/riscv/to_logical.hpp new file mode 100644 index 0000000000..7658a20d52 --- /dev/null +++ b/include/eve/detail/function/simd/riscv/to_logical.hpp @@ -0,0 +1,26 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include +#include + +namespace eve::detail +{ +template +EVE_FORCEINLINE auto +to_logical(wide const& v) noexcept +requires rvv_abi> +{ + return v != static_cast(0); +} + +} diff --git a/include/eve/detail/function/slice.hpp b/include/eve/detail/function/slice.hpp index 32161a975d..f858eb9c18 100644 --- a/include/eve/detail/function/slice.hpp +++ b/include/eve/detail/function/slice.hpp @@ -25,3 +25,7 @@ #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/detail/function/subscript.hpp b/include/eve/detail/function/subscript.hpp index 7d6af94dbe..799617e6a2 100644 --- a/include/eve/detail/function/subscript.hpp +++ b/include/eve/detail/function/subscript.hpp @@ -18,3 +18,7 @@ #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/detail/function/to_logical.hpp b/include/eve/detail/function/to_logical.hpp index 9547656b2c..5b6bda5809 100644 --- a/include/eve/detail/function/to_logical.hpp +++ b/include/eve/detail/function/to_logical.hpp @@ -25,3 +25,7 @@ #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/detail/spy.hpp b/include/eve/detail/spy.hpp index 7b7b8d6d3a..2752c87091 100644 --- a/include/eve/detail/spy.hpp +++ b/include/eve/detail/spy.hpp @@ -10,21 +10,24 @@ #include namespace spy::detail { - enum class archs { undefined_ = -1 - , x86_ = 10, amd64_ = 11 - , ppc_ = 20, arm_ = 30 - , wasm_ = 40 - }; - template struct arch_info +enum class archs { + undefined_ = -1, + x86_ = 10, + amd64_ = 11, + ppc_ = 20, + arm_ = 30, + wasm_ = 40, + riscv_ = 50 +}; +template struct arch_info +{ + static constexpr archs vendor = Arch; + inline constexpr explicit operator bool() const noexcept; + template constexpr bool operator==(arch_info const&) const noexcept { - static constexpr archs vendor = Arch; - inline constexpr explicit operator bool() const noexcept; - template - constexpr bool operator==(arch_info const&) const noexcept - { - return A2 == vendor; - } - }; + return A2 == vendor; + } +}; template std::ostream& operator<<(std::ostream& os, arch_info const&) { @@ -33,6 +36,7 @@ namespace spy::detail if(Arch == archs::ppc_ ) return os << "PowerPC"; if(Arch == archs::arm_ ) return os << "ARM"; if(Arch == archs::wasm_ ) return os << "WebAssembly"; + if( Arch == archs::riscv_ ) return os << "RISCV"; return os << "Undefined Architecture"; } } @@ -58,6 +62,9 @@ namespace spy #elif defined(__wasm__) using arch_type = detail::arch_info; #define SPY_ARCH_IS_WASM +#elif defined(__riscv) +# define SPY_ARCH_IS_RISCV +using arch_type = detail::arch_info; #else #define SPY_ARCH_IS_UNKNOWN using arch_type = detail::arch_info; @@ -79,6 +86,7 @@ namespace spy constexpr inline auto ppc_ = detail::arch_info{}; constexpr inline auto arm_ = detail::arch_info{}; constexpr inline auto wasm_ = detail::arch_info{}; + constexpr inline auto riscv_ = detail::arch_info {}; } #include namespace spy::detail @@ -808,6 +816,11 @@ namespace avx512 # endif # endif #endif +#if !defined(SPY_SIMD_DETECTED) && defined(__riscv) && defined(__riscv_vector) +# define SPY_SIMD_DETECTED ::spy::detail::simd_version::rvv_ +# define SPY_SIMD_IS_RISCV_FLEXIBLE +# define SPY_SIMD_VENDOR ::spy::detail::simd_isa::riscv_ +#endif #if !defined(SPY_SIMD_DETECTED) && defined(__aarch64__) # define SPY_SIMD_IS_ARM_ASIMD # define SPY_SIMD_DETECTED ::spy::detail::simd_version::asimd_ @@ -872,48 +885,80 @@ namespace avx512 #endif namespace spy::detail { - enum class simd_isa { undefined_ = -1, x86_ = 1000, ppc_ = 2000, arm_ = 3000, wasm_ = 4000 }; - enum class simd_version { undefined_ = -1 - , sse1_ = 1110, sse2_ = 1120, sse3_ = 1130, ssse3_ = 1131 - , sse41_ = 1141, sse42_ = 1142 - , avx_ = 1201, avx2_ = 1202 - , avx512_ = 1300 - , vmx_ = 2000 - , vmx_2_03_ = 2203, vmx_2_05_ = 2205, vmx_2_06_ = 2206 - , vmx_2_07_ = 2207, vmx_3_00_ = 2300, vmx_3_01_ = 2301 - , vsx_ = 3000 - , vsx_2_06_ = 3206, vsx_2_07_ = 3207, vsx_3_00_ = 3300, vsx_3_01_ = 3301 - , neon_ = 4001, asimd_ = 4002 - , sve_ = 5000, fixed_sve_ = 5100 - , simd128_ = 6000 - }; - template - struct simd_info +enum class simd_isa { + undefined_ = -1, + x86_ = 1000, + ppc_ = 2000, + arm_ = 3000, + wasm_ = 4000, + riscv_ = 5000 +}; +enum class simd_version { + undefined_ = -1, + sse1_ = 1110, + sse2_ = 1120, + sse3_ = 1130, + ssse3_ = 1131, + sse41_ = 1141, + sse42_ = 1142, + avx_ = 1201, + avx2_ = 1202, + avx512_ = 1300, + vmx_ = 2000, + vmx_2_03_ = 2203, + vmx_2_05_ = 2205, + vmx_2_06_ = 2206, + vmx_2_07_ = 2207, + vmx_3_00_ = 2300, + vmx_3_01_ = 2301, + vsx_ = 3000, + vsx_2_06_ = 3206, + vsx_2_07_ = 3207, + vsx_3_00_ = 3300, + vsx_3_01_ = 3301, + neon_ = 4001, + asimd_ = 4002, + sve_ = 5000, + fixed_sve_ = 5100, + simd128_ = 6000, + rvv_ = 7000 +}; +template +struct simd_info +{ + static constexpr auto isa = InsSetArch; + static constexpr auto version = Version; + static constexpr std::ptrdiff_t width = []() { - static constexpr auto isa = InsSetArch; - static constexpr auto version = Version; - static constexpr std::ptrdiff_t width = []() + if constexpr( Version == simd_version::simd128_ + || (Version >= simd_version::sse1_ && Version <= simd_version::sse42_) + || Version == simd_version::neon_ || Version == simd_version::asimd_ + || (Version >= simd_version::vmx_2_03_ && Version <= simd_version::vsx_3_01_) ) + return 128; + else if constexpr( Version == simd_version::avx_ || Version == simd_version::avx2_ ) return 256; + else if constexpr( Version == simd_version::avx512_ ) return 512; + else if constexpr( Version == simd_version::fixed_sve_ ) { - if constexpr( Version == simd_version::simd128_ - || (Version >= simd_version::sse1_ && Version <= simd_version::sse42_) - || Version == simd_version::neon_ || Version == simd_version::asimd_ - || (Version >= simd_version::vmx_2_03_ && Version <= simd_version::vsx_3_01_) - ) return 128; - else if constexpr(Version == simd_version::avx_ || Version == simd_version::avx2_) return 256; - else if constexpr(Version == simd_version::avx512_ ) return 512; - else if constexpr(Version == simd_version::fixed_sve_ ) - { #if defined(__ARM_FEATURE_SVE_BITS) - return __ARM_FEATURE_SVE_BITS; + return __ARM_FEATURE_SVE_BITS; #else - return -1; + return -1; #endif - } - else return -1; - }(); - friend std::ostream& operator<<(std::ostream& os, simd_info const&) + } + else if constexpr( Version == simd_version::rvv_ ) { - if constexpr ( Version == simd_version::simd128_ ) os << "WASM SIMD128"; +#if defined(__riscv_v_fixed_vlen) + return __riscv_v_fixed_vlen; +#else + return -1; +#endif + } + else return -1; + }(); + friend std::ostream& operator<<(std::ostream& os, simd_info const&) + { + if constexpr( Version == simd_version::simd128_ ) os << "WASM SIMD128"; else if constexpr ( Version == simd_version::sse1_ ) os << "X86 SSE"; else if constexpr ( Version == simd_version::sse2_ ) os << "X86 SSE2"; else if constexpr ( Version == simd_version::sse3_ ) os << "X86 SSE3"; @@ -939,12 +984,13 @@ namespace spy::detail else if constexpr ( Version == simd_version::fixed_sve_) os << "ARM SVE (" << simd_info::width << " bits)"; + else if constexpr( Version == simd_version::rvv_ ) os << "RISCV SVE (dyn. bits)"; else return os << "Undefined SIMD instructions set"; if constexpr (spy::supports::fma_) os << " (with FMA3 support)"; if constexpr (spy::supports::fma4_) os << " (with FMA4 support)"; if constexpr (spy::supports::xop_) os << " (with XOP support)"; return os; - } + } template constexpr bool operator==(simd_info const&) const noexcept { return OInsSetArch == InsSetArch; } template @@ -979,7 +1025,7 @@ namespace spy::detail { return (Version >= OVersion) && (OInsSetArch == InsSetArch); } - }; +}; } namespace spy { @@ -1027,6 +1073,9 @@ namespace spy constexpr inline auto asimd_ = arm_simd_info{}; constexpr inline auto sve_ = arm_simd_info{}; constexpr inline auto fixed_sve_ = arm_simd_info{}; + template + using riscv_simd_info = detail::simd_info; + constexpr inline auto rvv_ = riscv_simd_info {}; } #include namespace spy::detail diff --git a/include/eve/module/core/regular/if_else.hpp b/include/eve/module/core/regular/if_else.hpp index a26639e7bf..21211e2086 100644 --- a/include/eve/module/core/regular/if_else.hpp +++ b/include/eve/module/core/regular/if_else.hpp @@ -81,3 +81,7 @@ EVE_MAKE_CALLABLE(if_else_, if_else); #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/impl/simd/riscv/if_else.hpp b/include/eve/module/core/regular/impl/simd/riscv/if_else.hpp new file mode 100644 index 0000000000..f8375f0182 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/if_else.hpp @@ -0,0 +1,63 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include + +namespace eve::detail +{ + +template +EVE_FORCEINLINE wide + if_else_(EVE_SUPPORTS(rvv_), + logical> const &c, + wide const &vt, + wide const &vf) noexcept +requires rvv_abi> +{ + return __riscv_vmerge_tu(vt, vf, vt, c, N::value); +} + +template +EVE_FORCEINLINE wide + if_else_(EVE_SUPPORTS(rvv_), + logical> const &c, + T const &vt, + wide const &vf) noexcept +requires rvv_abi> +{ + if constexpr( std::is_floating_point_v ) return __riscv_vfmerge(vf, vt, c, N::value); + else return __riscv_vmerge(vf, vt, c, N::value); +} + +template +EVE_FORCEINLINE wide +if_else_(EVE_SUPPORTS(rvv_), logical> const& c, T const& vt, T const& vf) noexcept +requires rvv_abi> +{ + auto fwide = make(as> {}, vf); + return if_else(c, vt, fwide); +} + +template +EVE_FORCEINLINE logical> + if_else_(EVE_SUPPORTS(rvv_), + logical> const &c, + logical> const &vt, + logical> const &vf) noexcept +requires rvv_abi> +{ + auto needed_vt = __riscv_vmand(c, vt, N::value); + auto neg_mask = __riscv_vmnot(c, N::value); + auto needed_vf = __riscv_vmand(neg_mask, vf, N::value); + return __riscv_vmor(needed_vt, needed_vf, N::value); +} + +} diff --git a/test/test.hpp b/test/test.hpp index 512da8f328..2584ba0d94 100644 --- a/test/test.hpp +++ b/test/test.hpp @@ -158,6 +158,9 @@ namespace eve::test case 128: return {0,6,5,0,4,0,0,0,3}; case 256: return {0,7,6,0,5,0,0,0,4}; case 512: return {0,8,7,0,6,0,0,0,5}; + case 1024: return {0, 6, 5, 0, 4, 0, 0, 0, 3}; + // Unfortunately, this is too long on simulation + // case 1024: return {0,9,8,0,7,0,0,0,6}; default : return {}; }; }; @@ -278,7 +281,8 @@ namespace tts // Add garbage at the end of sub-native registers // For emulated type, there is no such gap so we don't do anything - if constexpr( (W::size() < eve::fundamental_cardinal_v) && !eve::has_emulated_abi_v ) + if constexpr( (W::size() < eve::fundamental_cardinal_v)&&!eve::has_emulated_abi_v + && eve::current_api != eve::rvv ) { using p_t = eve::as_arithmetic_t>; using ftype = eve::as_wide_t>; @@ -294,6 +298,24 @@ namespace tts return W(these.storage()); } + else if constexpr( (W::size() < eve::fundamental_cardinal_v)&&!eve::has_emulated_abi_v + && eve::current_api == eve::rvv ) + { + // for riscv we can not just cast ftypes from storage type, so use bitcast. + using p_t = eve::as_arithmetic_t>; + using ftype = eve::as_wide_t>; + + ftype these = eve::bit_cast(data, eve::as {}); + + // Compute a recognizable filler + for( std::ptrdiff_t i = data.size(); i < these.size(); ++i ) + { + p_t filler = eve::Constant(0xDEADBEEFBABE0000)>() + p_t(i); + these.set(i, eve::bit_cast(filler, eve::as())); + } + + return eve::bit_cast(these, eve::as {}); + } else { return data; diff --git a/test/unit/arch/is_supported.cpp b/test/unit/arch/is_supported.cpp index 99996b12a9..38e4a7cd2b 100644 --- a/test/unit/arch/is_supported.cpp +++ b/test/unit/arch/is_supported.cpp @@ -60,6 +60,10 @@ TTS_CASE("Static detections of API") std::cout << "ARM SIMD extensions\n"; std::cout << "NEON : " << std::boolalpha << (eve::current_api >= eve::neon ) << "\n"; std::cout << "ASIMD : " << std::boolalpha << (eve::current_api >= eve::asimd ) << "\n"; + std::cout << "========================\n"; + std::cout << "RISCV SIMD extensions\n"; + std::cout << "RVV : " << std::boolalpha << (eve::current_api == eve::rvv ) << "\n"; + std::cout << "\n"; std::cout << "\n"; TTS_PASS("All static detections - done"); @@ -90,6 +94,9 @@ TTS_CASE("Dynamic detections of API") std::cout << "ARM SIMD extensions\n"; std::cout << "NEON : " << std::boolalpha << eve::is_supported(eve::neon) << "\n"; std::cout << "ASIMD : " << std::boolalpha << eve::is_supported(eve::asimd) << "\n"; + std::cout << "========================\n"; + std::cout << "RISCV SIMD extensions\n"; + std::cout << "RVV : " << std::boolalpha << eve::is_supported(eve::rvv) << "\n"; TTS_PASS("All dynamic detections - done"); }; diff --git a/test/unit/arch/top_bits.cpp b/test/unit/arch/top_bits.cpp index e3cc9ed51b..9ea1a84c85 100644 --- a/test/unit/arch/top_bits.cpp +++ b/test/unit/arch/top_bits.cpp @@ -38,12 +38,15 @@ TTS_CASE_TPL( "Check top bits raw type", eve::test::simd::all_types) { using v_t = eve::element_type_t; using logical = eve::logical; + using c_t = eve::cardinal_t; + using rvv_logical_type = eve::logical>; using tb_storage = typename eve::top_bits::storage_type; using ABI = typename logical::abi_type; if constexpr (eve::has_aggregated_abi_v) TTS_EXPECT(expect_array(tb_storage{})); else if constexpr (std::same_as) TTS_TYPE_IS(tb_storage, std::uint64_t); else if constexpr (eve::current_api >= eve::sve ) TTS_TYPE_IS(tb_storage, eve::logical>); + else if constexpr( eve::current_api == eve::rvv ) TTS_TYPE_IS(tb_storage, rvv_logical_type); else if constexpr (eve::current_api >= eve::avx512 ) { constexpr std::ptrdiff_t min_size = sizeof(v_t) == 1 ? 16 : 8; From d4092f526ed778124828fd066f9898caab99de23 Mon Sep 17 00:00:00 2001 From: Tetyushkin Ivan Date: Fri, 16 Aug 2024 13:11:34 +0300 Subject: [PATCH 2/8] [riscv] Add support for unit.internals testing With this patch all unit.internals tests are passed for RISC-V. --- test/unit/internals/category.cpp | 66 +++++++++++++++++++++----------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/test/unit/internals/category.cpp b/test/unit/internals/category.cpp index f15930ee93..ce31c46eb9 100644 --- a/test/unit/internals/category.cpp +++ b/test/unit/internals/category.cpp @@ -70,7 +70,7 @@ TTS_CASE_TPL("Test category matching for std::uint64", natives) { // All types below fundamental cardinal categorize with the same # of lanes using enum eve::detail::category; - constexpr auto lanes = + [[maybe_unused]] constexpr auto lanes = static_cast(eve::fundamental_cardinal_v); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), integer_)); @@ -91,14 +91,18 @@ TTS_CASE_TPL("Test category matching for float", natives) { // All types below fundamental cardinal categorize with the same # of lanes using enum eve::detail::category; - constexpr auto lanes = static_cast(eve::fundamental_cardinal_v); + [[maybe_unused]] constexpr auto lanes = + static_cast(eve::fundamental_cardinal_v); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), float_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), signed_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), size32_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), float32)); - TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), lanes)); - TTS_CONSTEXPR_EQUAL(eve::detail::categorize(), float32 | lanes); + if constexpr( eve::current_api != eve::rvv ) + { + TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), lanes)); + TTS_CONSTEXPR_EQUAL(eve::detail::categorize(), float32 | lanes); + } } else { TTS_PASS("wide is not native and therefore can't be categorized."); } }; @@ -110,7 +114,7 @@ TTS_CASE_TPL("Test category matching for std::int32", natives) { // All types below fundamental cardinal categorize with the same # of lanes using enum eve::detail::category; - constexpr auto lanes = + [[maybe_unused]] constexpr auto lanes = static_cast(eve::fundamental_cardinal_v); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), integer_)); @@ -118,8 +122,11 @@ TTS_CASE_TPL("Test category matching for std::int32", natives) TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), size32_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), int_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), int32)); - TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), lanes)); - TTS_CONSTEXPR_EQUAL(eve::detail::categorize(), int32 | lanes); + if constexpr( eve::current_api != eve::rvv ) + { + TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), lanes)); + TTS_CONSTEXPR_EQUAL(eve::detail::categorize(), int32 | lanes); + } } else { TTS_PASS("wide is not native and therefore can't be categorized."); } }; @@ -131,7 +138,7 @@ TTS_CASE_TPL("Test category matching for std::uint32", natives) { // All types below fundamental cardinal categorize with the same # of lanes using enum eve::detail::category; - constexpr auto lanes = + [[maybe_unused]] constexpr auto lanes = static_cast(eve::fundamental_cardinal_v); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), integer_)); @@ -139,8 +146,11 @@ TTS_CASE_TPL("Test category matching for std::uint32", natives) TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), size32_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), uint_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), uint32)); - TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), lanes)); - TTS_CONSTEXPR_EQUAL(eve::detail::categorize(), uint32 | lanes); + if constexpr( eve::current_api != eve::rvv ) + { + TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), lanes)); + TTS_CONSTEXPR_EQUAL(eve::detail::categorize(), uint32 | lanes); + } } else { TTS_PASS("wide is not native and therefore can't be categorized."); } }; @@ -152,7 +162,7 @@ TTS_CASE_TPL("Test category matching for std::int16", natives) { // All types below fundamental cardinal categorize with the same # of lanes using enum eve::detail::category; - constexpr auto lanes = + [[maybe_unused]] constexpr auto lanes = static_cast(eve::fundamental_cardinal_v); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), integer_)); @@ -160,8 +170,11 @@ TTS_CASE_TPL("Test category matching for std::int16", natives) TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), size16_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), int_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), int16)); - TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), lanes)); - TTS_CONSTEXPR_EQUAL(eve::detail::categorize(), int16 | lanes); + if constexpr( eve::current_api != eve::rvv ) + { + TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), lanes)); + TTS_CONSTEXPR_EQUAL(eve::detail::categorize(), int16 | lanes); + } } else { TTS_PASS("wide is not native and therefore can't be categorized."); } }; @@ -173,7 +186,7 @@ TTS_CASE_TPL("Test category matching for std::uint16", natives) { // All types below fundamental cardinal categorize with the same # of lanes using enum eve::detail::category; - constexpr auto lanes = + [[maybe_unused]] constexpr auto lanes = static_cast(eve::fundamental_cardinal_v); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), integer_)); @@ -181,8 +194,11 @@ TTS_CASE_TPL("Test category matching for std::uint16", natives) TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), size16_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), uint_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), uint16)); - TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), lanes)); - TTS_CONSTEXPR_EQUAL(eve::detail::categorize(), uint16 | lanes); + if constexpr( eve::current_api != eve::rvv ) + { + TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), lanes)); + TTS_CONSTEXPR_EQUAL(eve::detail::categorize(), uint16 | lanes); + } } else { TTS_PASS("wide is not native and therefore can't be categorized."); } }; @@ -194,7 +210,7 @@ TTS_CASE_TPL("Test category matching for std::int8", natives) { // All types below fundamental cardinal categorize with the same # of lanes using enum eve::detail::category; - constexpr auto lanes = + [[maybe_unused]] constexpr auto lanes = static_cast(eve::fundamental_cardinal_v); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), integer_)); @@ -202,8 +218,11 @@ TTS_CASE_TPL("Test category matching for std::int8", natives) TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), size8_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), int_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), int8)); - TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), lanes)); - TTS_CONSTEXPR_EQUAL(eve::detail::categorize(), int8 | lanes); + if constexpr( eve::current_api != eve::rvv ) + { + TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), lanes)); + TTS_CONSTEXPR_EQUAL(eve::detail::categorize(), int8 | lanes); + } } else { TTS_PASS("wide is not native and therefore can't be categorized."); } }; @@ -215,7 +234,7 @@ TTS_CASE_TPL("Test category matching for std::uint8", natives) { // All types below fundamental cardinal categorize with the same # of lanes using enum eve::detail::category; - constexpr auto lanes = + [[maybe_unused]] constexpr auto lanes = static_cast(eve::fundamental_cardinal_v); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), integer_)); @@ -223,8 +242,11 @@ TTS_CASE_TPL("Test category matching for std::uint8", natives) TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), size8_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), uint_)); TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), uint8)); - TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), lanes)); - TTS_CONSTEXPR_EQUAL(eve::detail::categorize(), uint8 | lanes); + if constexpr( eve::current_api != eve::rvv ) + { + TTS_CONSTEXPR_EXPECT(match(eve::detail::categorize(), lanes)); + TTS_CONSTEXPR_EQUAL(eve::detail::categorize(), uint8 | lanes); + } } else { TTS_PASS("wide is not native and therefore can't be categorized."); } }; From 666cbfebf16ebc02bed0d0d6f123c8b979e4f81a Mon Sep 17 00:00:00 2001 From: Tetyushkin Ivan Date: Fri, 16 Aug 2024 13:38:49 +0300 Subject: [PATCH 3/8] [riscv] Add support unit.core testing With this patch all unit.core tests passed for RISC-V. --- include/eve/detail/shuffle_v2/shuffle_l2.hpp | 4 + .../shuffle_v2/simd/riscv/shuffle_l2.hpp | 53 +++++++ include/eve/module/core/regular/convert.hpp | 4 + .../core/regular/impl/simd/riscv/convert.hpp | 150 ++++++++++++++++++ .../regular/impl/simd/riscv/slide_right.hpp | 49 ++++++ .../core/regular/impl/simd/riscv/store.hpp | 58 +++++++ .../eve/module/core/regular/slide_right.hpp | 4 + include/eve/module/core/regular/store.hpp | 4 + 8 files changed, 326 insertions(+) create mode 100644 include/eve/detail/shuffle_v2/simd/riscv/shuffle_l2.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/convert.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/slide_right.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/store.hpp diff --git a/include/eve/detail/shuffle_v2/shuffle_l2.hpp b/include/eve/detail/shuffle_v2/shuffle_l2.hpp index 5c004043b7..1fd1325ba8 100644 --- a/include/eve/detail/shuffle_v2/shuffle_l2.hpp +++ b/include/eve/detail/shuffle_v2/shuffle_l2.hpp @@ -40,3 +40,7 @@ EVE_CALLABLE_API(shuffle_l2_, shuffle_l2) #if defined(EVE_INCLUDE_POWERPC_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/detail/shuffle_v2/simd/riscv/shuffle_l2.hpp b/include/eve/detail/shuffle_v2/simd/riscv/shuffle_l2.hpp new file mode 100644 index 0000000000..ddc15c2695 --- /dev/null +++ b/include/eve/detail/shuffle_v2/simd/riscv/shuffle_l2.hpp @@ -0,0 +1,53 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +namespace eve::detail +{ +template +EVE_FORCEINLINE auto +shuffle_l2_(EVE_SUPPORTS(rvv_), P p, fixed g, wide x) +{ + using int_ty = as_integer_t; + if constexpr( G == N::value ) + { + wide, N> offsets(P::idxs); + wide res = __riscv_vrgather_tu(x, x, offsets.storage(), N::value); + return res; + } + else + { + std::array ids; + for( auto id = 0; id < N::value; ++id ) + { + size_t id_from_pattern = id / G; + ids[id] = P::idxs[id_from_pattern] * G + id % G; + } + wide, N> offsets(ids); + wide res = __riscv_vrgather_tu(x, x, offsets.storage(), N::value); + return res; + } +} + +template +EVE_FORCEINLINE auto +shuffle_l2_(EVE_SUPPORTS(rvv_), P p, fixed g, wide x, wide y) +{ + if constexpr( !idxm::is_blend(P::idxs, N::value / G) ) return no_matching_shuffle; + else + { + eve::logical> m([](int i, int size) { return P::idxs[i / G] < size / G; }); + auto ret = if_else(m, x, y); + // there we not have zeroing indexes, add zeros. + wide zeros(0); + eve::logical> not_zero([](int i, int size) { return P::idxs[i / G] != na_; }); + return if_else(not_zero, ret, zeros); + } +} + +} diff --git a/include/eve/module/core/regular/convert.hpp b/include/eve/module/core/regular/convert.hpp index 43e6cf51e0..fc6f7da51a 100644 --- a/include/eve/module/core/regular/convert.hpp +++ b/include/eve/module/core/regular/convert.hpp @@ -83,3 +83,7 @@ EVE_CALLABLE_API(convert_, convert) #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/impl/simd/riscv/convert.hpp b/include/eve/module/core/regular/impl/simd/riscv/convert.hpp new file mode 100644 index 0000000000..042f31484c --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/convert.hpp @@ -0,0 +1,150 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include +#include + +namespace eve::detail +{ +template +EVE_FORCEINLINE wide + convert_impl(EVE_SUPPORTS(rvv_), wide const &v, as const &tgt) noexcept +requires rvv_abi> && (sizeof(T) == sizeof(U)) +{ + constexpr auto in_c = categorize>(); + constexpr auto out_c = categorize>(); + if constexpr( match(in_c, category::float_) ) + { + if constexpr( match(out_c, category::int_) ) return __riscv_vfcvt_rtz_x(v, N::value); + else if constexpr( match(out_c, category::uint_) ) return __riscv_vfcvt_rtz_xu(v, N::value); + } + else if constexpr( match(out_c, category::float_) ) { return __riscv_vfcvt_f(v, N::value); } + else return bit_cast(v, as> {}); +} + +// Narrow +template +EVE_FORCEINLINE wide + convert_impl(EVE_SUPPORTS(rvv_), wide const &v, as const &tgt) noexcept +requires rvv_abi> && (sizeof(T) > sizeof(U)) +{ + constexpr auto in_c = categorize>(); + constexpr auto out_c = categorize>(); + + constexpr int Scale = sizeof(T) / sizeof(U); + if constexpr( Scale == 2 ) + { + // we can perform operation. + if constexpr( match(in_c, category::float_) ) + { + if constexpr( match(out_c, category::int_) ) return __riscv_vfncvt_rtz_x(v, N::value); + else if constexpr( match(out_c, category::uint_) ) return __riscv_vfncvt_rtz_xu(v, N::value); + else if constexpr( match(out_c, category::float_) ) return __riscv_vfncvt_f(v, N::value); + } + if constexpr( match(in_c, category::int_) ) + { + if constexpr( match(out_c, category::int_) ) return __riscv_vncvt_x(v, N::value); + else if constexpr( match(out_c, category::uint_) ) + { + auto part_done = convert(v, as> {}); + return convert(part_done, tgt); + } + else if constexpr( match(out_c, category::float_) ) return __riscv_vfncvt_f(v, N::value); + } + if constexpr( match(in_c, category::uint_) ) + { + if constexpr( match(out_c, category::uint_) ) return __riscv_vncvt_x(v, N::value); + else if constexpr( match(out_c, category::int_) ) + { + auto part_done = convert(v, as> {}); + return convert(part_done, tgt); + } + else if constexpr( match(out_c, category::float_) ) return __riscv_vfncvt_f(v, N::value); + } + } + else + { + // we need to cast to the lower type. Then call convert again. + using down_ty = downgrade_t; + if constexpr( sizeof(T) == sizeof(down_ty) ) + { + // this means downgrade_t reached limit (for fp for example). + using int_ty = as_integer_t; + auto part_done = convert(v, as {}); + return convert(part_done, tgt); + } + else + { + auto part_done = convert(v, as {}); + return convert(part_done, tgt); + } + } +} +// Widen +template +EVE_FORCEINLINE wide + convert_impl(EVE_SUPPORTS(rvv_), wide const &v, as const &tgt) noexcept +requires rvv_abi> && (sizeof(T) < sizeof(U)) + && (riscv_rvv_dyn_::getLMUL(N::value) <= 8) +{ + constexpr auto in_c = categorize>(); + constexpr auto out_c = categorize>(); + constexpr int Scale = sizeof(U) / sizeof(T); + if constexpr( Scale == 2 ) + { + if constexpr( match(in_c, category::int_) ) + { + if constexpr( match(out_c, category::int_) ) return __riscv_vwcvt_x(v, N::value); + else if constexpr( match(out_c, category::uint_) ) + { + using cast_ty = as_integer_t; + auto casted = bit_cast(v, as> {}); + return convert(casted, tgt); + } + else if constexpr( match(out_c, category::float_) ) return __riscv_vfwcvt_f(v, N::value); + } + else if constexpr( match(in_c, category::uint_) ) + { + if constexpr( match(out_c, category::uint_) ) return __riscv_vwcvtu_x(v, N::value); + else if constexpr( match(out_c, category::int_) ) + { + using cast_ty = as_integer_t; + auto casted = bit_cast(v, as> {}); + return convert(casted, tgt); + } + else if constexpr( match(out_c, category::float_) ) return __riscv_vfwcvt_f(v, N::value); + } + else if constexpr( match(in_c, category::float_) ) + { + if constexpr( match(out_c, category::int_) ) return __riscv_vfwcvt_rtz_x(v, N::value); + else if constexpr( match(out_c, category::uint_) ) return __riscv_vfwcvt_rtz_xu(v, N::value); + else if constexpr( match(out_c, category::float_) ) return __riscv_vfwcvt_f(v, N::value); + } + } + else + { + // we need to have widen operation before. + using bigger_t = upgrade_t; + auto part_done = convert(v, as {}); + return convert(part_done, tgt); + } +} + +template +EVE_FORCEINLINE logical> +convert_impl(EVE_SUPPORTS(rvv_), logical> const& v, as> const& tgt) noexcept +requires rvv_abi> +{ + return convert(v.bits(), as {}) != 0; +} + +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/slide_right.hpp b/include/eve/module/core/regular/impl/simd/riscv/slide_right.hpp new file mode 100644 index 0000000000..2a5d29136c --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/slide_right.hpp @@ -0,0 +1,49 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +namespace eve::detail +{ +template +EVE_FORCEINLINE auto +slide_right_(EVE_SUPPORTS(rvv_), logical> v, index_t) noexcept +requires(Shift > 0 && Shift < N::value && rvv_abi>) +{ + logical> res {0}; + for( size_t id = Shift; id < N::value; ++id ) + { + auto val = v.get((id + Shift) % N::value); + res.set(id, val); + } + return res; +} +template +EVE_FORCEINLINE auto +slide_right_(EVE_SUPPORTS(rvv_), + logical> v, + logical> w, + index_t) noexcept +requires(Shift > 0 && Shift < N::value && rvv_abi>) +{ + logical> res {0}; + for( size_t id = 0; (id) < N::value; ++id ) + { + if( id < Shift ) + { + auto val = v.get(N::value - Shift + id); + res.set(id, val); + } + else + { + auto val = w.get(id - Shift); + res.set(id, val); + } + } + return res; +} +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/store.hpp b/include/eve/module/core/regular/impl/simd/riscv/store.hpp new file mode 100644 index 0000000000..33bea95c44 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/store.hpp @@ -0,0 +1,58 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once +#include +#include +#include +#include +#include + +namespace eve::detail +{ + +template> Ptr> +EVE_FORCEINLINE void +riscv_store(logical> mask, wide v, Ptr p) +requires rvv_abi> +{ + constexpr auto c = categorize>(); + if constexpr( match(c, category::size8_) ) return __riscv_vse8(mask, p, v, N::value); + if constexpr( match(c, category::size16_) ) return __riscv_vse16(mask, p, v, N::value); + if constexpr( match(c, category::size32_) ) return __riscv_vse32(mask, p, v, N::value); + if constexpr( match(c, category::size64_) ) return __riscv_vse64(mask, p, v, N::value); +} + +// Regular store +template> Ptr> +EVE_FORCEINLINE void +store_(EVE_SUPPORTS(rvv_), wide v, Ptr p) +requires(rvv_abi> && !has_store_equivalent, Ptr>) +{ + auto const tgt = as> {}; + auto ptr = unalign(p); + + return riscv_store(rvv_true(), v, p); +} + +// Conditional store +template> Ptr> +EVE_FORCEINLINE void +store_(EVE_SUPPORTS(rvv_), C const& cond, wide const& v, Ptr ptr) noexcept +requires rvv_abi> && (!has_store_equivalent, Ptr>) +{ + if constexpr( C::is_complete || C::has_alternative || N() != expected_cardinal_v ) + { + store_(EVE_RETARGET(cpu_), cond, v, ptr); + } + else riscv_store(cond.mask(as> {}), v, unalign(ptr)); +} + +} diff --git a/include/eve/module/core/regular/slide_right.hpp b/include/eve/module/core/regular/slide_right.hpp index 5357c58203..77cb942c11 100644 --- a/include/eve/module/core/regular/slide_right.hpp +++ b/include/eve/module/core/regular/slide_right.hpp @@ -65,3 +65,7 @@ namespace detail #if defined(EVE_INCLUDE_POWERPC_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/store.hpp b/include/eve/module/core/regular/store.hpp index cf1bb2da6c..33aaf3e04f 100644 --- a/include/eve/module/core/regular/store.hpp +++ b/include/eve/module/core/regular/store.hpp @@ -90,3 +90,7 @@ EVE_MAKE_CALLABLE(store_, store); #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif From dc3687ef3ee3f0bd1497c7bbec94a61313aca921 Mon Sep 17 00:00:00 2001 From: Tetyushkin Ivan Date: Sat, 17 Aug 2024 13:22:20 +0300 Subject: [PATCH 4/8] [riscv] Fix test unit.memory.load.tuple for RISC-V As RISC-V default cardinal for some types bigger that 8, this test earlier resulted in reading non-initialized data. With this patch all tests from unit.memory are passed for RISC-V with vlen=128. --- test/unit/memory/load/tuple.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/test/unit/memory/load/tuple.cpp b/test/unit/memory/load/tuple.cpp index 312ad95a00..6012d7f7e3 100644 --- a/test/unit/memory/load/tuple.cpp +++ b/test/unit/memory/load/tuple.cpp @@ -29,7 +29,8 @@ TTS_CASE_TPL( "Check load behavior with soa_ptr", eve::test::scalar::all_types) { using s_t = tuple_t; using w_t = eve::wide>; - using w8_t = eve::wide, eve::fixed<8>>; + using expected_num_elements = eve::fixed<16>; + using we_t = eve::wide, expected_num_elements>; auto const filler = [](auto i, auto) { return s_t{ static_cast(1+i) , static_cast(i) @@ -38,29 +39,28 @@ TTS_CASE_TPL( "Check load behavior with soa_ptr", eve::test::scalar::all_types) }; w_t reference = filler; - w8_t reference8 = filler; + we_t reference_expected = filler; auto il = eve::ignore_last(w_t::size()/4); auto ireference = w_t{kumi::map ( [=](M m){ return m & il.mask(eve::as(m)).mask(); } , reference.storage() )}; - auto [data0,idx0] = page(); - auto [data1,idx1] = page(); - auto [data2,idx2] = page(); - - auto src = eve::soa_ptr ( eve::as_aligned(&data0[idx0],typename w8_t::cardinal_type{}) - , &data1[idx1] - 1 - , eve::as_aligned(&data2[idx2],typename w8_t::cardinal_type{}) - ); + auto [data0, idx0] = page(); + auto [data1, idx1] = page(); + auto [data2, idx2] = page(); + auto src = eve::soa_ptr(eve::as_aligned(&data0[idx0], typename we_t::cardinal_type {}), + &data1[idx1] - 1, + eve::as_aligned(&data2[idx2], typename we_t::cardinal_type {})); w_t constructed(src); TTS_EQUAL(constructed , reference ); TTS_EQUAL(eve::load(src) , reference ); - TTS_EQUAL(eve::load(src, eve::lane<8>) , reference8 ); + TTS_EQUAL(eve::load(src, eve::lane), reference_expected); TTS_EQUAL(eve::unsafe(eve::load)(src) , reference ); - TTS_EQUAL(eve::unsafe(eve::load)(src, eve::lane<8>) , reference8 ); + TTS_EQUAL(eve::unsafe(eve::load)(src, eve::lane), + reference_expected); auto loaded = eve::load[il](src); kumi::for_each( [=](M& m){ m &= il.mask(eve::as(m)).mask(); }, loaded); From 6b38369febee3d8373f5787d65ae6ee6f0d242d8 Mon Sep 17 00:00:00 2001 From: Tetyushkin Ivan Date: Fri, 16 Aug 2024 17:02:12 +0300 Subject: [PATCH 5/8] [riscv] Impelement right variant of first_true With this patch, all tests form examples are passed for RISC-V with vlen=128. --- .../eve/module/core/regular/first_true.hpp | 4 ++ .../regular/impl/simd/riscv/first_true.hpp | 51 +++++++++++++++++++ 2 files changed, 55 insertions(+) create mode 100644 include/eve/module/core/regular/impl/simd/riscv/first_true.hpp diff --git a/include/eve/module/core/regular/first_true.hpp b/include/eve/module/core/regular/first_true.hpp index 58d89b9a80..4903850f26 100644 --- a/include/eve/module/core/regular/first_true.hpp +++ b/include/eve/module/core/regular/first_true.hpp @@ -77,3 +77,7 @@ EVE_MAKE_CALLABLE(first_true_, first_true); #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/impl/simd/riscv/first_true.hpp b/include/eve/module/core/regular/impl/simd/riscv/first_true.hpp new file mode 100644 index 0000000000..9df8a21b65 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/first_true.hpp @@ -0,0 +1,51 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once +namespace eve::detail +{ +template +EVE_FORCEINLINE std::optional + first_true_(EVE_SUPPORTS(rvv_), C c, L m) noexcept +{ + using e_t = element_type_t; + using cardinal = cardinal_t; + if constexpr( C::is_complete && !C::is_inverted ) return std::nullopt; + else if constexpr( has_aggregated_abi_v ) + { + if constexpr( !C::is_complete ) m = m && c.mask(as {}); + auto [lo, hi] = m.slice(); + auto lo_res = first_true[ignore_none](lo); + auto hi_res = first_true[ignore_none](hi); + if( lo_res ) return lo_res; + if( hi_res ) *hi_res += lo.size(); + return hi_res; + } + else + { + auto mask = c.mask(as {}); + auto res = __riscv_vfirst(mask, m, cardinal::value); + if( res == -1 ) return std::nullopt; + return res; + } +} + +template +EVE_FORCEINLINE std::optional + first_true_(EVE_SUPPORTS(rvv_), L m) noexcept +{ + return first_true[ignore_none](m); +} + +template +EVE_FORCEINLINE std::optional + first_true_(EVE_SUPPORTS(rvv_), top_bits m) noexcept +{ + return first_true(to_logical(m)); +} + +} From 707f5f3b04d982ad7c41f01478d6db6aaa85210c Mon Sep 17 00:00:00 2001 From: Tetyushkin Ivan Date: Sat, 17 Aug 2024 18:42:08 +0300 Subject: [PATCH 6/8] [riscv] Add support for unit.api tests for RISC-V With this patch all unit.api testing is passed. Test that requires constuction wide from storage for riscv is removed, as by default we can not construct wide with different cardinal type with the same underlying type. --- include/eve/module/core/named_shuffles/blend.hpp | 1 + include/eve/module/core/named_shuffles/broadcast_lane.hpp | 1 + include/eve/module/core/named_shuffles/reverse.hpp | 2 ++ .../eve/module/core/named_shuffles/reverse_in_subgroups.hpp | 1 + include/eve/module/core/named_shuffles/swap_adjacent.hpp | 2 ++ test/unit/api/regular/shuffle_v2/shuffle_v2_8x1.cpp | 2 ++ test/unit/api/regular/wide.cpp | 6 ++++-- 7 files changed, 13 insertions(+), 2 deletions(-) diff --git a/include/eve/module/core/named_shuffles/blend.hpp b/include/eve/module/core/named_shuffles/blend.hpp index 499488e26f..35270f8417 100644 --- a/include/eve/module/core/named_shuffles/blend.hpp +++ b/include/eve/module/core/named_shuffles/blend.hpp @@ -100,6 +100,7 @@ struct blend_t if( ((I == 1) && ...) ) return 0; if( current_api >= sve ) return logical_simd_value ? 6 : 2; + if( current_api == rvv ) return logical_simd_value ? 6 : 2; if( current_api >= avx512 ) return logical_simd_value ? 6 : 2; if( current_api >= vmx ) return 3; diff --git a/include/eve/module/core/named_shuffles/broadcast_lane.hpp b/include/eve/module/core/named_shuffles/broadcast_lane.hpp index 613fcfc9d6..90f072a026 100644 --- a/include/eve/module/core/named_shuffles/broadcast_lane.hpp +++ b/include/eve/module/core/named_shuffles/broadcast_lane.hpp @@ -78,6 +78,7 @@ struct broadcast_lane_t using half_t = decltype(T {}.slice(lower_)); return level(as {}, g, i); } + if( current_api == rvv ) return logical_simd_value ? 6 : 2; else if constexpr( current_api >= vmx ) return 2; else if constexpr( current_api >= sve ) { diff --git a/include/eve/module/core/named_shuffles/reverse.hpp b/include/eve/module/core/named_shuffles/reverse.hpp index 8acd8ba4a6..a842a261bf 100644 --- a/include/eve/module/core/named_shuffles/reverse.hpp +++ b/include/eve/module/core/named_shuffles/reverse.hpp @@ -88,6 +88,8 @@ struct reverse_t return level(detail::mask_type(tgt), g) + 4; } + if( current_api == rvv ) return logical_simd_value ? 6 : 2; + if (current_api >= neon) { if ( reg_size <= 8 ) return 2; if ( g_size == 8 ) return 2; diff --git a/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp b/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp index 696dae90ee..2529062a0b 100644 --- a/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp +++ b/include/eve/module/core/named_shuffles/reverse_in_subgroups.hpp @@ -92,6 +92,7 @@ struct reverse_in_subgroups_t using half_t = decltype(T {}.slice(lower_)); return level(as {}, g, sub_g); } + if( current_api == rvv ) return logical_simd_value ? 6 : 2; if( current_api >= sve ) { diff --git a/include/eve/module/core/named_shuffles/swap_adjacent.hpp b/include/eve/module/core/named_shuffles/swap_adjacent.hpp index d07bddbc21..9894d57327 100644 --- a/include/eve/module/core/named_shuffles/swap_adjacent.hpp +++ b/include/eve/module/core/named_shuffles/swap_adjacent.hpp @@ -72,6 +72,8 @@ struct swap_adjacent_t const std::size_t reg_size = sizeof(element_type_t) * T::size(); const std::size_t fund_size = eve::fundamental_cardinal_v; + if( current_api == rvv ) return logical_simd_value ? 6 : 2; + if( current_api >= sve ) { if( arithmetic_simd_value ) diff --git a/test/unit/api/regular/shuffle_v2/shuffle_v2_8x1.cpp b/test/unit/api/regular/shuffle_v2/shuffle_v2_8x1.cpp index 6aa85d0767..df79b60f70 100644 --- a/test/unit/api/regular/shuffle_v2/shuffle_v2_8x1.cpp +++ b/test/unit/api/regular/shuffle_v2/shuffle_v2_8x1.cpp @@ -28,6 +28,8 @@ TTS_CASE("shuffle_v2: 8x1") return 3; } + if( eve::current_api == eve::rvv ) return 2; + if (eve::current_api >= eve::sve) { if (idxm::is_lane_broadcast(p)) return 2; diff --git a/test/unit/api/regular/wide.cpp b/test/unit/api/regular/wide.cpp index 101374ed94..b58f6a19d3 100644 --- a/test/unit/api/regular/wide.cpp +++ b/test/unit/api/regular/wide.cpp @@ -32,7 +32,8 @@ TTS_CASE_WITH( "Check eve::wide enumerating constructor" // Test smaller size wide for non-garbage using v_t = typename T::value_type; - if constexpr( T::size() < eve::fundamental_cardinal_v && !eve::has_emulated_abi_v ) + if constexpr( T::size() < eve::fundamental_cardinal_v && !eve::has_emulated_abi_v + && eve::current_api != eve::rvv ) { using w_t = eve::as_wide_t>; using wl_t = eve::as_logical_t; @@ -72,7 +73,8 @@ TTS_CASE_TPL( "Check eve::wide splat constructor", eve::test::simd::all_types) // Test smaller size wide for non-garbage using v_t = typename T::value_type; - if constexpr( T::size() < eve::fundamental_cardinal_v && !eve::has_emulated_abi_v ) + if constexpr( T::size() < eve::fundamental_cardinal_v && !eve::has_emulated_abi_v + && eve::current_api != eve::rvv ) { using w_t = eve::as_wide_t>; using wl_t = eve::as_logical_t; From 979fa91d833d0a20b47669a021d797d60c5dc51f Mon Sep 17 00:00:00 2001 From: Tetyushkin Ivan Date: Sat, 17 Aug 2024 17:35:23 +0300 Subject: [PATCH 7/8] [riscv] Add support for testing RISC-V for unit.algo testsuite With this patch all unit.algo tests are passed for RISC-V. --- include/eve/module/core/compress/compress.hpp | 4 ++ .../compress/simd/common/compress_copy.hpp | 3 +- .../core/compress/simd/riscv/compress.hpp | 27 ++++++++++++ .../eve/module/core/regular/count_true.hpp | 4 ++ .../regular/impl/simd/riscv/count_true.hpp | 39 +++++++++++++++++ .../impl/simd/riscv/iterate_selected.hpp | 26 +++++++++++ .../regular/impl/simd/riscv/last_true.hpp | 43 +++++++++++++++++++ .../module/core/regular/iterate_selected.hpp | 4 ++ include/eve/module/core/regular/last_true.hpp | 4 ++ include/eve/module/core/regular/unalign.hpp | 32 +++++++------- .../algorithm/min_element_special_cases.cpp | 16 ++++++- ...nsform_keep_if_and_remove_generic_test.hpp | 2 +- 12 files changed, 184 insertions(+), 20 deletions(-) create mode 100644 include/eve/module/core/compress/simd/riscv/compress.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/count_true.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/iterate_selected.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/last_true.hpp diff --git a/include/eve/module/core/compress/compress.hpp b/include/eve/module/core/compress/compress.hpp index e9d7b59112..c8a70076bd 100644 --- a/include/eve/module/core/compress/compress.hpp +++ b/include/eve/module/core/compress/compress.hpp @@ -98,3 +98,7 @@ namespace eve #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/compress/simd/common/compress_copy.hpp b/include/eve/module/core/compress/simd/common/compress_copy.hpp index b52f5ed524..af6069e2c9 100644 --- a/include/eve/module/core/compress/simd/common/compress_copy.hpp +++ b/include/eve/module/core/compress/simd/common/compress_copy.hpp @@ -20,7 +20,8 @@ struct compress_copy_core { using COut = typename Settings::cond_out_t; if constexpr( eve::has_emulated_abi_v ) return compress_copy_scalar; - else if constexpr( eve::current_api >= sve || eve::current_api >= avx512 ) + else if constexpr( eve::current_api >= sve || eve::current_api >= avx512 + || eve::current_api == rvv ) return compress_copy_simd; else if constexpr( Settings::is_sparse || !COut::is_complete ) return compress_copy_scalar; else return compress_copy_simd; diff --git a/include/eve/module/core/compress/simd/riscv/compress.hpp b/include/eve/module/core/compress/simd/riscv/compress.hpp new file mode 100644 index 0000000000..6f455f4117 --- /dev/null +++ b/include/eve/module/core/compress/simd/riscv/compress.hpp @@ -0,0 +1,27 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once +#include +namespace eve::detail +{ + +template +EVE_FORCEINLINE auto +compress_(EVE_SUPPORTS(rvv_), C c, wide v, logical> mask) noexcept +requires rvv_abi> +{ + auto c_mask = expand_mask(c, as> {}); + auto formated_mask = bit_cast(mask, as>> {}); + logical> real_mask = __riscv_vmand(c_mask, formated_mask, N::value); + wide init = make(as> {}, static_cast(0)); + wide compressed = __riscv_vcompress_tu(init, v, real_mask, N::value); + auto new_element_num = count_true(real_mask); + kumi::tuple cur {compressed, new_element_num}; + return kumi::tuple {cur}; +} +} diff --git a/include/eve/module/core/regular/count_true.hpp b/include/eve/module/core/regular/count_true.hpp index 5d0d50fd6d..bdf58d83d8 100644 --- a/include/eve/module/core/regular/count_true.hpp +++ b/include/eve/module/core/regular/count_true.hpp @@ -79,3 +79,7 @@ EVE_MAKE_CALLABLE(count_true_, count_true); #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/impl/simd/riscv/count_true.hpp b/include/eve/module/core/regular/impl/simd/riscv/count_true.hpp new file mode 100644 index 0000000000..0e7441b7c4 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/count_true.hpp @@ -0,0 +1,39 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include + +namespace eve::detail +{ +template +EVE_FORCEINLINE std::ptrdiff_t + count_true_(EVE_SUPPORTS(cpu_), C cond, logical> v) noexcept +requires rvv_abi> +{ + auto const m = cond.mask(as> {}); + return __riscv_vcpop(m, v, N::value); +} + +template +EVE_FORCEINLINE std::ptrdiff_t + count_true_(EVE_SUPPORTS(rvv_), logical> v) noexcept +requires rvv_abi> +{ + return __riscv_vcpop(v, N::value); +} +template +EVE_FORCEINLINE std::ptrdiff_t + count_true_(EVE_SUPPORTS(rvv_), top_bits>> v) noexcept +requires rvv_abi> +{ + return __riscv_vcpop(v.storage, N::value); +} +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/iterate_selected.hpp b/include/eve/module/core/regular/impl/simd/riscv/iterate_selected.hpp new file mode 100644 index 0000000000..4a747ffed9 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/iterate_selected.hpp @@ -0,0 +1,26 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +namespace eve::detail +{ + +template +EVE_FORCEINLINE bool +iterate_selected_(EVE_REQUIRES(rvv_), O const& opts, top_bits l, F&& f) +requires(L::size() >= 64) +{ + auto [lo, hi] = l.slice(); + if( iterate_selected(lo, f) ) return true; + return iterate_selected(hi, detail::plus_offset_lambda {f, L::size() / 2}); +} + +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/last_true.hpp b/include/eve/module/core/regular/impl/simd/riscv/last_true.hpp new file mode 100644 index 0000000000..922b6a5262 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/last_true.hpp @@ -0,0 +1,43 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include + +namespace eve::detail +{ + +template +EVE_FORCEINLINE std::optional +last_true_(EVE_SUPPORTS(rvv_), C const& cond, top_bits>> const& v0) noexcept +requires rvv_abi> +{ + if constexpr( C::is_complete && !C::is_inverted ) return std::nullopt; + else + { + auto m = cond.mask(as> {}); + logical> masked = __riscv_vmand(v0.storage, m, N::value); + return last_true(masked); + } +} + +template +EVE_FORCEINLINE std::optional + last_true_(EVE_SUPPORTS(rvv_), top_bits>> const &v0) noexcept +requires rvv_abi> +{ + // TODO: optimize. + for( int i = N::value - 1; i > -1; --i ) + { + if( v0.get(i) ) return i; + } + return {}; +} +} diff --git a/include/eve/module/core/regular/iterate_selected.hpp b/include/eve/module/core/regular/iterate_selected.hpp index 619aa143ce..5ebc22dd02 100644 --- a/include/eve/module/core/regular/iterate_selected.hpp +++ b/include/eve/module/core/regular/iterate_selected.hpp @@ -94,3 +94,7 @@ struct iterate_selected_t #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/last_true.hpp b/include/eve/module/core/regular/last_true.hpp index 075faaf3a3..e6782c392f 100644 --- a/include/eve/module/core/regular/last_true.hpp +++ b/include/eve/module/core/regular/last_true.hpp @@ -20,3 +20,7 @@ EVE_MAKE_CALLABLE(last_true_, last_true); #if defined(EVE_INCLUDE_ARM_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/unalign.hpp b/include/eve/module/core/regular/unalign.hpp index effb08dd14..27e851651d 100644 --- a/include/eve/module/core/regular/unalign.hpp +++ b/include/eve/module/core/regular/unalign.hpp @@ -95,22 +95,22 @@ namespace eve //! @} //================================================================================================ -//================================================================================================ -//! @addtogroup memory -//! @{ -//! @typedef unaligned_t -//! @brief Compute the unaligned pointer type associated to a given type. -//! @tparam T Type to process -//! -//! **Required header:** `#include ` -//! -//! @code{.cpp} -//! template -//! using unaligned_t = decltype(unalign(std::declval())); -//! @endcode -//! @} -//================================================================================================ - template using unaligned_t = decltype(unalign(std::declval())); + //================================================================================================ + //! @addtogroup memory + //! @{ + //! @typedef unaligned_t + //! @brief Compute the unaligned pointer type associated to a given type. + //! @tparam T Type to process + //! + //! **Required header:** `#include ` + //! + //! @code{.cpp} + //! template + //! using unaligned_t = decltype(unalign(static_cast(std::declval())_); + //! @endcode + //! @} + //================================================================================================ + template using unaligned_t = decltype(unalign(static_cast(std::declval()))); } namespace eve::detail { diff --git a/test/unit/module/algo/algorithm/min_element_special_cases.cpp b/test/unit/module/algo/algorithm/min_element_special_cases.cpp index 16022e01ab..aa0ea2ee2a 100644 --- a/test/unit/module/algo/algorithm/min_element_special_cases.cpp +++ b/test/unit/module/algo/algorithm/min_element_special_cases.cpp @@ -13,10 +13,17 @@ TTS_CASE("Min element one pass, uint8 index") { - auto alg = eve::algo::min_element // + auto alg0 = eve::algo::min_element // [eve::algo::single_pass] // [eve::algo::index_type] // [eve::algo::unroll<2>]; + + auto alg = [&] + { + if constexpr( eve::expected_cardinal_v < 128 ) { return alg0; } + else { return alg0[eve::algo::force_cardinal<64>]; } + }(); + { std::vector v {1, 2, 3}; TTS_EQUAL(0, alg(v) - v.begin()); @@ -60,10 +67,15 @@ TTS_CASE("Min element one pass, uint8 index") }; TTS_CASE("Min element one pass, uint8 index, first one is the answer") { - auto alg = eve::algo::min_element // + auto alg0 = eve::algo::min_element // [eve::algo::single_pass] // [eve::algo::index_type] // [eve::algo::unroll<2>]; + auto alg = [&] + { + if constexpr( eve::expected_cardinal_v < 128 ) { return alg0; } + else { return alg0[eve::algo::force_cardinal<64>]; } + }(); std::vector v; v.resize(300); diff --git a/test/unit/module/algo/algorithm/transform_keep_if_and_remove_generic_test.hpp b/test/unit/module/algo/algorithm/transform_keep_if_and_remove_generic_test.hpp index f6245a8a17..638ffb908f 100644 --- a/test/unit/module/algo/algorithm/transform_keep_if_and_remove_generic_test.hpp +++ b/test/unit/module/algo/algorithm/transform_keep_if_and_remove_generic_test.hpp @@ -24,7 +24,7 @@ template void transform_keep_if_generic_test_aligned_ptr(eve::as, Algo alg, Op op) { using e_t = eve::element_type_t; - alignas(64) std::array data; + alignas(128) std::array data; const e_t keep{5}; const e_t drop{1}; From af4327d886da3d5bdba9de972b7145a36685789f Mon Sep 17 00:00:00 2001 From: Tetyushkin Ivan Date: Sat, 17 Aug 2024 18:39:34 +0300 Subject: [PATCH 8/8] [riscv] Optimizations for RISC-V --- include/eve/module/core/regular/abs.hpp | 4 ++ include/eve/module/core/regular/add.hpp | 4 ++ include/eve/module/core/regular/all.hpp | 4 ++ include/eve/module/core/regular/any.hpp | 4 ++ include/eve/module/core/regular/bit_and.hpp | 4 ++ include/eve/module/core/regular/div.hpp | 4 ++ .../core/regular/impl/simd/riscv/abs.hpp | 55 ++++++++++++++++++ .../core/regular/impl/simd/riscv/add.hpp | 34 +++++++++++ .../core/regular/impl/simd/riscv/all.hpp | 28 +++++++++ .../core/regular/impl/simd/riscv/any.hpp | 27 +++++++++ .../core/regular/impl/simd/riscv/bit_and.hpp | 58 +++++++++++++++++++ .../core/regular/impl/simd/riscv/div.hpp | 34 +++++++++++ .../regular/impl/simd/riscv/logical_xor.hpp | 20 +++++++ .../core/regular/impl/simd/riscv/max.hpp | 35 +++++++++++ .../core/regular/impl/simd/riscv/min.hpp | 43 ++++++++++++++ .../core/regular/impl/simd/riscv/shl.hpp | 32 ++++++++++ .../eve/module/core/regular/logical_xor.hpp | 4 ++ include/eve/module/core/regular/max.hpp | 3 + include/eve/module/core/regular/min.hpp | 4 ++ include/eve/module/core/regular/shl.hpp | 4 ++ 20 files changed, 405 insertions(+) create mode 100644 include/eve/module/core/regular/impl/simd/riscv/abs.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/add.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/all.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/any.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/bit_and.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/div.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/logical_xor.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/max.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/min.hpp create mode 100644 include/eve/module/core/regular/impl/simd/riscv/shl.hpp diff --git a/include/eve/module/core/regular/abs.hpp b/include/eve/module/core/regular/abs.hpp index 6805076417..71aae44290 100644 --- a/include/eve/module/core/regular/abs.hpp +++ b/include/eve/module/core/regular/abs.hpp @@ -103,3 +103,7 @@ namespace eve #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/add.hpp b/include/eve/module/core/regular/add.hpp index 584e40797d..b23d93101e 100644 --- a/include/eve/module/core/regular/add.hpp +++ b/include/eve/module/core/regular/add.hpp @@ -106,3 +106,7 @@ namespace eve #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/all.hpp b/include/eve/module/core/regular/all.hpp index 4471d2f799..61085a5318 100644 --- a/include/eve/module/core/regular/all.hpp +++ b/include/eve/module/core/regular/all.hpp @@ -72,3 +72,7 @@ EVE_MAKE_CALLABLE(all_, all); #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/any.hpp b/include/eve/module/core/regular/any.hpp index ef3e27a1c0..6329dc4e03 100644 --- a/include/eve/module/core/regular/any.hpp +++ b/include/eve/module/core/regular/any.hpp @@ -70,3 +70,7 @@ EVE_MAKE_CALLABLE(any_, any); #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/bit_and.hpp b/include/eve/module/core/regular/bit_and.hpp index c4d60fac19..b80c505187 100644 --- a/include/eve/module/core/regular/bit_and.hpp +++ b/include/eve/module/core/regular/bit_and.hpp @@ -104,3 +104,7 @@ namespace eve #if defined(EVE_INCLUDE_X86_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/div.hpp b/include/eve/module/core/regular/div.hpp index 695b0c320e..e3f3337203 100644 --- a/include/eve/module/core/regular/div.hpp +++ b/include/eve/module/core/regular/div.hpp @@ -131,3 +131,7 @@ namespace eve #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/impl/simd/riscv/abs.hpp b/include/eve/module/core/regular/impl/simd/riscv/abs.hpp new file mode 100644 index 0000000000..9eef6ed897 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/abs.hpp @@ -0,0 +1,55 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include + +namespace eve::detail +{ + +template +EVE_FORCEINLINE auto +abs_(EVE_SUPPORTS(rvv_), wide const& a) noexcept -> wide +requires rvv_abi> && (match(categorize>(), category::float_)) +{ + return __riscv_vfabs(a, N::value); +} + +template +EVE_FORCEINLINE auto +abs_(EVE_SUPPORTS(rvv_), wide const& a) noexcept -> wide +requires rvv_abi> && (match(categorize>(), category::int_)) +{ + wide negative_values = __riscv_vneg(a, N::value); + logical> need_to_change_mask = self_less(a, static_cast(0)); + return if_else(need_to_change_mask, negative_values, a); +} + +template +EVE_FORCEINLINE auto +abs_(EVE_SUPPORTS(rvv_), C const& cx, wide const& v) noexcept -> wide +requires rvv_abi> +{ + constexpr auto c = categorize>(); + if constexpr( C::is_complete ) return abs_(EVE_RETARGET(cpu_), cx, v); + else + { + auto mask = expand_mask(cx, as> {}); + if constexpr( match(c, category::float_) ) { return __riscv_vfabs_tumu(mask, v, v, N::value); } + if constexpr( match(c, category::int_) ) + { + wide negative_values = __riscv_vneg(v, N::value); + logical> need_to_change_mask = self_less(v, static_cast(0)); + logical> mask_to_update = __riscv_vmand(mask, need_to_change_mask, N::value); + return if_else(mask_to_update, negative_values, v); + } + } +} +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/add.hpp b/include/eve/module/core/regular/impl/simd/riscv/add.hpp new file mode 100644 index 0000000000..9670444063 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/add.hpp @@ -0,0 +1,34 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +namespace eve::detail +{ + +template +EVE_FORCEINLINE wide + add_(EVE_SUPPORTS(rvv_), C const &cx, wide v, wide w) noexcept +requires rvv_abi> +{ + if constexpr( C::is_complete ) { return add_(EVE_RETARGET(cpu_), cx, v, w); } + else + { + if constexpr( !C::has_alternative ) + { + auto m = expand_mask(cx, as> {}); + constexpr auto c = categorize>(); + if constexpr( match(c, category::float_) ) return __riscv_vfadd_tumu(m, v, v, w, N::value); + else return __riscv_vadd_tumu(m, v, v, w, N::value); + } + else return add_(EVE_RETARGET(cpu_), cx, v, w); + } +} +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/all.hpp b/include/eve/module/core/regular/impl/simd/riscv/all.hpp new file mode 100644 index 0000000000..c6f3e542c3 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/all.hpp @@ -0,0 +1,28 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include + +namespace eve::detail +{ +template +EVE_FORCEINLINE bool +all_(EVE_SUPPORTS(rvv_), C const& cond, logical> const& v) noexcept +requires rvv_abi> +{ + if constexpr( C::is_complete ) + { + if constexpr( C::is_inverted ) return __riscv_vcpop(v, N::value) == N::value; + else return true; + } + else return count_true(cond, v) == cond.count(as>()); +} +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/any.hpp b/include/eve/module/core/regular/impl/simd/riscv/any.hpp new file mode 100644 index 0000000000..6a66dbd1df --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/any.hpp @@ -0,0 +1,27 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +namespace eve::detail +{ +template +EVE_FORCEINLINE bool +any_(EVE_SUPPORTS(rvv_), C const& cond, logical> v) noexcept +requires rvv_abi> +{ + if constexpr( C::is_complete && !C::is_inverted ) return false; + else + { + auto m = expand_mask(cond, as> {}); + return __riscv_vcpop(m, v, N::value) > 0; + } +} +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/bit_and.hpp b/include/eve/module/core/regular/impl/simd/riscv/bit_and.hpp new file mode 100644 index 0000000000..feb6cb70a4 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/bit_and.hpp @@ -0,0 +1,58 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include +#include +namespace eve ::detail +{ +// ----------------------------------------------------------------------------------------------- +// Masked case +template +EVE_FORCEINLINE wide +bit_and_(EVE_SUPPORTS(rvv_), C const& cx, wide const& v0, wide const& v1) noexcept +requires rvv_abi> +{ + constexpr auto c = categorize>(); + + if constexpr( C::is_complete || abi_t::is_wide_logical ) + { + return bit_and_(EVE_RETARGET(cpu_), cx, v0, v1); + } + else + { + auto m = expand_mask(cx, as> {}); + using sign = unsigned; + using out_part_scalar = as_integer_t; + using out_part_wide = wide; + auto part_tgt = as {}; + auto v0_int = bit_cast(v0, part_tgt); + auto v1_int = bit_cast(v1, part_tgt); + out_part_wide part_res = __riscv_vand_tumu(m, v0_int, v0_int, v1_int, N::value); + return bit_cast(part_res, as> {}); + } +} +// ----------------------------------------------------------------------------------------------- +// Masked case +template +EVE_FORCEINLINE wide + bit_and_(EVE_SUPPORTS(rvv_), wide const &v0, wide const &v1) noexcept +requires rvv_abi> +{ + using sign = unsigned; + using out_part_scalar = as_integer_t; + using out_part_wide = wide; + auto part_tgt = as {}; + auto v0_int = bit_cast(v0, part_tgt); + auto v1_int = bit_cast(v1, part_tgt); + out_part_wide part_res = __riscv_vand(v0_int, v1_int, N::value); + return bit_cast(part_res, as> {}); +} +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/div.hpp b/include/eve/module/core/regular/impl/simd/riscv/div.hpp new file mode 100644 index 0000000000..acd352d3de --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/div.hpp @@ -0,0 +1,34 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +namespace eve::detail +{ +template +EVE_FORCEINLINE wide +div_(EVE_SUPPORTS(rvv_), C const& cx, wide const& v, wide const& w) noexcept +requires rvv_abi> +{ + if constexpr( C::is_complete ) { return div_(EVE_RETARGET(cpu_), cx, v, w); } + else + { + if constexpr( !C::has_alternative ) + { + auto m = expand_mask(cx, as> {}); + constexpr auto c = categorize>(); + if constexpr( match(c, category::int_) ) return __riscv_vdiv_tumu(m, v, v, w, N::value); + else if constexpr( match(c, category::uint_) ) return __riscv_vdivu_tumu(m, v, v, w, N::value); + else if constexpr( match(c, category::float_) ) return __riscv_vfdiv_tumu(m, v, v, w, N::value); + } + else return div_(EVE_RETARGET(cpu_), cx, v, w); + } +} +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/logical_xor.hpp b/include/eve/module/core/regular/impl/simd/riscv/logical_xor.hpp new file mode 100644 index 0000000000..4fe12fd81e --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/logical_xor.hpp @@ -0,0 +1,20 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once +namespace eve::detail +{ +template +EVE_FORCEINLINE logical> + logical_xor_(EVE_SUPPORTS(rvv_), + logical> const &a, + logical> const &b) noexcept +requires rvv_abi> +{ + return a != bit_cast(b, as>> {}); +} +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/max.hpp b/include/eve/module/core/regular/impl/simd/riscv/max.hpp new file mode 100644 index 0000000000..fcceea64c9 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/max.hpp @@ -0,0 +1,35 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include + +namespace eve::detail +{ + +template +EVE_FORCEINLINE wide + max_(EVE_SUPPORTS(rvv_), C const &cx, wide v, wide w) noexcept +requires rvv_abi> +{ + if constexpr( C::is_complete ) { return max_(EVE_RETARGET(cpu_), cx, v, w); } + else + { + if constexpr( !C::has_alternative ) + { + auto m = expand_mask(cx, as> {}); + constexpr auto c = categorize>(); + if constexpr( match(c, category::float_) ) return __riscv_vfmax_tumu(m, v, v, w, N::value); + else if constexpr( match(c, category::int_) ) return __riscv_vmax_tumu(m, v, v, w, N::value); + else if constexpr( match(c, category::uint_) ) return __riscv_vmaxu_tumu(m, v, v, w, N::value); + } + else return max_(EVE_RETARGET(cpu_), cx, v, w); + } +} +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/min.hpp b/include/eve/module/core/regular/impl/simd/riscv/min.hpp new file mode 100644 index 0000000000..e60b8bace4 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/min.hpp @@ -0,0 +1,43 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once + +#include +#include +#include + +namespace eve::detail +{ +template +EVE_FORCEINLINE auto +min_(EVE_SUPPORTS(rvv_), wide const& a, wide const& b) noexcept -> wide +requires rvv_abi> +{ + constexpr auto c = categorize>(); + if constexpr( match(c, category::float_) ) return __riscv_vfmin_tu(a, a, b, N::value); + else if constexpr( match(c, category::int_) ) + { + wide res = __riscv_vmin_tu(a, a, b, N::value); + return res; + } + else if constexpr( match(c, category::uint_) ) return __riscv_vminu_tu(a, a, b, N::value); +} + +template +EVE_FORCEINLINE auto +min_(EVE_SUPPORTS(rvv_), C const& cx, wide const& v, wide const& w) noexcept + -> wide +{ + auto mask = expand_mask(cx, as> {}); + constexpr auto c = categorize>(); + if constexpr( match(c, category::float_) ) return __riscv_vfmin_tumu(mask, v, v, w, N::value); + else if constexpr( match(c, category::int_) ) return __riscv_vmin_tumu(mask, v, v, w, N::value); + else if constexpr( match(c, category::uint_) ) return __riscv_vminu_tumu(mask, v, v, w, N::value); +} + +} diff --git a/include/eve/module/core/regular/impl/simd/riscv/shl.hpp b/include/eve/module/core/regular/impl/simd/riscv/shl.hpp new file mode 100644 index 0000000000..a63aa8f3e8 --- /dev/null +++ b/include/eve/module/core/regular/impl/simd/riscv/shl.hpp @@ -0,0 +1,32 @@ +//================================================================================================== +/* + EVE - Expressive Vector Engine + Copyright : EVE Project Contributors + SPDX-License-Identifier: BSL-1.0 +*/ +//================================================================================================== +#pragma once +namespace eve::detail +{ + +template +EVE_FORCEINLINE wide + shl_(EVE_REQUIRES(rvv_), O const&, wide a, U s) noexcept +requires rvv_abi> +{ + constexpr auto c = categorize>(); + if constexpr( match(c, category::float_) ) + { + using i_t = typename wide::template rebind, N>; + return shl(bit_cast(a, as {}), s); + } + else return __riscv_vsll(a, s, N::value); +} +template +EVE_FORCEINLINE auto +shl_(EVE_REQUIRES(rvv_), O const&, wide a, index_t const& s) noexcept +requires rvv_abi> +{ + return shl(a, V); +} +} diff --git a/include/eve/module/core/regular/logical_xor.hpp b/include/eve/module/core/regular/logical_xor.hpp index a5b595c0a7..ec57f3b143 100644 --- a/include/eve/module/core/regular/logical_xor.hpp +++ b/include/eve/module/core/regular/logical_xor.hpp @@ -128,3 +128,7 @@ namespace eve #if defined(EVE_INCLUDE_X86_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/max.hpp b/include/eve/module/core/regular/max.hpp index 8fd807ed6f..72c3dcecbc 100644 --- a/include/eve/module/core/regular/max.hpp +++ b/include/eve/module/core/regular/max.hpp @@ -113,3 +113,6 @@ namespace eve #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/min.hpp b/include/eve/module/core/regular/min.hpp index 485543cdde..2f969474e4 100644 --- a/include/eve/module/core/regular/min.hpp +++ b/include/eve/module/core/regular/min.hpp @@ -114,3 +114,7 @@ namespace eve #if defined(EVE_INCLUDE_SVE_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif diff --git a/include/eve/module/core/regular/shl.hpp b/include/eve/module/core/regular/shl.hpp index 80fdacecc9..a667b4692b 100644 --- a/include/eve/module/core/regular/shl.hpp +++ b/include/eve/module/core/regular/shl.hpp @@ -123,3 +123,7 @@ namespace eve #if defined(EVE_INCLUDE_X86_HEADER) # include #endif + +#if defined(EVE_INCLUDE_RISCV_HEADER) +# include +#endif