From 063e01a6426571151b649e43615231820aaf3518 Mon Sep 17 00:00:00 2001 From: Steffen Larsen Date: Wed, 8 Jan 2025 08:42:28 +0100 Subject: [PATCH] Revert "[SYCL] Refactor `sycl::vec`'s operators implementation (#16529)" This reverts commit 16c2c2161c9a67a1519a719f867cb5ad1bdd4533. --- .../detail/type_traits/vec_marray_traits.hpp | 12 +- sycl/include/sycl/detail/vector_arith.hpp | 135 ++++----- .../vector/vector_math_ops.cpp | 284 +++++++++--------- 3 files changed, 217 insertions(+), 214 deletions(-) diff --git a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp index 86e8764dc87a..8097451352e7 100644 --- a/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp +++ b/sycl/include/sycl/detail/type_traits/vec_marray_traits.hpp @@ -31,24 +31,14 @@ template constexpr bool is_vec_v = is_vec::value; template struct is_ext_vector : std::false_type {}; -template -struct is_valid_type_for_ext_vector : std::false_type {}; #if defined(__has_extension) #if __has_extension(attribute_ext_vector_type) template -using ext_vector = T __attribute__((ext_vector_type(N))); -template -struct is_ext_vector> : std::true_type {}; -template -struct is_valid_type_for_ext_vector>> - : std::true_type {}; +struct is_ext_vector : std::true_type {}; #endif #endif template inline constexpr bool is_ext_vector_v = is_ext_vector::value; -template -inline constexpr bool is_valid_type_for_ext_vector_v = - is_valid_type_for_ext_vector::value; template struct is_swizzle : std::false_type {}; template -#include -#include -#include -#include +#include // for half, cl_char, cl_int +#include // for is_sigeninteger, is_s... +#include // for is_floating_point + +#include // bfloat16 + +#include +#include // for enable_if_t, is_same namespace sycl { inline namespace _V1 { @@ -47,7 +50,13 @@ struct UnaryPlus { }; struct VecOperators { - template +#ifdef __SYCL_DEVICE_ONLY__ + static constexpr bool is_host = false; +#else + static constexpr bool is_host = true; +#endif + + template static constexpr auto apply(const ArgTys &...Args) { using Self = nth_type_t<0, ArgTys...>; static_assert(is_vec_v); @@ -56,50 +65,41 @@ struct VecOperators { using element_type = typename Self::element_type; constexpr int N = Self::size(); constexpr bool is_logical = check_type_in_v< - OpTy, std::equal_to, std::not_equal_to, std::less, + BinOp, std::equal_to, std::not_equal_to, std::less, std::greater, std::less_equal, std::greater_equal, std::logical_and, std::logical_or, std::logical_not>; using result_t = std::conditional_t< is_logical, vec, N>, Self>; - OpTy Op{}; -#ifdef __has_extension -#if __has_extension(attribute_ext_vector_type) - // ext_vector_type's bool vectors are mapped onto and have - // different memory layout than sycl::vec (which has 1 byte per - // element). As such we perform operation on int8_t and then need to - // create bit pattern that can be bit-casted back to the original - // sycl::vec. This is a hack actually, but we've been doing - // that for a long time using sycl::vec::vector_t type. - using vec_elem_ty = - typename detail::map_type*/ std::int8_t, -#if (!defined(_HAS_STD_BYTE) || _HAS_STD_BYTE != 0) - std::byte, /*->*/ std::uint8_t, -#endif -#ifdef __SYCL_DEVICE_ONLY__ - half, /*->*/ _Float16, -#endif - element_type, /*->*/ element_type>::type; - if constexpr (N != 1 && - detail::is_valid_type_for_ext_vector_v) { - using vec_t = ext_vector; - auto tmp = [&](auto... xs) { + BinOp Op{}; + if constexpr (is_host || N == 1 || + std::is_same_v) { + result_t res{}; + for (size_t i = 0; i < N; ++i) + if constexpr (is_logical) + res[i] = Op(Args[i]...) ? -1 : 0; + else + res[i] = Op(Args[i]...); + return res; + } else { + using vector_t = typename Self::vector_t; + + auto res = [&](auto... xs) { // Workaround for https://github.com/llvm/llvm-project/issues/119617. if constexpr (sizeof...(Args) == 2) { return [&](auto x, auto y) { - if constexpr (std::is_same_v>) + if constexpr (std::is_same_v>) return x == y; - else if constexpr (std::is_same_v>) + else if constexpr (std::is_same_v>) return x != y; - else if constexpr (std::is_same_v>) + else if constexpr (std::is_same_v>) return x < y; - else if constexpr (std::is_same_v>) + else if constexpr (std::is_same_v>) return x <= y; - else if constexpr (std::is_same_v>) + else if constexpr (std::is_same_v>) return x > y; - else if constexpr (std::is_same_v>) + else if constexpr (std::is_same_v>) return x >= y; else return Op(x, y); @@ -107,45 +107,46 @@ struct VecOperators { } else { return Op(xs...); } - }(bit_cast(Args)...); + }(bit_cast(Args)...); + if constexpr (std::is_same_v) { - // Some operations are known to produce the required bit patterns and - // the following post-processing isn't necessary for them: + // vec(vector_t) ctor does a simple bit_cast and the way "bool" is + // stored is that only one bit matters. vector_t, however, is a char + // type and it can have non-zero value with lowest bit unset. E.g., + // consider this: + // + // auto x = true + true; // int x = 2 + // bool y = true + true; // bool y = true + // + // and the vec has to behave in a similar way. As such, current + // implementation needs to do some extra processing for operators that + // can result in this scenario. + // if constexpr (!is_logical && - !check_type_in_v, + !check_type_in_v, std::divides, std::bit_or, std::bit_and, std::bit_xor, ShiftRight, UnaryPlus>) { - // Extra cast is needed because: - static_assert(std::is_same_v); - static_assert(!std::is_same_v< - decltype(std::declval>() != 0), - ext_vector>); - static_assert(std::is_same_v< - decltype(std::declval>() != 0), - ext_vector>); - - // `... * -1` is needed because ext_vector_type's comparison follows - // OpenCL binary representation for "true" (-1). - // `std::array` is different and LLVM annotates its - // elements with [0, 2) range metadata when loaded, so we need to - // ensure we generate 0/1 only (and not 2/-1/etc.). - static_assert((ext_vector{1, 0} == 0)[1] == -1); - - tmp = reinterpret_cast((tmp != 0) * -1); + // TODO: Not sure why the following doesn't work + // (test-e2e/Basic/vector/bool.cpp fails). + // + // res = (decltype(res))(res != 0); + for (size_t i = 0; i < N; ++i) + res[i] = bit_cast(res[i]) != 0; } } - return bit_cast(tmp); + // The following is true: + // + // using char2 = char __attribute__((ext_vector_type(2))); + // using uchar2 = unsigned char __attribute__((ext_vector_type(2))); + // static_assert(std::is_same_v() == + // std::declval()), + // char2>); + // + // so we need some extra casts. Also, static_cast(char2{}) + // isn't allowed either. + return result_t{(typename result_t::vector_t)res}; } -#endif -#endif - result_t res{}; - for (size_t i = 0; i < N; ++i) - if constexpr (is_logical) - res[i] = Op(Args[i]...) ? -1 : 0; - else - res[i] = Op(Args[i]...); - return res; } }; diff --git a/sycl/test/check_device_code/vector/vector_math_ops.cpp b/sycl/test/check_device_code/vector/vector_math_ops.cpp index 2f24e0c1aa2a..89e2af23db1c 100644 --- a/sycl/test/check_device_code/vector/vector_math_ops.cpp +++ b/sycl/test/check_device_code/vector/vector_math_ops.cpp @@ -26,48 +26,48 @@ using namespace sycl; // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A]], align 8, !tbaa [[TBAA14:![0-9]+]], !noalias [[META17:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B]], align 8, !tbaa [[TBAA14]], !noalias [[META17]] // CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <2 x i32> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META18:![0-9]+]] +// CHECK-NEXT: store <2 x i32> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA14]], !alias.scope [[META17]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIfLi3EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.1") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.1") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.1") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META21:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.1") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.1") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.1") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META18:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x float>, ptr [[A]], align 16, !noalias [[META28:![0-9]+]] -// CHECK-NEXT: [[LOADVEC4_I6_I_I:%.*]] = load <4 x float>, ptr [[B]], align 16, !noalias [[META28]] -// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x float> [[LOADVEC4_I_I_I]], [[LOADVEC4_I6_I_I]] -// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META28]] +// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x float>, ptr [[A]], align 16, !noalias [[META25:![0-9]+]] +// CHECK-NEXT: [[LOADVEC4_I7_I_I:%.*]] = load <4 x float>, ptr [[B]], align 16, !noalias [[META25]] +// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x float> [[LOADVEC4_I_I_I]], [[LOADVEC4_I7_I_I]] +// CHECK-NEXT: [[EXTRACTVEC_I9_I_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I9_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA14]], !alias.scope [[META25]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIcLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.9") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.9") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.9") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META29:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.9") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.9") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.9") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META26:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META36:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[B]], align 16, !tbaa [[TBAA14]], !noalias [[META36]] +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META33:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[B]], align 16, !tbaa [[TBAA14]], !noalias [[META33]] // CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <16 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <16 x i8> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META36]] +// CHECK-NEXT: store <16 x i8> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA14]], !alias.scope [[META33]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // std::byte does not support '+'. Therefore, using bitwise XOR as a substitute. // CHECK-LABEL: define dso_local spir_func void @_Z7TestXorN4sycl3_V13vecISt4byteLi8EEES3_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.17") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.17") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.17") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META37:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.17") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.17") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.17") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META34:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 8, !tbaa [[TBAA14]], !noalias [[META44:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[B]], align 8, !tbaa [[TBAA14]], !noalias [[META44]] +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 8, !tbaa [[TBAA14]], !noalias [[META41:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[B]], align 8, !tbaa [[TBAA14]], !noalias [[META41]] // CHECK-NEXT: [[XOR_I_I_I_I_I:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <8 x i8> [[XOR_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META45:![0-9]+]] +// CHECK-NEXT: store <8 x i8> [[XOR_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA14]], !alias.scope [[META41]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestXor(vec a, vec b) { @@ -75,45 +75,57 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIbLi4EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.25") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.25") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.25") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META48:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.25") align 4 [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.25") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.25") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META42:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META55:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[B]], align 4, !tbaa [[TBAA14]], !noalias [[META55]] -// CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = sub <4 x i8> zeroinitializer, [[TMP1]] -// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne <4 x i8> [[TMP0]], [[ADD_I_I_I_I_I]] -// CHECK-NEXT: [[SEXT_NEG_I_I:%.*]] = zext <4 x i1> [[CMP_I_I]] to <4 x i8> -// CHECK-NEXT: store <4 x i8> [[SEXT_NEG_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META56:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META46:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META49:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[B]], align 4, !tbaa [[TBAA14]], !noalias [[META49]] +// CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] +// CHECK: for.cond.i.i: +// CHECK-NEXT: [[RES_0_I_I:%.*]] = phi <4 x i8> [ [[ADD_I_I_I_I_I]], [[ENTRY:%.*]] ], [ [[VECINS_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] +// CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I]] ] +// CHECK-NEXT: [[CMP_I_I:%.*]] = icmp samesign ult i64 [[I_0_I_I]], 4 +// CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILPLIBEENS0_3VECIBLI4EEERKS4_S6__EXIT:%.*]] +// CHECK: for.body.i.i: +// CHECK-NEXT: [[VECEXT_I_I:%.*]] = extractelement <4 x i8> [[RES_0_I_I]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[CMP8_I_I:%.*]] = icmp ne i8 [[VECEXT_I_I]], 0 +// CHECK-NEXT: [[CONV9_I_I:%.*]] = zext i1 [[CMP8_I_I]] to i8 +// CHECK-NEXT: [[VECINS_I_I]] = insertelement <4 x i8> [[RES_0_I_I]], i8 [[CONV9_I_I]], i64 [[I_0_I_I]] +// CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP50:![0-9]+]] +// CHECK: _ZN4sycl3_V16detailplIbEENS0_3vecIbLi4EEERKS4_S6_.exit: +// CHECK-NEXT: store <4 x i8> [[RES_0_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA14]], !alias.scope [[META49]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_6detail9half_impl4halfELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.33") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.33") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.33") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META59:![0-9]+]] !sycl_used_aspects [[META60:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.33") align 8 initializes((0, 8)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.33") align 8 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.33") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META52:![0-9]+]] !sycl_used_aspects [[META53:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x half>, ptr [[A]], align 8, !noalias [[META68:![0-9]+]] -// CHECK-NEXT: [[LOADVEC4_I6_I_I:%.*]] = load <4 x half>, ptr [[B]], align 8, !noalias [[META68]] -// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x half> [[LOADVEC4_I_I_I]], [[LOADVEC4_I6_I_I]] -// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x half> [[TMP0]], <4 x half> poison, <4 x i32> -// CHECK-NEXT: store <4 x half> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META69:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x half>, ptr [[A]], align 8, !noalias [[META61:![0-9]+]] +// CHECK-NEXT: [[LOADVEC4_I7_I_I:%.*]] = load <4 x half>, ptr [[B]], align 8, !noalias [[META61]] +// CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x half> [[LOADVEC4_I_I_I]], [[LOADVEC4_I7_I_I]] +// CHECK-NEXT: [[EXTRACTVEC_I9_I_I:%.*]] = shufflevector <4 x half> [[TMP0]], <4 x half> poison, <4 x i32> +// CHECK-NEXT: store <4 x half> [[EXTRACTVEC_I9_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA14]], !alias.scope [[META61]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.41") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.41") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.41") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] !srcloc [[META72:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.41") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.41") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.41") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] !srcloc [[META62:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 // CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.41", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META73:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META76:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META73]] -// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META79:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META63:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META66:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META63]] +// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META69:![0-9]+]] // CHECK-NEXT: [[REF_TMP_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK: for.cond.i.i: @@ -123,21 +135,21 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK: for.body.i.i: // CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] // CHECK-NEXT: [[ARRAYIDX_I_I_I12_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META80:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR8:[0-9]+]], !noalias [[META83:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I12_I_I]]) #[[ATTR8]], !noalias [[META83]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META70:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR9:[0-9]+]], !noalias [[META73:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I2_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I12_I_I]]) #[[ATTR9]], !noalias [[META73]] // CHECK-NEXT: [[ADD_I_I_I_I:%.*]] = fadd float [[CALL_I_I_I_I_I_I]], [[CALL_I_I2_I_I_I_I]] -// CHECK-NEXT: store float [[ADD_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA86:![0-9]+]], !noalias [[META83]] -// CHECK-NEXT: [[CALL_I_I3_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR8]], !noalias [[META83]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META80]] +// CHECK-NEXT: store float [[ADD_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA76:![0-9]+]], !noalias [[META73]] +// CHECK-NEXT: [[CALL_I_I3_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR9]], !noalias [[META73]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META70]] // CHECK-NEXT: [[ARRAYIDX_I_I_I14_I_I:%.*]] = getelementptr inbounds [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CALL_I_I3_I_I_I_I]], ptr [[ARRAYIDX_I_I_I14_I_I]], align 2, !tbaa [[TBAA88:![0-9]+]], !noalias [[META79]] +// CHECK-NEXT: store i16 [[CALL_I_I3_I_I_I_I]], ptr [[ARRAYIDX_I_I_I14_I_I]], align 2, !tbaa [[TBAA78:![0-9]+]], !noalias [[META69]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP90:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP80:![0-9]+]] // CHECK: _ZN4sycl3_V16detailplINS0_3ext6oneapi8bfloat16EEENS0_3vecIS5_Li3EEERKS7_S9_.exit: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META79]] -// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META79]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META73]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META69]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META69]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META63]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, @@ -148,15 +160,15 @@ SYCL_EXTERNAL auto TestAdd(vec a, /***************** Binary Logical Ops *******************/ // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIiLi16EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.47") align 64 initializes((0, 64)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.47") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.47") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META92:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.47") align 64 initializes((0, 64)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.47") align 64 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.47") align 64 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META81:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A]], align 64, !tbaa [[TBAA14]], !noalias [[META99:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B]], align 64, !tbaa [[TBAA14]], !noalias [[META99]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META82:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META85:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i32>, ptr [[A]], align 64, !tbaa [[TBAA14]], !noalias [[META88:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B]], align 64, !tbaa [[TBAA14]], !noalias [[META88]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp sgt <16 x i32> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <16 x i1> [[CMP_I_I_I_I]] to <16 x i32> -// CHECK-NEXT: store <16 x i32> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 64, !alias.scope [[META99]] +// CHECK-NEXT: store <16 x i32> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 64, !tbaa [[TBAA14]], !alias.scope [[META88]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -164,7 +176,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func noundef range(i8 -1, 1) <3 x i8> @_Z15TestGreaterThanN4sycl3_V13vecISt4byteLi3EEES3_( -// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.56") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.56") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] !srcloc [[META100:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.56") align 4 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.56") align 4 [[B:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] !srcloc [[META89:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[LOADVEC4_I_I:%.*]] = load <4 x i8>, ptr [[A]], align 1 // CHECK-NEXT: [[LOADVEC4_I_I2:%.*]] = load <4 x i8>, ptr [[B]], align 1 @@ -178,15 +190,15 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIbLi2EEES2_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.62") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.68") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.68") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META101:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.62") align 2 initializes((0, 2)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.68") align 2 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.68") align 2 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META90:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[A]], align 2, !tbaa [[TBAA14]], !noalias [[META108:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[B]], align 2, !tbaa [[TBAA14]], !noalias [[META108]] -// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp sgt <2 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META91:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META94:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x i8>, ptr [[A]], align 2, !tbaa [[TBAA14]], !noalias [[META97:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[B]], align 2, !tbaa [[TBAA14]], !noalias [[META97]] +// CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp ugt <2 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <2 x i1> [[CMP_I_I_I_I]] to <2 x i8> -// CHECK-NEXT: store <2 x i8> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 2, !alias.scope [[META109:![0-9]+]] +// CHECK-NEXT: store <2 x i8> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 2, !tbaa [[TBAA14]], !alias.scope [[META97]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -194,15 +206,15 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.78") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.84") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.84") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META112:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.78") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.84") align 16 [[A:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.84") align 16 [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META98:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META113:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META119:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B]], align 16, !tbaa [[TBAA14]], !noalias [[META119]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META99:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META105:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B]], align 16, !tbaa [[TBAA14]], !noalias [[META105]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = fcmp ogt <8 x half> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <8 x i1> [[CMP_I_I_I_I]] to <8 x i16> -// CHECK-NEXT: store <8 x i16> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META119]] +// CHECK-NEXT: store <8 x i16> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA14]], !alias.scope [[META105]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -210,15 +222,15 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEES5_( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.94") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.100") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.100") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META120:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.94") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.100") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.100") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META106:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.94", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) // CHECK-NEXT: [[B_ASCAST:%.*]] = addrspacecast ptr [[B]] to ptr addrspace(4) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META121:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META124:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META121]] -// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META127:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META107:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META110:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META107]] +// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META113:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK: for.cond.i.i: // CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] @@ -227,18 +239,18 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { // CHECK: for.body.i.i: // CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] // CHECK-NEXT: [[ARRAYIDX_I_I_I14_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[B_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[CALL_I_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR8]], !noalias [[META127]] -// CHECK-NEXT: [[CALL_I_I2_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I14_I_I]]) #[[ATTR8]], !noalias [[META127]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR9]], !noalias [[META113]] +// CHECK-NEXT: [[CALL_I_I2_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I14_I_I]]) #[[ATTR9]], !noalias [[META113]] // CHECK-NEXT: [[CMP_I_I_I_I_I:%.*]] = fcmp ogt float [[CALL_I_I_I_I_I_I_I]], [[CALL_I_I2_I_I_I_I_I]] // CHECK-NEXT: [[CONV6_I_I:%.*]] = sext i1 [[CMP_I_I_I_I_I]] to i16 // CHECK-NEXT: [[ARRAYIDX_I_I_I16_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CONV6_I_I]], ptr [[ARRAYIDX_I_I_I16_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META127]] +// CHECK-NEXT: store i16 [[CONV6_I_I]], ptr [[ARRAYIDX_I_I_I16_I_I]], align 2, !tbaa [[TBAA78]], !noalias [[META113]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP128:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP114:![0-9]+]] // CHECK: _ZN4sycl3_V16detailgtINS0_3ext6oneapi8bfloat16EEENS0_3vecIsLi4EEERKNS6_IS5_Li4EEESA_.exit: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META127]] -// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META127]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META121]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META113]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META113]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META107]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, @@ -249,92 +261,92 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, /********************** Unary Ops **********************/ // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.105") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.105") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META129:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.105") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.105") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META115:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META130:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META133:![0-9]+]]) -// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, !noalias [[META136:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META119:![0-9]+]]) +// CHECK-NEXT: [[LOADVEC4_I_I_I:%.*]] = load <4 x i32>, ptr [[A]], align 16, !noalias [[META122:![0-9]+]] // CHECK-NEXT: [[EXTRACTVEC_I_I_I:%.*]] = shufflevector <4 x i32> [[LOADVEC4_I_I_I]], <4 x i32> poison, <3 x i32> // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp eq <3 x i32> [[EXTRACTVEC_I_I_I]], zeroinitializer // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <3 x i1> [[CMP_I_I_I_I]] to <3 x i32> -// CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <3 x i32> [[SEXT_I_I_I_I]], <3 x i32> poison, <4 x i32> -// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META136]] +// CHECK-NEXT: [[EXTRACTVEC_I4_I_I:%.*]] = shufflevector <3 x i32> [[SEXT_I_I_I_I]], <3 x i32> poison, <4 x i32> +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I4_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA14]], !alias.scope [[META122]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecIiLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.112") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.112") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META137:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.112") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.112") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META123:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META141:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META144:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META124:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META127:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META130:![0-9]+]] // CHECK-NEXT: [[SUB_I_I_I_I:%.*]] = sub <4 x i32> zeroinitializer, [[TMP0]] -// CHECK-NEXT: store <4 x i32> [[SUB_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META144]] +// CHECK-NEXT: store <4 x i32> [[SUB_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA14]], !alias.scope [[META130]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // Negation is not valid for std::byte. Therefore, using bitwise negation. // CHECK-LABEL: define dso_local spir_func void @_Z19TestBitwiseNegationN4sycl3_V13vecISt4byteLi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.118") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.118") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META145:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.118") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.118") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META131:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META146:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META149:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META152:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META132:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META135:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META138:![0-9]+]] // CHECK-NEXT: [[NOT_I_I_I_I:%.*]] = xor <16 x i8> [[TMP0]], splat (i8 -1) -// CHECK-NEXT: store <16 x i8> [[NOT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META152]] +// CHECK-NEXT: store <16 x i8> [[NOT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA14]], !alias.scope [[META138]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIbLi4EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.125") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.25") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META153:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.125") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.25") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META139:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META154:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META157:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META160:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META140:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META143:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META146:![0-9]+]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <4 x i1> [[CMP_I_I_I_I]] to <4 x i8> -// CHECK-NEXT: store <4 x i8> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META161:![0-9]+]] +// CHECK-NEXT: store <4 x i8> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA14]], !alias.scope [[META146]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_6detail9half_impl4halfELi2EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.132") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.138") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META164:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.132") align 4 initializes((0, 4)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.138") align 4 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META147:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META165:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META171:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META148:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META151:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META154:![0-9]+]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = fcmp oeq <2 x half> [[TMP0]], zeroinitializer // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <2 x i1> [[CMP_I_I_I_I]] to <2 x i16> -// CHECK-NEXT: store <2 x i16> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META172:![0-9]+]] +// CHECK-NEXT: store <2 x i16> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !tbaa [[TBAA14]], !alias.scope [[META154]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.84") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.84") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META175:![0-9]+]] !sycl_used_aspects [[META60]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.84") align 16 initializes((0, 16)) [[AGG_RESULT:%.*]], ptr nocapture noundef readonly byval(%"class.sycl::_V1::vec.84") align 16 [[A:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META155:![0-9]+]] !sycl_used_aspects [[META53]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META176:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META179:![0-9]+]]) -// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META182:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META156:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META159:![0-9]+]]) +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META162:![0-9]+]] // CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg <8 x half> [[TMP0]] -// CHECK-NEXT: store <8 x half> [[FNEG_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META182]] +// CHECK-NEXT: store <8 x half> [[FNEG_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !tbaa [[TBAA14]], !alias.scope [[META162]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.146") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.41") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META183:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.146") align 8 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.41") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META163:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.146", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META184:![0-9]+]]) -// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META187:![0-9]+]]) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META184]] -// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META190:![0-9]+]] +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META164:![0-9]+]]) +// CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META167:![0-9]+]]) +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META164]] +// CHECK-NEXT: store i64 0, ptr [[RES_I_I]], align 8, !noalias [[META170:![0-9]+]] // CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK: for.cond.i.i: // CHECK-NEXT: [[I_0_I_I:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC_I_I:%.*]], [[FOR_BODY_I_I:%.*]] ] @@ -342,29 +354,29 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILNTERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI3EEE_EXIT:%.*]] // CHECK: for.body.i.i: // CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR8]], !noalias [[META190]] +// CHECK-NEXT: [[CALL_I_I_I_I_I:%.*]] = call spir_func noundef float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) dereferenceable_or_null(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR9]], !noalias [[META170]] // CHECK-NEXT: [[TOBOOL_I_I_I:%.*]] = fcmp oeq float [[CALL_I_I_I_I_I]], 0.000000e+00 // CHECK-NEXT: [[CONV2_I_I:%.*]] = sext i1 [[TOBOOL_I_I_I]] to i16 // CHECK-NEXT: [[ARRAYIDX_I_I_I9_I_I:%.*]] = getelementptr inbounds [4 x i16], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CONV2_I_I]], ptr [[ARRAYIDX_I_I_I9_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META190]] +// CHECK-NEXT: store i16 [[CONV2_I_I]], ptr [[ARRAYIDX_I_I_I9_I_I]], align 2, !tbaa [[TBAA78]], !noalias [[META170]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP191:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP171:![0-9]+]] // CHECK: _ZN4sycl3_V16detailntERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi3EEE.exit: -// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META190]] -// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META190]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META184]] +// CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META170]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META170]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META164]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEE( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.151") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.151") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] !srcloc [[META192:![0-9]+]] !sycl_fixed_targets [[META7]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias nocapture writable writeonly sret(%"class.sycl::_V1::vec.151") align 32 [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.151") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR2]] !srcloc [[META172:![0-9]+]] !sycl_fixed_targets [[META7]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 // CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.151", align 32 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META193:![0-9]+]] -// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[RES_I_I]], i8 0, i64 32, i1 false), !noalias [[META196:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META173:![0-9]+]] +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[RES_I_I]], i8 0, i64 32, i1 false), !noalias [[META176:![0-9]+]] // CHECK-NEXT: [[REF_TMP_ASCAST_I_I_I_I:%.*]] = addrspacecast ptr [[REF_TMP_I_I_I_I]] to ptr addrspace(4) // CHECK-NEXT: br label [[FOR_COND_I_I:%.*]] // CHECK: for.cond.i.i: @@ -373,19 +385,19 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-NEXT: br i1 [[CMP_I_I]], label [[FOR_BODY_I_I]], label [[_ZN4SYCL3_V16DETAILNGERKNS0_3VECINS0_3EXT6ONEAPI8BFLOAT16ELI16EEE_EXIT:%.*]] // CHECK: for.body.i.i: // CHECK-NEXT: [[ARRAYIDX_I_I_I_I_I:%.*]] = getelementptr inbounds nuw [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr addrspace(4) [[A_ASCAST]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META199:![0-9]+]] -// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR8]], !noalias [[META202:![0-9]+]] +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META179:![0-9]+]] +// CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func float @__devicelib_ConvertBF16ToFINTEL(ptr addrspace(4) noundef align 2 dereferenceable(2) [[ARRAYIDX_I_I_I_I_I]]) #[[ATTR9]], !noalias [[META182:![0-9]+]] // CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg float [[CALL_I_I_I_I]] -// CHECK-NEXT: store float [[FNEG_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA86]], !noalias [[META202]] -// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR8]], !noalias [[META202]] -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META199]] +// CHECK-NEXT: store float [[FNEG_I_I_I_I]], ptr [[REF_TMP_I_I_I_I]], align 4, !tbaa [[TBAA76]], !noalias [[META182]] +// CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef zeroext i16 @__devicelib_ConvertFToBF16INTEL(ptr addrspace(4) noundef align 4 dereferenceable(4) [[REF_TMP_ASCAST_I_I_I_I]]) #[[ATTR9]], !noalias [[META182]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[REF_TMP_I_I_I_I]]), !noalias [[META179]] // CHECK-NEXT: [[ARRAYIDX_I_I_I7_I_I:%.*]] = getelementptr inbounds [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], ptr [[RES_I_I]], i64 0, i64 [[I_0_I_I]] -// CHECK-NEXT: store i16 [[CALL_I_I_I_I_I_I]], ptr [[ARRAYIDX_I_I_I7_I_I]], align 2, !tbaa [[TBAA88]], !noalias [[META196]] +// CHECK-NEXT: store i16 [[CALL_I_I_I_I_I_I]], ptr [[ARRAYIDX_I_I_I7_I_I]], align 2, !tbaa [[TBAA78]], !noalias [[META176]] // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 -// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP205:![0-9]+]] +// CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP185:![0-9]+]] // CHECK: _ZN4sycl3_V16detailngERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi16EEE.exit: // CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 32 [[AGG_RESULT]], ptr align 32 [[RES_I_I]], i64 32, i1 false) -// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META193]] +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META173]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; }