["SSSE3"]
-
- * [ ] [`_mm_abs_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi16)
- * [ ] [`_mm_abs_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi32)
- * [ ] [`_mm_abs_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi8)
- * [ ] [`_mm_alignr_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_pi8)
- * [ ] [`_mm_hadd_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pi16)
- * [ ] [`_mm_hadd_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pi32)
- * [ ] [`_mm_hadds_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_pi16)
- * [ ] [`_mm_hsub_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pi16)
- * [ ] [`_mm_hsub_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pi32)
- * [ ] [`_mm_hsubs_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_pi16)
- * [ ] [`_mm_maddubs_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_pi16)
- * [ ] [`_mm_mulhrs_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_pi16)
- * [ ] [`_mm_shuffle_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pi8)
- * [ ] [`_mm_sign_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi16)
- * [ ] [`_mm_sign_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi32)
- * [ ] [`_mm_sign_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi8)
+["SSE4.1"]
+
+ * [ ] [`_mm_stream_load_si128`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_load_si128)
diff --git a/crates/core_arch/src/x86/avx.rs b/crates/core_arch/src/x86/avx.rs
index 3a993bf785..db17be9f21 100644
--- a/crates/core_arch/src/x86/avx.rs
+++ b/crates/core_arch/src/x86/avx.rs
@@ -1715,11 +1715,11 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
/// See [`_mm_sfence`] for details.
#[inline]
#[target_feature(enable = "avx")]
-#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq
+#[cfg_attr(test, assert_instr(vmovntdq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
crate::arch::asm!(
- "vmovntps [{mem_addr}], {a}",
+ "vmovntdq [{mem_addr}], {a}",
mem_addr = in(reg) mem_addr,
a = in(ymm_reg) a,
options(nostack, preserves_flags),
@@ -1742,12 +1742,12 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
/// See [`_mm_sfence`] for details.
#[inline]
#[target_feature(enable = "avx")]
-#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
+#[cfg_attr(test, assert_instr(vmovntpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
crate::arch::asm!(
- "vmovntps [{mem_addr}], {a}",
+ "vmovntpd [{mem_addr}], {a}",
mem_addr = in(reg) mem_addr,
a = in(ymm_reg) a,
options(nostack, preserves_flags),
diff --git a/crates/core_arch/src/x86/avx2.rs b/crates/core_arch/src/x86/avx2.rs
index 5227b549dd..f3dd0c8e4a 100644
--- a/crates/core_arch/src/x86/avx2.rs
+++ b/crates/core_arch/src/x86/avx2.rs
@@ -3124,8 +3124,6 @@ pub unsafe fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
transmute(psrlvq256(a.as_i64x4(), count.as_i64x4()))
}
-// TODO _mm256_stream_load_si256 (__m256i const* mem_addr)
-
/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi16)
diff --git a/crates/core_arch/src/x86/avx512bw.rs b/crates/core_arch/src/x86/avx512bw.rs
index 936d4a0041..2e3d2cc913 100644
--- a/crates/core_arch/src/x86/avx512bw.rs
+++ b/crates/core_arch/src/x86/avx512bw.rs
@@ -4583,7 +4583,7 @@ pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *con
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -4603,7 +4603,7 @@ pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -4623,7 +4623,7 @@ pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *cons
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -4643,7 +4643,7 @@ pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m5
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -4663,7 +4663,7 @@ pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *con
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -4683,7 +4683,7 @@ pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -4703,7 +4703,7 @@ pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *cons
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -4723,7 +4723,7 @@ pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m2
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -4743,7 +4743,7 @@ pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -4763,7 +4763,7 @@ pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -4783,7 +4783,7 @@ pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -4803,7 +4803,7 @@ pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -4821,7 +4821,7 @@ pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: _
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(zmm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -4838,7 +4838,7 @@ pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(zmm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -4855,7 +4855,7 @@ pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: _
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(ymm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -4872,7 +4872,7 @@ pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(ymm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -4889,7 +4889,7 @@ pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m12
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(xmm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -4906,7 +4906,7 @@ pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(xmm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -8761,7 +8761,7 @@ pub unsafe fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) ->
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(mov))] //should be kmovq
-pub unsafe fn _store_mask64(mem_addr: *mut u64, a: __mmask64) {
+pub unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
ptr::write(mem_addr as *mut __mmask64, a);
}
@@ -8772,7 +8772,7 @@ pub unsafe fn _store_mask64(mem_addr: *mut u64, a: __mmask64) {
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(mov))] //should be kmovd
-pub unsafe fn _store_mask32(mem_addr: *mut u32, a: __mmask32) {
+pub unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
ptr::write(mem_addr as *mut __mmask32, a);
}
@@ -8783,7 +8783,7 @@ pub unsafe fn _store_mask32(mem_addr: *mut u32, a: __mmask32) {
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(mov))] //should be kmovq
-pub unsafe fn _load_mask64(mem_addr: *const u64) -> __mmask64 {
+pub unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
ptr::read(mem_addr as *const __mmask64)
}
@@ -8794,7 +8794,7 @@ pub unsafe fn _load_mask64(mem_addr: *const u64) -> __mmask64 {
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(mov))] //should be kmovd
-pub unsafe fn _load_mask32(mem_addr: *const u32) -> __mmask32 {
+pub unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
ptr::read(mem_addr as *const __mmask32)
}
diff --git a/crates/core_arch/src/x86/avx512f.rs b/crates/core_arch/src/x86/avx512f.rs
index 99d63734fd..20ca9d2a6d 100644
--- a/crates/core_arch/src/x86/avx512f.rs
+++ b/crates/core_arch/src/x86/avx512f.rs
@@ -50,7 +50,7 @@ pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
let a = a.as_i32x16();
// all-0 is a properly initialized i32x16
let zero: i32x16 = mem::zeroed();
- let sub = simd_sub(zero, a);
+ let sub = simd_neg(a);
let cmp: i32x16 = simd_gt(a, zero);
transmute(simd_select(cmp, a, sub))
}
@@ -145,7 +145,7 @@ pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i {
let a = a.as_i64x8();
// all-0 is a properly initialized i64x8
let zero: i64x8 = mem::zeroed();
- let sub = simd_sub(zero, a);
+ let sub = simd_neg(a);
let cmp: i64x8 = simd_gt(a, zero);
transmute(simd_select(cmp, a, sub))
}
@@ -186,7 +186,7 @@ pub unsafe fn _mm256_abs_epi64(a: __m256i) -> __m256i {
let a = a.as_i64x4();
// all-0 is a properly initialized i64x4
let zero: i64x4 = mem::zeroed();
- let sub = simd_sub(zero, a);
+ let sub = simd_neg(a);
let cmp: i64x4 = simd_gt(a, zero);
transmute(simd_select(cmp, a, sub))
}
@@ -3414,11 +3414,13 @@ pub unsafe fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
pub unsafe fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
- transmute(vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub))
+ transmute(vfmadd132ps(
+ a.as_f32x16(),
+ b.as_f32x16(),
+ simd_neg(c.as_f32x16()),
+ ))
}
/// Multiply packed single-precision (32-bit) floating-point elements in a and b, subtract packed elements in c from the intermediate result, and store the results in dst using writemask k (elements are copied from a when the corresponding mask bit is not set).
@@ -3427,7 +3429,7 @@ pub unsafe fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
pub unsafe fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmsub, a.as_f32x16()))
@@ -3439,7 +3441,7 @@ pub unsafe fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
pub unsafe fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
@@ -3452,7 +3454,7 @@ pub unsafe fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m51
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generate vfmadd, gcc generate vfmsub
pub unsafe fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmsub, c.as_f32x16()))
@@ -3538,10 +3540,9 @@ pub unsafe fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
pub unsafe fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
+ let sub = simd_neg(c.as_f64x8());
transmute(vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub))
}
@@ -3551,7 +3552,7 @@ pub unsafe fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
pub unsafe fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmsub, a.as_f64x8()))
@@ -3563,7 +3564,7 @@ pub unsafe fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m51
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
pub unsafe fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
@@ -3576,7 +3577,7 @@ pub unsafe fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m5
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
+#[cfg_attr(test, assert_instr(vfmsub))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang fmadd, gcc fmsub
pub unsafe fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmsub, c.as_f64x8()))
@@ -3916,10 +3917,9 @@ pub unsafe fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mma
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
pub unsafe fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
+ let sub = simd_neg(c.as_f32x16());
transmute(vfmaddsub213ps(
a.as_f32x16(),
b.as_f32x16(),
@@ -3934,7 +3934,7 @@ pub unsafe fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
pub unsafe fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmsubadd, a.as_f32x16()))
@@ -3946,7 +3946,7 @@ pub unsafe fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
pub unsafe fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
@@ -3959,7 +3959,7 @@ pub unsafe fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
pub unsafe fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmsubadd, c.as_f32x16()))
@@ -4045,10 +4045,9 @@ pub unsafe fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
pub unsafe fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
+ let sub = simd_neg(c.as_f64x8());
transmute(vfmaddsub213pd(
a.as_f64x8(),
b.as_f64x8(),
@@ -4063,7 +4062,7 @@ pub unsafe fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
pub unsafe fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmsubadd, a.as_f64x8()))
@@ -4075,7 +4074,7 @@ pub unsafe fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
pub unsafe fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
@@ -4088,7 +4087,7 @@ pub unsafe fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: _
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
pub unsafe fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmsubadd, c.as_f64x8()))
@@ -4174,10 +4173,9 @@ pub unsafe fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mma
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
pub unsafe fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f32x16());
+ let sub = simd_neg(a.as_f32x16());
transmute(vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16()))
}
@@ -4187,7 +4185,7 @@ pub unsafe fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
pub unsafe fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fnmadd, a.as_f32x16()))
@@ -4199,7 +4197,7 @@ pub unsafe fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m51
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
pub unsafe fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
@@ -4212,7 +4210,7 @@ pub unsafe fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m5
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
pub unsafe fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fnmadd, c.as_f32x16()))
@@ -4298,10 +4296,9 @@ pub unsafe fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8)
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
pub unsafe fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f64x8());
+ let sub = simd_neg(a.as_f64x8());
transmute(vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8()))
}
@@ -4311,7 +4308,7 @@ pub unsafe fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
pub unsafe fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fnmadd, a.as_f64x8()))
@@ -4323,7 +4320,7 @@ pub unsafe fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m5
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
pub unsafe fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
@@ -4336,7 +4333,7 @@ pub unsafe fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
pub unsafe fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fnmadd, c.as_f64x8()))
@@ -4422,11 +4419,10 @@ pub unsafe fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
pub unsafe fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
- let zero: f32x16 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f32x16());
- let subc = simd_sub(zero, c.as_f32x16());
+ let suba = simd_neg(a.as_f32x16());
+ let subc = simd_neg(c.as_f32x16());
transmute(vfmadd132ps(suba, b.as_f32x16(), subc))
}
@@ -4436,7 +4432,7 @@ pub unsafe fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
pub unsafe fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fnmsub, a.as_f32x16()))
@@ -4448,7 +4444,7 @@ pub unsafe fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m51
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
pub unsafe fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
@@ -4461,7 +4457,7 @@ pub unsafe fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m5
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
pub unsafe fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fnmsub, c.as_f32x16()))
@@ -4547,11 +4543,10 @@ pub unsafe fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8)
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
pub unsafe fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
- let zero: f64x8 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f64x8());
- let subc = simd_sub(zero, c.as_f64x8());
+ let suba = simd_neg(a.as_f64x8());
+ let subc = simd_neg(c.as_f64x8());
transmute(vfmadd132pd(suba, b.as_f64x8(), subc))
}
@@ -4561,7 +4556,7 @@ pub unsafe fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
pub unsafe fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fnmsub, a.as_f64x8()))
@@ -4573,7 +4568,7 @@ pub unsafe fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m5
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
pub unsafe fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
@@ -4586,7 +4581,7 @@ pub unsafe fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
pub unsafe fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fnmsub, c.as_f64x8()))
@@ -8377,7 +8372,7 @@ pub unsafe fn _mm512_mask3_fmadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fmsub_round_ps(
a: __m512,
@@ -8385,8 +8380,7 @@ pub unsafe fn _mm512_fmsub_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
+ let sub = simd_neg(c.as_f32x16());
let a = a.as_f32x16();
let b = b.as_f32x16();
let r = vfmadd132psround(a, b, sub, ROUNDING);
@@ -8406,7 +8400,7 @@ pub unsafe fn _mm512_fmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fmsub_round_ps(
a: __m512,
@@ -8415,8 +8409,7 @@ pub unsafe fn _mm512_mask_fmsub_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
+ let sub = simd_neg(c.as_f32x16());
let a = a.as_f32x16();
let b = b.as_f32x16();
let r = vfmadd132psround(a, b, sub, ROUNDING);
@@ -8436,7 +8429,7 @@ pub unsafe fn _mm512_mask_fmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fmsub_round_ps(
k: __mmask16,
@@ -8446,7 +8439,7 @@ pub unsafe fn _mm512_maskz_fmsub_round_ps(
) -> __m512 {
static_assert_rounding!(ROUNDING);
let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
+ let sub = simd_neg(c.as_f32x16());
let a = a.as_f32x16();
let b = b.as_f32x16();
let r = vfmadd132psround(a, b, sub, ROUNDING);
@@ -8466,7 +8459,7 @@ pub unsafe fn _mm512_maskz_fmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132ps or vfmsub213ps or vfmsub231ps, clang generates vfmadd, gcc generates vfmsub
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fmsub_round_ps(
a: __m512,
@@ -8475,9 +8468,8 @@ pub unsafe fn _mm512_mask3_fmsub_round_ps(
k: __mmask16,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
let c = c.as_f32x16();
- let sub = simd_sub(zero, c);
+ let sub = simd_neg(c);
let a = a.as_f32x16();
let b = b.as_f32x16();
let r = vfmadd132psround(a, b, sub, ROUNDING);
@@ -8497,7 +8489,7 @@ pub unsafe fn _mm512_mask3_fmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fmsub_round_pd(
a: __m512d,
@@ -8505,8 +8497,7 @@ pub unsafe fn _mm512_fmsub_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
+ let sub = simd_neg(c.as_f64x8());
let a = a.as_f64x8();
let b = b.as_f64x8();
let r = vfmadd132pdround(a, b, sub, ROUNDING);
@@ -8526,7 +8517,7 @@ pub unsafe fn _mm512_fmsub_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fmsub_round_pd(
a: __m512d,
@@ -8535,8 +8526,7 @@ pub unsafe fn _mm512_mask_fmsub_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
+ let sub = simd_neg(c.as_f64x8());
let a = a.as_f64x8();
let b = b.as_f64x8();
let r = vfmadd132pdround(a, b, sub, ROUNDING);
@@ -8556,7 +8546,7 @@ pub unsafe fn _mm512_mask_fmsub_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fmsub_round_pd(
k: __mmask8,
@@ -8566,7 +8556,7 @@ pub unsafe fn _mm512_maskz_fmsub_round_pd(
) -> __m512d {
static_assert_rounding!(ROUNDING);
let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
+ let sub = simd_neg(c.as_f64x8());
let a = a.as_f64x8();
let b = b.as_f64x8();
let r = vfmadd132pdround(a, b, sub, ROUNDING);
@@ -8586,7 +8576,7 @@ pub unsafe fn _mm512_maskz_fmsub_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
+#[cfg_attr(test, assert_instr(vfmsub, ROUNDING = 8))] //vfmsub132pd or vfmsub213pd or vfmsub231pd. clang generates fmadd, gcc generates fmsub
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fmsub_round_pd(
a: __m512d,
@@ -8595,9 +8585,8 @@ pub unsafe fn _mm512_mask3_fmsub_round_pd(
k: __mmask8,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
let c = c.as_f64x8();
- let sub = simd_sub(zero, c);
+ let sub = simd_neg(c);
let a = a.as_f64x8();
let b = b.as_f64x8();
let r = vfmadd132pdround(a, b, sub, ROUNDING);
@@ -8849,7 +8838,7 @@ pub unsafe fn _mm512_mask3_fmaddsub_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fmsubadd_round_ps(
a: __m512,
@@ -8857,8 +8846,7 @@ pub unsafe fn _mm512_fmsubadd_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
+ let sub = simd_neg(c.as_f32x16());
let a = a.as_f32x16();
let b = b.as_f32x16();
let r = vfmaddsub213ps(a, b, sub, ROUNDING);
@@ -8878,7 +8866,7 @@ pub unsafe fn _mm512_fmsubadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fmsubadd_round_ps(
a: __m512,
@@ -8887,8 +8875,7 @@ pub unsafe fn _mm512_mask_fmsubadd_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
+ let sub = simd_neg(c.as_f32x16());
let a = a.as_f32x16();
let b = b.as_f32x16();
let r = vfmaddsub213ps(a, b, sub, ROUNDING);
@@ -8908,7 +8895,7 @@ pub unsafe fn _mm512_mask_fmsubadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fmsubadd_round_ps(
k: __mmask16,
@@ -8918,7 +8905,7 @@ pub unsafe fn _mm512_maskz_fmsubadd_round_ps(
) -> __m512 {
static_assert_rounding!(ROUNDING);
let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f32x16());
+ let sub = simd_neg(c.as_f32x16());
let a = a.as_f32x16();
let b = b.as_f32x16();
let r = vfmaddsub213ps(a, b, sub, ROUNDING);
@@ -8938,7 +8925,7 @@ pub unsafe fn _mm512_maskz_fmsubadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132ps or vfmsubadd213ps or vfmsubadd231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fmsubadd_round_ps(
a: __m512,
@@ -8947,9 +8934,8 @@ pub unsafe fn _mm512_mask3_fmsubadd_round_ps(
k: __mmask16,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
let c = c.as_f32x16();
- let sub = simd_sub(zero, c);
+ let sub = simd_neg(c);
let a = a.as_f32x16();
let b = b.as_f32x16();
let r = vfmaddsub213ps(a, b, sub, ROUNDING);
@@ -8969,7 +8955,7 @@ pub unsafe fn _mm512_mask3_fmsubadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fmsubadd_round_pd(
a: __m512d,
@@ -8977,8 +8963,7 @@ pub unsafe fn _mm512_fmsubadd_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
+ let sub = simd_neg(c.as_f64x8());
let a = a.as_f64x8();
let b = b.as_f64x8();
let r = vfmaddsub213pd(a, b, sub, ROUNDING);
@@ -8998,7 +8983,7 @@ pub unsafe fn _mm512_fmsubadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fmsubadd_round_pd(
a: __m512d,
@@ -9007,8 +8992,7 @@ pub unsafe fn _mm512_mask_fmsubadd_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
+ let sub = simd_neg(c.as_f64x8());
let a = a.as_f64x8();
let b = b.as_f64x8();
let r = vfmaddsub213pd(a, b, sub, ROUNDING);
@@ -9028,7 +9012,7 @@ pub unsafe fn _mm512_mask_fmsubadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fmsubadd_round_pd(
k: __mmask8,
@@ -9038,7 +9022,7 @@ pub unsafe fn _mm512_maskz_fmsubadd_round_pd(
) -> __m512d {
static_assert_rounding!(ROUNDING);
let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, c.as_f64x8());
+ let sub = simd_neg(c.as_f64x8());
let a = a.as_f64x8();
let b = b.as_f64x8();
let r = vfmaddsub213pd(a, b, sub, ROUNDING);
@@ -9058,7 +9042,7 @@ pub unsafe fn _mm512_maskz_fmsubadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmaddsub, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
+#[cfg_attr(test, assert_instr(vfmsubadd, ROUNDING = 8))] //vfmsubadd132pd or vfmsubadd213pd or vfmsubadd231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fmsubadd_round_pd(
a: __m512d,
@@ -9067,9 +9051,8 @@ pub unsafe fn _mm512_mask3_fmsubadd_round_pd(
k: __mmask8,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
let c = c.as_f64x8();
- let sub = simd_sub(zero, c);
+ let sub = simd_neg(c);
let a = a.as_f64x8();
let b = b.as_f64x8();
let r = vfmaddsub213pd(a, b, sub, ROUNDING);
@@ -9089,7 +9072,7 @@ pub unsafe fn _mm512_mask3_fmsubadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fnmadd_round_ps(
a: __m512,
@@ -9097,8 +9080,7 @@ pub unsafe fn _mm512_fnmadd_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f32x16());
+ let sub = simd_neg(a.as_f32x16());
let b = b.as_f32x16();
let c = c.as_f32x16();
let r = vfmadd132psround(sub, b, c, ROUNDING);
@@ -9118,7 +9100,7 @@ pub unsafe fn _mm512_fnmadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fnmadd_round_ps(
a: __m512,
@@ -9127,8 +9109,7 @@ pub unsafe fn _mm512_mask_fnmadd_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f32x16());
+ let sub = simd_neg(a.as_f32x16());
let b = b.as_f32x16();
let c = c.as_f32x16();
let r = vfmadd132psround(sub, b, c, ROUNDING);
@@ -9148,7 +9129,7 @@ pub unsafe fn _mm512_mask_fnmadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fnmadd_round_ps(
k: __mmask16,
@@ -9158,7 +9139,7 @@ pub unsafe fn _mm512_maskz_fnmadd_round_ps(
) -> __m512 {
static_assert_rounding!(ROUNDING);
let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f32x16());
+ let sub = simd_neg(a.as_f32x16());
let b = b.as_f32x16();
let c = c.as_f32x16();
let r = vfmadd132psround(sub, b, c, ROUNDING);
@@ -9178,7 +9159,7 @@ pub unsafe fn _mm512_maskz_fnmadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132ps or vfnmadd213ps or vfnmadd231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fnmadd_round_ps(
a: __m512,
@@ -9187,8 +9168,7 @@ pub unsafe fn _mm512_mask3_fnmadd_round_ps(
k: __mmask16,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f32x16());
+ let sub = simd_neg(a.as_f32x16());
let b = b.as_f32x16();
let c = c.as_f32x16();
let r = vfmadd132psround(sub, b, c, ROUNDING);
@@ -9208,7 +9188,7 @@ pub unsafe fn _mm512_mask3_fnmadd_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fnmadd_round_pd(
a: __m512d,
@@ -9216,8 +9196,7 @@ pub unsafe fn _mm512_fnmadd_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f64x8());
+ let sub = simd_neg(a.as_f64x8());
let b = b.as_f64x8();
let c = c.as_f64x8();
let r = vfmadd132pdround(sub, b, c, ROUNDING);
@@ -9237,7 +9216,7 @@ pub unsafe fn _mm512_fnmadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fnmadd_round_pd(
a: __m512d,
@@ -9246,9 +9225,8 @@ pub unsafe fn _mm512_mask_fnmadd_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
let a = a.as_f64x8();
- let sub = simd_sub(zero, a);
+ let sub = simd_neg(a);
let b = b.as_f64x8();
let c = c.as_f64x8();
let r = vfmadd132pdround(sub, b, c, ROUNDING);
@@ -9268,7 +9246,7 @@ pub unsafe fn _mm512_mask_fnmadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fnmadd_round_pd(
k: __mmask8,
@@ -9278,7 +9256,7 @@ pub unsafe fn _mm512_maskz_fnmadd_round_pd(
) -> __m512d {
static_assert_rounding!(ROUNDING);
let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f64x8());
+ let sub = simd_neg(a.as_f64x8());
let b = b.as_f64x8();
let c = c.as_f64x8();
let r = vfmadd132pdround(sub, b, c, ROUNDING);
@@ -9298,7 +9276,7 @@ pub unsafe fn _mm512_maskz_fnmadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
+#[cfg_attr(test, assert_instr(vfnmadd, ROUNDING = 8))] //vfnmadd132pd or vfnmadd213pd or vfnmadd231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fnmadd_round_pd(
a: __m512d,
@@ -9307,8 +9285,7 @@ pub unsafe fn _mm512_mask3_fnmadd_round_pd(
k: __mmask8,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let sub = simd_sub(zero, a.as_f64x8());
+ let sub = simd_neg(a.as_f64x8());
let b = b.as_f64x8();
let c = c.as_f64x8();
let r = vfmadd132pdround(sub, b, c, ROUNDING);
@@ -9328,7 +9305,7 @@ pub unsafe fn _mm512_mask3_fnmadd_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fnmsub_round_ps(
a: __m512,
@@ -9336,9 +9313,8 @@ pub unsafe fn _mm512_fnmsub_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f32x16());
- let subc = simd_sub(zero, c.as_f32x16());
+ let suba = simd_neg(a.as_f32x16());
+ let subc = simd_neg(c.as_f32x16());
let b = b.as_f32x16();
let r = vfmadd132psround(suba, b, subc, ROUNDING);
transmute(r)
@@ -9357,7 +9333,7 @@ pub unsafe fn _mm512_fnmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fnmsub_round_ps(
a: __m512,
@@ -9366,10 +9342,9 @@ pub unsafe fn _mm512_mask_fnmsub_round_ps(
c: __m512,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
let a = a.as_f32x16();
- let suba = simd_sub(zero, a);
- let subc = simd_sub(zero, c.as_f32x16());
+ let suba = simd_neg(a);
+ let subc = simd_neg(c.as_f32x16());
let b = b.as_f32x16();
let r = vfmadd132psround(suba, b, subc, ROUNDING);
transmute(simd_select_bitmask(k, r, a))
@@ -9388,7 +9363,7 @@ pub unsafe fn _mm512_mask_fnmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fnmsub_round_ps(
k: __mmask16,
@@ -9398,8 +9373,8 @@ pub unsafe fn _mm512_maskz_fnmsub_round_ps(
) -> __m512 {
static_assert_rounding!(ROUNDING);
let zero: f32x16 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f32x16());
- let subc = simd_sub(zero, c.as_f32x16());
+ let suba = simd_neg(a.as_f32x16());
+ let subc = simd_neg(c.as_f32x16());
let b = b.as_f32x16();
let r = vfmadd132psround(suba, b, subc, ROUNDING);
transmute(simd_select_bitmask(k, r, zero))
@@ -9418,7 +9393,7 @@ pub unsafe fn _mm512_maskz_fnmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132ps or vfnmsub213ps or vfnmsub231ps
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fnmsub_round_ps(
a: __m512,
@@ -9427,10 +9402,9 @@ pub unsafe fn _mm512_mask3_fnmsub_round_ps(
k: __mmask16,
) -> __m512 {
static_assert_rounding!(ROUNDING);
- let zero: f32x16 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f32x16());
+ let suba = simd_neg(a.as_f32x16());
let c = c.as_f32x16();
- let subc = simd_sub(zero, c);
+ let subc = simd_neg(c);
let b = b.as_f32x16();
let r = vfmadd132psround(suba, b, subc, ROUNDING);
transmute(simd_select_bitmask(k, r, c))
@@ -9449,7 +9423,7 @@ pub unsafe fn _mm512_mask3_fnmsub_round_ps(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_fnmsub_round_pd(
a: __m512d,
@@ -9457,9 +9431,8 @@ pub unsafe fn _mm512_fnmsub_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f64x8());
- let subc = simd_sub(zero, c.as_f64x8());
+ let suba = simd_neg(a.as_f64x8());
+ let subc = simd_neg(c.as_f64x8());
let b = b.as_f64x8();
let r = vfmadd132pdround(suba, b, subc, ROUNDING);
transmute(r)
@@ -9478,7 +9451,7 @@ pub unsafe fn _mm512_fnmsub_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_fnmsub_round_pd(
a: __m512d,
@@ -9487,10 +9460,9 @@ pub unsafe fn _mm512_mask_fnmsub_round_pd(
c: __m512d,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
let a = a.as_f64x8();
- let suba = simd_sub(zero, a);
- let subc = simd_sub(zero, c.as_f64x8());
+ let suba = simd_neg(a);
+ let subc = simd_neg(c.as_f64x8());
let b = b.as_f64x8();
let r = vfmadd132pdround(suba, b, subc, ROUNDING);
transmute(simd_select_bitmask(k, r, a))
@@ -9509,7 +9481,7 @@ pub unsafe fn _mm512_mask_fnmsub_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_maskz_fnmsub_round_pd(
k: __mmask8,
@@ -9519,8 +9491,8 @@ pub unsafe fn _mm512_maskz_fnmsub_round_pd(
) -> __m512d {
static_assert_rounding!(ROUNDING);
let zero: f64x8 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f64x8());
- let subc = simd_sub(zero, c.as_f64x8());
+ let suba = simd_neg(a.as_f64x8());
+ let subc = simd_neg(c.as_f64x8());
let b = b.as_f64x8();
let r = vfmadd132pdround(suba, b, subc, ROUNDING);
transmute(simd_select_bitmask(k, r, zero))
@@ -9539,7 +9511,7 @@ pub unsafe fn _mm512_maskz_fnmsub_round_pd(
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vfmadd, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
+#[cfg_attr(test, assert_instr(vfnmsub, ROUNDING = 8))] //vfnmsub132pd or vfnmsub213pd or vfnmsub231pd
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask3_fnmsub_round_pd(
a: __m512d,
@@ -9548,10 +9520,9 @@ pub unsafe fn _mm512_mask3_fnmsub_round_pd(
k: __mmask8,
) -> __m512d {
static_assert_rounding!(ROUNDING);
- let zero: f64x8 = mem::zeroed();
- let suba = simd_sub(zero, a.as_f64x8());
+ let suba = simd_neg(a.as_f64x8());
let c = c.as_f64x8();
- let subc = simd_sub(zero, c);
+ let subc = simd_neg(c);
let b = b.as_f64x8();
let r = vfmadd132pdround(suba, b, subc, ROUNDING);
transmute(simd_select_bitmask(k, r, c))
@@ -28037,11 +28008,11 @@ pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vmovntps))] //should be vmovntpd
+#[cfg_attr(test, assert_instr(vmovntpd))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
crate::arch::asm!(
- "vmovntps [{mem_addr}], {a}",
+ "vmovntpd [{mem_addr}], {a}",
mem_addr = in(reg) mem_addr,
a = in(zmm_reg) a,
options(nostack, preserves_flags),
@@ -28063,11 +28034,11 @@ pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
#[inline]
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
-#[cfg_attr(test, assert_instr(vmovntps))] //should be vmovntdq
+#[cfg_attr(test, assert_instr(vmovntdq))]
#[allow(clippy::cast_ptr_alignment)]
-pub unsafe fn _mm512_stream_si512(mem_addr: *mut i64, a: __m512i) {
+pub unsafe fn _mm512_stream_si512(mem_addr: *mut i32, a: __m512i) {
crate::arch::asm!(
- "vmovntps [{mem_addr}], {a}",
+ "vmovntdq [{mem_addr}], {a}",
mem_addr = in(reg) mem_addr,
a = in(zmm_reg) a,
options(nostack, preserves_flags),
@@ -31306,7 +31277,14 @@ pub unsafe fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_reduce_add_ps(a: __m512) -> f32 {
- simd_reduce_add_unordered(a.as_f32x16())
+ // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
+ let a = _mm256_add_ps(
+ simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
+ simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
+ );
+ let a = _mm_add_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
+ let a = _mm_add_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
+ simd_extract::<_, f32>(a, 0) + simd_extract::<_, f32>(a, 1)
}
/// Reduce the packed single-precision (32-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
@@ -31316,11 +31294,7 @@ pub unsafe fn _mm512_reduce_add_ps(a: __m512) -> f32 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
- simd_reduce_add_unordered(simd_select_bitmask(
- k,
- a.as_f32x16(),
- _mm512_setzero_ps().as_f32x16(),
- ))
+ _mm512_reduce_add_ps(simd_select_bitmask(k, a, _mm512_setzero_ps()))
}
/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition. Returns the sum of all elements in a.
@@ -31330,7 +31304,12 @@ pub unsafe fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
- simd_reduce_add_unordered(a.as_f64x8())
+ let a = _mm256_add_pd(
+ _mm512_extractf64x4_pd::<0>(a),
+ _mm512_extractf64x4_pd::<1>(a),
+ );
+ let a = _mm_add_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
+ simd_extract::<_, f64>(a, 0) + simd_extract::<_, f64>(a, 1)
}
/// Reduce the packed double-precision (64-bit) floating-point elements in a by addition using mask k. Returns the sum of all active elements in a.
@@ -31340,11 +31319,7 @@ pub unsafe fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
- simd_reduce_add_unordered(simd_select_bitmask(
- k,
- a.as_f64x8(),
- _mm512_setzero_pd().as_f64x8(),
- ))
+ _mm512_reduce_add_pd(simd_select_bitmask(k, a, _mm512_setzero_pd()))
}
/// Reduce the packed 32-bit integers in a by multiplication. Returns the product of all elements in a.
@@ -31402,7 +31377,14 @@ pub unsafe fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
- simd_reduce_mul_unordered(a.as_f32x16())
+ // we have to use `simd_shuffle` here because `_mm512_extractf32x8_ps` is in AVX512DQ
+ let a = _mm256_mul_ps(
+ simd_shuffle!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]),
+ simd_shuffle!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]),
+ );
+ let a = _mm_mul_ps(_mm256_extractf128_ps::<0>(a), _mm256_extractf128_ps::<1>(a));
+ let a = _mm_mul_ps(a, simd_shuffle!(a, a, [2, 3, 0, 1]));
+ simd_extract::<_, f32>(a, 0) * simd_extract::<_, f32>(a, 1)
}
/// Reduce the packed single-precision (32-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -31412,11 +31394,7 @@ pub unsafe fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
- simd_reduce_mul_unordered(simd_select_bitmask(
- k,
- a.as_f32x16(),
- _mm512_set1_ps(1.).as_f32x16(),
- ))
+ _mm512_reduce_mul_ps(simd_select_bitmask(k, a, _mm512_set1_ps(1.)))
}
/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication. Returns the product of all elements in a.
@@ -31426,7 +31404,12 @@ pub unsafe fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
- simd_reduce_mul_unordered(a.as_f64x8())
+ let a = _mm256_mul_pd(
+ _mm512_extractf64x4_pd::<0>(a),
+ _mm512_extractf64x4_pd::<1>(a),
+ );
+ let a = _mm_mul_pd(_mm256_extractf128_pd::<0>(a), _mm256_extractf128_pd::<1>(a));
+ simd_extract::<_, f64>(a, 0) * simd_extract::<_, f64>(a, 1)
}
/// Reduce the packed double-precision (64-bit) floating-point elements in a by multiplication using mask k. Returns the product of all active elements in a.
@@ -31436,11 +31419,7 @@ pub unsafe fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
#[target_feature(enable = "avx512f")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
- simd_reduce_mul_unordered(simd_select_bitmask(
- k,
- a.as_f64x8(),
- _mm512_set1_pd(1.).as_f64x8(),
- ))
+ _mm512_reduce_mul_pd(simd_select_bitmask(k, a, _mm512_set1_pd(1.)))
}
/// Reduce the packed signed 32-bit integers in a by maximum. Returns the maximum of all elements in a.
@@ -32794,7 +32773,7 @@ pub unsafe fn _mm512_mask_loadu_epi32(src: __m512i, k: __mmask16, mem_addr: *con
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -32814,7 +32793,7 @@ pub unsafe fn _mm512_maskz_loadu_epi32(k: __mmask16, mem_addr: *const i32) -> __
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -32834,7 +32813,7 @@ pub unsafe fn _mm512_mask_loadu_epi64(src: __m512i, k: __mmask8, mem_addr: *cons
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -32854,7 +32833,7 @@ pub unsafe fn _mm512_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -32874,7 +32853,7 @@ pub unsafe fn _mm512_mask_loadu_ps(src: __m512, k: __mmask16, mem_addr: *const f
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -32894,7 +32873,7 @@ pub unsafe fn _mm512_maskz_loadu_ps(k: __mmask16, mem_addr: *const f32) -> __m51
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -32914,7 +32893,7 @@ pub unsafe fn _mm512_mask_loadu_pd(src: __m512d, k: __mmask8, mem_addr: *const f
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -32934,7 +32913,7 @@ pub unsafe fn _mm512_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m512
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -32954,7 +32933,7 @@ pub unsafe fn _mm256_mask_loadu_epi32(src: __m256i, k: __mmask8, mem_addr: *cons
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -32974,7 +32953,7 @@ pub unsafe fn _mm256_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -32994,7 +32973,7 @@ pub unsafe fn _mm256_mask_loadu_epi64(src: __m256i, k: __mmask8, mem_addr: *cons
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -33014,7 +32993,7 @@ pub unsafe fn _mm256_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -33034,7 +33013,7 @@ pub unsafe fn _mm256_mask_loadu_ps(src: __m256, k: __mmask8, mem_addr: *const f3
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -33054,7 +33033,7 @@ pub unsafe fn _mm256_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m256
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -33074,7 +33053,7 @@ pub unsafe fn _mm256_mask_loadu_pd(src: __m256d, k: __mmask8, mem_addr: *const f
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -33094,7 +33073,7 @@ pub unsafe fn _mm256_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m256
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -33114,7 +33093,7 @@ pub unsafe fn _mm_mask_loadu_epi32(src: __m128i, k: __mmask8, mem_addr: *const i
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -33134,7 +33113,7 @@ pub unsafe fn _mm_maskz_loadu_epi32(k: __mmask8, mem_addr: *const i32) -> __m128
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -33154,7 +33133,7 @@ pub unsafe fn _mm_mask_loadu_epi64(src: __m128i, k: __mmask8, mem_addr: *const i
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -33174,7 +33153,7 @@ pub unsafe fn _mm_maskz_loadu_epi64(k: __mmask8, mem_addr: *const i64) -> __m128
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -33194,7 +33173,7 @@ pub unsafe fn _mm_mask_loadu_ps(src: __m128, k: __mmask8, mem_addr: *const f32)
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -33214,7 +33193,7 @@ pub unsafe fn _mm_maskz_loadu_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -33234,7 +33213,7 @@ pub unsafe fn _mm_mask_loadu_pd(src: __m128d, k: __mmask8, mem_addr: *const f64)
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -33254,7 +33233,7 @@ pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -33266,17 +33245,13 @@ pub unsafe fn _mm_maskz_loadu_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi32)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovdqu32))] // FIXME: should be vmovdqa32
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *const i32) -> __m512i {
- let mut dst: __m512i = src;
- asm!(
- vpl!("vmovdqa32 {dst}{{{k}}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ let ones = _mm512_set1_epi32(-1).as_i32x16();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(k, ones, zero);
+ transmute(simd_masked_load(mask, mem_addr, src.as_i32x16()))
}
/// Load packed 32-bit integers from memory into dst using zeromask k
@@ -33286,17 +33261,10 @@ pub unsafe fn _mm512_mask_load_epi32(src: __m512i, k: __mmask16, mem_addr: *cons
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi32)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovdqu32))] // FIXME: should be vmovdqa32
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m512i {
- let mut dst: __m512i;
- asm!(
- vpl!("vmovdqa32 {dst}{{{k}}} {{z}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ _mm512_mask_load_epi32(_mm512_setzero_epi32(), k, mem_addr)
}
/// Load packed 64-bit integers from memory into dst using writemask k
@@ -33306,17 +33274,13 @@ pub unsafe fn _mm512_maskz_load_epi32(k: __mmask16, mem_addr: *const i32) -> __m
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovdqu64))] // FIXME: should be vmovdqa64
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const i64) -> __m512i {
- let mut dst: __m512i = src;
- asm!(
- vpl!("vmovdqa64 {dst}{{{k}}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ let ones = _mm512_set1_epi64(-1).as_i64x8();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(k, ones, zero);
+ transmute(simd_masked_load(mask, mem_addr, src.as_i64x8()))
}
/// Load packed 64-bit integers from memory into dst using zeromask k
@@ -33326,17 +33290,10 @@ pub unsafe fn _mm512_mask_load_epi64(src: __m512i, k: __mmask8, mem_addr: *const
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovdqu64))] // FIXME: should be vmovdqa64
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m512i {
- let mut dst: __m512i;
- asm!(
- vpl!("vmovdqa64 {dst}{{{k}}} {{z}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ _mm512_mask_load_epi64(_mm512_setzero_epi32(), k, mem_addr)
}
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
@@ -33346,17 +33303,13 @@ pub unsafe fn _mm512_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m5
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_ps)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovups))] // FIXME: should be vmovaps
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f32) -> __m512 {
- let mut dst: __m512 = src;
- asm!(
- vpl!("vmovaps {dst}{{{k}}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ let ones = _mm512_set1_epi32(-1).as_i32x16();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(k, ones, zero);
+ simd_masked_load(mask, mem_addr, src)
}
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@@ -33366,17 +33319,10 @@ pub unsafe fn _mm512_mask_load_ps(src: __m512, k: __mmask16, mem_addr: *const f3
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_ps)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovups))] // FIXME: should be vmovaps
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512 {
- let mut dst: __m512;
- asm!(
- vpl!("vmovaps {dst}{{{k}}} {{z}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ _mm512_mask_load_ps(_mm512_setzero_ps(), k, mem_addr)
}
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
@@ -33386,17 +33332,13 @@ pub unsafe fn _mm512_maskz_load_ps(k: __mmask16, mem_addr: *const f32) -> __m512
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_load_pd)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovupd))] // FIXME: should be vmovapd
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f64) -> __m512d {
- let mut dst: __m512d = src;
- asm!(
- vpl!("vmovapd {dst}{{{k}}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ let ones = _mm512_set1_epi64(-1).as_i64x8();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(k, ones, zero);
+ simd_masked_load(mask, mem_addr, src)
}
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@@ -33406,17 +33348,10 @@ pub unsafe fn _mm512_mask_load_pd(src: __m512d, k: __mmask8, mem_addr: *const f6
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_maskz_load_pd)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovupd))] // FIXME: should be vmovapd
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d {
- let mut dst: __m512d;
- asm!(
- vpl!("vmovapd {dst}{{{k}}} {{z}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ _mm512_mask_load_pd(_mm512_setzero_pd(), k, mem_addr)
}
/// Load packed 32-bit integers from memory into dst using writemask k
@@ -33425,18 +33360,14 @@ pub unsafe fn _mm512_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m512d
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi32)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovdqu32))] // FIXME: should be vmovdqa32
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const i32) -> __m256i {
- let mut dst: __m256i = src;
- asm!(
- vpl!("vmovdqa32 {dst}{{{k}}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ let ones = _mm256_set1_epi32(-1).as_i32x8();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(k, ones, zero);
+ transmute(simd_masked_load(mask, mem_addr, src.as_i32x8()))
}
/// Load packed 32-bit integers from memory into dst using zeromask k
@@ -33445,18 +33376,11 @@ pub unsafe fn _mm256_mask_load_epi32(src: __m256i, k: __mmask8, mem_addr: *const
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi32)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovdqu32))] // FIXME: should be vmovdqa32
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m256i {
- let mut dst: __m256i;
- asm!(
- vpl!("vmovdqa32 {dst}{{{k}}} {{z}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ _mm256_mask_load_epi32(_mm256_setzero_si256(), k, mem_addr)
}
/// Load packed 64-bit integers from memory into dst using writemask k
@@ -33465,18 +33389,14 @@ pub unsafe fn _mm256_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m2
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_epi64)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovdqu64))] // FIXME: should be vmovdqa64
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const i64) -> __m256i {
- let mut dst: __m256i = src;
- asm!(
- vpl!("vmovdqa64 {dst}{{{k}}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ let ones = _mm256_set1_epi64x(-1).as_i64x4();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(k, ones, zero);
+ transmute(simd_masked_load(mask, mem_addr, src.as_i64x4()))
}
/// Load packed 64-bit integers from memory into dst using zeromask k
@@ -33485,18 +33405,11 @@ pub unsafe fn _mm256_mask_load_epi64(src: __m256i, k: __mmask8, mem_addr: *const
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_epi64)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovdqu64))] // FIXME: should be vmovdqa64
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m256i {
- let mut dst: __m256i;
- asm!(
- vpl!("vmovdqa64 {dst}{{{k}}} {{z}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ _mm256_mask_load_epi64(_mm256_setzero_si256(), k, mem_addr)
}
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
@@ -33505,18 +33418,14 @@ pub unsafe fn _mm256_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m2
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_ps)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovups))] // FIXME: should be vmovaps
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32) -> __m256 {
- let mut dst: __m256 = src;
- asm!(
- vpl!("vmovaps {dst}{{{k}}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ let ones = _mm256_set1_epi32(-1).as_i32x8();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(k, ones, zero);
+ simd_masked_load(mask, mem_addr, src)
}
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@@ -33525,18 +33434,11 @@ pub unsafe fn _mm256_mask_load_ps(src: __m256, k: __mmask8, mem_addr: *const f32
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_ps)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovups))] // FIXME: should be vmovaps
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256 {
- let mut dst: __m256;
- asm!(
- vpl!("vmovaps {dst}{{{k}}} {{z}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ _mm256_mask_load_ps(_mm256_setzero_ps(), k, mem_addr)
}
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
@@ -33545,18 +33447,14 @@ pub unsafe fn _mm256_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m256
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_load_pd)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovupd))] // FIXME: should be vmovapd
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f64) -> __m256d {
- let mut dst: __m256d = src;
- asm!(
- vpl!("vmovapd {dst}{{{k}}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ let ones = _mm256_set1_epi64x(-1).as_i64x4();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(k, ones, zero);
+ simd_masked_load(mask, mem_addr, src)
}
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@@ -33565,18 +33463,11 @@ pub unsafe fn _mm256_mask_load_pd(src: __m256d, k: __mmask8, mem_addr: *const f6
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_maskz_load_pd)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovupd))] // FIXME: should be vmovapd
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d {
- let mut dst: __m256d;
- asm!(
- vpl!("vmovapd {dst}{{{k}}} {{z}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ _mm256_mask_load_pd(_mm256_setzero_pd(), k, mem_addr)
}
/// Load packed 32-bit integers from memory into dst using writemask k
@@ -33585,18 +33476,14 @@ pub unsafe fn _mm256_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m256d
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi32)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovdqu32))] // FIXME: should be vmovdqa32
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i32) -> __m128i {
- let mut dst: __m128i = src;
- asm!(
- vpl!("vmovdqa32 {dst}{{{k}}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ let ones = _mm_set1_epi32(-1).as_i32x4();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(k, ones, zero);
+ transmute(simd_masked_load(mask, mem_addr, src.as_i32x4()))
}
/// Load packed 32-bit integers from memory into dst using zeromask k
@@ -33605,18 +33492,11 @@ pub unsafe fn _mm_mask_load_epi32(src: __m128i, k: __mmask8, mem_addr: *const i3
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi32)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovdqu32))] // FIXME: should be vmovdqa32
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i {
- let mut dst: __m128i;
- asm!(
- vpl!("vmovdqa32 {dst}{{{k}}} {{z}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ _mm_mask_load_epi32(_mm_setzero_si128(), k, mem_addr)
}
/// Load packed 64-bit integers from memory into dst using writemask k
@@ -33625,18 +33505,14 @@ pub unsafe fn _mm_maskz_load_epi32(k: __mmask8, mem_addr: *const i32) -> __m128i
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_epi64)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovdqu64))] // FIXME: should be vmovdqa64
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i64) -> __m128i {
- let mut dst: __m128i = src;
- asm!(
- vpl!("vmovdqa64 {dst}{{{k}}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ let ones = _mm_set1_epi64x(-1).as_i64x2();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(k, ones, zero);
+ transmute(simd_masked_load(mask, mem_addr, src.as_i64x2()))
}
/// Load packed 64-bit integers from memory into dst using zeromask k
@@ -33645,18 +33521,11 @@ pub unsafe fn _mm_mask_load_epi64(src: __m128i, k: __mmask8, mem_addr: *const i6
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_epi64)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovdqu64))] // FIXME: should be vmovdqa64
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i {
- let mut dst: __m128i;
- asm!(
- vpl!("vmovdqa64 {dst}{{{k}}} {{z}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ _mm_mask_load_epi64(_mm_setzero_si128(), k, mem_addr)
}
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using writemask k
@@ -33665,18 +33534,14 @@ pub unsafe fn _mm_maskz_load_epi64(k: __mmask8, mem_addr: *const i64) -> __m128i
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_ps)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovups))] // FIXME: should be vmovaps
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -> __m128 {
- let mut dst: __m128 = src;
- asm!(
- vpl!("vmovaps {dst}{{{k}}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ let ones = _mm_set1_epi32(-1).as_i32x4();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(k, ones, zero);
+ simd_masked_load(mask, mem_addr, src)
}
/// Load packed single-precision (32-bit) floating-point elements from memory into dst using zeromask k
@@ -33685,18 +33550,11 @@ pub unsafe fn _mm_mask_load_ps(src: __m128, k: __mmask8, mem_addr: *const f32) -
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_ps)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovups))] // FIXME: should be vmovaps
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
- let mut dst: __m128;
- asm!(
- vpl!("vmovaps {dst}{{{k}}} {{z}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ _mm_mask_load_ps(_mm_setzero_ps(), k, mem_addr)
}
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using writemask k
@@ -33705,18 +33563,14 @@ pub unsafe fn _mm_maskz_load_ps(k: __mmask8, mem_addr: *const f32) -> __m128 {
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_load_pd)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovupd))] // FIXME: should be vmovapd
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64) -> __m128d {
- let mut dst: __m128d = src;
- asm!(
- vpl!("vmovapd {dst}{{{k}}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ let ones = _mm_set1_epi64x(-1).as_i64x2();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(k, ones, zero);
+ simd_masked_load(mask, mem_addr, src)
}
/// Load packed double-precision (64-bit) floating-point elements from memory into dst using zeromask k
@@ -33725,18 +33579,11 @@ pub unsafe fn _mm_mask_load_pd(src: __m128d, k: __mmask8, mem_addr: *const f64)
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maskz_load_pd)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovupd))] // FIXME: should be vmovapd
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_maskz_load_pd(k: __mmask8, mem_addr: *const f64) -> __m128d {
- let mut dst: __m128d;
- asm!(
- vpl!("vmovapd {dst}{{{k}}} {{z}}"),
- p = in(reg) mem_addr,
- k = in(kreg) k,
- dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
- );
- dst
+ _mm_mask_load_pd(_mm_setzero_pd(), k, mem_addr)
}
/// Store packed 32-bit integers from a into memory using writemask k.
@@ -33752,7 +33599,7 @@ pub unsafe fn _mm512_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask16, a: _
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(zmm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -33769,7 +33616,7 @@ pub unsafe fn _mm512_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(zmm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -33786,7 +33633,7 @@ pub unsafe fn _mm512_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask16, a: __m5
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(zmm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -33803,7 +33650,7 @@ pub unsafe fn _mm512_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m51
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(zmm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -33820,7 +33667,7 @@ pub unsafe fn _mm256_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(ymm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -33837,7 +33684,7 @@ pub unsafe fn _mm256_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(ymm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -33854,7 +33701,7 @@ pub unsafe fn _mm256_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m25
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(ymm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -33871,7 +33718,7 @@ pub unsafe fn _mm256_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m25
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(ymm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -33888,7 +33735,7 @@ pub unsafe fn _mm_mask_storeu_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m12
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(xmm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -33905,7 +33752,7 @@ pub unsafe fn _mm_mask_storeu_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m12
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(xmm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -33922,7 +33769,7 @@ pub unsafe fn _mm_mask_storeu_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128)
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(xmm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -33939,7 +33786,7 @@ pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d)
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(xmm_reg) a,
- options(nostack)
+ options(nostack, preserves_flags)
);
}
@@ -33949,15 +33796,13 @@ pub unsafe fn _mm_mask_storeu_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d)
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi32)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovdqu32))] // FIXME: should be vmovdqa32
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __m512i) {
- asm!(
- vps!("vmovdqa32", "{{{mask}}}, {a}"),
- p = in(reg) mem_addr,
- mask = in(kreg) mask,
- a = in(zmm_reg) a,
- options(nostack)
- );
+ let ones = _mm512_set1_epi32(-1).as_i32x16();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(mask, ones, zero);
+ simd_masked_store(mask, mem_addr, a.as_i32x16())
}
/// Store packed 64-bit integers from a into memory using writemask k.
@@ -33966,15 +33811,13 @@ pub unsafe fn _mm512_mask_store_epi32(mem_addr: *mut i32, mask: __mmask16, a: __
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_epi64)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovdqu64))] // FIXME: should be vmovdqa64
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m512i) {
- asm!(
- vps!("vmovdqa64", "{{{mask}}}, {a}"),
- p = in(reg) mem_addr,
- mask = in(kreg) mask,
- a = in(zmm_reg) a,
- options(nostack)
- );
+ let ones = _mm512_set1_epi64(-1).as_i64x8();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(mask, ones, zero);
+ simd_masked_store(mask, mem_addr, a.as_i64x8())
}
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@@ -33983,15 +33826,13 @@ pub unsafe fn _mm512_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_ps)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovups))] // FIXME: should be vmovaps
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m512) {
- asm!(
- vps!("vmovaps", "{{{mask}}}, {a}"),
- p = in(reg) mem_addr,
- mask = in(kreg) mask,
- a = in(zmm_reg) a,
- options(nostack)
- );
+ let ones = _mm512_set1_epi32(-1).as_i32x16();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(mask, ones, zero);
+ simd_masked_store(mask, mem_addr, a)
}
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@@ -34000,15 +33841,13 @@ pub unsafe fn _mm512_mask_store_ps(mem_addr: *mut f32, mask: __mmask16, a: __m51
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm512_mask_store_pd)
#[inline]
#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vmovupd))] // FIXME: should be vmovapd
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512d) {
- asm!(
- vps!("vmovapd", "{{{mask}}}, {a}"),
- p = in(reg) mem_addr,
- mask = in(kreg) mask,
- a = in(zmm_reg) a,
- options(nostack)
- );
+ let ones = _mm512_set1_epi64(-1).as_i64x8();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(mask, ones, zero);
+ simd_masked_store(mask, mem_addr, a)
}
/// Store packed 32-bit integers from a into memory using writemask k.
@@ -34016,16 +33855,14 @@ pub unsafe fn _mm512_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m512
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi32)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovdqu32))] // FIXME: should be vmovdqa32
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m256i) {
- asm!(
- vps!("vmovdqa32", "{{{mask}}}, {a}"),
- p = in(reg) mem_addr,
- mask = in(kreg) mask,
- a = in(ymm_reg) a,
- options(nostack)
- );
+ let ones = _mm256_set1_epi32(-1).as_i32x8();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(mask, ones, zero);
+ simd_masked_store(mask, mem_addr, a.as_i32x8())
}
/// Store packed 64-bit integers from a into memory using writemask k.
@@ -34033,16 +33870,14 @@ pub unsafe fn _mm256_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_epi64)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovdqu64))] // FIXME: should be vmovdqa64
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m256i) {
- asm!(
- vps!("vmovdqa64", "{{{mask}}}, {a}"),
- p = in(reg) mem_addr,
- mask = in(kreg) mask,
- a = in(ymm_reg) a,
- options(nostack)
- );
+ let ones = _mm256_set1_epi64x(-1).as_i64x4();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(mask, ones, zero);
+ simd_masked_store(mask, mem_addr, a.as_i64x4())
}
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@@ -34050,16 +33885,14 @@ pub unsafe fn _mm256_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_ps)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovups))] // FIXME: should be vmovaps
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256) {
- asm!(
- vps!("vmovaps", "{{{mask}}}, {a}"),
- p = in(reg) mem_addr,
- mask = in(kreg) mask,
- a = in(ymm_reg) a,
- options(nostack)
- );
+ let ones = _mm256_set1_epi32(-1).as_i32x8();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(mask, ones, zero);
+ simd_masked_store(mask, mem_addr, a)
}
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@@ -34067,16 +33900,14 @@ pub unsafe fn _mm256_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m256
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_mask_store_pd)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovupd))] // FIXME: should be vmovapd
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256d) {
- asm!(
- vps!("vmovapd", "{{{mask}}}, {a}"),
- p = in(reg) mem_addr,
- mask = in(kreg) mask,
- a = in(ymm_reg) a,
- options(nostack)
- );
+ let ones = _mm256_set1_epi64x(-1).as_i64x4();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(mask, ones, zero);
+ simd_masked_store(mask, mem_addr, a)
}
/// Store packed 32-bit integers from a into memory using writemask k.
@@ -34084,16 +33915,14 @@ pub unsafe fn _mm256_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m256
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi32)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovdqu32))] // FIXME: should be vmovdqa32
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128i) {
- asm!(
- vps!("vmovdqa32", "{{{mask}}}, {a}"),
- p = in(reg) mem_addr,
- mask = in(kreg) mask,
- a = in(xmm_reg) a,
- options(nostack)
- );
+ let ones = _mm_set1_epi32(-1).as_i32x4();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(mask, ones, zero);
+ simd_masked_store(mask, mem_addr, a.as_i32x4())
}
/// Store packed 64-bit integers from a into memory using writemask k.
@@ -34101,16 +33930,14 @@ pub unsafe fn _mm_mask_store_epi32(mem_addr: *mut i32, mask: __mmask8, a: __m128
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_epi64)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovdqu64))] // FIXME: should be vmovdqa64
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128i) {
- asm!(
- vps!("vmovdqa64", "{{{mask}}}, {a}"),
- p = in(reg) mem_addr,
- mask = in(kreg) mask,
- a = in(xmm_reg) a,
- options(nostack)
- );
+ let ones = _mm_set1_epi64x(-1).as_i64x2();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(mask, ones, zero);
+ simd_masked_store(mask, mem_addr, a.as_i64x2())
}
/// Store packed single-precision (32-bit) floating-point elements from a into memory using writemask k.
@@ -34118,16 +33945,14 @@ pub unsafe fn _mm_mask_store_epi64(mem_addr: *mut i64, mask: __mmask8, a: __m128
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_ps)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovups))] // FIXME: should be vmovaps
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
- asm!(
- vps!("vmovaps", "{{{mask}}}, {a}"),
- p = in(reg) mem_addr,
- mask = in(kreg) mask,
- a = in(xmm_reg) a,
- options(nostack)
- );
+ let ones = _mm_set1_epi32(-1).as_i32x4();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(mask, ones, zero);
+ simd_masked_store(mask, mem_addr, a)
}
/// Store packed double-precision (64-bit) floating-point elements from a into memory using writemask k.
@@ -34135,16 +33960,14 @@ pub unsafe fn _mm_mask_store_ps(mem_addr: *mut f32, mask: __mmask8, a: __m128) {
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mask_store_pd)
#[inline]
-#[target_feature(enable = "avx512f,avx512vl,avx,sse")]
+#[target_feature(enable = "avx512f,avx512vl")]
+#[cfg_attr(test, assert_instr(vmovupd))] // FIXME: should be vmovapd
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
pub unsafe fn _mm_mask_store_pd(mem_addr: *mut f64, mask: __mmask8, a: __m128d) {
- asm!(
- vps!("vmovapd", "{{{mask}}}, {a}"),
- p = in(reg) mem_addr,
- mask = in(kreg) mask,
- a = in(xmm_reg) a,
- options(nostack)
- );
+ let ones = _mm_set1_epi64x(-1).as_i64x2();
+ let zero = mem::zeroed();
+ let mask = simd_select_bitmask(mask, ones, zero);
+ simd_masked_store(mask, mem_addr, a)
}
/// Load contiguous active 32-bit integers from unaligned memory at mem_addr (those with their respective bit set in mask k), and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -34164,7 +33987,7 @@ pub unsafe fn _mm512_mask_expandloadu_epi32(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34182,7 +34005,7 @@ pub unsafe fn _mm512_maskz_expandloadu_epi32(k: __mmask16, mem_addr: *const i32)
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34204,7 +34027,7 @@ pub unsafe fn _mm256_mask_expandloadu_epi32(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34222,7 +34045,7 @@ pub unsafe fn _mm256_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32)
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34244,7 +34067,7 @@ pub unsafe fn _mm_mask_expandloadu_epi32(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34262,7 +34085,7 @@ pub unsafe fn _mm_maskz_expandloadu_epi32(k: __mmask8, mem_addr: *const i32) ->
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34284,7 +34107,7 @@ pub unsafe fn _mm512_mask_expandloadu_epi64(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34302,7 +34125,7 @@ pub unsafe fn _mm512_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64)
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34324,7 +34147,7 @@ pub unsafe fn _mm256_mask_expandloadu_epi64(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34342,7 +34165,7 @@ pub unsafe fn _mm256_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64)
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34364,7 +34187,7 @@ pub unsafe fn _mm_mask_expandloadu_epi64(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34382,7 +34205,7 @@ pub unsafe fn _mm_maskz_expandloadu_epi64(k: __mmask8, mem_addr: *const i64) ->
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34404,7 +34227,7 @@ pub unsafe fn _mm512_mask_expandloadu_ps(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34422,7 +34245,7 @@ pub unsafe fn _mm512_maskz_expandloadu_ps(k: __mmask16, mem_addr: *const f32) ->
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34440,7 +34263,7 @@ pub unsafe fn _mm256_mask_expandloadu_ps(src: __m256, k: __mmask8, mem_addr: *co
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34458,7 +34281,7 @@ pub unsafe fn _mm256_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) ->
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34476,7 +34299,7 @@ pub unsafe fn _mm_mask_expandloadu_ps(src: __m128, k: __mmask8, mem_addr: *const
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34494,7 +34317,7 @@ pub unsafe fn _mm_maskz_expandloadu_ps(k: __mmask8, mem_addr: *const f32) -> __m
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34516,7 +34339,7 @@ pub unsafe fn _mm512_mask_expandloadu_pd(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34534,7 +34357,7 @@ pub unsafe fn _mm512_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) ->
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34556,7 +34379,7 @@ pub unsafe fn _mm256_mask_expandloadu_pd(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34574,7 +34397,7 @@ pub unsafe fn _mm256_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) ->
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34592,7 +34415,7 @@ pub unsafe fn _mm_mask_expandloadu_pd(src: __m128d, k: __mmask8, mem_addr: *cons
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -34610,7 +34433,7 @@ pub unsafe fn _mm_maskz_expandloadu_pd(k: __mmask8, mem_addr: *const f64) -> __m
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -54387,6 +54210,7 @@ mod tests {
}
#[simd_test(enable = "avx512f")]
+ #[cfg_attr(miri, ignore)]
unsafe fn test_mm512_stream_ps() {
#[repr(align(64))]
struct Memory {
@@ -54401,6 +54225,38 @@ mod tests {
}
}
+ #[simd_test(enable = "avx512f")]
+ #[cfg_attr(miri, ignore)]
+ unsafe fn test_mm512_stream_pd() {
+ #[repr(align(64))]
+ struct Memory {
+ pub data: [f64; 8],
+ }
+ let a = _mm512_set1_pd(7.0);
+ let mut mem = Memory { data: [-1.0; 8] };
+
+ _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
+ for i in 0..8 {
+ assert_eq!(mem.data[i], get_m512d(a, i));
+ }
+ }
+
+ #[simd_test(enable = "avx512f")]
+ #[cfg_attr(miri, ignore)]
+ unsafe fn test_mm512_stream_si512() {
+ #[repr(align(64))]
+ struct Memory {
+ pub data: [i64; 8],
+ }
+ let a = _mm512_set1_epi32(7);
+ let mut mem = Memory { data: [-1; 8] };
+
+ _mm512_stream_si512(mem.data.as_mut_ptr().cast(), a);
+ for i in 0..8 {
+ assert_eq!(mem.data[i], get_m512i(a, i));
+ }
+ }
+
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_add_epi32() {
let a = _mm512_set1_epi32(1);
diff --git a/crates/core_arch/src/x86/avx512vbmi2.rs b/crates/core_arch/src/x86/avx512vbmi2.rs
index 979cad7284..4bbb6f44c2 100644
--- a/crates/core_arch/src/x86/avx512vbmi2.rs
+++ b/crates/core_arch/src/x86/avx512vbmi2.rs
@@ -24,7 +24,7 @@ pub unsafe fn _mm512_mask_expandloadu_epi16(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -42,7 +42,7 @@ pub unsafe fn _mm512_maskz_expandloadu_epi16(k: __mmask32, mem_addr: *const i16)
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -64,7 +64,7 @@ pub unsafe fn _mm256_mask_expandloadu_epi16(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -82,7 +82,7 @@ pub unsafe fn _mm256_maskz_expandloadu_epi16(k: __mmask16, mem_addr: *const i16)
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -104,7 +104,7 @@ pub unsafe fn _mm_mask_expandloadu_epi16(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -122,7 +122,7 @@ pub unsafe fn _mm_maskz_expandloadu_epi16(k: __mmask8, mem_addr: *const i16) ->
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -144,7 +144,7 @@ pub unsafe fn _mm512_mask_expandloadu_epi8(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -162,7 +162,7 @@ pub unsafe fn _mm512_maskz_expandloadu_epi8(k: __mmask64, mem_addr: *const i8) -
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -184,7 +184,7 @@ pub unsafe fn _mm256_mask_expandloadu_epi8(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -202,7 +202,7 @@ pub unsafe fn _mm256_maskz_expandloadu_epi8(k: __mmask32, mem_addr: *const i8) -
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -224,7 +224,7 @@ pub unsafe fn _mm_mask_expandloadu_epi8(
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
@@ -242,7 +242,7 @@ pub unsafe fn _mm_maskz_expandloadu_epi8(k: __mmask16, mem_addr: *const i8) -> _
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
- options(pure, readonly, nostack)
+ options(pure, readonly, nostack, preserves_flags)
);
dst
}
diff --git a/crates/core_arch/src/x86_64/avx512f.rs b/crates/core_arch/src/x86_64/avx512f.rs
index ce6bc89f5b..aa501bec3d 100644
--- a/crates/core_arch/src/x86_64/avx512f.rs
+++ b/crates/core_arch/src/x86_64/avx512f.rs
@@ -12012,36 +12012,6 @@ mod tests {
assert_eq!(r, e);
}
- #[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_stream_pd() {
- #[repr(align(64))]
- struct Memory {
- pub data: [f64; 8],
- }
- let a = _mm512_set1_pd(7.0);
- let mut mem = Memory { data: [-1.0; 8] };
-
- _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
- for i in 0..8 {
- assert_eq!(mem.data[i], get_m512d(a, i));
- }
- }
-
- #[simd_test(enable = "avx512f")]
- unsafe fn test_mm512_stream_si512() {
- #[repr(align(64))]
- struct Memory {
- pub data: [i64; 8],
- }
- let a = _mm512_set1_epi64(7);
- let mut mem = Memory { data: [-1; 8] };
-
- _mm512_stream_si512(&mut mem.data[0] as *mut i64, a);
- for i in 0..8 {
- assert_eq!(mem.data[i], get_m512i(a, i));
- }
- }
-
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_set1_epi64() {
let src = _mm512_set1_epi64(2);
diff --git a/crates/stdarch-verify/tests/x86-intel.rs b/crates/stdarch-verify/tests/x86-intel.rs
index a69d17e71c..c5773a0984 100644
--- a/crates/stdarch-verify/tests/x86-intel.rs
+++ b/crates/stdarch-verify/tests/x86-intel.rs
@@ -346,7 +346,10 @@ fn verify_all_signatures() {
// unsigned integer, but all other _mm_shuffle_.. intrinsics
// take a signed-integer. This breaks `_MM_SHUFFLE` for
// `_mm_shuffle_ps`:
- "_mm_shuffle_ps" => continue,
+ name@"_mm_shuffle_ps" => {
+ map.remove(name);
+ continue;
+ },
_ => {}
}
@@ -391,13 +394,19 @@ fn verify_all_signatures() {
fn print_missing(map: &HashMap<&str, Vec<&Intrinsic>>, mut f: impl Write) -> io::Result<()> {
let mut missing = BTreeMap::new(); // BTreeMap to keep the cpuids ordered
- // we cannot use SVML and MMX, and MPX is not in LLVM, and intrinsics without any cpuid requirement
- // are accessible from safe rust
+
+ // we cannot use SVML and MMX, and MPX is not in LLVM, and intrinsics without any cpuid requirement
+ // are accessible from safe rust
for intrinsic in map.values().flatten().filter(|intrinsic| {
intrinsic.tech != "SVML"
&& intrinsic.tech != "MMX"
&& !intrinsic.cpuid.is_empty()
&& !intrinsic.cpuid.contains(&"MPX".to_string())
+ && intrinsic.return_.type_ != "__m64"
+ && !intrinsic
+ .parameters
+ .iter()
+ .any(|param| param.type_.contains("__m64"))
}) {
missing
.entry(&intrinsic.cpuid)