Skip to content

Commit

Permalink
Fixed many intrinsics
Browse files Browse the repository at this point in the history
fixed avx512-fma, mask-load/store stream, reduce-add and reduce-mul. and load/store of mask32 and mask64. added preserves-flags to load and store asm. fixed the missing list
  • Loading branch information
sayantn committed Jun 25, 2024
1 parent 258f489 commit 7387776
Show file tree
Hide file tree
Showing 8 changed files with 475 additions and 710 deletions.
80 changes: 6 additions & 74 deletions crates/core_arch/missing-x86.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@

<details><summary>["AVX2"]</summary><p>

* [ ] [`_mm256_stream_load_si256`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_stream_load_si256)
* [ ] [`_mm_broadcastsi128_si256`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastsi128_si256)
</p></details>

Expand Down Expand Up @@ -174,6 +175,7 @@
* [ ] [`_mm512_mask_i32logather_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32logather_pd)
* [ ] [`_mm512_mask_i32loscatter_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_epi64)
* [ ] [`_mm512_mask_i32loscatter_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_i32loscatter_pd)
* [ ] [`_mm512_stream_load_si512`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_stream_load_si512)
* [ ] [`_mm_mask_load_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_sd)
* [ ] [`_mm_mask_load_ss`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_load_ss)
* [ ] [`_mm_mask_store_sd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_store_sd)
Expand Down Expand Up @@ -1539,95 +1541,25 @@

<details><summary>["SSE"]</summary><p>

* [ ] [`_m_maskmovq`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_maskmovq)
* [ ] [`_m_pavgb`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pavgb)
* [ ] [`_m_pavgw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pavgw)
* [ ] [`_m_pextrw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pextrw)
* [ ] [`_m_pinsrw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pinsrw)
* [ ] [`_m_pmaxsw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmaxsw)
* [ ] [`_m_pmaxub`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmaxub)
* [ ] [`_m_pminsw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pminsw)
* [ ] [`_m_pminub`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pminub)
* [ ] [`_m_pmovmskb`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmovmskb)
* [ ] [`_m_pmulhuw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pmulhuw)
* [ ] [`_m_psadbw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_psadbw)
* [ ] [`_m_pshufw`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_m_pshufw)
* [ ] [`_mm_avg_pu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_pu16)
* [ ] [`_mm_avg_pu8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_pu8)
* [ ] [`_mm_cvt_pi2ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_pi2ps)
* [ ] [`_mm_cvt_ps2pi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_ps2pi)
* [ ] [`_mm_cvtpi16_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi16_ps)
* [ ] [`_mm_cvtpi32_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32_ps)
* [ ] [`_mm_cvtpi32x2_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32x2_ps)
* [ ] [`_mm_cvtpi8_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi8_ps)
* [ ] [`_mm_cvtps_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi16)
* [ ] [`_mm_cvtps_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi32)
* [ ] [`_mm_cvtps_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pi8)
* [ ] [`_mm_cvtpu16_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpu16_ps)
* [ ] [`_mm_cvtpu8_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpu8_ps)
* [ ] [`_mm_cvtt_ps2pi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_ps2pi)
* [ ] [`_mm_cvttps_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_pi32)
* [ ] [`_mm_extract_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_pi16)
* [ ] [`_mm_free`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_free)
* [ ] [`_mm_insert_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_pi16)
* [ ] [`_mm_loadh_pi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pi)
* [ ] [`_mm_loadl_pi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pi)
* [ ] [`_mm_malloc`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_malloc)
* [ ] [`_mm_maskmove_si64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmove_si64)
* [ ] [`_mm_max_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pi16)
* [ ] [`_mm_max_pu8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pu8)
* [ ] [`_mm_min_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pi16)
* [ ] [`_mm_min_pu8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pu8)
* [ ] [`_mm_movemask_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pi8)
* [ ] [`_mm_mulhi_pu16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_pu16)
* [ ] [`_mm_sad_pu8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_pu8)
* [ ] [`_mm_shuffle_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pi16)
* [ ] [`_mm_shuffle_ps`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_ps)
* [ ] [`_mm_storeh_pi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pi)
* [ ] [`_mm_storel_pi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pi)
* [ ] [`_mm_stream_pi`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pi)
</p></details>


<details><summary>["SSE2"]</summary><p>

* [ ] [`_mm_add_si64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_si64)
* [ ] [`_mm_cvtpd_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_pi32)
* [ ] [`_mm_cvtpi32_pd`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpi32_pd)
* [ ] [`_mm_cvttpd_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_pi32)
* [ ] [`_mm_loadu_si16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si16)
* [ ] [`_mm_loadu_si32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si32)
* [ ] [`_mm_movepi64_pi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movepi64_pi64)
* [ ] [`_mm_movpi64_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movpi64_epi64)
* [ ] [`_mm_mul_su32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_su32)
* [ ] [`_mm_set1_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64)
* [ ] [`_mm_set_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64)
* [ ] [`_mm_setr_epi64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi64)
* [ ] [`_mm_loadu_si64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si64)
* [ ] [`_mm_storeu_si16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si16)
* [ ] [`_mm_storeu_si32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si32)
* [ ] [`_mm_storeu_si64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si64)
* [ ] [`_mm_sub_si64`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_si64)
</p></details>


<details><summary>["SSSE3"]</summary><p>

* [ ] [`_mm_abs_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi16)
* [ ] [`_mm_abs_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi32)
* [ ] [`_mm_abs_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_pi8)
* [ ] [`_mm_alignr_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_pi8)
* [ ] [`_mm_hadd_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pi16)
* [ ] [`_mm_hadd_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pi32)
* [ ] [`_mm_hadds_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_pi16)
* [ ] [`_mm_hsub_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pi16)
* [ ] [`_mm_hsub_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pi32)
* [ ] [`_mm_hsubs_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_pi16)
* [ ] [`_mm_maddubs_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_pi16)
* [ ] [`_mm_mulhrs_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_pi16)
* [ ] [`_mm_shuffle_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pi8)
* [ ] [`_mm_sign_pi16`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi16)
* [ ] [`_mm_sign_pi32`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi32)
* [ ] [`_mm_sign_pi8`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_pi8)
<details><summary>["SSE4.1"]</summary><p>

* [ ] [`_mm_stream_load_si128`](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_load_si128)
</p></details>


Expand Down
8 changes: 4 additions & 4 deletions crates/core_arch/src/x86/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1715,11 +1715,11 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i {
/// See [`_mm_sfence`] for details.
#[inline]
#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntdq
#[cfg_attr(test, assert_instr(vmovntdq))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
crate::arch::asm!(
"vmovntps [{mem_addr}], {a}",
"vmovntdq [{mem_addr}], {a}",
mem_addr = in(reg) mem_addr,
a = in(ymm_reg) a,
options(nostack, preserves_flags),
Expand All @@ -1742,12 +1742,12 @@ pub unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i) {
/// See [`_mm_sfence`] for details.
#[inline]
#[target_feature(enable = "avx")]
#[cfg_attr(test, assert_instr(vmovntps))] // FIXME vmovntpd
#[cfg_attr(test, assert_instr(vmovntpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d) {
crate::arch::asm!(
"vmovntps [{mem_addr}], {a}",
"vmovntpd [{mem_addr}], {a}",
mem_addr = in(reg) mem_addr,
a = in(ymm_reg) a,
options(nostack, preserves_flags),
Expand Down
2 changes: 0 additions & 2 deletions crates/core_arch/src/x86/avx2.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3124,8 +3124,6 @@ pub unsafe fn _mm256_srlv_epi64(a: __m256i, count: __m256i) -> __m256i {
transmute(psrlvq256(a.as_i64x4(), count.as_i64x4()))
}

// TODO _mm256_stream_load_si256 (__m256i const* mem_addr)

/// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a`
///
/// [Intel's documentation](https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_sub_epi16)
Expand Down
44 changes: 22 additions & 22 deletions crates/core_arch/src/x86/avx512bw.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4583,7 +4583,7 @@ pub unsafe fn _mm512_mask_loadu_epi16(src: __m512i, k: __mmask32, mem_addr: *con
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
options(pure, readonly, nostack)
options(pure, readonly, nostack, preserves_flags)
);
dst
}
Expand All @@ -4603,7 +4603,7 @@ pub unsafe fn _mm512_maskz_loadu_epi16(k: __mmask32, mem_addr: *const i16) -> __
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
options(pure, readonly, nostack)
options(pure, readonly, nostack, preserves_flags)
);
dst
}
Expand All @@ -4623,7 +4623,7 @@ pub unsafe fn _mm512_mask_loadu_epi8(src: __m512i, k: __mmask64, mem_addr: *cons
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(zmm_reg) dst,
options(pure, readonly, nostack)
options(pure, readonly, nostack, preserves_flags)
);
dst
}
Expand All @@ -4643,7 +4643,7 @@ pub unsafe fn _mm512_maskz_loadu_epi8(k: __mmask64, mem_addr: *const i8) -> __m5
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(zmm_reg) dst,
options(pure, readonly, nostack)
options(pure, readonly, nostack, preserves_flags)
);
dst
}
Expand All @@ -4663,7 +4663,7 @@ pub unsafe fn _mm256_mask_loadu_epi16(src: __m256i, k: __mmask16, mem_addr: *con
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
options(pure, readonly, nostack)
options(pure, readonly, nostack, preserves_flags)
);
dst
}
Expand All @@ -4683,7 +4683,7 @@ pub unsafe fn _mm256_maskz_loadu_epi16(k: __mmask16, mem_addr: *const i16) -> __
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
options(pure, readonly, nostack)
options(pure, readonly, nostack, preserves_flags)
);
dst
}
Expand All @@ -4703,7 +4703,7 @@ pub unsafe fn _mm256_mask_loadu_epi8(src: __m256i, k: __mmask32, mem_addr: *cons
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(ymm_reg) dst,
options(pure, readonly, nostack)
options(pure, readonly, nostack, preserves_flags)
);
dst
}
Expand All @@ -4723,7 +4723,7 @@ pub unsafe fn _mm256_maskz_loadu_epi8(k: __mmask32, mem_addr: *const i8) -> __m2
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(ymm_reg) dst,
options(pure, readonly, nostack)
options(pure, readonly, nostack, preserves_flags)
);
dst
}
Expand All @@ -4743,7 +4743,7 @@ pub unsafe fn _mm_mask_loadu_epi16(src: __m128i, k: __mmask8, mem_addr: *const i
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
options(pure, readonly, nostack)
options(pure, readonly, nostack, preserves_flags)
);
dst
}
Expand All @@ -4763,7 +4763,7 @@ pub unsafe fn _mm_maskz_loadu_epi16(k: __mmask8, mem_addr: *const i16) -> __m128
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
options(pure, readonly, nostack)
options(pure, readonly, nostack, preserves_flags)
);
dst
}
Expand All @@ -4783,7 +4783,7 @@ pub unsafe fn _mm_mask_loadu_epi8(src: __m128i, k: __mmask16, mem_addr: *const i
p = in(reg) mem_addr,
k = in(kreg) k,
dst = inout(xmm_reg) dst,
options(pure, readonly, nostack)
options(pure, readonly, nostack, preserves_flags)
);
dst
}
Expand All @@ -4803,7 +4803,7 @@ pub unsafe fn _mm_maskz_loadu_epi8(k: __mmask16, mem_addr: *const i8) -> __m128i
p = in(reg) mem_addr,
k = in(kreg) k,
dst = out(xmm_reg) dst,
options(pure, readonly, nostack)
options(pure, readonly, nostack, preserves_flags)
);
dst
}
Expand All @@ -4821,7 +4821,7 @@ pub unsafe fn _mm512_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask32, a: _
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(zmm_reg) a,
options(nostack)
options(nostack, preserves_flags)
);
}

Expand All @@ -4838,7 +4838,7 @@ pub unsafe fn _mm512_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask64, a: __m
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(zmm_reg) a,
options(nostack)
options(nostack, preserves_flags)
);
}

Expand All @@ -4855,7 +4855,7 @@ pub unsafe fn _mm256_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask16, a: _
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(ymm_reg) a,
options(nostack)
options(nostack, preserves_flags)
);
}

Expand All @@ -4872,7 +4872,7 @@ pub unsafe fn _mm256_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask32, a: __m
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(ymm_reg) a,
options(nostack)
options(nostack, preserves_flags)
);
}

Expand All @@ -4889,7 +4889,7 @@ pub unsafe fn _mm_mask_storeu_epi16(mem_addr: *mut i16, mask: __mmask8, a: __m12
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(xmm_reg) a,
options(nostack)
options(nostack, preserves_flags)
);
}

Expand All @@ -4906,7 +4906,7 @@ pub unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a: __m128
p = in(reg) mem_addr,
mask = in(kreg) mask,
a = in(xmm_reg) a,
options(nostack)
options(nostack, preserves_flags)
);
}

Expand Down Expand Up @@ -8761,7 +8761,7 @@ pub unsafe fn _mm_mask_testn_epi8_mask(k: __mmask16, a: __m128i, b: __m128i) ->
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(mov))] //should be kmovq
pub unsafe fn _store_mask64(mem_addr: *mut u64, a: __mmask64) {
pub unsafe fn _store_mask64(mem_addr: *mut __mmask64, a: __mmask64) {
ptr::write(mem_addr as *mut __mmask64, a);
}

Expand All @@ -8772,7 +8772,7 @@ pub unsafe fn _store_mask64(mem_addr: *mut u64, a: __mmask64) {
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(mov))] //should be kmovd
pub unsafe fn _store_mask32(mem_addr: *mut u32, a: __mmask32) {
pub unsafe fn _store_mask32(mem_addr: *mut __mmask32, a: __mmask32) {
ptr::write(mem_addr as *mut __mmask32, a);
}

Expand All @@ -8783,7 +8783,7 @@ pub unsafe fn _store_mask32(mem_addr: *mut u32, a: __mmask32) {
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(mov))] //should be kmovq
pub unsafe fn _load_mask64(mem_addr: *const u64) -> __mmask64 {
pub unsafe fn _load_mask64(mem_addr: *const __mmask64) -> __mmask64 {
ptr::read(mem_addr as *const __mmask64)
}

Expand All @@ -8794,7 +8794,7 @@ pub unsafe fn _load_mask64(mem_addr: *const u64) -> __mmask64 {
#[target_feature(enable = "avx512bw")]
#[unstable(feature = "stdarch_x86_avx512", issue = "111137")]
#[cfg_attr(test, assert_instr(mov))] //should be kmovd
pub unsafe fn _load_mask32(mem_addr: *const u32) -> __mmask32 {
pub unsafe fn _load_mask32(mem_addr: *const __mmask32) -> __mmask32 {
ptr::read(mem_addr as *const __mmask32)
}

Expand Down
Loading

0 comments on commit 7387776

Please sign in to comment.