From d25d603e0b708d856e4cafca7dac1e6b7126c320 Mon Sep 17 00:00:00 2001 From: Quentin Khan Date: Wed, 31 Jul 2024 04:35:05 -0700 Subject: [PATCH] Protect polyfills to only be defined when their instruction set is available. `_mm512_dpbusd_epi32_madd`'s implementation uses the following AVX512BW intrinsics: - `_mm512_sub_epi8` - `_mm512_maddubs_epi16` - `_mm512_madd_epi16` `_mm256_dpbusd_epi32_bw`'s implementation uses the following AVX2 intrinsics: - `_mm256_srli_epi32` - `_mm256_xor_si256` - `_mm256_sub_epi8` - `_mm256_maddubs_epi16` - `_mm256_madd_epi16` - `_mm256_add_epi32` PiperOrigin-RevId: 657957490 --- src/xnnpack/intrinsics-polyfill.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/xnnpack/intrinsics-polyfill.h b/src/xnnpack/intrinsics-polyfill.h index 73dc2d61397..2188c071cf3 100644 --- a/src/xnnpack/intrinsics-polyfill.h +++ b/src/xnnpack/intrinsics-polyfill.h @@ -170,6 +170,7 @@ void _mm512_storeu_epi32 (void* mem_addr, __m512i a) { } #endif // GCC pre-10 +#ifdef __AVX512BW__ // VNNI replacement that uses vpmaddubsw. // u4 is uint4 in lower 4 bits. static XNN_INTRINSIC @@ -181,9 +182,10 @@ __m512i _mm512_dpbusd_epi32_madd(__m512i i32, const __m512i u8, const __m512i u4 const __m512i v = _mm512_madd_epi16(i12, vsixteen); // convert 16 bits to 32 bits return _mm512_add_epi32(i32, v); } +#endif // __AVX512BW__ #endif // __AVX512F__ -#ifdef __AVX__ +#ifdef __AVX2__ // AVXVNNI replacement that uses vpmaddubsw. // i4h is int4 in upper 4 bits. Low bits are zero. @@ -199,7 +201,7 @@ __m256i _mm256_dpbusd_epi32_bw(__m256i i32, const __m256i u8, const __m256i i4h) return _mm256_add_epi32(i32, v); } -#endif // __AVX__ +#endif // __AVX2__ #if XNN_ARCH_ARM