From 02a12d098f54307fae18895b96f09ebe7281d4cf Mon Sep 17 00:00:00 2001 From: supersjgk Date: Tue, 13 Feb 2024 14:39:46 -0600 Subject: [PATCH 1/2] Fixed issue #394-missing macro --- src/strategies/avx2/quant-avx2.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c index 9d05fc39..aae3cb4b 100644 --- a/src/strategies/avx2/quant-avx2.c +++ b/src/strategies/avx2/quant-avx2.c @@ -54,6 +54,14 @@ #include "transform.h" #include "fast_coeff_cost.h" +// added missing header +#define _mm256_storeu2_m128i(/* __m128i* */ hiaddr, /* __m128i* */ loaddr, /* __m256i */ a) \ + do { __m256i _a = (a); \ + _mm_storeu_si128((__m128i*)(loaddr), _mm256_castsi256_si128(_a)); \ + _mm_storeu_si128((__m128i*)(hiaddr), _mm256_extractf128_si256(_a, 0x1)); \ + } while (0) + + static INLINE int32_t hsum32_8x32i(__m256i src) { __m128i a = _mm256_extracti128_si256(src, 0); From 203d3be6966484ed86f9671ddddd3255dd8aadb4 Mon Sep 17 00:00:00 2001 From: supersjgk Date: Thu, 15 Feb 2024 12:45:19 -0600 Subject: [PATCH 2/2] Fixed issue #394 - missing macro --- src/strategies/avx2/quant-avx2.c | 10 ++-------- src/strategies/missing-intel-intrinsics.h | 10 ++++++++++ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/strategies/avx2/quant-avx2.c b/src/strategies/avx2/quant-avx2.c index aae3cb4b..582823da 100644 --- a/src/strategies/avx2/quant-avx2.c +++ b/src/strategies/avx2/quant-avx2.c @@ -40,6 +40,8 @@ #include #include +#include "strategies/missing-intel-intrinsics.h" + #include "avx2_common_functions.h" #include "cu.h" #include "encoder.h" @@ -54,14 +56,6 @@ #include "transform.h" #include "fast_coeff_cost.h" -// added missing header -#define _mm256_storeu2_m128i(/* __m128i* */ hiaddr, /* __m128i* */ loaddr, /* __m256i */ a) \ - do { __m256i _a = (a); \ - _mm_storeu_si128((__m128i*)(loaddr), _mm256_castsi256_si128(_a)); \ - _mm_storeu_si128((__m128i*)(hiaddr), _mm256_extractf128_si256(_a, 0x1)); \ - } while (0) - - static INLINE int32_t hsum32_8x32i(__m256i src) { __m128i a = _mm256_extracti128_si256(src, 0); diff --git a/src/strategies/missing-intel-intrinsics.h b/src/strategies/missing-intel-intrinsics.h index f132f033..af8b805f 100644 --- a/src/strategies/missing-intel-intrinsics.h +++ b/src/strategies/missing-intel-intrinsics.h @@ -30,6 +30,16 @@ #ifndef _mm256_extract_epi32 #define _mm256_extract_epi32(a, index) (_mm_extract_epi32(_mm256_extracti128_si256((a), (index) >> 2), (index) & 3)) #endif + + #ifndef _mm256_storeu2_m128i + #define _mm256_storeu2_m128i(/* __m128i* */ hiaddr, /* __m128i* */ loaddr, /* __m256i */ a) \ + do { __m256i _a = (a); \ + _mm_storeu_si128((__m128i*)(loaddr), _mm256_castsi256_si128(_a)); \ + _mm_storeu_si128((__m128i*)(hiaddr), _mm256_extractf128_si256(_a, 0x1)); \ + } while (0) + #endif + #endif #endif +