From 91fbdde9a14b2ea884b52c9e4f7d9fa776d9eea7 Mon Sep 17 00:00:00 2001 From: nitheshsrikanth-mcw Date: Thu, 19 Dec 2024 12:25:49 -0800 Subject: [PATCH] Copybara import of the project: -- fd9d24dc76700b8aba75016ed5e5743f65631f8c by nithesh : VMulCAddC-Replaced yaml files with header table Replaced yaml files with header table for vmulcadd op -- 4fb020d493b463d600c5c2216f9c5234a286a061 by nithesh : Addressed review comments -- eb27915579882d90fe4910b2f9ba61edbb3025ea by nithesh : Addressed review comments -- b06ecd1e7fed14c604de184f5662aa7d4b0c1ce9 by nithesh : Address CI failed checks -- 2fb29c5430aee022076e300295e8f2fe53685c91 by nithesh : Addressed kernel declarations comment -- a4d5e628dd00de1071bff944d5b01af48e504564 by nithesh : Resolve "illegal instructions" for AVX512F tests -- 76e4518fd2da412517e6afc43cc197f10ee4015e by nithesh : Add support for build bazel -- a472377ec4ef9d10377ffb15d29221a7b8dbac8a by nithesh : Defined kernel declrations to use all datatype -- e6882d455cda223f450802c583ccc5129b08ad25 by nithesh : Addressed the merge issue -- 7799d9a7460cafa31abe204452602c206f7672ab by nithesh : Addressed the test case issue -- 6a2b4d2593ffc5fa443628ee7f8fe36020436b49 by nithesh : Resolve check fails for ARM Arch FUTURE_COPYBARA_INTEGRATE_REVIEW=https://github.com/google/XNNPACK/pull/7411 from nitheshsrikanth-mcw:xnn_vmulcadd 6a2b4d2593ffc5fa443628ee7f8fe36020436b49 PiperOrigin-RevId: 707992277 --- BUILD.bazel | 2 + scripts/generate-tests.sh | 4 - src/f16-vmulcaddc/f16-vmulcaddc.h | 35 + src/f32-vmulcaddc/f32-vmulcaddc.h | 61 + src/xnnpack/vmulcaddc.h | 70 +- test/f16-vmulcaddc-minmax.cc | 634 +----- test/f16-vmulcaddc-minmax.yaml | 16 - test/f32-vmulcaddc-minmax.cc | 2882 +-------------------------- test/f32-vmulcaddc-minmax.yaml | 51 - test/vmulcaddc-microkernel-tester.h | 289 ++- tools/generate-vmulcaddc-test.py | 280 --- 11 files changed, 363 insertions(+), 3961 deletions(-) create mode 100644 src/f16-vmulcaddc/f16-vmulcaddc.h create mode 100644 src/f32-vmulcaddc/f32-vmulcaddc.h delete mode 100644 test/f16-vmulcaddc-minmax.yaml delete mode 100644 test/f32-vmulcaddc-minmax.yaml delete mode 100755 tools/generate-vmulcaddc-test.py diff --git a/BUILD.bazel b/BUILD.bazel index 6b443921e0f..39c2847d453 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -91,6 +91,7 @@ MICROKERNEL_DEFS = [ "src/f16-velu/f16-velu.h", "src/f16-vhswish/f16-vhswish.h", "src/f16-vlrelu/f16-vlrelu.h", + "src/f16-vmulcaddc/f16-vmulcaddc.h", "src/f16-vneg/f16-vneg.h", "src/f16-vrnd/f16-vrndd.h", "src/f16-vrnd/f16-vrndne.h", @@ -143,6 +144,7 @@ MICROKERNEL_DEFS = [ "src/f32-vhswish/f32-vhswish.h", "src/f32-vlog/f32-vlog.h", "src/f32-vlrelu/f32-vlrelu.h", + "src/f32-vmulcaddc/f32-vmulcaddc.h", "src/f32-vneg/f32-vneg.h", "src/f32-vrelu/f32-vrelu.h", "src/f32-vrnd/f32-vrndd.h", diff --git a/scripts/generate-tests.sh b/scripts/generate-tests.sh index d48def9d6dc..58e6de3133f 100755 --- a/scripts/generate-tests.sh +++ b/scripts/generate-tests.sh @@ -252,10 +252,6 @@ tools/generate-raddexpminusmax-test.py --spec test/f32-raddexpminusmax.yaml --ou tools/generate-raddstoreexpminusmax-test.py --spec test/f16-raddstoreexpminusmax.yaml --output test/f16-raddstoreexpminusmax.cc & tools/generate-raddstoreexpminusmax-test.py --spec test/f32-raddstoreexpminusmax.yaml --output test/f32-raddstoreexpminusmax.cc & -### Tests for VMulCAddC micro-kernels -tools/generate-vmulcaddc-test.py --spec test/f16-vmulcaddc-minmax.yaml --output test/f16-vmulcaddc-minmax.cc & -tools/generate-vmulcaddc-test.py --spec test/f32-vmulcaddc-minmax.yaml --output test/f32-vmulcaddc-minmax.cc & - ### Tests for the portable SIMD wrappers. tools/xngen test/f32-simd.cc.in -D ARCH=scalar -D ARCH_MACRO="" -D TEST_REQUIRES="" -o test/f32-simd-scalar.cc & tools/xngen test/f32-simd.cc.in -D ARCH=sse2 -D ARCH_MACRO="XNN_ARCH_X86 || XNN_ARCH_X86_64" -D TEST_REQUIRES=TEST_REQUIRES_X86_SSE2 -o test/f32-simd-sse2.cc & diff --git a/src/f16-vmulcaddc/f16-vmulcaddc.h b/src/f16-vmulcaddc/f16-vmulcaddc.h new file mode 100644 index 00000000000..1a9ad29f4c2 --- /dev/null +++ b/src/f16-vmulcaddc/f16-vmulcaddc.h @@ -0,0 +1,35 @@ +// Copyright 2023 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. +#ifndef XNN_UKERNEL_WITH_PARAMS +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, row_tile, channel_tile, datatype, params_type, init_params) \ + XNN_UKERNEL(arch_flags, ukernel, row_tile, channel_tile, datatype, params_type) +#define XNN_DEFINED_UKERNEL_WITH_PARAMS +#endif + +#ifndef XNN_UKERNEL +#define XNN_UKERNEL(arch_flags, ukernel, row_tile, channel_tile, datatype, params_type) \ + XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, row_tile, channel_tile, datatype, void, /*init_params=*/nullptr) +#define XNN_DEFINED_UKERNEL +#endif + +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, 2, 8, xnn_float16, union xnn_f16_minmax_params, xnn_init_f16_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(xnn_arch_x86, xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, 2, 16, xnn_float16, union xnn_f16_minmax_params, xnn_init_f16_minmax_scalar_params) +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 + +#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_ARM, xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, 2, 8, xnn_float16, union xnn_f16_minmax_params, xnn_init_f16_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_ARM, xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, 2, 16, xnn_float16, union xnn_f16_minmax_params, xnn_init_f16_minmax_scalar_params) +#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) + +#ifdef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_UKERNEL_WITH_PARAMS +#endif + +#ifdef XNN_DEFINED_UKERNEL +#undef XNN_DEFINED_UKERNEL +#undef XNN_UKERNEL +#endif diff --git a/src/f32-vmulcaddc/f32-vmulcaddc.h b/src/f32-vmulcaddc/f32-vmulcaddc.h new file mode 100644 index 00000000000..3dd8bba9490 --- /dev/null +++ b/src/f32-vmulcaddc/f32-vmulcaddc.h @@ -0,0 +1,61 @@ +// Copyright 2023 Google LLC +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. +#ifndef XNN_UKERNEL_WITH_PARAMS +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, row_tile, channel_tile, datatype, params_type, init_params) \ + XNN_UKERNEL(arch_flags, ukernel, row_tile, channel_tile, datatype, params_type) +#define XNN_DEFINED_UKERNEL_WITH_PARAMS +#endif + +#ifndef XNN_UKERNEL +#define XNN_UKERNEL(arch_flags, ukernel, row_tile, channel_tile, datatype, params_type) \ + XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, row_tile, channel_tile, datatype, void, /*init_params=*/nullptr) +#define XNN_DEFINED_UKERNEL +#endif + +#if XNN_ARCH_X86 || XNN_ARCH_X86_64 +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_X86, xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_X86, xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x, 2, 8, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 + +#if XNN_ARCH_ARM || XNN_ARCH_ARM64 +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_ARM, xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_ARM, xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_ARM, xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x, 2, 8, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_ARM, xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x, 2, 8, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 + +#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x, 2, 8, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x, 2, 8, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +#endif //XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + +#if XNN_ARCH_WASMRELAXEDSIMD +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMRELAXEDSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMRELAXEDSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMRELAXEDSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x, 2, 8, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) + XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMRELAXEDSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x, 2, 8, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +#endif // XNN_ARCH_WASMRELAXEDSIMD + +#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASM, xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x, 2, 1, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASM, xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x, 2, 2, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASM, xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD + +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x, 2, 1, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x, 2, 2, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) +XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params) + +#ifdef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_DEFINED_UKERNEL_WITH_PARAMS +#undef XNN_UKERNEL_WITH_PARAMS +#endif + +#ifdef XNN_DEFINED_UKERNEL +#undef XNN_DEFINED_UKERNEL +#undef XNN_UKERNEL +#endif diff --git a/src/xnnpack/vmulcaddc.h b/src/xnnpack/vmulcaddc.h index 953a250401c..b7ef9573b02 100644 --- a/src/xnnpack/vmulcaddc.h +++ b/src/xnnpack/vmulcaddc.h @@ -15,63 +15,19 @@ extern "C" { #endif - -#define DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(fn_name) \ - XNN_INTERNAL void fn_name( \ - size_t m, \ - size_t c, \ - const float* x, \ - size_t x_stride, \ - const float* w, \ - float* y, \ - size_t y_stride, \ - const union xnn_f32_minmax_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]); - -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x) -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x) - -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x) -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x) - -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x) -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x) - -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x) -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x) - -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x) -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x) - -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x) -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x) - -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x) -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x) - -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x) -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x) -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x) - -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x) -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x) -DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x) - -#define DECLARE_F16_VMULCADDC_MINMAX_UKERNEL_FUNCTION(fn_name) \ - XNN_INTERNAL void fn_name( \ - size_t m, \ - size_t c, \ - const xnn_float16* x, \ - size_t x_stride, \ - const xnn_float16* w, \ - xnn_float16* y, \ - size_t y_stride, \ - const union xnn_f16_minmax_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]); - -DECLARE_F16_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x) -DECLARE_F16_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x) - -DECLARE_F16_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x) -DECLARE_F16_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x) +#define XNN_UKERNEL(arch_flags, fn_name, row_tile, channel_tile, datatype, params_type) \ + XNN_INTERNAL void fn_name( \ + size_t m, \ + size_t c, \ + const datatype* x, \ + size_t x_stride, \ + const datatype* w, \ + datatype* y, \ + size_t y_stride, \ + const params_type params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]); +#include "f32-vmulcaddc/f32-vmulcaddc.h" +#include "f16-vmulcaddc/f16-vmulcaddc.h" +#undef XNN_UKERNEL #ifdef __cplusplus } // extern "C" diff --git a/test/f16-vmulcaddc-minmax.cc b/test/f16-vmulcaddc-minmax.cc index 64b7e60fc24..e8788571aac 100644 --- a/test/f16-vmulcaddc-minmax.cc +++ b/test/f16-vmulcaddc-minmax.cc @@ -4,629 +4,27 @@ // LICENSE file in the root directory of this source tree. // // Auto-generated file. Do not edit! -// Specification: test/f16-vmulcaddc-minmax.yaml +// Specification: f16-vmulcaddc // Generator: tools/generate-vmulcaddc-test.py #include #include "xnnpack/common.h" #include "xnnpack/isa-checks.h" -#include "xnnpack/microparams-init.h" #include "xnnpack/vmulcaddc.h" +#include "xnnpack/microparams-init.h" #include "vmulcaddc-microkernel-tester.h" - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, channels_eq_8) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(8) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - - TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, channels_div_8) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t channels = 16; channels < 80; channels += 8) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, channels_lt_8) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t channels = 1; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, channels_gt_8) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t channels = 9; channels < 16; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, rows_lt_2) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, rows_div_2) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, rows_gt_2) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, input_stride) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .input_stride(43) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, output_stride) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .output_stride(43) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, inplace) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, qmin) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__NEONFP16ARITH_2X, qmax) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, channels_eq_16) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(16) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - - TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, channels_div_16) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t channels = 32; channels < 160; channels += 16) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, channels_lt_16) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t channels = 1; channels < 16; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, channels_gt_16) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t channels = 17; channels < 32; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, rows_lt_2) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, rows_div_2) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, rows_gt_2) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, input_stride) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .input_stride(83) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, output_stride) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .output_stride(83) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, inplace) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, qmin) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__NEONFP16ARITH_2X, qmax) { - TEST_REQUIRES_ARM_NEON_FP16_ARITH; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, xnn_init_f16_minmax_scalar_params); - } - } - } -#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64) - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, channels_eq_8) { - TEST_REQUIRES_X86_FMA3; - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(8) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - - TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, channels_div_8) { - TEST_REQUIRES_X86_FMA3; - for (size_t channels = 16; channels < 80; channels += 8) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, channels_lt_8) { - TEST_REQUIRES_X86_FMA3; - for (size_t channels = 1; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, channels_gt_8) { - TEST_REQUIRES_X86_FMA3; - for (size_t channels = 9; channels < 16; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, rows_lt_2) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, rows_div_2) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, rows_gt_2) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, input_stride) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .input_stride(43) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, output_stride) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .output_stride(43) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, inplace) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, qmin) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C8__FMA3_2X, qmax) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, channels_eq_16) { - TEST_REQUIRES_X86_FMA3; - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(16) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - - TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, channels_div_16) { - TEST_REQUIRES_X86_FMA3; - for (size_t channels = 32; channels < 160; channels += 16) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, channels_lt_16) { - TEST_REQUIRES_X86_FMA3; - for (size_t channels = 1; channels < 16; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, channels_gt_16) { - TEST_REQUIRES_X86_FMA3; - for (size_t channels = 17; channels < 32; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(2) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, rows_lt_2) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, rows_div_2) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, rows_gt_2) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, input_stride) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .input_stride(83) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, output_stride) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .output_stride(83) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, inplace) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, qmin) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } - - TEST(F16_VMULCADDC_MINMAX_C16__FMA3_2X, qmax) { - TEST_REQUIRES_X86_FMA3; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 80; channels += 15) { - VMulCAddCMicrokernelTester() - .channel_tile(16) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, xnn_init_f16_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, row_tile, channel_tile, datatype, params_type, init_params) XNN_TEST_VMULCADDC_ROW_DIV(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params);\ +XNN_TEST_VMULCADDC_ROW_LT(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_ROW_GT(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_CHANNEL_GT(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_CHANNEL_EQ(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_CHANNEL_DIV(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_CHANNEL_LT(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_INPUT_STRIDE(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_OUTPUT_STRIDE(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_INPLACE(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_QMAX(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_QMIN(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); +#include "f16-vmulcaddc/f16-vmulcaddc.h" +#undef XNN_UKERNEL_WITH_PARAMS diff --git a/test/f16-vmulcaddc-minmax.yaml b/test/f16-vmulcaddc-minmax.yaml deleted file mode 100644 index 0fb65eb6c9f..00000000000 --- a/test/f16-vmulcaddc-minmax.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# Copyright 2020 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# ARM NEON+FP16ARITH -- name: xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x - init: xnn_init_f16_minmax_scalar_params -- name: xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x - init: xnn_init_f16_minmax_scalar_params - -# x86 FMA3 -- name: xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x - init: xnn_init_f16_minmax_scalar_params -- name: xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x - init: xnn_init_f16_minmax_scalar_params diff --git a/test/f32-vmulcaddc-minmax.cc b/test/f32-vmulcaddc-minmax.cc index 3d4a8d6865b..4b033be1f84 100644 --- a/test/f32-vmulcaddc-minmax.cc +++ b/test/f32-vmulcaddc-minmax.cc @@ -4,2877 +4,27 @@ // LICENSE file in the root directory of this source tree. // // Auto-generated file. Do not edit! -// Specification: test/f32-vmulcaddc-minmax.yaml +// Specification: f32-vmulcaddc // Generator: tools/generate-vmulcaddc-test.py #include #include "xnnpack/common.h" #include "xnnpack/isa-checks.h" -#include "xnnpack/microparams-init.h" #include "xnnpack/vmulcaddc.h" +#include "xnnpack/microparams-init.h" #include "vmulcaddc-microkernel-tester.h" - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_VMULCADDC_MINMAX_C4__NEON_2X, channels_eq_4) { - TEST_REQUIRES_ARM_NEON; - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(4) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEON_2X, channels_div_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 8; channels < 40; channels += 4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEON_2X, channels_lt_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 1; channels < 4; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEON_2X, channels_gt_4) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 5; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEON_2X, rows_lt_2) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEON_2X, rows_div_2) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEON_2X, rows_gt_2) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEON_2X, input_stride) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .input_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEON_2X, output_stride) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .output_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEON_2X, inplace) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEON_2X, qmin) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEON_2X, qmax) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_VMULCADDC_MINMAX_C4__NEONFMA_2X, channels_eq_4) { - TEST_REQUIRES_ARM_NEON_FMA; - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(4) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEONFMA_2X, channels_div_4) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t channels = 8; channels < 40; channels += 4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEONFMA_2X, channels_lt_4) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t channels = 1; channels < 4; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEONFMA_2X, channels_gt_4) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t channels = 5; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEONFMA_2X, rows_lt_2) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEONFMA_2X, rows_div_2) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEONFMA_2X, rows_gt_2) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEONFMA_2X, input_stride) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .input_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEONFMA_2X, output_stride) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .output_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEONFMA_2X, inplace) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEONFMA_2X, qmin) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__NEONFMA_2X, qmax) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_VMULCADDC_MINMAX_C8__NEON_2X, channels_eq_8) { - TEST_REQUIRES_ARM_NEON; - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(8) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEON_2X, channels_div_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 16; channels < 80; channels += 8) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEON_2X, channels_lt_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 1; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEON_2X, channels_gt_8) { - TEST_REQUIRES_ARM_NEON; - for (size_t channels = 9; channels < 16; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEON_2X, rows_lt_2) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEON_2X, rows_div_2) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEON_2X, rows_gt_2) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEON_2X, input_stride) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .input_stride(43) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEON_2X, output_stride) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .output_stride(43) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEON_2X, inplace) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEON_2X, qmin) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEON_2X, qmax) { - TEST_REQUIRES_ARM_NEON; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_ARM || XNN_ARCH_ARM64 - TEST(F32_VMULCADDC_MINMAX_C8__NEONFMA_2X, channels_eq_8) { - TEST_REQUIRES_ARM_NEON_FMA; - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(8) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEONFMA_2X, channels_div_8) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t channels = 16; channels < 80; channels += 8) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEONFMA_2X, channels_lt_8) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t channels = 1; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEONFMA_2X, channels_gt_8) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t channels = 9; channels < 16; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEONFMA_2X, rows_lt_2) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEONFMA_2X, rows_div_2) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEONFMA_2X, rows_gt_2) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEONFMA_2X, input_stride) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .input_stride(43) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEONFMA_2X, output_stride) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .output_stride(43) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEONFMA_2X, inplace) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEONFMA_2X, qmin) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__NEONFMA_2X, qmax) { - TEST_REQUIRES_ARM_NEON_FMA; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_VMULCADDC_MINMAX_C4__SSE_2X, channels_eq_4) { - TEST_REQUIRES_X86_SSE; - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(4) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C4__SSE_2X, channels_div_4) { - TEST_REQUIRES_X86_SSE; - for (size_t channels = 8; channels < 40; channels += 4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__SSE_2X, channels_lt_4) { - TEST_REQUIRES_X86_SSE; - for (size_t channels = 1; channels < 4; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__SSE_2X, channels_gt_4) { - TEST_REQUIRES_X86_SSE; - for (size_t channels = 5; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__SSE_2X, rows_lt_2) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__SSE_2X, rows_div_2) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__SSE_2X, rows_gt_2) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__SSE_2X, input_stride) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .input_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__SSE_2X, output_stride) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .output_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__SSE_2X, inplace) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__SSE_2X, qmin) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__SSE_2X, qmax) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_X86 || XNN_ARCH_X86_64 - TEST(F32_VMULCADDC_MINMAX_C8__SSE_2X, channels_eq_8) { - TEST_REQUIRES_X86_SSE; - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(8) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C8__SSE_2X, channels_div_8) { - TEST_REQUIRES_X86_SSE; - for (size_t channels = 16; channels < 80; channels += 8) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__SSE_2X, channels_lt_8) { - TEST_REQUIRES_X86_SSE; - for (size_t channels = 1; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__SSE_2X, channels_gt_8) { - TEST_REQUIRES_X86_SSE; - for (size_t channels = 9; channels < 16; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__SSE_2X, rows_lt_2) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__SSE_2X, rows_div_2) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__SSE_2X, rows_gt_2) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__SSE_2X, input_stride) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .input_stride(43) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__SSE_2X, output_stride) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .output_stride(43) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__SSE_2X, inplace) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__SSE_2X, qmin) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__SSE_2X, qmax) { - TEST_REQUIRES_X86_SSE; - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64 - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_ARM_2X, channels_eq_4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(4) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_ARM_2X, channels_div_4) { - for (size_t channels = 8; channels < 40; channels += 4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_ARM_2X, channels_lt_4) { - for (size_t channels = 1; channels < 4; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_ARM_2X, channels_gt_4) { - for (size_t channels = 5; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_ARM_2X, rows_lt_2) { - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_ARM_2X, rows_div_2) { - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_ARM_2X, rows_gt_2) { - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_ARM_2X, input_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .input_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_ARM_2X, output_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .output_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_ARM_2X, inplace) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_ARM_2X, qmin) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_ARM_2X, qmax) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_X86_2X, channels_eq_4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(4) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_X86_2X, channels_div_4) { - for (size_t channels = 8; channels < 40; channels += 4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_X86_2X, channels_lt_4) { - for (size_t channels = 1; channels < 4; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_X86_2X, channels_gt_4) { - for (size_t channels = 5; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_X86_2X, rows_lt_2) { - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_X86_2X, rows_div_2) { - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_X86_2X, rows_gt_2) { - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_X86_2X, input_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .input_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_X86_2X, output_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .output_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_X86_2X, inplace) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_X86_2X, qmin) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMSIMD_X86_2X, qmax) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_ARM_2X, channels_eq_8) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(8) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_ARM_2X, channels_div_8) { - for (size_t channels = 16; channels < 80; channels += 8) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_ARM_2X, channels_lt_8) { - for (size_t channels = 1; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_ARM_2X, channels_gt_8) { - for (size_t channels = 9; channels < 16; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_ARM_2X, rows_lt_2) { - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_ARM_2X, rows_div_2) { - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_ARM_2X, rows_gt_2) { - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_ARM_2X, input_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .input_stride(43) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_ARM_2X, output_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .output_stride(43) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_ARM_2X, inplace) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_ARM_2X, qmin) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_ARM_2X, qmax) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_X86_2X, channels_eq_8) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(8) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_X86_2X, channels_div_8) { - for (size_t channels = 16; channels < 80; channels += 8) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_X86_2X, channels_lt_8) { - for (size_t channels = 1; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_X86_2X, channels_gt_8) { - for (size_t channels = 9; channels < 16; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_X86_2X, rows_lt_2) { - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_X86_2X, rows_div_2) { - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_X86_2X, rows_gt_2) { - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_X86_2X, input_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .input_stride(43) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_X86_2X, output_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .output_stride(43) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_X86_2X, inplace) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_X86_2X, qmin) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMSIMD_X86_2X, qmax) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_FMA_2X, channels_eq_4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(4) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_FMA_2X, channels_div_4) { - for (size_t channels = 8; channels < 40; channels += 4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_FMA_2X, channels_lt_4) { - for (size_t channels = 1; channels < 4; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_FMA_2X, channels_gt_4) { - for (size_t channels = 5; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_FMA_2X, rows_lt_2) { - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_FMA_2X, rows_div_2) { - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_FMA_2X, rows_gt_2) { - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_FMA_2X, input_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .input_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_FMA_2X, output_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .output_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_FMA_2X, inplace) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_FMA_2X, qmin) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_FMA_2X, qmax) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_2X, channels_eq_4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(4) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_2X, channels_div_4) { - for (size_t channels = 8; channels < 40; channels += 4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_2X, channels_lt_4) { - for (size_t channels = 1; channels < 4; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_2X, channels_gt_4) { - for (size_t channels = 5; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_2X, rows_lt_2) { - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_2X, rows_div_2) { - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_2X, rows_gt_2) { - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_2X, input_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .input_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_2X, output_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .output_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_2X, inplace) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_2X, qmin) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASMRELAXEDSIMD_2X, qmax) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_FMA_2X, channels_eq_8) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(8) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_FMA_2X, channels_div_8) { - for (size_t channels = 16; channels < 80; channels += 8) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_FMA_2X, channels_lt_8) { - for (size_t channels = 1; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_FMA_2X, channels_gt_8) { - for (size_t channels = 9; channels < 16; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_FMA_2X, rows_lt_2) { - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_FMA_2X, rows_div_2) { - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_FMA_2X, rows_gt_2) { - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_FMA_2X, input_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .input_stride(43) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_FMA_2X, output_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .output_stride(43) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_FMA_2X, inplace) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_FMA_2X, qmin) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_FMA_2X, qmax) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_2X, channels_eq_8) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(8) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_2X, channels_div_8) { - for (size_t channels = 16; channels < 80; channels += 8) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_2X, channels_lt_8) { - for (size_t channels = 1; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_2X, channels_gt_8) { - for (size_t channels = 9; channels < 16; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_2X, rows_lt_2) { - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_2X, rows_div_2) { - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_2X, rows_gt_2) { - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_2X, input_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .input_stride(43) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_2X, output_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .output_stride(43) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_2X, inplace) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_2X, qmin) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C8__WASMRELAXEDSIMD_2X, qmax) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 40; channels += 7) { - VMulCAddCMicrokernelTester() - .channel_tile(8) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_VMULCADDC_MINMAX_C1__WASM_2X, channels_eq_1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(1) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C1__WASM_2X, channels_gt_1) { - for (size_t channels = 2; channels < 10; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C1__WASM_2X, rows_lt_2) { - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C1__WASM_2X, rows_div_2) { - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C1__WASM_2X, rows_gt_2) { - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C1__WASM_2X, input_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .input_stride(7) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C1__WASM_2X, output_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .output_stride(7) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C1__WASM_2X, inplace) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C1__WASM_2X, qmin) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C1__WASM_2X, qmax) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_VMULCADDC_MINMAX_C2__WASM_2X, channels_eq_2) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(2) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C2__WASM_2X, channels_div_2) { - for (size_t channels = 4; channels < 20; channels += 2) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C2__WASM_2X, channels_lt_2) { - for (size_t channels = 1; channels < 2; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C2__WASM_2X, channels_gt_2) { - for (size_t channels = 3; channels < 4; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C2__WASM_2X, rows_lt_2) { - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C2__WASM_2X, rows_div_2) { - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C2__WASM_2X, rows_gt_2) { - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C2__WASM_2X, input_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .input_stride(13) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C2__WASM_2X, output_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .output_stride(13) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C2__WASM_2X, inplace) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C2__WASM_2X, qmin) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C2__WASM_2X, qmax) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - TEST(F32_VMULCADDC_MINMAX_C4__WASM_2X, channels_eq_4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(4) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASM_2X, channels_div_4) { - for (size_t channels = 8; channels < 40; channels += 4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASM_2X, channels_lt_4) { - for (size_t channels = 1; channels < 4; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASM_2X, channels_gt_4) { - for (size_t channels = 5; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASM_2X, rows_lt_2) { - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASM_2X, rows_div_2) { - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASM_2X, rows_gt_2) { - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASM_2X, input_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .input_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASM_2X, output_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .output_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASM_2X, inplace) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASM_2X, qmin) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } - - TEST(F32_VMULCADDC_MINMAX_C4__WASM_2X, qmax) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x, xnn_init_f32_minmax_scalar_params); - } - } - } -#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD - - -TEST(F32_VMULCADDC_MINMAX_C1__SCALAR_2X, channels_eq_1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(1) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x, xnn_init_f32_minmax_scalar_params); -} - -TEST(F32_VMULCADDC_MINMAX_C1__SCALAR_2X, channels_gt_1) { - for (size_t channels = 2; channels < 10; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x, xnn_init_f32_minmax_scalar_params); - } -} - -TEST(F32_VMULCADDC_MINMAX_C1__SCALAR_2X, rows_lt_2) { - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C1__SCALAR_2X, rows_div_2) { - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C1__SCALAR_2X, rows_gt_2) { - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C1__SCALAR_2X, input_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .input_stride(7) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C1__SCALAR_2X, output_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .output_stride(7) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C1__SCALAR_2X, inplace) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C1__SCALAR_2X, qmin) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C1__SCALAR_2X, qmax) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 5; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(1) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C2__SCALAR_2X, channels_eq_2) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(2) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x, xnn_init_f32_minmax_scalar_params); -} - -TEST(F32_VMULCADDC_MINMAX_C2__SCALAR_2X, channels_div_2) { - for (size_t channels = 4; channels < 20; channels += 2) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x, xnn_init_f32_minmax_scalar_params); - } -} - -TEST(F32_VMULCADDC_MINMAX_C2__SCALAR_2X, channels_lt_2) { - for (size_t channels = 1; channels < 2; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x, xnn_init_f32_minmax_scalar_params); - } -} - -TEST(F32_VMULCADDC_MINMAX_C2__SCALAR_2X, channels_gt_2) { - for (size_t channels = 3; channels < 4; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x, xnn_init_f32_minmax_scalar_params); - } -} - -TEST(F32_VMULCADDC_MINMAX_C2__SCALAR_2X, rows_lt_2) { - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C2__SCALAR_2X, rows_div_2) { - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C2__SCALAR_2X, rows_gt_2) { - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C2__SCALAR_2X, input_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .input_stride(13) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C2__SCALAR_2X, output_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .output_stride(13) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C2__SCALAR_2X, inplace) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C2__SCALAR_2X, qmin) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C2__SCALAR_2X, qmax) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 10; channels += 1) { - VMulCAddCMicrokernelTester() - .channel_tile(2) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C4__SCALAR_2X, channels_eq_4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(4) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x, xnn_init_f32_minmax_scalar_params); -} - -TEST(F32_VMULCADDC_MINMAX_C4__SCALAR_2X, channels_div_4) { - for (size_t channels = 8; channels < 40; channels += 4) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x, xnn_init_f32_minmax_scalar_params); - } -} - -TEST(F32_VMULCADDC_MINMAX_C4__SCALAR_2X, channels_lt_4) { - for (size_t channels = 1; channels < 4; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x, xnn_init_f32_minmax_scalar_params); - } -} - -TEST(F32_VMULCADDC_MINMAX_C4__SCALAR_2X, channels_gt_4) { - for (size_t channels = 5; channels < 8; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(2) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x, xnn_init_f32_minmax_scalar_params); - } -} - -TEST(F32_VMULCADDC_MINMAX_C4__SCALAR_2X, rows_lt_2) { - for (size_t rows = 1; rows < 2; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C4__SCALAR_2X, rows_div_2) { - for (size_t rows = 4; rows <= 8; rows += 2) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C4__SCALAR_2X, rows_gt_2) { - for (size_t rows = 3; rows < 4; rows++) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C4__SCALAR_2X, input_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .input_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C4__SCALAR_2X, output_stride) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .output_stride(23) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C4__SCALAR_2X, inplace) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C4__SCALAR_2X, qmin) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} - -TEST(F32_VMULCADDC_MINMAX_C4__SCALAR_2X, qmax) { - for (size_t rows = 1; rows <= 6; rows += 1) { - for (size_t channels = 1; channels <= 20; channels += 3) { - VMulCAddCMicrokernelTester() - .channel_tile(4) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x, xnn_init_f32_minmax_scalar_params); - } - } -} \ No newline at end of file +#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, row_tile, channel_tile, datatype, params_type, init_params) XNN_TEST_VMULCADDC_ROW_DIV(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params);\ +XNN_TEST_VMULCADDC_ROW_LT(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_ROW_GT(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_CHANNEL_GT(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_CHANNEL_EQ(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_CHANNEL_DIV(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_CHANNEL_LT(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_INPUT_STRIDE(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_OUTPUT_STRIDE(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_INPLACE(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_QMAX(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); \ +XNN_TEST_VMULCADDC_QMIN(ukernel, arch_flags, row_tile, channel_tile, datatype, params_type, init_params); +#include "f32-vmulcaddc/f32-vmulcaddc.h" +#undef XNN_UKERNEL_WITH_PARAMS diff --git a/test/f32-vmulcaddc-minmax.yaml b/test/f32-vmulcaddc-minmax.yaml deleted file mode 100644 index 7bb7ff42c2e..00000000000 --- a/test/f32-vmulcaddc-minmax.yaml +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2019 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -# ARM NEON -- name: xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x - init: xnn_init_f32_minmax_scalar_params -- name: xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x - init: xnn_init_f32_minmax_scalar_params -- name: xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x - init: xnn_init_f32_minmax_scalar_params -- name: xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x - init: xnn_init_f32_minmax_scalar_params -# x86 SSE -- name: xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x - init: xnn_init_f32_minmax_scalar_params -- name: xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x - init: xnn_init_f32_minmax_scalar_params -# WAsm SIMD -- name: xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x - init: xnn_init_f32_minmax_scalar_params -- name: xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x - init: xnn_init_f32_minmax_scalar_params -- name: xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x - init: xnn_init_f32_minmax_scalar_params -- name: xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x - init: xnn_init_f32_minmax_scalar_params -# WAsm Relaxed SIMD -- name: xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x - init: xnn_init_f32_minmax_scalar_params -- name: xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x - init: xnn_init_f32_minmax_scalar_params -- name: xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x - init: xnn_init_f32_minmax_scalar_params -- name: xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x - init: xnn_init_f32_minmax_scalar_params -# WAsm -- name: xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x - init: xnn_init_f32_minmax_scalar_params -- name: xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x - init: xnn_init_f32_minmax_scalar_params -- name: xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x - init: xnn_init_f32_minmax_scalar_params -# Scalar -- name: xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x - init: xnn_init_f32_minmax_scalar_params -- name: xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x - init: xnn_init_f32_minmax_scalar_params -- name: xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x - init: xnn_init_f32_minmax_scalar_params diff --git a/test/vmulcaddc-microkernel-tester.h b/test/vmulcaddc-microkernel-tester.h index daf1359c719..429eaefe0c8 100644 --- a/test/vmulcaddc-microkernel-tester.h +++ b/test/vmulcaddc-microkernel-tester.h @@ -15,104 +15,99 @@ #include #include + +#include "next_prime.h" +#include "replicable_random_device.h" #include "xnnpack.h" #include "xnnpack/buffer.h" #include "xnnpack/math.h" #include "xnnpack/microfnptr.h" #include "xnnpack/microparams.h" #include "xnnpack/pack.h" -#include "replicable_random_device.h" class VMulCAddCMicrokernelTester { - public: - VMulCAddCMicrokernelTester& channel_tile(size_t channel_tile) { +public: + VMulCAddCMicrokernelTester& channel_tile(size_t channel_tile) + { this->channel_tile_ = channel_tile; return *this; } - size_t channel_tile() const { - return this->channel_tile_; - } + size_t channel_tile() const { return this->channel_tile_; } - VMulCAddCMicrokernelTester& channels(size_t channels) { + VMulCAddCMicrokernelTester& channels(size_t channels) + { assert(channels != 0); this->channels_ = channels; return *this; } - size_t channels() const { - return this->channels_; - } + size_t channels() const { return this->channels_; } - size_t packed_channels() const { + size_t packed_channels() const + { return channels() % channel_tile() == 0 ? channels() : (channels() / channel_tile() + 1) * channel_tile(); } - VMulCAddCMicrokernelTester& rows(size_t rows) { + VMulCAddCMicrokernelTester& rows(size_t rows) + { assert(rows != 0); this->rows_ = rows; return *this; } - size_t rows() const { - return this->rows_; - } + size_t rows() const { return this->rows_; } - VMulCAddCMicrokernelTester& input_stride(size_t input_stride) { + VMulCAddCMicrokernelTester& input_stride(size_t input_stride) + { this->input_stride_ = input_stride; return *this; } - size_t input_stride() const { - return this->input_stride_ == 0 ? channels() : this->input_stride_; - } + size_t input_stride() const { return this->input_stride_ == 0 ? channels() : this->input_stride_; } - VMulCAddCMicrokernelTester& output_stride(size_t output_stride) { + VMulCAddCMicrokernelTester& output_stride(size_t output_stride) + { this->output_stride_ = output_stride; return *this; } - size_t output_stride() const { - return this->output_stride_ == 0 ? channels() : this->output_stride_; - } + size_t output_stride() const { return this->output_stride_ == 0 ? channels() : this->output_stride_; } - VMulCAddCMicrokernelTester& inplace(bool inplace) { + VMulCAddCMicrokernelTester& inplace(bool inplace) + { this->inplace_ = inplace; return *this; } - bool inplace() const { - return this->inplace_; - } + bool inplace() const { return this->inplace_; } - VMulCAddCMicrokernelTester& qmin(uint8_t qmin) { + VMulCAddCMicrokernelTester& qmin(uint8_t qmin) + { this->qmin_ = qmin; return *this; } - uint8_t qmin() const { - return this->qmin_; - } + uint8_t qmin() const { return this->qmin_; } - VMulCAddCMicrokernelTester& qmax(uint8_t qmax) { + VMulCAddCMicrokernelTester& qmax(uint8_t qmax) + { this->qmax_ = qmax; return *this; } - uint8_t qmax() const { - return this->qmax_; - } + uint8_t qmax() const { return this->qmax_; } - VMulCAddCMicrokernelTester& iterations(size_t iterations) { + VMulCAddCMicrokernelTester& iterations(size_t iterations) + { this->iterations_ = iterations; return *this; } - size_t iterations() const { - return this->iterations_; - } + size_t iterations() const { return this->iterations_; } - void Test(xnn_f16_vmulcaddc_ukernel_fn vmulcaddc, xnn_init_f16_minmax_params_fn init_params) const { + void Test(xnn_f16_vmulcaddc_ukernel_fn vmulcaddc, xnn_init_f16_minmax_params_fn init_params) const + { xnnpack::ReplicableRandomDevice rng; std::uniform_real_distribution f32dist; @@ -123,9 +118,9 @@ class VMulCAddCMicrokernelTester { xnnpack::Buffer x((rows() - 1) * input_stride() + channels() + XNN_EXTRA_BYTES / sizeof(xnn_float16)); xnnpack::Buffer scale(channels()); xnnpack::Buffer bias(channels()); - xnnpack::Buffer packed_w(packed_channels() * - 2); - xnnpack::Buffer y((rows() - 1) * output_stride() + channels() + (inplace() ? XNN_EXTRA_BYTES / sizeof(xnn_float16) : 0)); + xnnpack::Buffer packed_w(packed_channels() * 2); + xnnpack::Buffer y( + (rows() - 1) * output_stride() + channels() + (inplace() ? XNN_EXTRA_BYTES / sizeof(xnn_float16) : 0)); xnnpack::Buffer y_ref(rows() * channels()); for (size_t iteration = 0; iteration < iterations(); iteration++) { @@ -137,11 +132,9 @@ class VMulCAddCMicrokernelTester { } const xnn_float16* x_data = inplace() ? y.data() : x.data(); - xnn_pack_f16_vmulcaddc_w(channels(), channel_tile(), - reinterpret_cast(scale.data()), - reinterpret_cast(bias.data()), - reinterpret_cast(packed_w.data()), - nullptr); + xnn_pack_f16_vmulcaddc_w( + channels(), channel_tile(), reinterpret_cast(scale.data()), + reinterpret_cast(bias.data()), reinterpret_cast(packed_w.data()), nullptr); // Compute reference results. for (size_t i = 0; i < rows(); i++) { @@ -164,24 +157,24 @@ class VMulCAddCMicrokernelTester { init_params(¶ms, static_cast(y_min), static_cast(y_max)); // Call optimized micro-kernel. - vmulcaddc(rows(), channels() * sizeof(xnn_float16), - x_data, input_stride() * sizeof(xnn_float16), - packed_w.data(), - y.data(), output_stride() * sizeof(xnn_float16), - ¶ms); + vmulcaddc( + rows(), channels() * sizeof(xnn_float16), x_data, input_stride() * sizeof(xnn_float16), packed_w.data(), + y.data(), output_stride() * sizeof(xnn_float16), ¶ms); // Verify results. for (size_t i = 0; i < rows(); i++) { for (size_t j = 0; j < channels(); j++) { - EXPECT_NEAR(y[i * output_stride() + j], y_ref[i * channels() + j], std::max(1.0e-4f, std::abs(y_ref[i * channels() + j]) * 1.0e-2f)) - << "at pixel " << i << " / " << rows() - << ", channel = " << j << " / " << channels(); + EXPECT_NEAR( + y[i * output_stride() + j], y_ref[i * channels() + j], + std::max(1.0e-4f, std::abs(y_ref[i * channels() + j]) * 1.0e-2f)) + << "at pixel " << i << " / " << rows() << ", channel = " << j << " / " << channels(); } } } } - void Test(xnn_f32_vmulcaddc_ukernel_fn vmulcaddc, xnn_init_f32_minmax_params_fn init_params) const { + void Test(xnn_f32_vmulcaddc_ukernel_fn vmulcaddc, xnn_init_f32_minmax_params_fn init_params) const + { xnnpack::ReplicableRandomDevice rng; std::uniform_real_distribution f32dist; @@ -193,7 +186,8 @@ class VMulCAddCMicrokernelTester { xnnpack::Buffer scale(channels()); xnnpack::Buffer bias(channels()); xnnpack::Buffer packed_w(packed_channels() * 2); - xnnpack::Buffer y((rows() - 1) * output_stride() + channels() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0)); + xnnpack::Buffer y( + (rows() - 1) * output_stride() + channels() + (inplace() ? XNN_EXTRA_BYTES / sizeof(float) : 0)); xnnpack::Buffer y_ref(rows() * channels()); for (size_t iteration = 0; iteration < iterations(); iteration++) { std::generate(scale.begin(), scale.end(), [&]() { return f32dist(rng); }); @@ -204,8 +198,7 @@ class VMulCAddCMicrokernelTester { } const float* x_data = inplace() ? y.data() : x.data(); - xnn_pack_f32_vmulcaddc_w(channels(), channel_tile(), - scale.data(), bias.data(), packed_w.data(), nullptr); + xnn_pack_f32_vmulcaddc_w(channels(), channel_tile(), scale.data(), bias.data(), packed_w.data(), nullptr); // Compute reference results. for (size_t i = 0; i < rows(); i++) { @@ -227,24 +220,22 @@ class VMulCAddCMicrokernelTester { init_params(¶ms, y_min, y_max); // Call optimized micro-kernel. - vmulcaddc(rows(), channels() * sizeof(float), - x_data, input_stride() * sizeof(float), - packed_w.data(), - y.data(), output_stride() * sizeof(float), - ¶ms); + vmulcaddc( + rows(), channels() * sizeof(float), x_data, input_stride() * sizeof(float), packed_w.data(), y.data(), + output_stride() * sizeof(float), ¶ms); // Verify results. for (size_t i = 0; i < rows(); i++) { for (size_t j = 0; j < channels(); j++) { - EXPECT_NEAR(y[i * output_stride() + j], y_ref[i * channels() + j], std::abs(y_ref[i * channels() + j]) * 1.0e-6f) - << "at pixel " << i << " / " << rows() - << ", channel = " << j << " / " << channels(); + EXPECT_NEAR( + y[i * output_stride() + j], y_ref[i * channels() + j], std::abs(y_ref[i * channels() + j]) * 1.0e-6f) + << "at pixel " << i << " / " << rows() << ", channel = " << j << " / " << channels(); } } } } - private: +private: size_t channel_tile_{1}; size_t channels_{1}; size_t rows_{1}; @@ -255,3 +246,163 @@ class VMulCAddCMicrokernelTester { uint8_t qmax_{255}; size_t iterations_{15}; }; + +#define XNN_TEST_VMULCADDC_ROW_DIV(ukernel, arch_flags, row_tile_, channel_tile_, datatype, params_type, init_params) \ + TEST(ukernel, ROW_div) \ + { \ + for (size_t rows_ = row_tile_ * 2; rows_ <= row_tile_ * 4; rows_ += row_tile_) { \ + for (size_t channels_ = 1; channels_ <= channel_tile_ * 5; channels_ += std::max(1, channel_tile_ - 1)) { \ + VMulCAddCMicrokernelTester() \ + .channel_tile(channel_tile_) \ + .channels(channels_) \ + .rows(rows_) \ + .Test(ukernel, init_params); \ + } \ + } \ + } + +#define XNN_TEST_VMULCADDC_ROW_LT(ukernel, arch_flags, row_tile_, channel_tile_, datatype, params_type, init_params) \ + TEST(ukernel, ROW_lt) \ + { \ + for (size_t rows_ = 1; rows_ < row_tile_; rows_++) { \ + for (size_t channels_ = 1; channels_ <= channel_tile_ * 5; channels_ += std::max(1, channel_tile_ - 1)) { \ + VMulCAddCMicrokernelTester() \ + .channel_tile(channel_tile_) \ + .channels(channels_) \ + .rows(rows_) \ + .Test(ukernel, init_params); \ + } \ + } \ + } +#define XNN_TEST_VMULCADDC_ROW_GT(ukernel, arch_flags, row_tile_, channel_tile_, datatype, params_type, init_params) \ + TEST(ukernel, ROW_gt) \ + { \ + for (size_t rows_ = row_tile_ + 1; rows_ < row_tile_ * 2; rows_++) { \ + for (size_t channels_ = 1; channels_ <= channel_tile_ * 5; channels_ += std::max(1, channel_tile_ - 1)) { \ + VMulCAddCMicrokernelTester() \ + .channel_tile(channel_tile_) \ + .channels(channels_) \ + .rows(rows_) \ + .Test(ukernel, init_params); \ + } \ + } \ + } +#define XNN_TEST_VMULCADDC_CHANNEL_GT( \ + ukernel, arch_flags, row_tile_, channel_tile_, datatype, params_type, init_params) \ + TEST(ukernel, channels_gt_) \ + { \ + for (size_t channels_ = channel_tile_ + 1; channels_ < (channel_tile_ == 1 ? 10 : channel_tile_ * 2); \ + channels_++) { \ + VMulCAddCMicrokernelTester() \ + .channel_tile(channel_tile_) \ + .channels(channels_) \ + .rows(row_tile_) \ + .Test(ukernel, init_params); \ + } \ + } +#define XNN_TEST_VMULCADDC_CHANNEL_EQ( \ + ukernel, arch_flags, row_tile_, channel_tile_, datatype, params_type, init_params) \ + TEST(ukernel, channels_eq_) \ + { \ + VMulCAddCMicrokernelTester() \ + .channel_tile(channel_tile_) \ + .channels(channel_tile_) \ + .rows(row_tile_) \ + .Test(ukernel, init_params); \ + } +#define XNN_TEST_VMULCADDC_CHANNEL_DIV( \ + ukernel, arch_flags, row_tile_, channel_tile_, datatype, params_type, init_params) \ + TEST(ukernel, channels_div_) \ + { \ + for (size_t channels_ = channel_tile_ * 2; channels_ < channel_tile_ * 10; channels_ += channel_tile_) { \ + VMulCAddCMicrokernelTester() \ + .channel_tile(channel_tile_) \ + .channels(channels_) \ + .rows(row_tile_) \ + .Test(ukernel, init_params); \ + } \ + } +#define XNN_TEST_VMULCADDC_CHANNEL_LT( \ + ukernel, arch_flags, row_tile_, channel_tile_, datatype, params_type, init_params) \ + TEST(ukernel, channels_lt_) \ + { \ + for (size_t channels_ = 1; channels_ < channel_tile_; channels_++) { \ + VMulCAddCMicrokernelTester() \ + .channel_tile(channel_tile_) \ + .channels(channels_) \ + .rows(row_tile_) \ + .Test(ukernel, init_params); \ + } \ + } +#define XNN_TEST_VMULCADDC_INPUT_STRIDE( \ + ukernel, arch_flags, row_tile_, channel_tile_, datatype, params_type, init_params) \ + TEST(ukernel, input_stride) \ + { \ + for (size_t rows_ = 1; rows_ <= row_tile_ * 3; rows_ += std::max(1, row_tile_ - 1)) { \ + for (size_t channels_ = 1; channels_ <= channel_tile_ * 5; channels_ += std::max(1, channel_tile_ - 1)) { \ + VMulCAddCMicrokernelTester() \ + .channel_tile(channel_tile_) \ + .channels(channels_) \ + .rows(rows_) \ + .input_stride(xnnpack::NextPrime(channel_tile_ * 5 + 1)) \ + .Test(ukernel, init_params); \ + } \ + } \ + } +#define XNN_TEST_VMULCADDC_OUTPUT_STRIDE( \ + ukernel, arch_flags, row_tile_, channel_tile_, datatype, params_type, init_params) \ + TEST(ukernel, output_stride) \ + { \ + for (size_t rows_ = 1; rows_ <= row_tile_ * 3; rows_ += std::max(1, row_tile_ - 1)) { \ + for (size_t channels_ = 1; channels_ <= channel_tile_ * 5; channels_ += std::max(1, channel_tile_ - 1)) { \ + VMulCAddCMicrokernelTester() \ + .channel_tile(channel_tile_) \ + .channels(channels_) \ + .rows(rows_) \ + .output_stride(xnnpack::NextPrime(channel_tile_ * 5 + 1)) \ + .Test(ukernel, init_params); \ + } \ + } \ + } +#define XNN_TEST_VMULCADDC_INPLACE(ukernel, arch_flags, row_tile_, channel_tile_, datatype, params_type, init_params) \ + TEST(ukernel, inplace) \ + { \ + for (size_t rows_ = 1; rows_ <= row_tile_ * 3; rows_ += std::max(1, row_tile_ - 1)) { \ + for (size_t channels_ = 1; channels_ <= channel_tile_ * 5; channels_ += std::max(1, channel_tile_ - 1)) { \ + VMulCAddCMicrokernelTester() \ + .channel_tile(channel_tile_) \ + .channels(channels_) \ + .rows(rows_) \ + .inplace(true) \ + .Test(ukernel, init_params); \ + } \ + } \ + } +#define XNN_TEST_VMULCADDC_QMIN(ukernel, arch_flags, row_tile_, channel_tile_, datatype, params_type, init_params) \ + TEST(ukernel, qmin) \ + { \ + for (size_t rows_ = 1; rows_ <= row_tile_ * 3; rows_ += std::max(1, row_tile_ - 1)) { \ + for (size_t channels_ = 1; channels_ <= channel_tile_ * 5; channels_ += std::max(1, channel_tile_ - 1)) { \ + VMulCAddCMicrokernelTester() \ + .channel_tile(channel_tile_) \ + .channels(channels_) \ + .rows(rows_) \ + .qmin(128) \ + .Test(ukernel, init_params); \ + } \ + } \ + } +#define XNN_TEST_VMULCADDC_QMAX(ukernel, arch_flags, row_tile_, channel_tile_, datatype, params_type, init_params) \ + TEST(ukernel, qmax) \ + { \ + for (size_t rows_ = 1; rows_ <= row_tile_ * 3; rows_ += std::max(1, row_tile_ - 1)) { \ + for (size_t channels_ = 1; channels_ <= channel_tile_ * 5; channels_ += std::max(1, channel_tile_ - 1)) { \ + VMulCAddCMicrokernelTester() \ + .channel_tile(channel_tile_) \ + .channels(channels_) \ + .rows(rows_) \ + .qmax(128) \ + .Test(ukernel, init_params); \ + } \ + } \ + } diff --git a/tools/generate-vmulcaddc-test.py b/tools/generate-vmulcaddc-test.py deleted file mode 100755 index faadc36f88e..00000000000 --- a/tools/generate-vmulcaddc-test.py +++ /dev/null @@ -1,280 +0,0 @@ -#!/usr/bin/env python -# Copyright 2019 Google LLC -# -# This source code is licensed under the BSD-style license found in the -# LICENSE file in the root directory of this source tree. - -import argparse -import codecs -import math -import os -import re -import sys -import yaml - -sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) -from primes import next_prime -import xngen -import xnncommon - - -parser = argparse.ArgumentParser( - description='VMulCAddC microkernel test generator') -parser.add_argument("-s", "--spec", metavar="FILE", required=True, - help="Specification (YAML) file") -parser.add_argument("-o", "--output", metavar="FILE", required=True, - help='Output (C++ source) file') -parser.set_defaults(defines=list()) - - -def split_ukernel_name(name): - match = re.fullmatch(r"xnn_(f16|f32)_vmulcaddc(_(minmax))?_ukernel_c(\d+)__(.+)_(\d+)x", name) - assert match is not None - channel_tile = int(match.group(4)) - row_tile = int(match.group(6)) - - arch, isa, assembly = xnncommon.parse_target_name(target_name=match.group(5)) - return channel_tile, row_tile, arch, isa - - -VMULCADDC_TEST_TEMPLATE = """\ -TEST(${TEST_NAME}, channels_eq_${CHANNEL_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - VMulCAddCMicrokernelTester() - .channel_tile(${CHANNEL_TILE}) - .channels(${CHANNEL_TILE}) - .rows(${ROW_TILE}) - .Test(${", ".join(TEST_ARGS)}); -} - -$if CHANNEL_TILE > 1: - TEST(${TEST_NAME}, channels_div_${CHANNEL_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t channels = ${CHANNEL_TILE*2}; channels < ${CHANNEL_TILE*10}; channels += ${CHANNEL_TILE}) { - VMulCAddCMicrokernelTester() - .channel_tile(${CHANNEL_TILE}) - .channels(channels) - .rows(${ROW_TILE}) - .Test(${", ".join(TEST_ARGS)}); - } - } - - TEST(${TEST_NAME}, channels_lt_${CHANNEL_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t channels = 1; channels < ${CHANNEL_TILE}; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(${CHANNEL_TILE}) - .channels(channels) - .rows(${ROW_TILE}) - .Test(${", ".join(TEST_ARGS)}); - } - } - -TEST(${TEST_NAME}, channels_gt_${CHANNEL_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t channels = ${CHANNEL_TILE+1}; channels < ${10 if CHANNEL_TILE == 1 else CHANNEL_TILE*2}; channels++) { - VMulCAddCMicrokernelTester() - .channel_tile(${CHANNEL_TILE}) - .channels(channels) - .rows(${ROW_TILE}) - .Test(${", ".join(TEST_ARGS)}); - } -} - -$if ROW_TILE > 1: - TEST(${TEST_NAME}, rows_lt_${ROW_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t rows = 1; rows < ${ROW_TILE}; rows++) { - for (size_t channels = 1; channels <= ${CHANNEL_TILE*5}; channels += ${max(1, CHANNEL_TILE-1)}) { - VMulCAddCMicrokernelTester() - .channel_tile(${CHANNEL_TILE}) - .channels(channels) - .rows(rows) - .Test(${", ".join(TEST_ARGS)}); - } - } - } - - TEST(${TEST_NAME}, rows_div_${ROW_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t rows = ${ROW_TILE*2}; rows <= ${ROW_TILE*4}; rows += ${ROW_TILE}) { - for (size_t channels = 1; channels <= ${CHANNEL_TILE*5}; channels += ${max(1, CHANNEL_TILE-1)}) { - VMulCAddCMicrokernelTester() - .channel_tile(${CHANNEL_TILE}) - .channels(channels) - .rows(rows) - .Test(${", ".join(TEST_ARGS)}); - } - } - } - -TEST(${TEST_NAME}, rows_gt_${ROW_TILE}) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t rows = ${ROW_TILE+1}; rows < ${ROW_TILE*2}; rows++) { - for (size_t channels = 1; channels <= ${CHANNEL_TILE*5}; channels += ${max(1, CHANNEL_TILE-1)}) { - VMulCAddCMicrokernelTester() - .channel_tile(${CHANNEL_TILE}) - .channels(channels) - .rows(rows) - .Test(${", ".join(TEST_ARGS)}); - } - } -} - -TEST(${TEST_NAME}, input_stride) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t rows = 1; rows <= ${ROW_TILE*3}; rows += ${max(1, ROW_TILE-1)}) { - for (size_t channels = 1; channels <= ${CHANNEL_TILE*5}; channels += ${max(1, CHANNEL_TILE-1)}) { - VMulCAddCMicrokernelTester() - .channel_tile(${CHANNEL_TILE}) - .channels(channels) - .rows(rows) - .input_stride(${next_prime(CHANNEL_TILE*5+1)}) - .Test(${", ".join(TEST_ARGS)}); - } - } -} - -TEST(${TEST_NAME}, output_stride) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t rows = 1; rows <= ${ROW_TILE*3}; rows += ${max(1, ROW_TILE-1)}) { - for (size_t channels = 1; channels <= ${CHANNEL_TILE*5}; channels += ${max(1, CHANNEL_TILE-1)}) { - VMulCAddCMicrokernelTester() - .channel_tile(${CHANNEL_TILE}) - .channels(channels) - .rows(rows) - .output_stride(${next_prime(CHANNEL_TILE*5+1)}) - .Test(${", ".join(TEST_ARGS)}); - } - } -} - -TEST(${TEST_NAME}, inplace) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t rows = 1; rows <= ${ROW_TILE*3}; rows += ${max(1, ROW_TILE-1)}) { - for (size_t channels = 1; channels <= ${CHANNEL_TILE*5}; channels += ${max(1, CHANNEL_TILE-1)}) { - VMulCAddCMicrokernelTester() - .channel_tile(${CHANNEL_TILE}) - .channels(channels) - .rows(rows) - .inplace(true) - .Test(${", ".join(TEST_ARGS)}); - } - } -} - -TEST(${TEST_NAME}, qmin) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t rows = 1; rows <= ${ROW_TILE*3}; rows += ${max(1, ROW_TILE-1)}) { - for (size_t channels = 1; channels <= ${CHANNEL_TILE*5}; channels += ${max(1, CHANNEL_TILE-1)}) { - VMulCAddCMicrokernelTester() - .channel_tile(${CHANNEL_TILE}) - .channels(channels) - .rows(rows) - .qmin(128) - .Test(${", ".join(TEST_ARGS)}); - } - } -} - -TEST(${TEST_NAME}, qmax) { - $if ISA_CHECK: - ${ISA_CHECK}; - for (size_t rows = 1; rows <= ${ROW_TILE*3}; rows += ${max(1, ROW_TILE-1)}) { - for (size_t channels = 1; channels <= ${CHANNEL_TILE*5}; channels += ${max(1, CHANNEL_TILE-1)}) { - VMulCAddCMicrokernelTester() - .channel_tile(${CHANNEL_TILE}) - .channels(channels) - .rows(rows) - .qmax(128) - .Test(${", ".join(TEST_ARGS)}); - } - } -} -""" - - -def generate_test_cases(ukernel, channel_tile, row_tile, init_fn, isa): - """Generates all tests cases for a VMULCADDC micro-kernel. - - Args: - ukernel: C name of the micro-kernel function. - channel_tile: Number of channels processed per one iteration of the inner - loop of the micro-kernel. - row_tile: Number of rows processed per one iteration of the outer loop of - the micro-kernel. - init_fn: C name of the function to initialize microkernel parameters. - isa: instruction set required to run the micro-kernel. Generated unit test - will skip execution if the host processor doesn't support this ISA. - - Returns: - Code for the test case. - """ - _, test_name = ukernel.split("_", 1) - _, datatype, ukernel_type, _ = ukernel.split("_", 3) - test_args = [ukernel] - if init_fn: - test_args.append(init_fn) - return xngen.preprocess(VMULCADDC_TEST_TEMPLATE, { - "TEST_NAME": test_name.upper().replace("UKERNEL_", ""), - "TEST_ARGS": test_args, - "DATATYPE": datatype, - "CHANNEL_TILE": channel_tile, - "ROW_TILE": row_tile, - "ISA_CHECK": xnncommon.generate_isa_check_macro(isa), - "next_prime": next_prime, - }) - - -def main(args): - options = parser.parse_args(args) - - with codecs.open(options.spec, "r", encoding="utf-8") as spechannels_file: - spechannels_yaml = yaml.safe_load(spechannels_file) - if not isinstance(spechannels_yaml, list): - raise ValueError("expected a list of micro-kernels in the spec") - - tests = """\ -// Copyright 2019 Google LLC -// -// This source code is licensed under the BSD-style license found in the -// LICENSE file in the root directory of this source tree. -// -// Auto-generated file. Do not edit! -// Specification: {specification} -// Generator: {generator} - - -#include -#include "xnnpack/common.h" -#include "xnnpack/isa-checks.h" -#include "xnnpack/microparams-init.h" -#include "xnnpack/vmulcaddc.h" -#include "vmulcaddc-microkernel-tester.h" -""".format(specification=options.spec, generator=sys.argv[0]) - - for ukernel_spec in spechannels_yaml: - name = ukernel_spec["name"] - init_fn = ukernel_spec.get("init") - channel_tile, row_tile, arch, isa = split_ukernel_name(name) - - test_case = generate_test_cases( - name, channel_tile, row_tile, init_fn, isa) - tests += "\n\n" + xnncommon.postprocess_test_case(test_case, arch, isa) - - xnncommon.overwrite_if_changed(options.output, tests) - - -if __name__ == "__main__": - main(sys.argv[1:])