Skip to content

Commit

Permalink
Copybara import of the project:
Browse files Browse the repository at this point in the history
--
fd9d24d by nithesh <[email protected]>:

VMulCAddC-Replaced yaml files with header table
Replaced yaml files with header table for vmulcadd op

--
4fb020d by nithesh <[email protected]>:

Addressed review comments

--
eb27915 by nithesh <[email protected]>:

Addressed review comments

--
b06ecd1 by nithesh <[email protected]>:

Address CI failed checks

--
2fb29c5 by nithesh <[email protected]>:

Addressed kernel declarations comment

--
a4d5e62 by nithesh <[email protected]>:

Resolve "illegal instructions" for AVX512F tests

--
76e4518 by nithesh <[email protected]>:

Add support for build bazel

--
a472377 by nithesh <[email protected]>:

Defined kernel declrations to use all datatype

--
e6882d4 by nithesh <[email protected]>:

Addressed the merge issue

--
7799d9a by nithesh <[email protected]>:

Addressed the test case issue

--
a4370dc by nithesh <[email protected]>:

Resolve check fails for ARM Arch

FUTURE_COPYBARA_INTEGRATE_REVIEW=#7411 from nitheshsrikanth-mcw:xnn_vmulcadd a4370dc
PiperOrigin-RevId: 707992277
  • Loading branch information
nitheshsrikanth-mcw authored and xnnpack-bot committed Dec 20, 2024
1 parent 08185b7 commit b46cdae
Show file tree
Hide file tree
Showing 11 changed files with 363 additions and 3,961 deletions.
2 changes: 2 additions & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ MICROKERNEL_DEFS = [
"src/f16-velu/f16-velu.h",
"src/f16-vhswish/f16-vhswish.h",
"src/f16-vlrelu/f16-vlrelu.h",
"src/f16-vmulcaddc/f16-vmulcaddc.h",
"src/f16-vneg/f16-vneg.h",
"src/f16-vrnd/f16-vrndd.h",
"src/f16-vrnd/f16-vrndne.h",
Expand Down Expand Up @@ -143,6 +144,7 @@ MICROKERNEL_DEFS = [
"src/f32-vhswish/f32-vhswish.h",
"src/f32-vlog/f32-vlog.h",
"src/f32-vlrelu/f32-vlrelu.h",
"src/f32-vmulcaddc/f32-vmulcaddc.h",
"src/f32-vneg/f32-vneg.h",
"src/f32-vrelu/f32-vrelu.h",
"src/f32-vrnd/f32-vrndd.h",
Expand Down
4 changes: 0 additions & 4 deletions scripts/generate-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -252,10 +252,6 @@ tools/generate-raddexpminusmax-test.py --spec test/f32-raddexpminusmax.yaml --ou
tools/generate-raddstoreexpminusmax-test.py --spec test/f16-raddstoreexpminusmax.yaml --output test/f16-raddstoreexpminusmax.cc &
tools/generate-raddstoreexpminusmax-test.py --spec test/f32-raddstoreexpminusmax.yaml --output test/f32-raddstoreexpminusmax.cc &

### Tests for VMulCAddC micro-kernels
tools/generate-vmulcaddc-test.py --spec test/f16-vmulcaddc-minmax.yaml --output test/f16-vmulcaddc-minmax.cc &
tools/generate-vmulcaddc-test.py --spec test/f32-vmulcaddc-minmax.yaml --output test/f32-vmulcaddc-minmax.cc &

### Tests for the portable SIMD wrappers.
tools/xngen test/f32-simd.cc.in -D ARCH=scalar -D ARCH_MACRO="" -D TEST_REQUIRES="" -o test/f32-simd-scalar.cc &
tools/xngen test/f32-simd.cc.in -D ARCH=sse2 -D ARCH_MACRO="XNN_ARCH_X86 || XNN_ARCH_X86_64" -D TEST_REQUIRES=TEST_REQUIRES_X86_SSE2 -o test/f32-simd-sse2.cc &
Expand Down
35 changes: 35 additions & 0 deletions src/f16-vmulcaddc/f16-vmulcaddc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright 2023 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
#ifndef XNN_UKERNEL_WITH_PARAMS
#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, row_tile, channel_tile, datatype, params_type, init_params) \
XNN_UKERNEL(arch_flags, ukernel, row_tile, channel_tile, datatype, params_type)
#define XNN_DEFINED_UKERNEL_WITH_PARAMS
#endif

#ifndef XNN_UKERNEL
#define XNN_UKERNEL(arch_flags, ukernel, row_tile, channel_tile, datatype, params_type) \
XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, row_tile, channel_tile, datatype, void, /*init_params=*/nullptr)
#define XNN_DEFINED_UKERNEL
#endif

#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_X86, xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x, 2, 8, xnn_float16, union xnn_f16_minmax_params, xnn_init_f16_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_X86, xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x, 2, 16, xnn_float16, union xnn_f16_minmax_params, xnn_init_f16_minmax_scalar_params)
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64

#if XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_ARM, xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x, 2, 8, xnn_float16, union xnn_f16_minmax_params, xnn_init_f16_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_ARM, xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x, 2, 16, xnn_float16, union xnn_f16_minmax_params, xnn_init_f16_minmax_scalar_params)
#endif // XNN_ENABLE_ARM_FP16_VECTOR && (XNN_ARCH_ARM || XNN_ARCH_ARM64)

#ifdef XNN_DEFINED_UKERNEL_WITH_PARAMS
#undef XNN_DEFINED_UKERNEL_WITH_PARAMS
#undef XNN_UKERNEL_WITH_PARAMS
#endif

#ifdef XNN_DEFINED_UKERNEL
#undef XNN_DEFINED_UKERNEL
#undef XNN_UKERNEL
#endif
61 changes: 61 additions & 0 deletions src/f32-vmulcaddc/f32-vmulcaddc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
// Copyright 2023 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.
#ifndef XNN_UKERNEL_WITH_PARAMS
#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, row_tile, channel_tile, datatype, params_type, init_params) \
XNN_UKERNEL(arch_flags, ukernel, row_tile, channel_tile, datatype, params_type)
#define XNN_DEFINED_UKERNEL_WITH_PARAMS
#endif

#ifndef XNN_UKERNEL
#define XNN_UKERNEL(arch_flags, ukernel, row_tile, channel_tile, datatype, params_type) \
XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, row_tile, channel_tile, datatype, void, /*init_params=*/nullptr)
#define XNN_DEFINED_UKERNEL
#endif

#if XNN_ARCH_X86 || XNN_ARCH_X86_64
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_X86, xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_X86, xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x, 2, 8, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
#endif // XNN_ARCH_X86 || XNN_ARCH_X86_64

#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_ARM, xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_ARM, xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_ARM, xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x, 2, 8, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_ARM, xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x, 2, 8, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64

#if XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x, 2, 8, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x, 2, 8, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
#endif //XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD

#if XNN_ARCH_WASMRELAXEDSIMD
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMRELAXEDSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMRELAXEDSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMRELAXEDSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x, 2, 8, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASMRELAXEDSIMD, xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x, 2, 8, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
#endif // XNN_ARCH_WASMRELAXEDSIMD

#if XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASM, xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x, 2, 1, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASM, xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x, 2, 2, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(XNN_ARCH_WASM, xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
#endif // XNN_ARCH_WASM || XNN_ARCH_WASMSIMD || XNN_ARCH_WASMRELAXEDSIMD

XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x, 2, 1, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x, 2, 2, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x, 2, 4, float, union xnn_f32_minmax_params, xnn_init_f32_minmax_scalar_params)

#ifdef XNN_DEFINED_UKERNEL_WITH_PARAMS
#undef XNN_DEFINED_UKERNEL_WITH_PARAMS
#undef XNN_UKERNEL_WITH_PARAMS
#endif

#ifdef XNN_DEFINED_UKERNEL
#undef XNN_DEFINED_UKERNEL
#undef XNN_UKERNEL
#endif
70 changes: 13 additions & 57 deletions src/xnnpack/vmulcaddc.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,63 +15,19 @@
extern "C" {
#endif


#define DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(fn_name) \
XNN_INTERNAL void fn_name( \
size_t m, \
size_t c, \
const float* x, \
size_t x_stride, \
const float* w, \
float* y, \
size_t y_stride, \
const union xnn_f32_minmax_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__neon_2x)
DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c8__neon_2x)

DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__neonfma_2x)
DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c8__neonfma_2x)

DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__sse_2x)
DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c8__sse_2x)

DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_arm_2x)
DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x)

DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmsimd_x86_2x)
DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x)

DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_2x)
DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_2x)

DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasmrelaxedsimd_fma_2x)
DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmrelaxedsimd_fma_2x)

DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x)
DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c2__wasm_2x)
DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__wasm_2x)

DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x)
DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c2__scalar_2x)
DECLARE_F32_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f32_vmulcaddc_minmax_ukernel_c4__scalar_2x)

#define DECLARE_F16_VMULCADDC_MINMAX_UKERNEL_FUNCTION(fn_name) \
XNN_INTERNAL void fn_name( \
size_t m, \
size_t c, \
const xnn_float16* x, \
size_t x_stride, \
const xnn_float16* w, \
xnn_float16* y, \
size_t y_stride, \
const union xnn_f16_minmax_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F16_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f16_vmulcaddc_minmax_ukernel_c8__neonfp16arith_2x)
DECLARE_F16_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f16_vmulcaddc_minmax_ukernel_c16__neonfp16arith_2x)

DECLARE_F16_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f16_vmulcaddc_minmax_ukernel_c8__fma3_2x)
DECLARE_F16_VMULCADDC_MINMAX_UKERNEL_FUNCTION(xnn_f16_vmulcaddc_minmax_ukernel_c16__fma3_2x)
#define XNN_UKERNEL(arch_flags, fn_name, row_tile, channel_tile, datatype, params_type) \
XNN_INTERNAL void fn_name( \
size_t m, \
size_t c, \
const datatype* x, \
size_t x_stride, \
const datatype* w, \
datatype* y, \
size_t y_stride, \
const params_type params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);
#include "f32-vmulcaddc/f32-vmulcaddc.h"
#include "f16-vmulcaddc/f16-vmulcaddc.h"
#undef XNN_UKERNEL

#ifdef __cplusplus
} // extern "C"
Expand Down
Loading

0 comments on commit b46cdae

Please sign in to comment.