Skip to content

Commit

Permalink
Copybara import of the project:
Browse files Browse the repository at this point in the history
--
5d5cd76 by RahulSudarMCW <[email protected]>:

Replace conv-hwc yaml with table header

--
ad05494 by RahulSudarMCW <[email protected]>:

Remove 'conv-hwc-microkernel-tester.h'

--
6d39a08 by RahulSudarMCW <[email protected]>:

Declare microkernal to use table header

--
93d6323 by RahulSudarMCW <[email protected]>:

Resolve failed checks

FUTURE_COPYBARA_INTEGRATE_REVIEW=#7445 from RahulSundarMCW:conv-hwc 93d6323
PiperOrigin-RevId: 695826548
  • Loading branch information
RahulSundarMCW authored and xnnpack-bot committed Nov 13, 2024
1 parent 689b350 commit 3d3c275
Show file tree
Hide file tree
Showing 8 changed files with 819 additions and 5,951 deletions.
1 change: 1 addition & 0 deletions BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ MICROKERNEL_DEFS = [
"src/f16-vsqrt/f16-vsqrt.h",
"src/f16-vtanh/f16-vtanh.h",
"src/f32-avgpool/f32-avgpool-minmax.h",
"src/f32-conv-hwc/f32-conv-hwc.h",
"src/f32-dwconv/f32-dwconv-minmax-multipass.h",
"src/f32-dwconv/f32-dwconv-minmax-unipass.h",
"src/f32-dwconv/f32-dwconv-multipass.h",
Expand Down
3 changes: 0 additions & 3 deletions scripts/generate-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -195,9 +195,6 @@ tools/generate-rdsum-test.py --spec test/qu8-rdsum.yaml --output test/qu8-rdsum.
### Tests for LUT micro-kernels
tools/generate-lut-test.py --spec test/x8-lut.yaml --output test/x8-lut.cc &

### Tests for Conv HWC layout micro-kernels
tools/generate-conv-hwc-test.py --spec test/f32-conv-hwc.yaml --output test/f32-conv-hwc.cc &

### Tests for Conv HWC2CHW layout micro-kernels
tools/generate-conv-hwc2chw-test.py --spec test/f16-conv-hwc2chw.yaml --output test/f16-conv-hwc2chw.cc &
tools/generate-conv-hwc2chw-test.py --spec test/f32-conv-hwc2chw.yaml --output test/f32-conv-hwc2chw.cc &
Expand Down
51 changes: 51 additions & 0 deletions src/f32-conv-hwc/f32-conv-hwc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
// Copyright 2023 Google LLC
//
// This source code is licensed under the BSD-style license found in the
// LICENSE file in the root directory of this source tree.

#ifndef XNN_UKERNEL_WITH_PARAMS
#define XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, kernel_size, subsampling, padding_right, padding_left, input_channels, output_channels_tile, input_widths, datatype, params_type, init_params) \
XNN_UKERNEL(arch_flags, ukernel, kernel_size, subsampling, padding_right, padding_left, input_channels, output_channels_tile, input_widths, datatype)
#define XNN_DEFINED_UKERNEL_WITH_PARAMS
#endif

#ifndef XNN_UKERNEL
#define XNN_UKERNEL(arch_flags, ukernel, kernel_size, subsampling, padding_right, padding_left, input_channels, output_channels_tile, input_widths, datatype) \
XNN_UKERNEL_WITH_PARAMS(arch_flags, ukernel, kernel_size, subsampling, padding_right, padding_left, input_channels, output_channels_tile, input_widths, datatype, void, /*init_params=*/nullptr)
#define XNN_DEFINED_UKERNEL
#endif

XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1, 3, 2, 1, 0, 3, 4, 2, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(0, xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1, 3, 2, 1, 1, 3, 4, 2, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)

#if XNN_ARCH_ARM || XNN_ARCH_ARM64
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1, 3, 2, 1, 0, 3, 4, 2, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2, 3, 2, 1, 0, 3, 4, 4, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1, 3, 2, 1, 0, 3, 8, 2, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2, 3, 2, 1, 0, 3, 8, 4, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1, 3, 2, 1, 1, 3, 4, 2, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2, 3, 2, 1, 1, 3, 4, 4, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1, 3, 2, 1, 1, 3, 8, 2, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2, 3, 2, 1, 1, 3, 8, 4, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
#endif // XNN_ARCH_ARM || XNN_ARCH_ARM64

#if XNN_ARCH_ARM64
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__aarch64_neonfma_2x1, 3, 2, 1, 0, 3, 4, 2, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__aarch64_neonfma_2x2, 3, 2, 1, 0, 3, 4, 4, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__aarch64_neonfma_2x1, 3, 2, 1, 0, 3, 8, 2, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__aarch64_neonfma_2x2, 3, 2, 1, 0, 3, 8, 4, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__aarch64_neonfma_2x1, 3, 2, 1, 1, 3, 4, 2, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__aarch64_neonfma_2x2, 3, 2, 1, 1, 3, 4, 4, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__aarch64_neonfma_2x1, 3, 2, 1, 1, 3, 8, 2, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
XNN_UKERNEL_WITH_PARAMS(xnn_arch_arm_neon, xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__aarch64_neonfma_2x2, 3, 2, 1, 1, 3, 8, 4, float, struct xnn_f32_default_params, xnn_init_f32_minmax_scalar_params)
#endif // XNN_ARCH_ARM64

#ifdef XNN_DEFINED_UKERNEL_WITH_PARAMS
#undef XNN_DEFINED_UKERNEL_WITH_PARAMS
#undef XNN_UKERNEL_WITH_PARAMS
#endif

#ifdef XNN_DEFINED_UKERNEL
#undef XNN_DEFINED_UKERNEL
#undef XNN_UKERNEL
#endif
27 changes: 4 additions & 23 deletions src/xnnpack/conv.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ extern "C" {
#endif


#define DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(fn_name) \
#define XNN_UKERNEL(arch_flags, fn_name, kernel_size, subsampling, padding_right, padding_left, input_channels, output_channels_tile, input_widths, datatype) \
XNN_INTERNAL void fn_name( \
size_t input_height, \
size_t input_width, \
Expand All @@ -33,28 +33,9 @@ extern "C" {
size_t output_channels, \
size_t output_height_stride, \
size_t output_width_stride, \
const union xnn_f32_minmax_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);

DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__aarch64_neonfma_2x1)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__aarch64_neonfma_2x2)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x1)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__scalar_1x1)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__aarch64_neonfma_2x1)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__aarch64_neonfma_2x2)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x2)

DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__aarch64_neonfma_2x1)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__aarch64_neonfma_2x2)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x1)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__neon_2x2)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x4__scalar_1x1)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__aarch64_neonfma_2x1)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__aarch64_neonfma_2x2)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x1)
DECLARE_F32_CONV_HWC_UKERNEL_FUNCTION(xnn_f32_conv_hwc_ukernel_3x3s2p0p1c3x8__neon_2x2)

const union xnn_f32_minmax_params params[XNN_RESTRICT XNN_MIN_ELEMENTS(1)]);
#include "f32-conv-hwc/f32-conv-hwc.h"
#undef XNN_UKERNEL

#define DECLARE_F32_CONV_HWC2CHW_UKERNEL_FUNCTION(fn_name) \
XNN_INTERNAL void fn_name( \
Expand Down
Loading

0 comments on commit 3d3c275

Please sign in to comment.