Skip to content

Commit

Permalink
Integration of Kleidi F32 SME kernels
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 688984945
  • Loading branch information
alankelly authored and xnnpack-bot committed Oct 23, 2024
1 parent 4a121f0 commit 7670f71
Show file tree
Hide file tree
Showing 39 changed files with 1,190 additions and 254 deletions.
5 changes: 4 additions & 1 deletion BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ MICROKERNEL_DEFS = [
"src/x16-transposec/x16-transposec.h",
"src/x24-transposec/x24-transposec.h",
"src/x32-packb/x32-packb.h",
"src/x32-pack-lh/x32-pack-lh.h",
"src/x32-packw/x32-packw.h",
"src/x32-packx/x32-packx.h",
"src/x32-transposec/x32-transposec.h",
Expand All @@ -231,6 +232,7 @@ MICROKERNEL_HDRS = [
"src/xnnpack/packw.h",
"src/xnnpack/packx.h",
"src/xnnpack/pad.h",
"src/xnnpack/pack-lh.h",
"src/xnnpack/pavgpool.h",
"src/xnnpack/ppmm.h",
"src/xnnpack/quantization.h",
Expand Down Expand Up @@ -805,8 +807,9 @@ xnnpack_cxx_library(
"@KleidiAI//kai/ukernels/matmul",
"@KleidiAI//kai/ukernels/matmul:rhs_pack_kxn_qsi4cxp_qs4cxs1s0",
"@KleidiAI//kai/ukernels/matmul:rhs_pack_nxk_qsi4cxp_qs4cxs1s0",
"@KleidiAI//kai/ukernels/matmul:rhs_pack_nxk_qsi4c32p_qsu4c32s1s0",
"@KleidiAI//kai/ukernels/matmul:rhs_pack_kxn_f32p2vlx1biasf32_f32_f32_sme",
"@KleidiAI//kai/ukernels/matmul:rhs_pack_kxn_qsi4c32p_qsu4c32s1s0",
"@KleidiAI//kai/ukernels/matmul:rhs_pack_nxk_qsi4c32p_qsu4c32s1s0",
]),
)

Expand Down
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ SET(OPERATOR_SRCS
src/operators/global-average-pooling-nwc.c
src/operators/lut-elementwise-nc.c
src/operators/max-pooling-nhwc.c
src/operators/pack-lh.c
src/operators/reduce-nd.c
src/operators/resize-bilinear-nchw.c
src/operators/resize-bilinear-nhwc.c
Expand Down Expand Up @@ -474,6 +475,7 @@ SET(SUBGRAPH_SRCS
src/subgraph/log.c
src/subgraph/max-pooling-2d.c
src/subgraph/negate.c
src/subgraph/pack-lh.c
src/subgraph/reciprocal-square-root.c
src/subgraph/reshape-helpers.c
src/subgraph/scaled-dot-product-attention.c
Expand Down Expand Up @@ -517,6 +519,7 @@ SET(XNNPACK_SRCS
src/configs/lut32norm-config.c
src/configs/maxpool-config.c
src/configs/pavgpool-config.c
src/configs/pack-lh-config.c
src/configs/raddstoreexpminusmax-config.c
src/configs/reduce-config.c
src/configs/rmax-config.c
Expand Down
4 changes: 4 additions & 0 deletions build_params.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -490,6 +490,10 @@ XNNPACK_PARAMS_FOR_ARCH = {
"neonsme2": _create_params(
cond = "//:arm_sme2_enabled",
copts = ["-march=armv8.2-a+sve+sve2"],
extra_deps = xnnpack_if_kleidiai_enabled([
"@KleidiAI//kai/ukernels/matmul:lhs_pack_f32p2vlx1_f32_sme",
"@KleidiAI//kai/ukernels/matmul:clamp_f32_f32p_f32p",
]),
),
"aarch32": _create_params(
cond = "//build_config:aarch32",
Expand Down
3 changes: 3 additions & 0 deletions build_srcs.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ OPERATOR_SRCS = [
"src/operators/global-average-pooling-nwc.c",
"src/operators/lut-elementwise-nc.c",
"src/operators/max-pooling-nhwc.c",
"src/operators/pack-lh.c",
"src/operators/reduce-nd.c",
"src/operators/resize-bilinear-nchw.c",
"src/operators/resize-bilinear-nhwc.c",
Expand Down Expand Up @@ -70,6 +71,7 @@ SUBGRAPH_SRCS = [
"src/subgraph/log.c",
"src/subgraph/max-pooling-2d.c",
"src/subgraph/negate.c",
"src/subgraph/pack-lh.c",
"src/subgraph/reciprocal-square-root.c",
"src/subgraph/reshape-helpers.c",
"src/subgraph/rope.c",
Expand Down Expand Up @@ -118,6 +120,7 @@ XNNPACK_SRCS = [
"src/configs/lut32norm-config.c",
"src/configs/maxpool-config.c",
"src/configs/pavgpool-config.c",
"src/configs/pack-lh-config.c",
"src/configs/raddstoreexpminusmax-config.c",
"src/configs/reduce-config.c",
"src/configs/rmax-config.c",
Expand Down
4 changes: 3 additions & 1 deletion cmake/gen/neonsme2_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
# Generator: tools/update-microkernels.py


SET(PROD_NEONSME2_MICROKERNEL_SRCS)
SET(PROD_NEONSME2_MICROKERNEL_SRCS
src/pf32-gemm/pf32-gemm-32x32-minmax-neonsme2.c
src/x32-pack-lh/x32-packlh-neonsme2.c)

SET(NON_PROD_NEONSME2_MICROKERNEL_SRCS)

Expand Down
17 changes: 17 additions & 0 deletions cmake/gen/sme_aarch64_microkernels.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright 2022 Google LLC
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
#
# Description: microkernel filename lists for sme_aarch64
#
# Auto-generated file. Do not edit!
# Generator: tools/update-microkernels.py


SET(PROD_SME_AARCH64_MICROKERNEL_SRCS)

SET(NON_PROD_SME_AARCH64_MICROKERNEL_SRCS
src/x32-packx-w/x32-packx-w-aarch64-sme-u2.c)

SET(ALL_SME_AARCH64_MICROKERNEL_SRCS ${PROD_SME_AARCH64_MICROKERNEL_SRCS} + ${NON_PROD_SME_AARCH64_MICROKERNEL_SRCS})
16 changes: 16 additions & 0 deletions cmake/gen/sme_microkernels.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Copyright 2022 Google LLC
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
#
# Description: microkernel filename lists for sme
#
# Auto-generated file. Do not edit!
# Generator: tools/update-microkernels.py


SET(PROD_SME_MICROKERNEL_SRCS)

SET(NON_PROD_SME_MICROKERNEL_SRCS)

SET(ALL_SME_MICROKERNEL_SRCS ${PROD_SME_MICROKERNEL_SRCS} + ${NON_PROD_SME_MICROKERNEL_SRCS})
2 changes: 2 additions & 0 deletions gen/neonsme2_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ Auto-generated file. Do not edit!
"""

PROD_NEONSME2_MICROKERNEL_SRCS = [
"src/pf32-gemm/pf32-gemm-32x32-minmax-neonsme2.c",
"src/x32-pack-lh/x32-packlh-neonsme2.c",
]

NON_PROD_NEONSME2_MICROKERNEL_SRCS = [
Expand Down
15 changes: 15 additions & 0 deletions gen/sme_aarch64_microkernels.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
"""
Microkernel filenames lists for sme_aarch64.
Auto-generated file. Do not edit!
Generator: tools/update-microkernels.py
"""

PROD_SME_AARCH64_MICROKERNEL_SRCS = [
]

NON_PROD_SME_AARCH64_MICROKERNEL_SRCS = [
"src/x32-packx-w/x32-packx-w-aarch64-sme-u2.c",
]

ALL_SME_AARCH64_MICROKERNEL_SRCS = PROD_SME_AARCH64_MICROKERNEL_SRCS + NON_PROD_SME_AARCH64_MICROKERNEL_SRCS
14 changes: 14 additions & 0 deletions gen/sme_microkernels.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
"""
Microkernel filenames lists for sme.
Auto-generated file. Do not edit!
Generator: tools/update-microkernels.py
"""

PROD_SME_MICROKERNEL_SRCS = [
]

NON_PROD_SME_MICROKERNEL_SRCS = [
]

ALL_SME_MICROKERNEL_SRCS = PROD_SME_MICROKERNEL_SRCS + NON_PROD_SME_MICROKERNEL_SRCS
2 changes: 2 additions & 0 deletions include/xnnpack.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,8 @@ enum xnn_datatype {
/// Quantized 4-bit signed integer with shared per-channel-block quantization
/// parameters.
xnn_datatype_qbint4 = 12,
/// IEEE754 single-precision packed floating-point.
xnn_datatype_pfp32 = 13,
};

/// Define a tensor-type Value and add it to a Subgraph.
Expand Down
Loading

0 comments on commit 7670f71

Please sign in to comment.