diff --git a/BUILD.bazel b/BUILD.bazel index 16fbf7adac47..8a7601b8c111 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -766,6 +766,7 @@ xnnpack_microkernel_cc_library( srcs_prod = ["src/amalgam/gen/scalar.c"], srcs_test = ALL_SCALAR_MICROKERNEL_SRCS, deps = MICROKERNEL_DEPS + [ + ":config_hdrs", "@FP16", "@FXdiv", ], @@ -855,6 +856,7 @@ xnnpack_microkernel_cc_library( msvc_copts = xnnpack_msvc_std_copts(), deps = MICROKERNEL_DEPS + xnnpack_if_kleidiai_enabled([ "@KleidiAI//kai/ukernels/matmul", + "@KleidiAI//kai/ukernels/matmul:lhs_quant_pack_qai8dxp_f32", ]), ) @@ -977,6 +979,7 @@ xnnpack_microkernel_cc_library( msvc_copts = xnnpack_msvc_std_copts(), deps = MICROKERNEL_DEPS + xnnpack_if_kleidiai_enabled([ "@KleidiAI//kai/ukernels/matmul", + "@KleidiAI//kai/ukernels/matmul:clamp_f32_qai8dxp1x8_qsi4cxp8x8_1x8x32_neon_dotprod", ]), ) @@ -1000,6 +1003,7 @@ xnnpack_microkernel_cc_library( msvc_copts = xnnpack_msvc_std_copts(), deps = MICROKERNEL_DEPS + xnnpack_if_kleidiai_enabled([ "@KleidiAI//kai/ukernels/matmul", + "@KleidiAI//kai/ukernels/matmul:clamp_f32_qai8dxp4x8_qsi4cxp8x8_8x8x32_neon_i8mm", ]), ) @@ -1786,10 +1790,15 @@ xnnpack_cc_library( ":config_hdrs", ":logging", ":math", + ":microparams", + ":params", ":unaligned", + ":xnnpack_h", "@FP16", ] + xnnpack_if_kleidiai_enabled([ "@KleidiAI//kai/ukernels/matmul", + "@KleidiAI//kai/ukernels/matmul:rhs_pack_kxn_qsi4cxp_qsu4cxs1s0", + "@KleidiAI//kai/ukernels/matmul:rhs_pack_nxk_qsi4cxp_qsu4cxs1s0", ]), ) @@ -2007,6 +2016,7 @@ xnnpack_cc_library( ], deps = [ ":allocator", + ":logging", ":microkernel_configs", ":microparams", ":params", @@ -2040,10 +2050,12 @@ xnnpack_cc_library( gcc_copts = xnnpack_gcc_std_copts(), msvc_copts = xnnpack_msvc_std_copts(), deps = [ - ":cache", + ":allocator", ":common", + ":logging", ":math", ":operator_h", + ":params", ":xnnpack_h", ], ) @@ -2076,6 +2088,7 @@ xnnpack_cc_library( ":microkernel_configs", ":microkernel_type", ":microkernel_utils", + ":microkernels_h", ":microparams_init", ":normalization", ":operator_type", @@ -2083,6 +2096,7 @@ xnnpack_cc_library( ":packing", ":params", ":post_operation", + ":quantization", ":xnnpack_h", "@FP16", "@pthreadpool", @@ -2226,6 +2240,9 @@ xnnpack_cc_library( ":allocator", ":build_identifier", ":common", + ":config_hdrs", + ":hardware_config", + ":init_once", ":logging", ":math", ":microkernels_h", @@ -2234,6 +2251,7 @@ xnnpack_cc_library( ":operators", ":params", ":subgraph", + "@pthreadpool", ], ) @@ -2257,17 +2275,12 @@ xnnpack_cc_library( visibility = xnnpack_visibility(), deps = [ ":allocator", - ":build_identifier", ":common", + ":config_hdrs", + ":init_once", ":logging", - ":math", - ":microkernels_h", - ":microparams", - ":microparams_init", - ":operators", ":params", - ":prod_microkernels", - ":subgraph", + "@pthreadpool", ], ) @@ -2291,16 +2304,12 @@ xnnpack_cc_library( visibility = xnnpack_visibility(), deps = [ ":allocator", - ":build_identifier", ":common", + ":config_hdrs", + ":init_once", ":logging", - ":math", - ":microkernels_h", - ":microparams", - ":microparams_init", - ":operators", ":params", - ":prod_microkernels", + "@pthreadpool", ], ) diff --git a/src/f16-avgpool/f16-avgpool-9p8x-minmax-f16c-c8.c b/src/f16-avgpool/f16-avgpool-9p8x-minmax-f16c-c8.c index 6f0d7328f750..f1e8ac057dae 100644 --- a/src/f16-avgpool/f16-avgpool-9p8x-minmax-f16c-c8.c +++ b/src/f16-avgpool/f16-avgpool-9p8x-minmax-f16c-c8.c @@ -7,8 +7,13 @@ #include +#include +#include + #include "xnnpack/avgpool.h" +#include "xnnpack/common.h" #include "xnnpack/intrinsics-polyfill.h" +#include "xnnpack/microparams.h" void xnn_f16_avgpool_minmax_ukernel_9p8x__f16c_c8( diff --git a/src/f16-avgpool/f16-avgpool-9x-minmax-f16c-c8.c b/src/f16-avgpool/f16-avgpool-9x-minmax-f16c-c8.c index 579b593039cc..95b836fdc28f 100644 --- a/src/f16-avgpool/f16-avgpool-9x-minmax-f16c-c8.c +++ b/src/f16-avgpool/f16-avgpool-9x-minmax-f16c-c8.c @@ -7,8 +7,13 @@ #include +#include +#include + #include "xnnpack/avgpool.h" +#include "xnnpack/common.h" #include "xnnpack/intrinsics-polyfill.h" +#include "xnnpack/microparams.h" void xnn_f16_avgpool_minmax_ukernel_9x__f16c_c8( diff --git a/src/f16-pavgpool/f16-pavgpool-9p8x-minmax-avx2-c8.c b/src/f16-pavgpool/f16-pavgpool-9p8x-minmax-avx2-c8.c index 0f66c40d087a..adf2b9c7217f 100644 --- a/src/f16-pavgpool/f16-pavgpool-9p8x-minmax-avx2-c8.c +++ b/src/f16-pavgpool/f16-pavgpool-9p8x-minmax-avx2-c8.c @@ -7,7 +7,12 @@ #include +#include +#include + +#include "xnnpack/common.h" #include "xnnpack/intrinsics-polyfill.h" +#include "xnnpack/microparams.h" #include "xnnpack/pavgpool.h" diff --git a/src/f16-pavgpool/f16-pavgpool-9x-minmax-avx2-c8.c b/src/f16-pavgpool/f16-pavgpool-9x-minmax-avx2-c8.c index 4b2b8eae2c73..bf9a896fcb9e 100644 --- a/src/f16-pavgpool/f16-pavgpool-9x-minmax-avx2-c8.c +++ b/src/f16-pavgpool/f16-pavgpool-9x-minmax-avx2-c8.c @@ -7,7 +7,12 @@ #include +#include +#include + +#include "xnnpack/common.h" #include "xnnpack/intrinsics-polyfill.h" +#include "xnnpack/microparams.h" #include "xnnpack/pavgpool.h" diff --git a/src/f32-avgpool/f32-avgpool-9p8x-minmax-scalar-c1.c b/src/f32-avgpool/f32-avgpool-9p8x-minmax-scalar-c1.c index 2ee82bd76224..b4d6763952ce 100644 --- a/src/f32-avgpool/f32-avgpool-9p8x-minmax-scalar-c1.c +++ b/src/f32-avgpool/f32-avgpool-9p8x-minmax-scalar-c1.c @@ -5,8 +5,13 @@ #include +#include +#include + #include "xnnpack/avgpool.h" +#include "xnnpack/common.h" #include "xnnpack/math.h" +#include "xnnpack/microparams.h" void xnn_f32_avgpool_minmax_ukernel_9p8x__scalar_c1( diff --git a/src/f32-avgpool/f32-avgpool-9p8x-minmax-sse-c4.c b/src/f32-avgpool/f32-avgpool-9p8x-minmax-sse-c4.c index d9295c4e4595..b0b4047a381f 100644 --- a/src/f32-avgpool/f32-avgpool-9p8x-minmax-sse-c4.c +++ b/src/f32-avgpool/f32-avgpool-9p8x-minmax-sse-c4.c @@ -5,9 +5,13 @@ #include +#include +#include #include #include "xnnpack/avgpool.h" +#include "xnnpack/common.h" +#include "xnnpack/microparams.h" void xnn_f32_avgpool_minmax_ukernel_9p8x__sse_c4( diff --git a/src/f32-avgpool/f32-avgpool-9x-minmax-scalar-c1.c b/src/f32-avgpool/f32-avgpool-9x-minmax-scalar-c1.c index d82716ee27df..f0832345bf0a 100644 --- a/src/f32-avgpool/f32-avgpool-9x-minmax-scalar-c1.c +++ b/src/f32-avgpool/f32-avgpool-9x-minmax-scalar-c1.c @@ -5,8 +5,13 @@ #include +#include +#include + #include "xnnpack/avgpool.h" +#include "xnnpack/common.h" #include "xnnpack/math.h" +#include "xnnpack/microparams.h" void xnn_f32_avgpool_minmax_ukernel_9x__scalar_c1( diff --git a/src/f32-avgpool/f32-avgpool-9x-minmax-sse-c4.c b/src/f32-avgpool/f32-avgpool-9x-minmax-sse-c4.c index 206a8b3e1b61..7054d44c5b21 100644 --- a/src/f32-avgpool/f32-avgpool-9x-minmax-sse-c4.c +++ b/src/f32-avgpool/f32-avgpool-9x-minmax-sse-c4.c @@ -5,9 +5,14 @@ #include +#include +#include #include + #include "xnnpack/avgpool.h" +#include "xnnpack/common.h" +#include "xnnpack/microparams.h" void xnn_f32_avgpool_minmax_ukernel_9x__sse_c4( diff --git a/src/f32-pavgpool/f32-pavgpool-9p8x-minmax-scalar-c1.c b/src/f32-pavgpool/f32-pavgpool-9p8x-minmax-scalar-c1.c index d9ecc2d33195..231af6ca5a4a 100644 --- a/src/f32-pavgpool/f32-pavgpool-9p8x-minmax-scalar-c1.c +++ b/src/f32-pavgpool/f32-pavgpool-9p8x-minmax-scalar-c1.c @@ -5,6 +5,11 @@ #include +#include +#include + +#include "xnnpack/common.h" +#include "xnnpack/microparams.h" #include "xnnpack/pavgpool.h" #include "xnnpack/math.h" diff --git a/src/f32-pavgpool/f32-pavgpool-9p8x-minmax-sse-c4.c b/src/f32-pavgpool/f32-pavgpool-9p8x-minmax-sse-c4.c index b25cdd663d88..7e5973a0c986 100644 --- a/src/f32-pavgpool/f32-pavgpool-9p8x-minmax-sse-c4.c +++ b/src/f32-pavgpool/f32-pavgpool-9p8x-minmax-sse-c4.c @@ -5,8 +5,13 @@ #include +#include +#include #include + +#include "xnnpack/common.h" +#include "xnnpack/microparams.h" #include "xnnpack/pavgpool.h" diff --git a/src/f32-pavgpool/f32-pavgpool-9x-minmax-scalar-c1.c b/src/f32-pavgpool/f32-pavgpool-9x-minmax-scalar-c1.c index a60dc4f9f3fe..fa3992472473 100644 --- a/src/f32-pavgpool/f32-pavgpool-9x-minmax-scalar-c1.c +++ b/src/f32-pavgpool/f32-pavgpool-9x-minmax-scalar-c1.c @@ -5,6 +5,11 @@ #include +#include +#include + +#include "xnnpack/common.h" +#include "xnnpack/microparams.h" #include "xnnpack/pavgpool.h" #include "xnnpack/math.h" diff --git a/src/f32-pavgpool/f32-pavgpool-9x-minmax-sse-c4.c b/src/f32-pavgpool/f32-pavgpool-9x-minmax-sse-c4.c index 8a613d097b27..2743e3c723e2 100644 --- a/src/f32-pavgpool/f32-pavgpool-9x-minmax-sse-c4.c +++ b/src/f32-pavgpool/f32-pavgpool-9x-minmax-sse-c4.c @@ -5,8 +5,12 @@ #include +#include +#include #include +#include "xnnpack/common.h" +#include "xnnpack/microparams.h" #include "xnnpack/pavgpool.h" diff --git a/src/qu8-avgpool/qu8-avgpool-9p8x-minmax-fp32-scalar-imagic-c1.c b/src/qu8-avgpool/qu8-avgpool-9p8x-minmax-fp32-scalar-imagic-c1.c index 3f4a169a453e..a52f68040c6a 100644 --- a/src/qu8-avgpool/qu8-avgpool-9p8x-minmax-fp32-scalar-imagic-c1.c +++ b/src/qu8-avgpool/qu8-avgpool-9p8x-minmax-fp32-scalar-imagic-c1.c @@ -8,8 +8,13 @@ #include +#include +#include + #include "xnnpack/avgpool.h" +#include "xnnpack/common.h" #include "xnnpack/math.h" +#include "xnnpack/microparams.h" void xnn_qu8_avgpool_minmax_fp32_ukernel_9p8x__scalar_imagic_c1( diff --git a/src/qu8-avgpool/qu8-avgpool-9p8x-minmax-fp32-sse2-c8.c b/src/qu8-avgpool/qu8-avgpool-9p8x-minmax-fp32-sse2-c8.c index d4a0e41af921..d70b3aebdda5 100644 --- a/src/qu8-avgpool/qu8-avgpool-9p8x-minmax-fp32-sse2-c8.c +++ b/src/qu8-avgpool/qu8-avgpool-9p8x-minmax-fp32-sse2-c8.c @@ -9,8 +9,12 @@ #include #include +#include +#include #include "xnnpack/avgpool.h" +#include "xnnpack/common.h" +#include "xnnpack/microparams.h" #include "xnnpack/unaligned.h" diff --git a/src/qu8-avgpool/qu8-avgpool-9x-minmax-fp32-scalar-imagic-c1.c b/src/qu8-avgpool/qu8-avgpool-9x-minmax-fp32-scalar-imagic-c1.c index 83018ea27216..b61405fac6b4 100644 --- a/src/qu8-avgpool/qu8-avgpool-9x-minmax-fp32-scalar-imagic-c1.c +++ b/src/qu8-avgpool/qu8-avgpool-9x-minmax-fp32-scalar-imagic-c1.c @@ -8,8 +8,13 @@ #include +#include +#include + #include "xnnpack/avgpool.h" +#include "xnnpack/common.h" #include "xnnpack/math.h" +#include "xnnpack/microparams.h" void xnn_qu8_avgpool_minmax_fp32_ukernel_9x__scalar_imagic_c1( diff --git a/src/qu8-avgpool/qu8-avgpool-9x-minmax-fp32-sse2-c8.c b/src/qu8-avgpool/qu8-avgpool-9x-minmax-fp32-sse2-c8.c index 86566f6235b7..8341f6517d88 100644 --- a/src/qu8-avgpool/qu8-avgpool-9x-minmax-fp32-sse2-c8.c +++ b/src/qu8-avgpool/qu8-avgpool-9x-minmax-fp32-sse2-c8.c @@ -9,8 +9,12 @@ #include #include +#include +#include #include "xnnpack/avgpool.h" +#include "xnnpack/common.h" +#include "xnnpack/microparams.h" #include "xnnpack/unaligned.h" diff --git a/src/s8-vclamp/s8-vclamp-scalar-u4.c b/src/s8-vclamp/s8-vclamp-scalar-u4.c index ff5802cf92d6..8f75582f4ef0 100644 --- a/src/s8-vclamp/s8-vclamp-scalar-u4.c +++ b/src/s8-vclamp/s8-vclamp-scalar-u4.c @@ -4,8 +4,12 @@ // LICENSE file in the root directory of this source tree. #include +#include +#include +#include "xnnpack/common.h" #include "xnnpack/math.h" +#include "xnnpack/microparams.h" #include "xnnpack/vunary.h" diff --git a/src/s8-vclamp/s8-vclamp-sse2-u64.c b/src/s8-vclamp/s8-vclamp-sse2-u64.c index e0b9525fec58..3b7b1e8ddfb9 100644 --- a/src/s8-vclamp/s8-vclamp-sse2-u64.c +++ b/src/s8-vclamp/s8-vclamp-sse2-u64.c @@ -6,7 +6,11 @@ #include #include +#include +#include +#include "xnnpack/common.h" +#include "xnnpack/microparams.h" #include "xnnpack/unaligned.h" #include "xnnpack/vunary.h" diff --git a/src/s8-vclamp/s8-vclamp-sse41-u64.c b/src/s8-vclamp/s8-vclamp-sse41-u64.c index 23bc5fa08785..f7d842b11733 100644 --- a/src/s8-vclamp/s8-vclamp-sse41-u64.c +++ b/src/s8-vclamp/s8-vclamp-sse41-u64.c @@ -6,7 +6,11 @@ #include #include +#include +#include +#include "xnnpack/common.h" +#include "xnnpack/microparams.h" #include "xnnpack/unaligned.h" #include "xnnpack/vunary.h" diff --git a/src/subgraph/multiply2.c b/src/subgraph/multiply2.c index 6c07c31ae7c5..46d2c9b52387 100644 --- a/src/subgraph/multiply2.c +++ b/src/subgraph/multiply2.c @@ -9,6 +9,7 @@ #include #include + #include "xnnpack.h" #include "xnnpack/common.h" #include "xnnpack/log.h" diff --git a/src/u8-rmax/u8-rmax-scalar-u2.c b/src/u8-rmax/u8-rmax-scalar-u2.c index 4d887d82965b..a928ed52e40d 100644 --- a/src/u8-rmax/u8-rmax-scalar-u2.c +++ b/src/u8-rmax/u8-rmax-scalar-u2.c @@ -4,6 +4,8 @@ // LICENSE file in the root directory of this source tree. #include +#include +#include #include "xnnpack/reduce.h" diff --git a/src/u8-rmax/u8-rmax-sse2-u16.c b/src/u8-rmax/u8-rmax-sse2-u16.c index 50054f64e369..7c8e87e133be 100644 --- a/src/u8-rmax/u8-rmax-sse2-u16.c +++ b/src/u8-rmax/u8-rmax-sse2-u16.c @@ -9,7 +9,10 @@ #include #include +#include +#include +#include "xnnpack/common.h" #include "xnnpack/reduce.h" diff --git a/src/u8-vclamp/u8-vclamp-scalar-u4.c b/src/u8-vclamp/u8-vclamp-scalar-u4.c index d70ce16b73b1..32472df6f292 100644 --- a/src/u8-vclamp/u8-vclamp-scalar-u4.c +++ b/src/u8-vclamp/u8-vclamp-scalar-u4.c @@ -4,8 +4,12 @@ // LICENSE file in the root directory of this source tree. #include +#include +#include +#include "xnnpack/common.h" #include "xnnpack/math.h" +#include "xnnpack/microparams.h" #include "xnnpack/vunary.h" diff --git a/src/u8-vclamp/u8-vclamp-sse2-u64.c b/src/u8-vclamp/u8-vclamp-sse2-u64.c index c5ae43eb1a8a..dca2796a4ef6 100644 --- a/src/u8-vclamp/u8-vclamp-sse2-u64.c +++ b/src/u8-vclamp/u8-vclamp-sse2-u64.c @@ -6,7 +6,11 @@ #include #include +#include +#include +#include "xnnpack/common.h" +#include "xnnpack/microparams.h" #include "xnnpack/unaligned.h" #include "xnnpack/vunary.h"