Skip to content

Commit

Permalink
Merge branch 'master' into ibilinear
Browse files Browse the repository at this point in the history
  • Loading branch information
RahulSundarMCW authored Dec 19, 2024
2 parents 8abaad4 + 02764b3 commit 77e8997
Show file tree
Hide file tree
Showing 1,047 changed files with 70,139 additions and 33,520 deletions.
4 changes: 4 additions & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Common settings
common --enable_bzlmod
build --enable_bzlmod

# Basic build settings
build --jobs 128
build --cxxopt='-std=gnu++14'
Expand Down
12 changes: 7 additions & 5 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ jobs:
env:
CC: gcc-9
CXX: g++-9
BAZEL_DEFINES: --define=xnn_enable_avxvnni=false --define=xnn_enable_avxvnniint8=false --define=xnn_enable_avx512amx=false --define=xnn_enable_avx512fp16=false
BAZEL_DEFINES: --define=xnn_enable_avxvnni=false --define=xnn_enable_avx256vnni=false --define=xnn_enable_avxvnniint8=false --define=xnn_enable_avx512amx=false --define=xnn_enable_avx512fp16=false
steps:
- uses: actions/checkout@v4
- name: Update apt
Expand Down Expand Up @@ -474,14 +474,16 @@ jobs:
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- name: Install gcc-13
# Pull in gcc-13 from the ubuntu-23.10 repository since it is not available
# for ubuntu-22.04.
- name: Add repository ppa:ubuntu-toolchain-r/test for gcc-13 and g++-13
working-directory: ${{ github.workspace }}
run: |
sudo add-apt-repository ppa:ubuntu-toolchain-r/test
sudo apt update
sudo apt install gcc-13 g++-13
- name: Install gcc-13 (cached)
uses: awalsh128/cache-apt-pkgs-action@latest
with:
packages: gcc-13 g++-13
version: 1.0
- name: Restore bazel cache
uses: actions/cache/restore@v4
with:
Expand Down
3 changes: 2 additions & 1 deletion BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ MICROKERNEL_DEFS = [
"src/f32-pavgpool/f32-pavgpool-minmax.h",
"src/f32-qs8-vcvt/f32-qs8-vcvt.h",
"src/f32-qu8-vcvt/f32-qu8-vcvt.h",
"src/f32-raddextexp/f32-raddextexp.h",
"src/f32-vabs/f32-vabs.h",
"src/f32-vbinary/f32-vadd.h",
"src/f32-vbinary/f32-vaddc.h",
Expand Down Expand Up @@ -192,7 +193,6 @@ MICROKERNEL_DEFS = [
"src/s8-ibilinear/s8-ibilinear.h",
"src/s8-maxpool/s8-maxpool-minmax.h",
"src/s8-vclamp/s8-vclamp.h",
"src/s32-f32-vcvt/s32-f32-vcvt.h",
"src/u8-ibilinear/u8-ibilinear.h",
"src/u8-maxpool/u8-maxpool-minmax.h",
"src/u8-vclamp/u8-vclamp.h",
Expand Down Expand Up @@ -993,6 +993,7 @@ xnnpack_cc_library(
":datatype",
":fp16",
":indirection",
":internal",
":logging",
":math",
":microkernel_configs",
Expand Down
31 changes: 23 additions & 8 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,12 @@ SET(CMAKE_CXX_EXTENSIONS NO)
# ---[ Options.
SET(XNNPACK_LIBRARY_TYPE "default" CACHE STRING "Type of library (shared, static, or default) to build")
SET_PROPERTY(CACHE XNNPACK_LIBRARY_TYPE PROPERTY STRINGS default static shared)
OPTION(XNNPACK_ENABLE_ASSEMBLY "Build XNNPACK with assembly micro-kernels" ON)
IF(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
# Disable assembly when using MSVC until support is added.
OPTION(XNNPACK_ENABLE_ASSEMBLY "Build XNNPACK with assembly micro-kernels" OFF)
ELSE()
OPTION(XNNPACK_ENABLE_ASSEMBLY "Build XNNPACK with assembly micro-kernels" ON)
ENDIF()
OPTION(XNNPACK_ENABLE_MEMOPT "Build XNNPACK with optimized memory allocation scheme" ON)
OPTION(XNNPACK_ENABLE_SPARSE "Build XNNPACK with graph rewriting for sparse inference" ON)
OPTION(XNNPACK_ENABLE_GEMM_M_SPECIALIZATION "Build XNNPACK with support for selecting microkernel with different MR" ON)
Expand Down Expand Up @@ -430,7 +435,6 @@ SET(OPERATOR_SRCS
src/operators/average-pooling-nhwc.c
src/operators/batch-matrix-multiply-nc.c
src/operators/binary-elementwise-nd.c
src/operators/channel-shuffle-nc.c
src/operators/constant-pad-nd.c
src/operators/convolution-nchw.c
src/operators/convolution-nhwc.c
Expand Down Expand Up @@ -523,7 +527,6 @@ SET(XNNPACK_SRCS
src/configs/xx-fill-config.c
src/configs/xx-pad-config.c
src/configs/x8-lut-config.c
src/configs/zip-config.c
src/init.c
src/params.c
"${PROJECT_BINARY_DIR}/build_identifier.c")
Expand Down Expand Up @@ -660,6 +663,9 @@ IF(XNNPACK_TARGET_PROCESSOR MATCHES "^x86(_64)?$")
LIST(APPEND PROD_MICROKERNEL_SRCS ${PROD_F16C_MICROKERNEL_SRCS})
LIST(APPEND PROD_MICROKERNEL_SRCS ${PROD_FMA3_MICROKERNEL_SRCS})
LIST(APPEND PROD_MICROKERNEL_SRCS ${PROD_AVX2_MICROKERNEL_SRCS})
IF(XNNPACK_ENABLE_ASSEMBLY AND XNNPACK_TARGET_PROCESSOR MATCHES "x86_64")
LIST(APPEND PROD_MICROKERNEL_SRCS ${PROD_AMD64_ASM_MICROKERNEL_SRCS})
ENDIF()
IF(XNNPACK_ENABLE_AVX512AMX)
LIST(APPEND PROD_MICROKERNEL_SRCS ${PROD_AVX512AMX_MICROKERNEL_SRCS})
ENDIF()
Expand Down Expand Up @@ -707,6 +713,9 @@ IF(XNNPACK_TARGET_PROCESSOR MATCHES "^x86(_64)?$")
LIST(APPEND NON_PROD_MICROKERNEL_SRCS ${NON_PROD_F16C_MICROKERNEL_SRCS})
LIST(APPEND NON_PROD_MICROKERNEL_SRCS ${NON_PROD_FMA3_MICROKERNEL_SRCS})
LIST(APPEND NON_PROD_MICROKERNEL_SRCS ${NON_PROD_AVX2_MICROKERNEL_SRCS})
IF(XNNPACK_ENABLE_ASSEMBLY AND XNNPACK_TARGET_PROCESSOR MATCHES "x86_64")
LIST(APPEND NON_PROD_MICROKERNEL_SRCS ${NON_PROD_AMD64_ASM_MICROKERNEL_SRCS})
ENDIF()
IF(XNNPACK_ENABLE_AVX512AMX)
LIST(APPEND NON_PROD_MICROKERNEL_SRCS ${NON_PROD_AVX512AMX_MICROKERNEL_SRCS})
ENDIF()
Expand Down Expand Up @@ -1219,6 +1228,8 @@ IF(XNNPACK_BUILD_TESTS)

# Helper libraries
ADD_LIBRARY(next-prime STATIC test/next_prime.cc)
ADD_LIBRARY(runtime-flags STATIC test/runtime-flags.cc)
TARGET_LINK_LIBRARIES(runtime-flags PRIVATE GTest::gtest)

ADD_LIBRARY(gemm-microkernel-tester STATIC test/gemm-microkernel-tester.cc)
TARGET_INCLUDE_DIRECTORIES(gemm-microkernel-tester PRIVATE include src test)
Expand Down Expand Up @@ -1279,6 +1290,7 @@ IF(XNNPACK_BUILD_TESTS)
microparams-init
next-prime
pthreadpool
runtime-flags
XNNPACK)
ADD_SHARDED_TEST(${TEST}-test 10)
ENDFOREACH()
Expand All @@ -1300,6 +1312,7 @@ IF(XNNPACK_BUILD_TESTS)
GTest::gmock
GTest::gtest
GTest::gtest_main
runtime-flags
XNNPACK)
ADD_SHARDED_TEST(${TEST}-test 10)
ENDFOREACH()
Expand All @@ -1315,6 +1328,7 @@ IF(XNNPACK_BUILD_TESTS)
GTest::gtest
GTest::gtest_main
datatype
runtime-flags
unary-ops
XNNPACK)
ADD_TEST(NAME unary-elementwise-nc-test COMMAND unary-elementwise-nc-test)
Expand All @@ -1335,6 +1349,7 @@ IF(XNNPACK_BUILD_TESTS)
GTest::gmock
GTest::gtest
GTest::gtest_main
runtime-flags
XNNPACK)
ADD_TEST(NAME ${TEST}-test COMMAND ${TEST}-test)
ENDFOREACH()
Expand Down Expand Up @@ -1381,6 +1396,7 @@ IF(XNNPACK_BUILD_TESTS)
datatype
subgraph
logging
runtime-flags
unary-ops
XNNPACK)
ADD_TEST(NAME ${TEST}-test COMMAND ${TEST}-test)
Expand All @@ -1399,6 +1415,7 @@ IF(XNNPACK_BUILD_TESTS)
GTest::gmock
GTest::gtest
GTest::gtest_main
runtime-flags
subgraph
XNNPACK)
ADD_TEST(NAME ${TEST}-test COMMAND ${TEST}-test)
Expand Down Expand Up @@ -1464,12 +1481,10 @@ IF(XNNPACK_BUILD_TESTS)
x32-packw
x32-packx
x32-unpool
x32-zip
x8-lut
x8-packw
qs8-packw
qs8-qc4w-packw
x8-zip
xN-transpose
xx-fill
xx-pad)
Expand Down Expand Up @@ -1557,6 +1572,7 @@ IF(XNNPACK_BUILD_TESTS)
qd8-f32-qc4w-gemm-minmax
qd8-f32-qc8w-igemm-minmax
qp8-f32-qc4w-gemm-minmax
qp8-f32-qc8w-gemm-minmax
qp8-f32-qb4w-gemm-minmax
qs8-qc8w-gemm-minmax-fp32
qs8-qc8w-igemm-minmax-fp32
Expand Down Expand Up @@ -1681,7 +1697,6 @@ IF(XNNPACK_BUILD_TESTS)
f32-f16-vcvt
f32-qs8-vcvt
f32-qu8-vcvt
s32-f32-vcvt
qs8-f16-vcvt
qs8-f32-vcvt
qs8-vcvt
Expand Down Expand Up @@ -1857,7 +1872,7 @@ IF(XNNPACK_BUILD_BENCHMARKS)
TARGET_LINK_LIBRARIES(models PRIVATE XNNPACK)

ADD_EXECUTABLE(bench-models bench/models/benchmark.cc)
TARGET_INCLUDE_DIRECTORIES(bench-models PRIVATE bench)
TARGET_INCLUDE_DIRECTORIES(bench-models PRIVATE bench ${GOOGLEBENCHMARK_SOURCE_DIR})
TARGET_LINK_LIBRARIES(bench-models PRIVATE
bench-utils
benchmark::benchmark
Expand All @@ -1867,7 +1882,6 @@ IF(XNNPACK_BUILD_BENCHMARKS)
# ---[ Build operator-level microbenchmarks
SET(LIBRARY_OPERATOR_BENCHMARKS
average-pooling
channel-shuffle
convolution
deconvolution
max-pooling
Expand Down Expand Up @@ -1936,6 +1950,7 @@ IF(XNNPACK_BUILD_BENCHMARKS)
qd8-f32-qc4w-gemm
qd8-f32-qc8w-gemm
qp8-f32-qc4w-gemm
qp8-f32-qc8w-gemm
qp8-f32-qb4w-gemm
qs8-dwconv
qs8-gemm
Expand Down
82 changes: 34 additions & 48 deletions WORKSPACE → MODULE.bazel
Original file line number Diff line number Diff line change
@@ -1,49 +1,35 @@
workspace(name = "xnnpack")

load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")
## MODULE.bazel
module(
name = "xnnpack",
)

# Bazel rule definitions
http_archive(
name = "rules_cc",
sha256 = "3868eab488bd5be37a6acedbd222a196bea14408a2857916f33cce7b4780897d",
strip_prefix = "rules_cc-5e848c1434d3458018734238dbc4781f43992ea5",
urls = [
"https://github.com/bazelbuild/rules_cc/archive/5e848c1434d3458018734238dbc4781f43992ea5.zip",
],
)
bazel_dep(name = "rules_cc", version = "0.1.0")
bazel_dep(name = "rules_python", version = "1.0.0")

# Bazel Python rule definitions.
http_archive(
name = "rules_python",
sha256 = "4912ced70dc1a2a8e4b86cec233b192ca053e82bc72d877b98e126156e8f228d",
strip_prefix = "rules_python-0.32.2",
urls = [
"https://github.com/bazelbuild/rules_python/releases/download/0.32.2/rules_python-0.32.2.tar.gz",
],
pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")
pip.parse(
hub_name = "pip",
python_version = "3.11",
requirements_lock = "//:requirements_lock.txt",
)

load("@rules_python//python:repositories.bzl", "py_repositories")

py_repositories()
use_repo(pip, "pip")

# Bazel Skylib.
http_archive(
name = "bazel_skylib",
sha256 = "f7be3474d42aae265405a592bb7da8e171919d74c16f082a5457840f06054728",
urls = [
"https://mirror.bazel.build/github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz",
"https://github.com/bazelbuild/bazel-skylib/releases/download/1.2.1/bazel-skylib-1.2.1.tar.gz",
],
)
bazel_dep(name = "bazel_skylib", version = "1.7.1")

# Bazel Platforms
http_archive(
name = "platforms",
sha256 = "5308fc1d8865406a49427ba24a9ab53087f17f5266a7aabbfc28823f3916e1ca",
urls = ["https://github.com/bazelbuild/platforms/releases/download/0.0.6/platforms-0.0.6.tar.gz"],
)
bazel_dep(name = "platforms", version = "0.0.10")

# TODO: some (most? all?) of the http_archive() calls below could become bazel_dep() calls,
# but it would require verifying that the semver provided by the Bazel registry matches the hash
# that we expect in CMake; it's not clear that it is a big win to do so given the modest
# complexity of our deps, so I'm leaving it like this for now to ensure that the Bazel and CMake
# builds are using identical dependencies.

http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")

# LINT.IfChange
# LINT.IfChange(googletest)
# Google Test framework, used by most unit-tests.
http_archive(
name = "com_google_googletest",
Expand All @@ -53,7 +39,7 @@ http_archive(
)
# LINT.ThenChange(cmake/DownloadGoogleTest.cmake)

# LINT.IfChange
# LINT.IfChange(benchmark)
# Google Benchmark library, used in micro-benchmarks.
http_archive(
name = "com_google_benchmark",
Expand All @@ -63,7 +49,7 @@ http_archive(
)
# LINT.ThenChange(cmake/DownloadGoogleBenchmark.cmake)

# LINT.IfChange
# LINT.IfChange(FXdiv)
# FXdiv library, used for repeated integer division by the same factor
http_archive(
name = "FXdiv",
Expand All @@ -73,17 +59,17 @@ http_archive(
)
# LINT.ThenChange(cmake/DownloadFXdiv.cmake)

# LINT.IfChange
# LINT.IfChange(pthreadpool)
# pthreadpool library, used for parallelization
http_archive(
name = "pthreadpool",
sha256 = "a4cf06de57bfdf8d7b537c61f1c3071bce74e57524fe053e0bbd2332feca7f95",
strip_prefix = "pthreadpool-4fe0e1e183925bf8cfa6aae24237e724a96479b8",
urls = ["https://github.com/Maratyszcza/pthreadpool/archive/4fe0e1e183925bf8cfa6aae24237e724a96479b8.zip"],
sha256 = "9f1baba9e97df8abc792eeaa2a8f0e0d29e507db1b4c1a8210868c889eb449b5",
strip_prefix = "pthreadpool-39df650e19d4f6382e246c29d6819b1ce6ee0b24",
urls = ["https://github.com/google/pthreadpool/archive/39df650e19d4f6382e246c29d6819b1ce6ee0b24.zip"],
)
# LINT.ThenChange(cmake/DownloadPThreadPool.cmake)

# LINT.IfChange
# LINT.IfChange(cpuinfo)
# cpuinfo library, used for detecting processor characteristics
http_archive(
name = "cpuinfo",
Expand All @@ -95,14 +81,14 @@ http_archive(
)
# LINT.ThenChange(cmake/DownloadCpuinfo.cmake)

# LINT.IfChange
# LINT.IfChange(kleidiai)
# KleidiAI library, used for ARM microkernels.
http_archive(
name = "KleidiAI",
sha256 = "ad37707084a6d4ff41be10cbe8540c75bea057ba79d0de6c367c1bfac6ba0852",
strip_prefix = "kleidiai-40a926833857fb64786e02f97703e42b1537cb57",
sha256 = "8ba8cdb9f945941174d34d10eb4ad158ad1cbc1aef259de5ad992b0bbe85861f",
strip_prefix = "kleidiai-7e8c4baf953227fa447a2f345e5d6491a504aa56",
urls = [
"https://gitlab.arm.com/kleidi/kleidiai/-/archive/40a926833857fb64786e02f97703e42b1537cb57/kleidiai-40a926833857fb64786e02f97703e42b1537cb57.zip"
"https://gitlab.arm.com/kleidi/kleidiai/-/archive/7e8c4baf953227fa447a2f345e5d6491a504aa56/kleidiai-7e8c4baf953227fa447a2f345e5d6491a504aa56.zip",
],
)
# LINT.ThenChange(cmake/DownloadKleidiAI.cmake)
Expand Down
23 changes: 16 additions & 7 deletions bench/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,22 @@ xnnpack_benchmark(
)

xnnpack_benchmark(
name = "qp8_f32_qb4w_gemm",
name = "qp8_f32_qc8w_gemm_bench",
srcs = [
"qp8-f32-qc8w-gemm.cc",
],
defines = xnnpack_kleidiai_defines(),
tags = xnnpack_slow_benchmark_tags(),
deps = MICROKERNEL_BENCHMARK_DEPS + [
":gemm_benchmark",
"//:isa_checks",
] + xnnpack_if_kleidiai_enabled([
"@KleidiAI//kai/ukernels/matmul",
]),
)

xnnpack_benchmark(
name = "qp8_f32_qb4w_gemm_bench",
srcs = ["qp8-f32-qb4w-gemm.cc"],
defines = xnnpack_kleidiai_defines(),
tags = xnnpack_slow_benchmark_tags(),
Expand Down Expand Up @@ -593,12 +608,6 @@ xnnpack_benchmark(
],
)

xnnpack_benchmark(
name = "channel_shuffle_bench",
srcs = ["channel-shuffle.cc"],
deps = OPERATOR_BENCHMARK_DEPS,
)

xnnpack_benchmark(
name = "convolution_bench",
srcs = ["convolution.cc"],
Expand Down
Loading

0 comments on commit 77e8997

Please sign in to comment.