Skip to content

Commit

Permalink
Merge branch 'master' into ibilinear
Browse files Browse the repository at this point in the history
  • Loading branch information
RahulSundarMCW authored Dec 20, 2024
2 parents 019f109 + 5aa14b1 commit cffddac
Show file tree
Hide file tree
Showing 1,047 changed files with 70,224 additions and 33,573 deletions.
4 changes: 4 additions & 0 deletions .bazelrc
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Common settings
common --enable_bzlmod
build --enable_bzlmod

# Basic build settings
build --jobs 128
build --cxxopt='-std=gnu++14'
Expand Down
12 changes: 7 additions & 5 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ jobs:
env:
CC: gcc-9
CXX: g++-9
BAZEL_DEFINES: --define=xnn_enable_avxvnni=false --define=xnn_enable_avxvnniint8=false --define=xnn_enable_avx512amx=false --define=xnn_enable_avx512fp16=false
BAZEL_DEFINES: --define=xnn_enable_avxvnni=false --define=xnn_enable_avx256vnni=false --define=xnn_enable_avxvnniint8=false --define=xnn_enable_avx512amx=false --define=xnn_enable_avx512fp16=false
steps:
- uses: actions/checkout@v4
- name: Update apt
Expand Down Expand Up @@ -474,14 +474,16 @@ jobs:
timeout-minutes: 60
steps:
- uses: actions/checkout@v4
- name: Install gcc-13
# Pull in gcc-13 from the ubuntu-23.10 repository since it is not available
# for ubuntu-22.04.
- name: Add repository ppa:ubuntu-toolchain-r/test for gcc-13 and g++-13
working-directory: ${{ github.workspace }}
run: |
sudo add-apt-repository ppa:ubuntu-toolchain-r/test
sudo apt update
sudo apt install gcc-13 g++-13
- name: Install gcc-13 (cached)
uses: awalsh128/cache-apt-pkgs-action@latest
with:
packages: gcc-13 g++-13
version: 1.0
- name: Restore bazel cache
uses: actions/cache/restore@v4
with:
Expand Down
3 changes: 2 additions & 1 deletion BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ MICROKERNEL_DEFS = [
"src/f32-pavgpool/f32-pavgpool-minmax.h",
"src/f32-qs8-vcvt/f32-qs8-vcvt.h",
"src/f32-qu8-vcvt/f32-qu8-vcvt.h",
"src/f32-raddextexp/f32-raddextexp.h",
"src/f32-vabs/f32-vabs.h",
"src/f32-vbinary/f32-vadd.h",
"src/f32-vbinary/f32-vaddc.h",
Expand Down Expand Up @@ -192,7 +193,6 @@ MICROKERNEL_DEFS = [
"src/s8-ibilinear/s8-ibilinear.h",
"src/s8-maxpool/s8-maxpool-minmax.h",
"src/s8-vclamp/s8-vclamp.h",
"src/s32-f32-vcvt/s32-f32-vcvt.h",
"src/u8-ibilinear/u8-ibilinear.h",
"src/u8-maxpool/u8-maxpool-minmax.h",
"src/u8-vclamp/u8-vclamp.h",
Expand Down Expand Up @@ -993,6 +993,7 @@ xnnpack_cc_library(
":datatype",
":fp16",
":indirection",
":internal",
":logging",
":math",
":microkernel_configs",
Expand Down
29 changes: 22 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,12 @@ SET(CMAKE_CXX_EXTENSIONS NO)
# ---[ Options.
SET(XNNPACK_LIBRARY_TYPE "default" CACHE STRING "Type of library (shared, static, or default) to build")
SET_PROPERTY(CACHE XNNPACK_LIBRARY_TYPE PROPERTY STRINGS default static shared)
OPTION(XNNPACK_ENABLE_ASSEMBLY "Build XNNPACK with assembly micro-kernels" ON)
IF(CMAKE_C_COMPILER_ID STREQUAL "MSVC")
# Disable assembly when using MSVC until support is added.
OPTION(XNNPACK_ENABLE_ASSEMBLY "Build XNNPACK with assembly micro-kernels" OFF)
ELSE()
OPTION(XNNPACK_ENABLE_ASSEMBLY "Build XNNPACK with assembly micro-kernels" ON)
ENDIF()
OPTION(XNNPACK_ENABLE_MEMOPT "Build XNNPACK with optimized memory allocation scheme" ON)
OPTION(XNNPACK_ENABLE_SPARSE "Build XNNPACK with graph rewriting for sparse inference" ON)
OPTION(XNNPACK_ENABLE_GEMM_M_SPECIALIZATION "Build XNNPACK with support for selecting microkernel with different MR" ON)
Expand Down Expand Up @@ -430,7 +435,6 @@ SET(OPERATOR_SRCS
src/operators/average-pooling-nhwc.c
src/operators/batch-matrix-multiply-nc.c
src/operators/binary-elementwise-nd.c
src/operators/channel-shuffle-nc.c
src/operators/constant-pad-nd.c
src/operators/convolution-nchw.c
src/operators/convolution-nhwc.c
Expand Down Expand Up @@ -523,7 +527,6 @@ SET(XNNPACK_SRCS
src/configs/xx-fill-config.c
src/configs/xx-pad-config.c
src/configs/x8-lut-config.c
src/configs/zip-config.c
src/init.c
src/params.c
"${PROJECT_BINARY_DIR}/build_identifier.c")
Expand Down Expand Up @@ -660,6 +663,9 @@ IF(XNNPACK_TARGET_PROCESSOR MATCHES "^x86(_64)?$")
LIST(APPEND PROD_MICROKERNEL_SRCS ${PROD_F16C_MICROKERNEL_SRCS})
LIST(APPEND PROD_MICROKERNEL_SRCS ${PROD_FMA3_MICROKERNEL_SRCS})
LIST(APPEND PROD_MICROKERNEL_SRCS ${PROD_AVX2_MICROKERNEL_SRCS})
IF(XNNPACK_ENABLE_ASSEMBLY AND XNNPACK_TARGET_PROCESSOR MATCHES "x86_64")
LIST(APPEND PROD_MICROKERNEL_SRCS ${PROD_AMD64_ASM_MICROKERNEL_SRCS})
ENDIF()
IF(XNNPACK_ENABLE_AVX512AMX)
LIST(APPEND PROD_MICROKERNEL_SRCS ${PROD_AVX512AMX_MICROKERNEL_SRCS})
ENDIF()
Expand Down Expand Up @@ -707,6 +713,9 @@ IF(XNNPACK_TARGET_PROCESSOR MATCHES "^x86(_64)?$")
LIST(APPEND NON_PROD_MICROKERNEL_SRCS ${NON_PROD_F16C_MICROKERNEL_SRCS})
LIST(APPEND NON_PROD_MICROKERNEL_SRCS ${NON_PROD_FMA3_MICROKERNEL_SRCS})
LIST(APPEND NON_PROD_MICROKERNEL_SRCS ${NON_PROD_AVX2_MICROKERNEL_SRCS})
IF(XNNPACK_ENABLE_ASSEMBLY AND XNNPACK_TARGET_PROCESSOR MATCHES "x86_64")
LIST(APPEND NON_PROD_MICROKERNEL_SRCS ${NON_PROD_AMD64_ASM_MICROKERNEL_SRCS})
ENDIF()
IF(XNNPACK_ENABLE_AVX512AMX)
LIST(APPEND NON_PROD_MICROKERNEL_SRCS ${NON_PROD_AVX512AMX_MICROKERNEL_SRCS})
ENDIF()
Expand Down Expand Up @@ -1219,6 +1228,8 @@ IF(XNNPACK_BUILD_TESTS)

# Helper libraries
ADD_LIBRARY(next-prime STATIC test/next_prime.cc)
ADD_LIBRARY(runtime-flags STATIC test/runtime-flags.cc)
TARGET_LINK_LIBRARIES(runtime-flags PRIVATE GTest::gtest)

ADD_LIBRARY(gemm-microkernel-tester STATIC test/gemm-microkernel-tester.cc)
TARGET_INCLUDE_DIRECTORIES(gemm-microkernel-tester PRIVATE include src test)
Expand Down Expand Up @@ -1279,6 +1290,7 @@ IF(XNNPACK_BUILD_TESTS)
microparams-init
next-prime
pthreadpool
runtime-flags
XNNPACK)
ADD_SHARDED_TEST(${TEST}-test 10)
ENDFOREACH()
Expand All @@ -1300,6 +1312,7 @@ IF(XNNPACK_BUILD_TESTS)
GTest::gmock
GTest::gtest
GTest::gtest_main
runtime-flags
XNNPACK)
ADD_SHARDED_TEST(${TEST}-test 10)
ENDFOREACH()
Expand All @@ -1315,6 +1328,7 @@ IF(XNNPACK_BUILD_TESTS)
GTest::gtest
GTest::gtest_main
datatype
runtime-flags
unary-ops
XNNPACK)
ADD_TEST(NAME unary-elementwise-nc-test COMMAND unary-elementwise-nc-test)
Expand All @@ -1335,6 +1349,7 @@ IF(XNNPACK_BUILD_TESTS)
GTest::gmock
GTest::gtest
GTest::gtest_main
runtime-flags
XNNPACK)
ADD_TEST(NAME ${TEST}-test COMMAND ${TEST}-test)
ENDFOREACH()
Expand Down Expand Up @@ -1381,6 +1396,7 @@ IF(XNNPACK_BUILD_TESTS)
datatype
subgraph
logging
runtime-flags
unary-ops
XNNPACK)
ADD_TEST(NAME ${TEST}-test COMMAND ${TEST}-test)
Expand All @@ -1399,6 +1415,7 @@ IF(XNNPACK_BUILD_TESTS)
GTest::gmock
GTest::gtest
GTest::gtest_main
runtime-flags
subgraph
XNNPACK)
ADD_TEST(NAME ${TEST}-test COMMAND ${TEST}-test)
Expand Down Expand Up @@ -1464,12 +1481,10 @@ IF(XNNPACK_BUILD_TESTS)
x32-packw
x32-packx
x32-unpool
x32-zip
x8-lut
x8-packw
qs8-packw
qs8-qc4w-packw
x8-zip
xN-transpose
xx-fill
xx-pad)
Expand Down Expand Up @@ -1557,6 +1572,7 @@ IF(XNNPACK_BUILD_TESTS)
qd8-f32-qc4w-gemm-minmax
qd8-f32-qc8w-igemm-minmax
qp8-f32-qc4w-gemm-minmax
qp8-f32-qc8w-gemm-minmax
qp8-f32-qb4w-gemm-minmax
qs8-qc8w-gemm-minmax-fp32
qs8-qc8w-igemm-minmax-fp32
Expand Down Expand Up @@ -1681,7 +1697,6 @@ IF(XNNPACK_BUILD_TESTS)
f32-f16-vcvt
f32-qs8-vcvt
f32-qu8-vcvt
s32-f32-vcvt
qs8-f16-vcvt
qs8-f32-vcvt
qs8-vcvt
Expand Down Expand Up @@ -1867,7 +1882,6 @@ IF(XNNPACK_BUILD_BENCHMARKS)
# ---[ Build operator-level microbenchmarks
SET(LIBRARY_OPERATOR_BENCHMARKS
average-pooling
channel-shuffle
convolution
deconvolution
max-pooling
Expand Down Expand Up @@ -1936,6 +1950,7 @@ IF(XNNPACK_BUILD_BENCHMARKS)
qd8-f32-qc4w-gemm
qd8-f32-qc8w-gemm
qp8-f32-qc4w-gemm
qp8-f32-qc8w-gemm
qp8-f32-qb4w-gemm
qs8-dwconv
qs8-gemm
Expand Down
104 changes: 104 additions & 0 deletions MODULE.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
## MODULE.bazel
module(
name = "xnnpack",
)

# Bazel rule definitions
bazel_dep(name = "rules_cc", version = "0.1.0")
bazel_dep(name = "rules_python", version = "1.0.0")

pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")
pip.parse(
hub_name = "pip",
python_version = "3.11",
requirements_lock = "//:requirements_lock.txt",
)
use_repo(pip, "pip")

# Bazel Skylib.
bazel_dep(name = "bazel_skylib", version = "1.7.1")

# Bazel Platforms
bazel_dep(name = "platforms", version = "0.0.10")

# TODO: some (most? all?) of the http_archive() calls below could become bazel_dep() calls,
# but it would require verifying that the semver provided by the Bazel registry matches the hash
# that we expect in CMake; it's not clear that it is a big win to do so given the modest
# complexity of our deps, so I'm leaving it like this for now to ensure that the Bazel and CMake
# builds are using identical dependencies.

http_archive = use_repo_rule("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive")

# LINT.IfChange(googletest)
# Google Test framework, used by most unit-tests.
http_archive(
name = "com_google_googletest",
sha256 = "648b9430fca63acc68c59ee98f624dcbcd9c24ea6b278c306ab6b7f49f62034a",
strip_prefix = "googletest-d144031940543e15423a25ae5a8a74141044862f",
urls = ["https://github.com/google/googletest/archive/d144031940543e15423a25ae5a8a74141044862f.zip"],
)
# LINT.ThenChange(cmake/DownloadGoogleTest.cmake)

# LINT.IfChange(benchmark)
# Google Benchmark library, used in micro-benchmarks.
http_archive(
name = "com_google_benchmark",
sha256 = "1ba14374fddcd9623f126b1a60945e4deac4cdc4fb25a5f25e7f779e36f2db52",
strip_prefix = "benchmark-d2a8a4ee41b923876c034afb939c4fc03598e622",
urls = ["https://github.com/google/benchmark/archive/d2a8a4ee41b923876c034afb939c4fc03598e622.zip"],
)
# LINT.ThenChange(cmake/DownloadGoogleBenchmark.cmake)

# LINT.IfChange(FXdiv)
# FXdiv library, used for repeated integer division by the same factor
http_archive(
name = "FXdiv",
sha256 = "ab7dfb08829bee33dca38405d647868fb214ac685e379ec7ef2bebcd234cd44d",
strip_prefix = "FXdiv-b408327ac2a15ec3e43352421954f5b1967701d1",
urls = ["https://github.com/Maratyszcza/FXdiv/archive/b408327ac2a15ec3e43352421954f5b1967701d1.zip"],
)
# LINT.ThenChange(cmake/DownloadFXdiv.cmake)

# LINT.IfChange(pthreadpool)
# pthreadpool library, used for parallelization
http_archive(
name = "pthreadpool",
sha256 = "9f1baba9e97df8abc792eeaa2a8f0e0d29e507db1b4c1a8210868c889eb449b5",
strip_prefix = "pthreadpool-39df650e19d4f6382e246c29d6819b1ce6ee0b24",
urls = ["https://github.com/google/pthreadpool/archive/39df650e19d4f6382e246c29d6819b1ce6ee0b24.zip"],
)
# LINT.ThenChange(cmake/DownloadPThreadPool.cmake)

# LINT.IfChange(cpuinfo)
# cpuinfo library, used for detecting processor characteristics
http_archive(
name = "cpuinfo",
sha256 = "0edef1777a2cc6c43524cbabd7b631c34fcf8b7d9df96ec31785c029e9cec8ff",
strip_prefix = "cpuinfo-ca156f7bc9109c552973414a63d310f76ef0cbf8",
urls = [
"https://github.com/pytorch/cpuinfo/archive/ca156f7bc9109c552973414a63d310f76ef0cbf8.zip",
],
)
# LINT.ThenChange(cmake/DownloadCpuinfo.cmake)

# LINT.IfChange(kleidiai)
# KleidiAI library, used for ARM microkernels.
http_archive(
name = "KleidiAI",
sha256 = "8ba8cdb9f945941174d34d10eb4ad158ad1cbc1aef259de5ad992b0bbe85861f",
strip_prefix = "kleidiai-7e8c4baf953227fa447a2f345e5d6491a504aa56",
urls = [
"https://gitlab.arm.com/kleidi/kleidiai/-/archive/7e8c4baf953227fa447a2f345e5d6491a504aa56/kleidiai-7e8c4baf953227fa447a2f345e5d6491a504aa56.zip",
],
)
# LINT.ThenChange(cmake/DownloadKleidiAI.cmake)

# Ruy library, used to benchmark against
http_archive(
name = "ruy",
sha256 = "fe8345f521bb378745ebdd0f8c5937414849936851d2ec2609774eb2d7098e54",
strip_prefix = "ruy-9f53ba413e6fc879236dcaa3e008915973d67a4f",
urls = [
"https://github.com/google/ruy/archive/9f53ba413e6fc879236dcaa3e008915973d67a4f.zip",
],
)
Loading

0 comments on commit cffddac

Please sign in to comment.