From 398ce661dd3fbfed4d561c7423519ce744f8172f Mon Sep 17 00:00:00 2001 From: Mengwei Liu Date: Fri, 21 Jun 2024 15:46:41 -0700 Subject: [PATCH] Let custom_ops_aot_lib use portable_lib (#4024) Summary: Currently on Mac we can't import portable_lib then import sdpa_with_kv_cache. The reason is that they both statically link `executorch_no_prim_ops.a` and when they are both dlopen'd in python they are holding 2 copies of the static variables in `executorch_no_prim_ops.a` such as [initialized](https://github.com/pytorch/executorch/blob/main/runtime/platform/default/posix.cpp#L70). Pull Request resolved: https://github.com/pytorch/executorch/pull/4024 Test Plan: On Mac, make sure the pybind custom ops is able to import. ``` from executorch.extension.pybindings import portable_lib from executorch.examples.models.llama2.custom_ops import sdpa_with_kv_cache portable_lib._get_operator_names() > ... > llama::sdpa_with_kv_cache.out ``` Reviewed By: dbort Differential Revision: D58883985 Pulled By: larryliu0820 fbshipit-source-id: 0a42d90a53a90da83f8b57504844cbf67688d8f8 --- CMakeLists.txt | 29 ++++++----------- .../models/llama2/custom_ops/CMakeLists.txt | 32 ++++++++++++------- 2 files changed, 29 insertions(+), 32 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a8e4e6ccfd..a83a6ee565 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -519,13 +519,6 @@ target_link_options_shared_lib(executorch) # add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/portable) -if(EXECUTORCH_BUILD_KERNELS_CUSTOM) - # TODO: move all custom kernels to ${CMAKE_CURRENT_SOURCE_DIR}/kernels/custom - add_subdirectory( - ${CMAKE_CURRENT_SOURCE_DIR}/examples/models/llama2/custom_ops - ) -endif() - if(EXECUTORCH_BUILD_KERNELS_OPTIMIZED) add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/kernels/optimized) endif() @@ -688,17 +681,6 @@ if(EXECUTORCH_BUILD_PYBIND) list(APPEND _dep_libs quantized_kernels quantized_ops_lib) endif() - # TODO(larryliu): Fix macOS 2 dylibs having 2 sets of static variables issue - if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT AND NOT APPLE) - list(APPEND _dep_libs custom_ops_aot_lib) - endif() - # TODO(laryliu): Fix linux duplicate registation problem. In GH CI worker - # libcustom_ops.a doesn't dedup with the one indirectly linked from - # libcustom_ops_aot_lib.a - if(EXECUTORCH_BUILD_KERNELS_CUSTOM AND APPLE) - target_link_options_shared_lib(custom_ops) - list(APPEND _dep_libs custom_ops) - endif() # compile options for pybind set(_pybind_compile_options -Wno-deprecated-declarations @@ -723,7 +705,7 @@ if(EXECUTORCH_BUILD_PYBIND) target_link_libraries(util PRIVATE torch c10 executorch) # pybind portable_lib - pybind11_add_module(portable_lib extension/pybindings/pybindings.cpp) + pybind11_add_module(portable_lib SHARED extension/pybindings/pybindings.cpp) # The actual output file needs a leading underscore so it can coexist with # portable_lib.py in the same python package. set_target_properties(portable_lib PROPERTIES OUTPUT_NAME "_portable_lib") @@ -732,7 +714,7 @@ if(EXECUTORCH_BUILD_PYBIND) ) target_include_directories(portable_lib PRIVATE ${TORCH_INCLUDE_DIRS}) target_compile_options(portable_lib PUBLIC ${_pybind_compile_options}) - target_link_libraries(portable_lib PUBLIC ${_dep_libs}) + target_link_libraries(portable_lib PRIVATE ${_dep_libs}) if(APPLE) # pip wheels will need to be able to find the torch libraries. On Linux, the # .so has non-absolute dependencies on libs like "libtorch.so" without @@ -758,5 +740,12 @@ if(EXECUTORCH_BUILD_PYBIND) ) endif() +if(EXECUTORCH_BUILD_KERNELS_CUSTOM) + # TODO: move all custom kernels to ${CMAKE_CURRENT_SOURCE_DIR}/kernels/custom + add_subdirectory( + ${CMAKE_CURRENT_SOURCE_DIR}/examples/models/llama2/custom_ops + ) +endif() + # Print all summary executorch_print_configuration_summary() diff --git a/examples/models/llama2/custom_ops/CMakeLists.txt b/examples/models/llama2/custom_ops/CMakeLists.txt index b71faac04c..0de9f4df88 100644 --- a/examples/models/llama2/custom_ops/CMakeLists.txt +++ b/examples/models/llama2/custom_ops/CMakeLists.txt @@ -40,13 +40,7 @@ include(${EXECUTORCH_SRCS_FILE}) set(_common_include_directories ${EXECUTORCH_ROOT}/..) # Custom op libraries -set(custom_ops_libs) -if(APPLE) - list(APPEND executorch_no_prim_ops_shared) -else() - list(APPEND executorch_no_prim_ops) -endif() -list(APPEND custom_ops_libs pthreadpool) +set(custom_ops_libs pthreadpool) list(APPEND custom_ops_libs cpuinfo) list(APPEND custom_ops_libs cpublas) list(APPEND custom_ops_libs eigen_blas) @@ -72,7 +66,9 @@ target_include_directories(custom_ops PUBLIC "${_common_include_directories}") target_include_directories( custom_ops PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/../../../../include" ) -target_link_libraries(custom_ops PUBLIC ${custom_ops_libs}) +target_link_libraries( + custom_ops PUBLIC ${custom_ops_libs} executorch_no_prim_ops +) target_compile_options( custom_ops PUBLIC ${_common_compile_options} -DET_USE_THREADPOOL @@ -84,7 +80,8 @@ if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT) # Add a AOT library find_package(Torch CONFIG REQUIRED) add_library( - custom_ops_aot_lib SHARED ${CMAKE_CURRENT_SOURCE_DIR}/op_sdpa_aot.cpp + custom_ops_aot_lib SHARED ${_custom_ops__srcs} + ${CMAKE_CURRENT_SOURCE_DIR}/op_sdpa_aot.cpp ) target_include_directories( custom_ops_aot_lib PUBLIC "${_common_include_directories}" @@ -93,10 +90,21 @@ if(EXECUTORCH_BUILD_KERNELS_CUSTOM_AOT) custom_ops_aot_lib PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/../../../../include" ) - target_link_libraries(custom_ops_aot_lib PUBLIC custom_ops torch) + if(TARGET portable_lib) + # If we have portable_lib built, custom_ops_aot_lib gives the ability to use + # the ops in PyTorch and ExecuTorch through pybind + target_link_libraries(custom_ops_aot_lib PUBLIC portable_lib) + else() + # If no portable_lib, custom_ops_aot_lib still gives the ability to use the + # ops in PyTorch + target_link_libraries(custom_ops_aot_lib PUBLIC executorch_no_prim_ops) + endif() + + target_link_libraries(custom_ops_aot_lib PUBLIC cpublas torch) target_compile_options( - custom_ops_aot_lib PUBLIC -Wno-deprecated-declarations -fPIC -frtti - -fexceptions + custom_ops_aot_lib + PUBLIC -Wno-deprecated-declarations -fPIC -frtti -fexceptions + ${_common_compile_options} -DET_USE_THREADPOOL ) install(TARGETS custom_ops_aot_lib DESTINATION lib)