Skip to content

Commit

Permalink
[GLUTEN-1306][VL] feat: Link static depends via vcpkg (apache#1384)
Browse files Browse the repository at this point in the history
Added an option to manage static dependencies via vcpkg.

# Install build depends on system (optional)
sudo -E ./dev/vcpkg/setup-build-depends.sh

# Source new gcc (centos only)
source /opt/rh/devtoolset-9/enable

# Setup and enable vcpkg environemt
source ./dev/vcpkg/env.sh

# Build gluten package as original
./dev/buildbundle-veloxbe.sh --enable_vcpkg=ON
  • Loading branch information
ccat3z authored May 11, 2023
1 parent ac2b18f commit eaa3a57
Show file tree
Hide file tree
Showing 55 changed files with 1,448 additions and 24 deletions.
69 changes: 69 additions & 0 deletions .github/workflows/velox_be.yml
Original file line number Diff line number Diff line change
Expand Up @@ -353,3 +353,72 @@ jobs:
if: ${{ always() }}
run: |
docker stop velox-backend-centos7-test-$GITHUB_RUN_ID || true
velox-backend-static-build-test:
runs-on: velox-self-hosted
steps:
- uses: actions/checkout@v2
- name: Setup docker container
run: |
EXTRA_DOCKER_OPTIONS="--name velox-backend-static-build-test-$GITHUB_RUN_ID -e NUM_THREADS=30 --volume velox-backend-vcpkg-binary-cache:/var/cache/vcpkg --env VCPKG_BINARY_SOURCES=clear;files,/var/cache/vcpkg,readwrite --detach" \
NON_INTERACTIVE=ON \
MOUNT_MAVEN_CACHE=OFF \
OS_IMAGE=centos:8 \
OS_VERSION=8 \
tools/gluten-te/centos/cbash.sh sleep 14400
- name: Setup maven cache
run: |
docker cp ~/.m2/repository velox-backend-static-build-test-$GITHUB_RUN_ID:/root/.m2/
- name: Build Gluten CPP library
run: |
docker exec -i velox-backend-static-build-test-$GITHUB_RUN_ID bash -c '
source /env.sh && \
cd /opt/gluten && \
sudo -E ./dev/vcpkg/setup-build-depends.sh && \
source ./dev/vcpkg/env.sh && \
./dev/builddeps-veloxbe.sh --build_test=ON --build_benchmarks=ON --enable_s3=ON --enable_hdfs=ON'
- name: Build for Spark 3.2.2
run: |
docker exec velox-backend-static-build-test-$GITHUB_RUN_ID bash -c '
cd /opt/gluten && \
mvn clean install -Pspark-3.2 -Pbackends-velox -DskipTests -Dspark32.version=3.2.2 && \
cd /opt/gluten/tools/gluten-it && \
mvn clean package -Pspark-3.2'
- name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 (centos 8)
run: |
EXTRA_DOCKER_OPTIONS="--name velox-backend-static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30" \
NON_INTERACTIVE=ON \
MOUNT_MAVEN_CACHE=OFF \
OS_IMAGE=centos:8 \
OS_VERSION=8 \
tools/gluten-te/centos/cbash.sh 'cd /opt/gluten/tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh \
--preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=20g -s=1.0 --cpus=16 --iterations=1 \
&& GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh \
--preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --cpus=32 --iterations=1'
- name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 (ubuntu 20.04)
run: |
EXTRA_DOCKER_OPTIONS="--name velox-backend-static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30" \
NON_INTERACTIVE=ON \
MOUNT_MAVEN_CACHE=OFF \
OS_IMAGE=ubuntu:20.04 \
tools/gluten-te/ubuntu/cbash.sh 'cd /opt/gluten/tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh \
--preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=20g -s=1.0 --cpus=16 --iterations=1 \
&& GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh \
--preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --cpus=32 --iterations=1'
- name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 (ubuntu 22.04)
run: |
EXTRA_DOCKER_OPTIONS="--name velox-backend-static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30" \
NON_INTERACTIVE=ON \
MOUNT_MAVEN_CACHE=OFF \
OS_IMAGE=ubuntu:22.04 \
tools/gluten-te/ubuntu/cbash.sh 'cd /opt/gluten/tools/gluten-it \
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh \
--preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=20g -s=1.0 --cpus=16 --iterations=1 \
&& GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh \
--preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --cpus=32 --iterations=1'
- name: Exit docker container
if: ${{ always() }}
run: |
docker stop velox-backend-static-build-test-$GITHUB_RUN_ID || true
Empty file added .gitmodules
Empty file.
158 changes: 158 additions & 0 deletions cpp/CMake/FindThrift.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
# Copyright 2012 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# - Find Thrift (a cross platform RPC lib/tool)
#
# Variables used by this module, they can change the default behaviour and need
# to be set before calling find_package:
#
# Thrift_ROOT - When set, this path is inspected instead of standard library
# locations as the root of the Thrift installation.
# The environment variable THRIFT_HOME overrides this variable.
#
# This module defines
# Thrift_FOUND, whether Thrift is found or not
# Thrift_COMPILER_FOUND, whether Thrift compiler is found or not
#
# thrift::thrift, a library target to use Thrift
# thrift::compiler, a executable target to use Thrift compiler

function(EXTRACT_THRIFT_VERSION)
if(THRIFT_INCLUDE_DIR)
file(READ "${THRIFT_INCLUDE_DIR}/thrift/config.h" THRIFT_CONFIG_H_CONTENT)
string(REGEX MATCH "#define PACKAGE_VERSION \"[0-9.]+\"" THRIFT_VERSION_DEFINITION
"${THRIFT_CONFIG_H_CONTENT}")
string(REGEX MATCH "[0-9.]+" Thrift_VERSION "${THRIFT_VERSION_DEFINITION}")
set(Thrift_VERSION
"${Thrift_VERSION}"
PARENT_SCOPE)
else()
set(Thrift_VERSION
""
PARENT_SCOPE)
endif()
endfunction(EXTRACT_THRIFT_VERSION)

if(MSVC_TOOLCHAIN AND NOT DEFINED THRIFT_MSVC_LIB_SUFFIX)
if(NOT ARROW_THRIFT_USE_SHARED)
if(ARROW_USE_STATIC_CRT)
if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
set(THRIFT_MSVC_LIB_SUFFIX "mtd")
else()
set(THRIFT_MSVC_LIB_SUFFIX "mt")
endif()
else()
if("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
set(THRIFT_MSVC_LIB_SUFFIX "mdd")
else()
set(THRIFT_MSVC_LIB_SUFFIX "md")
endif()
endif()
endif()
endif()
set(THRIFT_LIB_NAME_BASE "thrift${THRIFT_MSVC_LIB_SUFFIX}")

if(ARROW_THRIFT_USE_SHARED)
set(THRIFT_LIB_NAMES thrift)
if(CMAKE_IMPORT_LIBRARY_SUFFIX)
list(APPEND
THRIFT_LIB_NAMES
"${CMAKE_IMPORT_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_IMPORT_LIBRARY_SUFFIX}"
)
endif()
list(APPEND
THRIFT_LIB_NAMES
"${CMAKE_SHARED_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_SHARED_LIBRARY_SUFFIX}"
)
else()
set(THRIFT_LIB_NAMES
"${CMAKE_STATIC_LIBRARY_PREFIX}${THRIFT_LIB_NAME_BASE}${CMAKE_STATIC_LIBRARY_SUFFIX}"
)
endif()

if(Thrift_ROOT)
find_library(THRIFT_LIB
NAMES ${THRIFT_LIB_NAMES}
PATHS ${Thrift_ROOT}
PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib")
find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h
PATHS ${Thrift_ROOT}
PATH_SUFFIXES "include")
find_program(THRIFT_COMPILER thrift
PATHS ${Thrift_ROOT}
PATH_SUFFIXES "bin")
extract_thrift_version()
else()
# THRIFT-4760: The pkgconfig files are currently only installed when using autotools.
# Starting with 0.13, they are also installed for the CMake-based installations of Thrift.
find_package(PkgConfig QUIET)
pkg_check_modules(THRIFT_PC thrift)
if(THRIFT_PC_FOUND)
set(THRIFT_INCLUDE_DIR "${THRIFT_PC_INCLUDEDIR}")

list(APPEND THRIFT_PC_LIBRARY_DIRS "${THRIFT_PC_LIBDIR}")

find_library(THRIFT_LIB
NAMES ${THRIFT_LIB_NAMES}
PATHS ${THRIFT_PC_LIBRARY_DIRS}
NO_DEFAULT_PATH)
find_program(THRIFT_COMPILER thrift
HINTS ${THRIFT_PC_PREFIX}
NO_DEFAULT_PATH
PATH_SUFFIXES "bin")
set(Thrift_VERSION ${THRIFT_PC_VERSION})
else()
find_library(THRIFT_LIB
NAMES ${THRIFT_LIB_NAMES}
PATH_SUFFIXES "lib/${CMAKE_LIBRARY_ARCHITECTURE}" "lib")
find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h PATH_SUFFIXES "include")
find_program(THRIFT_COMPILER thrift PATH_SUFFIXES "bin")
extract_thrift_version()
endif()
endif()

if(THRIFT_COMPILER)
set(Thrift_COMPILER_FOUND TRUE)
else()
set(Thrift_COMPILER_FOUND FALSE)
endif()

find_package_handle_standard_args(
Thrift
REQUIRED_VARS THRIFT_LIB THRIFT_INCLUDE_DIR
VERSION_VAR Thrift_VERSION
HANDLE_COMPONENTS)

if(Thrift_FOUND)
if(ARROW_THRIFT_USE_SHARED)
add_library(thrift::thrift SHARED IMPORTED)
else()
add_library(thrift::thrift STATIC IMPORTED)
endif()
set_target_properties(thrift::thrift
PROPERTIES IMPORTED_LOCATION "${THRIFT_LIB}"
INTERFACE_INCLUDE_DIRECTORIES "${THRIFT_INCLUDE_DIR}")
if(WIN32 AND NOT MSVC_TOOLCHAIN)
# We don't need this for Visual C++ because Thrift uses
# "#pragma comment(lib, "Ws2_32.lib")" in
# thrift/windows/config.h for Visual C++.
set_target_properties(thrift::thrift PROPERTIES INTERFACE_LINK_LIBRARIES "ws2_32")
endif()

if(Thrift_COMPILER_FOUND)
add_executable(thrift::compiler IMPORTED)
set_target_properties(thrift::compiler PROPERTIES IMPORTED_LOCATION
"${THRIFT_COMPILER}")
endif()
endif()
6 changes: 3 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,8 @@ find_package(JNI REQUIRED)

if(BUILD_TESTS)
set(GLUTEN_GTEST_MIN_VERSION "1.13.0")
find_package(gtest ${GLUTEN_GTEST_MIN_VERSION} CONFIG)
if(NOT gtest_FOUND)
find_package(GTest ${GLUTEN_GTEST_MIN_VERSION} CONFIG)
if(NOT GTest_FOUND)
include(BuildGTest)
endif()
include(GoogleTest)
Expand Down Expand Up @@ -145,7 +145,7 @@ function(ADD_TEST_CASE TEST_NAME)
endif()

add_executable(${TEST_NAME} ${SOURCES})
target_link_libraries(${TEST_NAME} gluten gtest gtest_main Threads::Threads glog::glog)
target_link_libraries(${TEST_NAME} gluten GTest::gtest GTest::gtest_main Threads::Threads)
target_include_directories(${TEST_NAME} PRIVATE ${CMAKE_SOURCE_DIR}/core)

if(ARG_EXTRA_LINK_LIBS)
Expand Down
50 changes: 41 additions & 9 deletions cpp/velox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ macro(ADD_VELOX_DEPENDENCIES)
endif()

add_velox_dependency(parquet::reader::thrift "${VELOX_COMPONENTS_PATH}/dwio/parquet/thrift/libvelox_dwio_parquet_thrift.a")
add_velox_dependency(thrift "${VELOX_BUILD_PATH}/third_party/arrow_ep/src/arrow_ep-build/thrift_ep-install/lib/libthrift.a")

add_velox_dependency(dwio::common "${VELOX_COMPONENTS_PATH}/dwio/common/libvelox_dwio_common.a")
add_velox_dependency(functions::prestosql::types "${VELOX_COMPONENTS_PATH}/functions/prestosql/types/libvelox_presto_types.a")
Expand Down Expand Up @@ -154,9 +153,14 @@ macro(ADD_VELOX_DEPENDENCIES)
endmacro()

macro(find_libhdfs3)
# Find the libhdfs3 library
set(CMAKE_FIND_LIBRARY_SUFFIXES ".so")
find_library(LIBHDFS3_LIBRARY NAMES hdfs3 PATHS ${SYSTEM_LIB_PATH} ${SYSTEM_LIB64_PATH} ${SYSTEM_LIB_MULTIARCH_PATH} ${SYSTEM_LOCAL_LIB_PATH} ${SYSTEM_LOCAL_LIB64_PATH} NO_DEFAULT_PATH)
find_package(libhdfs3 CONFIG)
if(libhdfs3_FOUND AND TARGET HDFS::hdfs3)
set(LIBHDFS3_LIBRARY HDFS::hdfs3)
else()
set(CMAKE_FIND_LIBRARY_SUFFIXES ".so")
find_library(LIBHDFS3_LIBRARY NAMES hdfs3 PATHS ${SYSTEM_LIB_PATH} ${SYSTEM_LIB64_PATH} ${SYSTEM_LIB_MULTIARCH_PATH} ${SYSTEM_LOCAL_LIB_PATH} ${SYSTEM_LOCAL_LIB64_PATH} NO_DEFAULT_PATH)
endif()

if (NOT LIBHDFS3_LIBRARY)
message(FATAL_ERROR "LIBHDFS3 Library Not Found")
else()
Expand All @@ -165,7 +169,13 @@ macro(find_libhdfs3)
endmacro()

macro(find_re2)
find_library(RE2_LIBRARY NAMES re2 PATHS ${SYSTEM_LIB_PATH} ${SYSTEM_LIB64_PATH} ${SYSTEM_LIB_MULTIARCH_PATH} ${SYSTEM_LOCAL_LIB_PATH} ${SYSTEM_LOCAL_LIB64_PATH} NO_DEFAULT_PATH)
find_package(re2 CONFIG)
if(re2_FOUND AND TARGET re2::re2)
set(RE2_LIBRARY re2::re2)
else()
find_library(RE2_LIBRARY NAMES re2 PATHS ${SYSTEM_LIB_PATH} ${SYSTEM_LIB64_PATH} ${SYSTEM_LIB_MULTIARCH_PATH} ${SYSTEM_LOCAL_LIB_PATH} ${SYSTEM_LOCAL_LIB64_PATH} NO_DEFAULT_PATH)
endif()

if (NOT RE2_LIBRARY)
message(FATAL_ERROR "RE2 Library Not Found")
else()
Expand Down Expand Up @@ -198,6 +208,11 @@ set(VELOX_SRCS
)
add_library(velox SHARED ${VELOX_SRCS})

if(ENABLE_GLUTEN_VCPKG)
# Hide symbols of static dependencies
target_link_options(velox PRIVATE -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/symbols.map)
endif()

target_include_directories(velox PUBLIC
${CMAKE_SYSTEM_INCLUDE_PATH}
${JNI_INCLUDE_DIRS}
Expand All @@ -217,7 +232,12 @@ set_target_properties(velox PROPERTIES
## `-DCMAKE_PREFIX_PATH="${folly builded directory}" to the parameters of cmake.
## It is also applicable to other dependencies.
find_package(Folly REQUIRED CONFIG)
find_package(gflags REQUIRED COMPONENTS shared CONFIG)

if(ENABLE_GLUTEN_VCPKG)
find_package(gflags REQUIRED COMPONENTS static CONFIG)
else()
find_package(gflags REQUIRED COMPONENTS shared CONFIG)
endif()

target_include_directories(velox PUBLIC
${GTEST_INCLUDE_DIRS}
Expand All @@ -229,16 +249,28 @@ target_link_libraries(velox PUBLIC Folly::folly)
find_re2()
target_link_libraries(velox PUBLIC ${RE2_LIBRARY})

if(ENABLE_GLUTEN_VCPKG)
find_package(Thrift CONFIG)
else()
# Prefer the shared library on system.
set(ARROW_THRIFT_USE_SHARED ON)
find_package(Thrift)
endif()

if(Thrift_FOUND)
target_link_libraries(velox PUBLIC thrift::thrift)
else()
add_velox_dependency(thrift "${VELOX_BUILD_PATH}/third_party/arrow_ep/src/arrow_ep-build/thrift_ep-install/lib/libthrift.a")
endif()

if(BUILD_TESTS)
add_subdirectory(tests)
endif()

if(ENABLE_HDFS)
add_definitions(-DENABLE_HDFS)
find_libhdfs3()
add_library(libhdfs3 SHARED IMPORTED)
set_target_properties(libhdfs3 PROPERTIES IMPORTED_LOCATION "${LIBHDFS3_LIBRARY}")
target_link_libraries(velox PUBLIC libhdfs3)
target_link_libraries(velox PUBLIC "${LIBHDFS3_LIBRARY}")
endif()

if(BUILD_BENCHMARKS)
Expand Down
2 changes: 1 addition & 1 deletion cpp/velox/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ find_arrow_lib(${PARQUET_LIB_NAME})
function(add_velox_benchmark BM_EXEC BM_FILE)
add_executable(${BM_EXEC} ${BM_FILE} BenchmarkUtils.cc)
target_include_directories(${BM_EXEC} PRIVATE ${CMAKE_SOURCE_DIR}/velox ${CMAKE_SOURCE_DIR}/src)
target_link_libraries(${BM_EXEC} gflags_shared Arrow::parquet velox benchmark::benchmark)
target_link_libraries(${BM_EXEC} gflags Arrow::parquet velox benchmark::benchmark)
endfunction()

# Query benchmark
Expand Down
12 changes: 12 additions & 0 deletions cpp/velox/symbols.map
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
global:
extern "C++" {
*gluten::*;
*facebook::velox::*;
};

Java_io_gluten*;
local:
# Hide symbols of static dependencies
*;
};
4 changes: 2 additions & 2 deletions cpp/velox/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ function(add_velox_test TEST_EXEC)
endif()
add_executable(${TEST_EXEC} ${SOURCES})
target_include_directories(${TEST_EXEC} PRIVATE ${CMAKE_SOURCE_DIR}/velox ${CMAKE_SOURCE_DIR}/src)
target_link_libraries(${TEST_EXEC} gflags_shared Arrow::parquet velox gtest gtest_main glog::glog protobuf benchmark::benchmark)
target_link_libraries(${TEST_EXEC} gflags Arrow::parquet velox GTest::gtest GTest::gtest_main glog::glog benchmark::benchmark)
gtest_discover_tests(${TEST_EXEC})
endfunction()
endfunction()

# velox test
add_velox_test(velox_shuffle_writer_test SOURCES VeloxShuffleWriterTest.cc)
Expand Down
Loading

0 comments on commit eaa3a57

Please sign in to comment.