diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index 7f1ce41a117d..0a07a568e769 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -171,8 +171,9 @@ function concat_velox_param { if [ "$ENABLE_VCPKG" = "ON" ]; then # vcpkg will install static depends and init build environment - envs="$("$GLUTEN_DIR/dev/vcpkg/init.sh")" - eval "$envs" + BUILD_OPTIONS="--build_tests=$BUILD_TESTS --enable_s3=$ENABLE_S3 --enable_gcs=$ENABLE_GCS \ + --enable_hdfs=$ENABLE_HDFS --enable_abfs=$ENABLE_ABFS" + source ./dev/vcpkg/env.sh ${BUILD_OPTIONS} fi if [ "$SPARK_VERSION" = "3.2" ] || [ "$SPARK_VERSION" = "3.3" ] \ @@ -196,8 +197,8 @@ function build_velox { cd $GLUTEN_DIR/ep/build-velox/src # When BUILD_TESTS is on for gluten cpp, we need turn on VELOX_BUILD_TEST_UTILS via build_test_utils. ./build_velox.sh --enable_s3=$ENABLE_S3 --enable_gcs=$ENABLE_GCS --build_type=$BUILD_TYPE --enable_hdfs=$ENABLE_HDFS \ - --enable_abfs=$ENABLE_ABFS --enable_ep_cache=$ENABLE_EP_CACHE --build_test_utils=$BUILD_TESTS --build_tests=$BUILD_VELOX_TESTS --build_benchmarks=$BUILD_VELOX_BENCHMARKS \ - --num_threads=$NUM_THREADS + --enable_abfs=$ENABLE_ABFS --enable_ep_cache=$ENABLE_EP_CACHE --build_test_utils=$BUILD_TESTS \ + --build_tests=$BUILD_VELOX_TESTS --build_benchmarks=$BUILD_VELOX_BENCHMARKS --num_threads=$NUM_THREADS } function build_gluten_cpp { diff --git a/dev/vcpkg/env.sh b/dev/vcpkg/env.sh index 8b247a907a05..91aaa6a7d267 100755 --- a/dev/vcpkg/env.sh +++ b/dev/vcpkg/env.sh @@ -7,5 +7,25 @@ if [ -z "${BASH_SOURCE[0]}" ] || [ "$0" == "${BASH_SOURCE[0]}" ]; then fi SCRIPT_ROOT="$(realpath "$(dirname "${BASH_SOURCE[0]}")")" -init_vcpkg_env=$("${SCRIPT_ROOT}/init.sh") +init_vcpkg_env=$("${SCRIPT_ROOT}/init.sh" $@) eval "$init_vcpkg_env" + +export VCPKG_ROOT="$SCRIPT_ROOT/.vcpkg" +export VCPKG="$SCRIPT_ROOT/.vcpkg/vcpkg" +export VCPKG_TRIPLET=x64-linux-avx +export VCPKG_TRIPLET_INSTALL_DIR=${SCRIPT_ROOT}/vcpkg_installed/${VCPKG_TRIPLET} +export EXPORT_TOOLS_PATH="${VCPKG_TRIPLET_INSTALL_DIR}/tools/protobuf" + +if [ "\${GLUTEN_VCPKG_ENABLED:-}" != "${VCPKG_ROOT}" ]; then + export VCPKG_ROOT=${VCPKG_ROOT} + export VCPKG_MANIFEST_DIR=${SCRIPT_ROOT} + export VCPKG_TRIPLET=${VCPKG_TRIPLET} + + export CMAKE_TOOLCHAIN_FILE=${SCRIPT_ROOT}/toolchain.cmake + export PKG_CONFIG_PATH=${VCPKG_TRIPLET_INSTALL_DIR}/lib/pkgconfig:${VCPKG_TRIPLET_INSTALL_DIR}/share/pkgconfig:\${PKG_CONFIG_PATH:-} + export PATH="${EXPORT_TOOLS_PATH}:$PATH" + + export GLUTEN_VCPKG_ENABLED=${VCPKG_ROOT} +else + echo "Gluten's vcpkg environment is enabled" >&2 +fi diff --git a/dev/vcpkg/init.sh b/dev/vcpkg/init.sh index e69aec94ab1f..9beaa19526fe 100755 --- a/dev/vcpkg/init.sh +++ b/dev/vcpkg/init.sh @@ -4,9 +4,44 @@ set -e exec 3>&1 >&2 -SCRIPT_ROOT="$(realpath "$(dirname "$0")")" -VCPKG_ROOT="$SCRIPT_ROOT/.vcpkg" -VCPKG="$SCRIPT_ROOT/.vcpkg/vcpkg" +BUILD_TESTS=OFF +ENABLE_S3=OFF +ENABLE_GCS=OFF +ENABLE_HDFS=OFF +ENABLE_ABFS=OFF + +for arg in "$@"; do + case $arg in + --build_tests=*) + BUILD_TESTS=("${arg#*=}") + shift # Remove argument name from processing + ;; + --enable_s3=*) + ENABLE_S3=("${arg#*=}") + shift # Remove argument name from processing + ;; + --enable_gcs=*) + ENABLE_GCS=("${arg#*=}") + shift # Remove argument name from processing + ;; + --enable_hdfs=*) + ENABLE_HDFS=("${arg#*=}") + shift # Remove argument name from processing + ;; + --enable_abfs=*) + ENABLE_ABFS=("${arg#*=}") + shift # Remove argument name from processing + ;; + *) + echo "Unrecognized argument: $arg" + exit 1 + ;; + esac +done + +export SCRIPT_ROOT="$(realpath "$(dirname "$0")")" +export VCPKG_ROOT="$SCRIPT_ROOT/.vcpkg" +export VCPKG="$SCRIPT_ROOT/.vcpkg/vcpkg" VCPKG_TRIPLET=x64-linux-avx cd "$SCRIPT_ROOT" @@ -19,12 +54,29 @@ fi sed -i "s/3.27.1/3.28.3/g" $VCPKG_ROOT/scripts/vcpkgTools.xml sed -i "s/192374a68e2971f04974a194645726196d9b8ee7abd650d1e6f65f7aa2ccc9b186c3edb473bb4958c764532edcdd42f4182ee1fcb86b17d78b0bcd6305ce3df1/bd311ca835ef0914952f21d70d1753564d58de2ede02e80ede96e78cd2f40b4189e006007643ebb37792e13edd97eb4a33810bc8aca1eab6dd428eaffe1d2e38/g" $VCPKG_ROOT/scripts/vcpkgTools.xml +EXTRA_FEATURES="" +if [ "$BUILD_TESTS" = "ON" ]; then + EXTRA_FEATURES+="--x-feature=duckdb " +fi +if [ "$ENABLE_S3" = "ON" ]; then + EXTRA_FEATURES+="--x-feature=velox-s3 " +fi +if [ "$ENABLE_GCS" = "ON" ]; then + EXTRA_FEATURES+="--x-feature=velox-gcs " +fi +if [ "$ENABLE_HDFS" = "ON" ]; then + EXTRA_FEATURES+="--x-feature=velox-hdfs " +fi +if [ "$ENABLE_ABFS" = "ON" ]; then + EXTRA_FEATURES+="--x-feature=velox-abfs" +fi + + $VCPKG install --no-print-usage \ - --triplet="${VCPKG_TRIPLET}" --host-triplet="${VCPKG_TRIPLET}" + --triplet="${VCPKG_TRIPLET}" --host-triplet="${VCPKG_TRIPLET}" ${EXTRA_FEATURES} -VCPKG_TRIPLET_INSTALL_DIR=${SCRIPT_ROOT}/vcpkg_installed/${VCPKG_TRIPLET} -EXPORT_TOOLS_PATH= -EXPORT_TOOLS_PATH="${VCPKG_TRIPLET_INSTALL_DIR}/tools/protobuf:${EXPORT_TOOLS_PATH}" +export VCPKG_TRIPLET_INSTALL_DIR=${SCRIPT_ROOT}/vcpkg_installed/${VCPKG_TRIPLET} +EXPORT_TOOLS_PATH="${VCPKG_TRIPLET_INSTALL_DIR}/tools/protobuf" # This scripts depends on environment $CMAKE_TOOLCHAIN_FILE, which requires # cmake >= 3.21. If system cmake < 3.25, vcpkg will download latest cmake. We @@ -34,7 +86,7 @@ if [ -f "$VCPKG_CMAKE_BIN_DIR/cmake" ]; then EXPORT_TOOLS_PATH="${VCPKG_CMAKE_BIN_DIR}:${EXPORT_TOOLS_PATH}" fi -EXPORT_TOOLS_PATH=${EXPORT_TOOLS_PATH/%:/} +export EXPORT_TOOLS_PATH=${EXPORT_TOOLS_PATH/%:/} # For fixing a build error like below when gluten's build type is Debug: # No rule to make target '/root/gluten/dev/vcpkg/vcpkg_installed/x64-linux-avx/debug/lib/libz.a', @@ -45,20 +97,6 @@ cp $VCPKG_TRIPLET_INSTALL_DIR/lib/libssl.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib cp $VCPKG_TRIPLET_INSTALL_DIR/lib/libcrypto.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib cp $VCPKG_TRIPLET_INSTALL_DIR/lib/liblzma.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib cp $VCPKG_TRIPLET_INSTALL_DIR/lib/libdwarf.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib -cp $VCPKG_TRIPLET_INSTALL_DIR/lib/libhdfs3.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib +# Allow libhdfs3.a is not installed as build option may not enable hdfs. +cp $VCPKG_TRIPLET_INSTALL_DIR/lib/libhdfs3.a $VCPKG_TRIPLET_INSTALL_DIR/debug/lib || true -cat <&3 -if [ "\${GLUTEN_VCPKG_ENABLED:-}" != "${VCPKG_ROOT}" ]; then - export VCPKG_ROOT=${VCPKG_ROOT} - export VCPKG_MANIFEST_DIR=${SCRIPT_ROOT} - export VCPKG_TRIPLET=${VCPKG_TRIPLET} - - export CMAKE_TOOLCHAIN_FILE=${SCRIPT_ROOT}/toolchain.cmake - export PKG_CONFIG_PATH=${VCPKG_TRIPLET_INSTALL_DIR}/lib/pkgconfig:${VCPKG_TRIPLET_INSTALL_DIR}/share/pkgconfig:\${PKG_CONFIG_PATH:-} - export PATH="${EXPORT_TOOLS_PATH}:\$PATH" - - export GLUTEN_VCPKG_ENABLED=${VCPKG_ROOT} -else - echo "Gluten's vcpkg environment is enabled" >&2 -fi -EOF diff --git a/dev/vcpkg/toolchain.cmake b/dev/vcpkg/toolchain.cmake index 13f556007067..21ff9090fb9e 100644 --- a/dev/vcpkg/toolchain.cmake +++ b/dev/vcpkg/toolchain.cmake @@ -1,5 +1,14 @@ +# This file will be used by cmake before cmake function `project(xxx)` +# is executed, even though it's an external cmake project. + set(ENABLE_GLUTEN_VCPKG ON) -set(VCPKG_MANIFEST_DIR $ENV{VCPKG_MANIFEST_DIR}) + +# If this arg is set, `vcpkg install` will be executed according +# to the manifest file exists in this given path, i.e., vcpkg.json, +# which will not respect our setting for extra features through +# `--x-feature`. +#set(VCPKG_MANIFEST_DIR $ENV{VCPKG_MANIFEST_DIR}) + set(VCPKG_TARGET_TRIPLET $ENV{VCPKG_TRIPLET}) set(VCPKG_HOST_TRIPLET $ENV{VCPKG_TRIPLET}) set(VCPKG_INSTALLED_DIR $ENV{VCPKG_MANIFEST_DIR}/vcpkg_installed) @@ -14,4 +23,4 @@ set(CMAKE_EXE_LINKER_FLAGS "-static-libstdc++ -static-libgcc") set(CMAKE_SHARED_LINKER_FLAGS "-static-libstdc++ -static-libgcc") # Disable boost new version warning for FindBoost module -set(Boost_NO_WARN_NEW_VERSIONS ON) \ No newline at end of file +set(Boost_NO_WARN_NEW_VERSIONS ON) diff --git a/dev/vcpkg/vcpkg.json b/dev/vcpkg/vcpkg.json index 618293ff63f2..b3d8dc2fcc0c 100644 --- a/dev/vcpkg/vcpkg.json +++ b/dev/vcpkg/vcpkg.json @@ -2,7 +2,7 @@ "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg-tool/main/docs/vcpkg.schema.json", "builtin-baseline": "a7b6122f6b6504d16d96117336a0562693579933", "dependencies": ["jemalloc"], - "default-features": ["velox", "velox-s3", "velox-gcs", "velox-hdfs", "velox-abfs", "duckdb"], + "default-features": ["velox"], "features": { "velox": { "description": "Velox backend",