From 620afb8092ad532b20fc65b255719cae0ca93198 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Thu, 30 May 2024 17:08:16 +0800 Subject: [PATCH 1/8] Initial commit --- dev/build_arrow.sh | 52 +++++++++++++++++++++++++++++++++++ dev/build_helper_functions.sh | 25 +++++++++++++++++ dev/builddeps-veloxbe.sh | 10 +++++-- 3 files changed, 84 insertions(+), 3 deletions(-) create mode 100755 dev/build_arrow.sh create mode 100644 dev/build_helper_functions.sh diff --git a/dev/build_arrow.sh b/dev/build_arrow.sh new file mode 100755 index 000000000000..422f41735424 --- /dev/null +++ b/dev/build_arrow.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd) +source ${CURRENT_DIR}/build_helper_functions.sh +VELOX_ARROW_BUILD_VERSION=15.0.0 +ARROW_PREFIX=$CURRENT_DIR/arrow_ep +# Always uses BUNDLED in case of that thrift is not installed. +THRIFT_SOURCE="BUNDLED" +BUILD_TYPE=Release +if [ -n "$1" ]; then + BUILD_TYPE=$1 +fi +wget_and_untar https://archive.apache.org/dist/arrow/arrow-${VELOX_ARROW_BUILD_VERSION}/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz arrow_ep + +git apply $CURRENT_DIR/../ep/build_velox/src/modify_arrow.patch + +pushd $ARROW_PREFIX/cpp +cmake -DARROW_PARQUET=ON \ + -DARROW_FILESYSTEM=ON \ + -DARROW_PROTOBUF_USE_SHARED=OFF \ + -DARROW_WITH_THRIFT=ON \ + -DARROW_WITH_LZ4=ON \ + -DARROW_WITH_SNAPPY=ON \ + -DARROW_WITH_ZLIB=ON \ + -DARROW_WITH_ZSTD=ON \ + -DARROW_JEMALLOC=OFF \ + -DARROW_SIMD_LEVEL=NONE \ + -DARROW_RUNTIME_SIMD_LEVEL=NONE \ + -DARROW_WITH_UTF8PROC=OFF \ + -DARROW_TESTING=ON \ + -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}/install \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DARROW_BUILD_STATIC=ON \ + -DThrift_SOURCE=${THRIFT_SOURCE} +make -j +make install +export ARROW_EP_PATH=$ARROW_PREFIX +popd diff --git a/dev/build_helper_functions.sh b/dev/build_helper_functions.sh new file mode 100644 index 000000000000..cd8f72fe8a66 --- /dev/null +++ b/dev/build_helper_functions.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +function wget_and_untar { + local URL=$1 + local DIR=$2 + mkdir -p "${DIR}" + pushd "${DIR}" + curl -L "${URL}" > $2.tar.gz + tar -xz --strip-components=1 -f $2.tar.gz + popd +} \ No newline at end of file diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index 35976d37a036..98d44aa9f5ad 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -24,7 +24,6 @@ ENABLE_S3=OFF ENABLE_HDFS=OFF ENABLE_ABFS=OFF ENABLE_EP_CACHE=OFF -ARROW_ENABLE_CUSTOM_CODEC=OFF ENABLE_VCPKG=OFF RUN_SETUP_SCRIPT=ON VELOX_REPO="" @@ -72,12 +71,10 @@ do ;; --enable_qat=*) ENABLE_QAT=("${arg#*=}") - ARROW_ENABLE_CUSTOM_CODEC=("${arg#*=}") shift # Remove argument name from processing ;; --enable_iaa=*) ENABLE_IAA=("${arg#*=}") - ARROW_ENABLE_CUSTOM_CODEC=("${arg#*=}") shift # Remove argument name from processing ;; --enable_hbm=*) @@ -189,6 +186,12 @@ fi concat_velox_param +function build_arrow { + echo "Start to build Arrow" + cd $GLUTEN_DIR/dev + ./build_arrow.sh $BUILD_TYPE +} + function build_velox { echo "Start to build Velox" cd $GLUTEN_DIR/ep/build-velox/src @@ -213,6 +216,7 @@ function build_gluten_cpp { } function build_velox_backend { + build_arrow build_velox build_gluten_cpp } From efb68d08aaa8827e001a0c4cbace7c9c29152dfb Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Wed, 26 Jun 2024 11:08:28 +0800 Subject: [PATCH 2/8] Move to a separate file --- dev/build_arrow.sh | 84 +++++++++++++++++++++---------- dev/build_helper_functions.sh | 2 +- dev/builddeps-veloxbe.sh | 17 ++++--- ep/build-velox/src/build_velox.sh | 39 +------------- 4 files changed, 70 insertions(+), 72 deletions(-) diff --git a/dev/build_arrow.sh b/dev/build_arrow.sh index 422f41735424..65d0c6d2fed7 100755 --- a/dev/build_arrow.sh +++ b/dev/build_arrow.sh @@ -21,32 +21,62 @@ ARROW_PREFIX=$CURRENT_DIR/arrow_ep # Always uses BUNDLED in case of that thrift is not installed. THRIFT_SOURCE="BUNDLED" BUILD_TYPE=Release -if [ -n "$1" ]; then - BUILD_TYPE=$1 -fi + wget_and_untar https://archive.apache.org/dist/arrow/arrow-${VELOX_ARROW_BUILD_VERSION}/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz arrow_ep +cd arrow_ep/ +patch -p1 < $CURRENT_DIR/../ep/build-velox/src/modify_arrow.patch + +function build_arrow_cpp() { + if [ -n "$1" ]; then + BUILD_TYPE=$1 + fi + pushd $ARROW_PREFIX/cpp + cmake -DARROW_PARQUET=ON \ + -DARROW_FILESYSTEM=ON \ + -DARROW_PROTOBUF_USE_SHARED=OFF \ + -DARROW_WITH_THRIFT=ON \ + -DARROW_WITH_LZ4=ON \ + -DARROW_WITH_SNAPPY=ON \ + -DARROW_WITH_ZLIB=ON \ + -DARROW_WITH_ZSTD=ON \ + -DARROW_JEMALLOC=OFF \ + -DARROW_SIMD_LEVEL=NONE \ + -DARROW_RUNTIME_SIMD_LEVEL=NONE \ + -DARROW_WITH_UTF8PROC=OFF \ + -DARROW_TESTING=ON \ + -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}/install \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DARROW_BUILD_STATIC=ON \ + -DThrift_SOURCE=${THRIFT_SOURCE} + make -j + make install + popd +} + +function build_arrow_java() { + ARROW_HOME="${VELOX_HOME}/_build/$COMPILE_TYPE/third_party/arrow_ep/src/arrow_ep" + ARROW_INSTALL_DIR="${ARROW_PREFIX}/install" + + pushd $ARROW_PREFIX/java + # Because arrow-bom module need the -DprocessAllModules + mvn versions:set -DnewVersion=15.0.0-gluten -DprocessAllModules + + mvn clean install -pl bom,maven/module-info-compiler-maven-plugin,vector -am \ + -DskipTests -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -Dassembly.skipAssembly + + # Arrow C Data Interface CPP libraries + mvn generate-resources -P generate-libs-cdata-all-os -Darrow.c.jni.dist.dir=$ARROW_INSTALL_DIR \ + -Dmaven.test.skip -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -N + + # Arrow JNI Date Interface CPP libraries + export PKG_CONFIG_PATH=/usr/local/lib64/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}} + mvn generate-resources -Pgenerate-libs-jni-macos-linux -N -Darrow.dataset.jni.dist.dir=$ARROW_INSTALL_DIR \ + -DARROW_GANDIVA=OFF -DARROW_JAVA_JNI_ENABLE_GANDIVA=OFF -DARROW_ORC=OFF -DARROW_JAVA_JNI_ENABLE_ORC=OFF \ + -Dmaven.test.skip -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -N -git apply $CURRENT_DIR/../ep/build_velox/src/modify_arrow.patch - -pushd $ARROW_PREFIX/cpp -cmake -DARROW_PARQUET=ON \ - -DARROW_FILESYSTEM=ON \ - -DARROW_PROTOBUF_USE_SHARED=OFF \ - -DARROW_WITH_THRIFT=ON \ - -DARROW_WITH_LZ4=ON \ - -DARROW_WITH_SNAPPY=ON \ - -DARROW_WITH_ZLIB=ON \ - -DARROW_WITH_ZSTD=ON \ - -DARROW_JEMALLOC=OFF \ - -DARROW_SIMD_LEVEL=NONE \ - -DARROW_RUNTIME_SIMD_LEVEL=NONE \ - -DARROW_WITH_UTF8PROC=OFF \ - -DARROW_TESTING=ON \ - -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}/install \ - -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ - -DARROW_BUILD_STATIC=ON \ - -DThrift_SOURCE=${THRIFT_SOURCE} -make -j -make install -export ARROW_EP_PATH=$ARROW_PREFIX -popd + # Arrow Java libraries + mvn install -Parrow-jni -P arrow-c-data -pl c,dataset -am \ + -Darrow.c.jni.dist.dir=$ARROW_INSTALL_DIR/lib -Darrow.dataset.jni.dist.dir=$ARROW_INSTALL_DIR/lib -Darrow.cpp.build.dir=$ARROW_INSTALL_DIR/lib \ + -Dmaven.test.skip -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -Dassembly.skipAssembly + popd +} diff --git a/dev/build_helper_functions.sh b/dev/build_helper_functions.sh index cd8f72fe8a66..57ee545f4543 100644 --- a/dev/build_helper_functions.sh +++ b/dev/build_helper_functions.sh @@ -22,4 +22,4 @@ function wget_and_untar { curl -L "${URL}" > $2.tar.gz tar -xz --strip-components=1 -f $2.tar.gz popd -} \ No newline at end of file +} diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index 98d44aa9f5ad..bb45c8211c73 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -30,7 +30,7 @@ VELOX_REPO="" VELOX_BRANCH="" VELOX_HOME="" VELOX_PARAMETER="" -COMPILE_ARROW_JAVA=ON +BUILD_ARROW=ON SPARK_VERSION=ALL # set default number of threads as cpu cores minus 2 @@ -133,8 +133,8 @@ do BUILD_VELOX_BENCHMARKS=("${arg#*=}") shift # Remove argument name from processing ;; - --compile_arrow_java=*) - COMPILE_ARROW_JAVA=("${arg#*=}") + --build_arrow=*) + BUILD_ARROW=("${arg#*=}") shift # Remove argument name from processing ;; --num_threads=*) @@ -186,10 +186,13 @@ fi concat_velox_param + + function build_arrow { echo "Start to build Arrow" - cd $GLUTEN_DIR/dev - ./build_arrow.sh $BUILD_TYPE + source $GLUTEN_DIR/dev/build_arrow.sh + build_arrow_cpp $BUILD_TYPE + build_arrow_java } function build_velox { @@ -216,7 +219,9 @@ function build_gluten_cpp { } function build_velox_backend { - build_arrow + if [ $BUILD_ARROW == "ON" ]; then + build_arrow + fi build_velox build_gluten_cpp } diff --git a/ep/build-velox/src/build_velox.sh b/ep/build-velox/src/build_velox.sh index 0224e9546861..97b3db5549bc 100755 --- a/ep/build-velox/src/build_velox.sh +++ b/ep/build-velox/src/build_velox.sh @@ -34,7 +34,6 @@ ENABLE_TESTS=OFF # Set to ON for gluten cpp test build. BUILD_TEST_UTILS=OFF RUN_SETUP_SCRIPT=ON -COMPILE_ARROW_JAVA=ON NUM_THREADS="" OTHER_ARGUMENTS="" @@ -87,10 +86,6 @@ for arg in "$@"; do RUN_SETUP_SCRIPT=("${arg#*=}") shift # Remove argument name from processing ;; - --compile_arrow_java=*) - COMPILE_ARROW_JAVA=("${arg#*=}") - shift # Remove argument name from processing - ;; --num_threads=*) NUM_THREADS=("${arg#*=}") shift # Remove argument name from processing @@ -191,7 +186,7 @@ function get_build_summary { echo "ENABLE_S3=$ENABLE_S3,ENABLE_GCS=$ENABLE_GCS,ENABLE_HDFS=$ENABLE_HDFS,ENABLE_ABFS=$ENABLE_ABFS,\ BUILD_TYPE=$BUILD_TYPE,VELOX_HOME=$VELOX_HOME,ENABLE_BENCHMARK=$ENABLE_BENCHMARK,\ ENABLE_TESTS=$ENABLE_TESTS,BUILD_TEST_UTILS=$BUILD_TEST_UTILS,\ -COMPILE_ARROW_JAVA=$COMPILE_ARROW_JAVA,OTHER_ARGUMENTS=$OTHER_ARGUMENTS,COMMIT_HASH=$COMMIT_HASH" +OTHER_ARGUMENTS=$OTHER_ARGUMENTS,COMMIT_HASH=$COMMIT_HASH" } function check_commit { @@ -277,34 +272,6 @@ function setup_linux { fi } -function compile_arrow_java_module() { - ARROW_HOME="${VELOX_HOME}/_build/$COMPILE_TYPE/third_party/arrow_ep/src/arrow_ep" - ARROW_INSTALL_DIR="${ARROW_HOME}/../../install" - - pushd $ARROW_HOME/java - # Because arrow-bom module need the -DprocessAllModules - mvn versions:set -DnewVersion=15.0.0-gluten -DprocessAllModules - - mvn clean install -pl bom,maven/module-info-compiler-maven-plugin,vector -am \ - -DskipTests -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -Dassembly.skipAssembly - - # Arrow C Data Interface CPP libraries - mvn generate-resources -P generate-libs-cdata-all-os -Darrow.c.jni.dist.dir=$ARROW_INSTALL_DIR \ - -Dmaven.test.skip -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -N - - # Arrow JNI Date Interface CPP libraries - export PKG_CONFIG_PATH=/usr/local/lib64/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}} - mvn generate-resources -Pgenerate-libs-jni-macos-linux -N -Darrow.dataset.jni.dist.dir=$ARROW_INSTALL_DIR \ - -DARROW_GANDIVA=OFF -DARROW_JAVA_JNI_ENABLE_GANDIVA=OFF -DARROW_ORC=OFF -DARROW_JAVA_JNI_ENABLE_ORC=OFF \ - -Dmaven.test.skip -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -N - - # Arrow Java libraries - mvn install -Parrow-jni -P arrow-c-data -pl c,dataset -am \ - -Darrow.c.jni.dist.dir=$ARROW_INSTALL_DIR/lib -Darrow.dataset.jni.dist.dir=$ARROW_INSTALL_DIR/lib -Darrow.cpp.build.dir=$ARROW_INSTALL_DIR/lib \ - -Dmaven.test.skip -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -Dassembly.skipAssembly - popd -} - CURRENT_DIR=$( cd "$(dirname "$BASH_SOURCE")" pwd @@ -334,9 +301,5 @@ echo "Target Velox build: $TARGET_BUILD_SUMMARY" check_commit compile -if [ $COMPILE_ARROW_JAVA == "ON" ]; then - compile_arrow_java_module -fi - echo "Successfully built Velox from Source." echo $TARGET_BUILD_SUMMARY >"${VELOX_HOME}/velox-build.cache" From 5896fc28576d5b9d9a66a6e01a2c9e2248f2c9cc Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Wed, 26 Jun 2024 12:01:45 +0800 Subject: [PATCH 3/8] Use a helper function --- dev/build_arrow.sh | 6 +-- dev/build_helper_functions.sh | 93 +++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 4 deletions(-) diff --git a/dev/build_arrow.sh b/dev/build_arrow.sh index 65d0c6d2fed7..22388be28e02 100755 --- a/dev/build_arrow.sh +++ b/dev/build_arrow.sh @@ -31,7 +31,8 @@ function build_arrow_cpp() { BUILD_TYPE=$1 fi pushd $ARROW_PREFIX/cpp - cmake -DARROW_PARQUET=ON \ + cmake_install \ + -DARROW_PARQUET=ON \ -DARROW_FILESYSTEM=ON \ -DARROW_PROTOBUF_USE_SHARED=OFF \ -DARROW_WITH_THRIFT=ON \ @@ -48,13 +49,10 @@ function build_arrow_cpp() { -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DARROW_BUILD_STATIC=ON \ -DThrift_SOURCE=${THRIFT_SOURCE} - make -j - make install popd } function build_arrow_java() { - ARROW_HOME="${VELOX_HOME}/_build/$COMPILE_TYPE/third_party/arrow_ep/src/arrow_ep" ARROW_INSTALL_DIR="${ARROW_PREFIX}/install" pushd $ARROW_PREFIX/java diff --git a/dev/build_helper_functions.sh b/dev/build_helper_functions.sh index 57ee545f4543..221f01bf6fb5 100644 --- a/dev/build_helper_functions.sh +++ b/dev/build_helper_functions.sh @@ -14,6 +14,73 @@ # See the License for the specific language governing permissions and # limitations under the License. +function get_cxx_flags { + local CPU_ARCH=$1 + + local OS + OS=$(uname) + local MACHINE + MACHINE=$(uname -m) + ADDITIONAL_FLAGS="" + + if [[ -z "$CPU_ARCH" ]] || [[ $CPU_ARCH == "unknown" ]]; then + if [ "$OS" = "Darwin" ]; then + + if [ "$MACHINE" = "x86_64" ]; then + local CPU_CAPABILITIES + CPU_CAPABILITIES=$(sysctl -a | grep machdep.cpu.features | awk '{print tolower($0)}') + + if [[ $CPU_CAPABILITIES =~ "avx" ]]; then + CPU_ARCH="avx" + else + CPU_ARCH="sse" + fi + + elif [[ $(sysctl -a | grep machdep.cpu.brand_string) =~ "Apple" ]]; then + # Apple silicon. + CPU_ARCH="arm64" + fi + + # On MacOs prevent the flood of translation visibility settings warnings. + ADDITIONAL_FLAGS="-fvisibility=hidden -fvisibility-inlines-hidden" + else [ "$OS" = "Linux" ]; + + local CPU_CAPABILITIES + CPU_CAPABILITIES=$(cat /proc/cpuinfo | grep flags | head -n 1| awk '{print tolower($0)}') + + if [[ "$CPU_CAPABILITIES" =~ "avx" ]]; then + CPU_ARCH="avx" + elif [[ "$CPU_CAPABILITIES" =~ "sse" ]]; then + CPU_ARCH="sse" + elif [ "$MACHINE" = "aarch64" ]; then + CPU_ARCH="aarch64" + fi + fi + fi + + case $CPU_ARCH in + + "arm64") + echo -n "-mcpu=apple-m1+crc -std=c++17 -fvisibility=hidden $ADDITIONAL_FLAGS" + ;; + + "avx") + echo -n "-mavx2 -mfma -mavx -mf16c -mlzcnt -std=c++17 -mbmi2 $ADDITIONAL_FLAGS" + ;; + + "sse") + echo -n "-msse4.2 -std=c++17 $ADDITIONAL_FLAGS" + ;; + + "aarch64") + echo -n "-mcpu=neoverse-n1 -std=c++17 $ADDITIONAL_FLAGS" + ;; + *) + echo -n "Architecture not supported!" + esac + +} + function wget_and_untar { local URL=$1 local DIR=$2 @@ -23,3 +90,29 @@ function wget_and_untar { tar -xz --strip-components=1 -f $2.tar.gz popd } + +function cmake_install { + local NAME=$(basename "$(pwd)") + local BINARY_DIR=_build + SUDO="${SUDO:-""}" + if [ -d "${BINARY_DIR}" ] && prompt "Do you want to rebuild ${NAME}?"; then + ${SUDO} rm -rf "${BINARY_DIR}" + fi + mkdir -p "${BINARY_DIR}" + CPU_TARGET="${CPU_TARGET:-unknown}" + COMPILER_FLAGS=$(get_cxx_flags $CPU_TARGET) + + # CMAKE_POSITION_INDEPENDENT_CODE is required so that Velox can be built into dynamic libraries \ + cmake -Wno-dev -B"${BINARY_DIR}" \ + -GNinja \ + -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + -DCMAKE_CXX_STANDARD=17 \ + "${INSTALL_PREFIX+-DCMAKE_PREFIX_PATH=}${INSTALL_PREFIX-}" \ + "${INSTALL_PREFIX+-DCMAKE_INSTALL_PREFIX=}${INSTALL_PREFIX-}" \ + -DCMAKE_CXX_FLAGS="$COMPILER_FLAGS" \ + -DBUILD_TESTING=OFF \ + "$@" + + cmake --build "${BINARY_DIR}" + ${SUDO} cmake --install "${BINARY_DIR}" +} From 1ad36cd2b02a3eb01131af9383a778ea7bfc1e3e Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Wed, 26 Jun 2024 13:26:57 +0800 Subject: [PATCH 4/8] Install to system --- dev/build_arrow.sh | 5 ++++- dev/builddeps-veloxbe.sh | 6 +++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/dev/build_arrow.sh b/dev/build_arrow.sh index 22388be28e02..4c456ed536c8 100755 --- a/dev/build_arrow.sh +++ b/dev/build_arrow.sh @@ -22,9 +22,11 @@ ARROW_PREFIX=$CURRENT_DIR/arrow_ep THRIFT_SOURCE="BUNDLED" BUILD_TYPE=Release +sudo rm -r arrow_ep/ wget_and_untar https://archive.apache.org/dist/arrow/arrow-${VELOX_ARROW_BUILD_VERSION}/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz arrow_ep cd arrow_ep/ patch -p1 < $CURRENT_DIR/../ep/build-velox/src/modify_arrow.patch +patch -p1 < $CURRENT_DIR/../ep/build-velox/src/modify_arrow_dataset_scan_option.patch function build_arrow_cpp() { if [ -n "$1" ]; then @@ -45,7 +47,8 @@ function build_arrow_cpp() { -DARROW_RUNTIME_SIMD_LEVEL=NONE \ -DARROW_WITH_UTF8PROC=OFF \ -DARROW_TESTING=ON \ - -DCMAKE_INSTALL_PREFIX=${ARROW_PREFIX}/install \ + # Install to system path + -DCMAKE_INSTALL_PREFIX=/usr/local \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DARROW_BUILD_STATIC=ON \ -DThrift_SOURCE=${THRIFT_SOURCE} diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index bb45c8211c73..818c0247bb19 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -186,13 +186,13 @@ fi concat_velox_param - - function build_arrow { echo "Start to build Arrow" source $GLUTEN_DIR/dev/build_arrow.sh build_arrow_cpp $BUILD_TYPE + echo "Finished building arrow CPP" build_arrow_java + echo "Finished building arrow Java" } function build_velox { @@ -202,7 +202,7 @@ function build_velox { # When BUILD_TESTS is on for gluten cpp, we need turn on VELOX_BUILD_TEST_UTILS via build_test_utils. ./build_velox.sh --run_setup_script=$RUN_SETUP_SCRIPT --enable_s3=$ENABLE_S3 --enable_gcs=$ENABLE_GCS --build_type=$BUILD_TYPE --enable_hdfs=$ENABLE_HDFS \ --enable_abfs=$ENABLE_ABFS --enable_ep_cache=$ENABLE_EP_CACHE --build_test_utils=$BUILD_TESTS --build_tests=$BUILD_VELOX_TESTS --build_benchmarks=$BUILD_VELOX_BENCHMARKS \ - --compile_arrow_java=$COMPILE_ARROW_JAVA --num_threads=$NUM_THREADS + --num_threads=$NUM_THREADS } ## compile gluten cpp From 58df82bc77c7f64ad1ae2830067bbbc147a6f73f Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Wed, 26 Jun 2024 13:46:20 +0800 Subject: [PATCH 5/8] Fix build --- dev/build_arrow.sh | 4 ++-- dev/builddeps-veloxbe.sh | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/dev/build_arrow.sh b/dev/build_arrow.sh index 4c456ed536c8..dff0d087d976 100755 --- a/dev/build_arrow.sh +++ b/dev/build_arrow.sh @@ -22,7 +22,7 @@ ARROW_PREFIX=$CURRENT_DIR/arrow_ep THRIFT_SOURCE="BUNDLED" BUILD_TYPE=Release -sudo rm -r arrow_ep/ +sudo rm -rf arrow_ep/ wget_and_untar https://archive.apache.org/dist/arrow/arrow-${VELOX_ARROW_BUILD_VERSION}/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz arrow_ep cd arrow_ep/ patch -p1 < $CURRENT_DIR/../ep/build-velox/src/modify_arrow.patch @@ -33,6 +33,7 @@ function build_arrow_cpp() { BUILD_TYPE=$1 fi pushd $ARROW_PREFIX/cpp + cmake_install \ -DARROW_PARQUET=ON \ -DARROW_FILESYSTEM=ON \ @@ -47,7 +48,6 @@ function build_arrow_cpp() { -DARROW_RUNTIME_SIMD_LEVEL=NONE \ -DARROW_WITH_UTF8PROC=OFF \ -DARROW_TESTING=ON \ - # Install to system path -DCMAKE_INSTALL_PREFIX=/usr/local \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DARROW_BUILD_STATIC=ON \ diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index 818c0247bb19..61e6dc1c62f9 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -188,7 +188,9 @@ concat_velox_param function build_arrow { echo "Start to build Arrow" - source $GLUTEN_DIR/dev/build_arrow.sh + export SUDO=sudo + cd $GLUTEN_DIR/dev + source build_arrow.sh build_arrow_cpp $BUILD_TYPE echo "Finished building arrow CPP" build_arrow_java From cb15f48fcbba1d10018e84dcf567bc0a5c41211b Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Wed, 26 Jun 2024 20:48:37 +0800 Subject: [PATCH 6/8] Fix CI --- .github/workflows/velox_docker.yml | 9 ++-- dev/build_arrow.sh | 21 ++++++--- dev/builddeps-veloxbe.sh | 1 + .../src/build_arrow_deps_centos8.sh | 45 ------------------- 4 files changed, 19 insertions(+), 57 deletions(-) delete mode 100755 ep/build-velox/src/build_arrow_deps_centos8.sh diff --git a/.github/workflows/velox_docker.yml b/.github/workflows/velox_docker.yml index 31796c15bdd5..a8d2cf63f289 100644 --- a/.github/workflows/velox_docker.yml +++ b/.github/workflows/velox_docker.yml @@ -600,13 +600,10 @@ jobs: - name: Build Gluten Velox third party if: ${{ steps.cache.outputs.cache-hit != 'true' }} run: | - cd ep/build-velox/src - ./get_velox.sh source /opt/rh/gcc-toolset-9/enable - ./build_arrow_deps_centos8.sh - ./build_velox.sh --run_setup_script=OFF --enable_ep_cache=OFF --build_test_utils=ON - cd $GITHUB_WORKSPACE/cpp - ./compile.sh --build_velox_backend=ON --build_protobuf=ON --build_tests=ON --build_examples=ON --build_benchmarks=ON + source ./dev/build_arrow.sh + install_arrow_deps + ./dev/builddeps-veloxbe.sh --run_setup_script=OFF --enable_ep_cache=OFF --build_tests=ON --build_examples=ON --build_benchmarks=ON - uses: actions/upload-artifact@v2 with: name: velox-native-lib-centos-8-${{github.sha}} diff --git a/dev/build_arrow.sh b/dev/build_arrow.sh index dff0d087d976..a822c4119ea0 100755 --- a/dev/build_arrow.sh +++ b/dev/build_arrow.sh @@ -22,11 +22,20 @@ ARROW_PREFIX=$CURRENT_DIR/arrow_ep THRIFT_SOURCE="BUNDLED" BUILD_TYPE=Release -sudo rm -rf arrow_ep/ -wget_and_untar https://archive.apache.org/dist/arrow/arrow-${VELOX_ARROW_BUILD_VERSION}/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz arrow_ep -cd arrow_ep/ -patch -p1 < $CURRENT_DIR/../ep/build-velox/src/modify_arrow.patch -patch -p1 < $CURRENT_DIR/../ep/build-velox/src/modify_arrow_dataset_scan_option.patch +function prepare_arrow_build() { + sudo rm -rf arrow_ep/ + wget_and_untar https://archive.apache.org/dist/arrow/arrow-${VELOX_ARROW_BUILD_VERSION}/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz arrow_ep + cd arrow_ep/ + patch -p1 < $CURRENT_DIR/../ep/build-velox/src/modify_arrow.patch + patch -p1 < $CURRENT_DIR/../ep/build-velox/src/modify_arrow_dataset_scan_option.patch +} + +function install_arrow_deps { + wget_and_untar https://github.com/openssl/openssl/archive/refs/tags/OpenSSL_1_1_1s.tar.gz openssl + pushd openssl + ./config no-shared && make depend && make && sudo make install + popd +} function build_arrow_cpp() { if [ -n "$1" ]; then @@ -76,7 +85,7 @@ function build_arrow_java() { -Dmaven.test.skip -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -N # Arrow Java libraries - mvn install -Parrow-jni -P arrow-c-data -pl c,dataset -am \ + mvn install -Parrow-jni -P arrow-c-data -pl c,dataset -am \ -Darrow.c.jni.dist.dir=$ARROW_INSTALL_DIR/lib -Darrow.dataset.jni.dist.dir=$ARROW_INSTALL_DIR/lib -Darrow.cpp.build.dir=$ARROW_INSTALL_DIR/lib \ -Dmaven.test.skip -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -Dassembly.skipAssembly popd diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index 61e6dc1c62f9..4bd07ab24a5e 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -191,6 +191,7 @@ function build_arrow { export SUDO=sudo cd $GLUTEN_DIR/dev source build_arrow.sh + prepare_arrow_build build_arrow_cpp $BUILD_TYPE echo "Finished building arrow CPP" build_arrow_java diff --git a/ep/build-velox/src/build_arrow_deps_centos8.sh b/ep/build-velox/src/build_arrow_deps_centos8.sh deleted file mode 100755 index 8dfc2af9375e..000000000000 --- a/ep/build-velox/src/build_arrow_deps_centos8.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -exu - -NPROC=$(getconf _NPROCESSORS_ONLN) - -function wget_and_untar { - local URL=$1 - local DIR=$2 - mkdir -p "${DIR}" - pushd "${DIR}" - curl -L "${URL}" > $2.tar.gz - tar -xz --strip-components=1 -f $2.tar.gz - popd -} - -function install_openssl { - wget_and_untar https://github.com/openssl/openssl/archive/refs/tags/OpenSSL_1_1_1s.tar.gz openssl - cd openssl - ./config no-shared && make depend && make && sudo make install - cd .. -} - -function install_arrow_deps { - install_openssl -} - -# Activate gcc9; enable errors on unset variables afterwards. -# source /opt/rh/gcc-toolset-9/enable || exit 1 -install_arrow_deps -echo "All dependencies for Arrow installed!" From 05598b910d497fe033f8fa302b127db648714258 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Wed, 26 Jun 2024 22:32:57 +0800 Subject: [PATCH 7/8] Fix protobuf --- .github/workflows/velox_docker.yml | 3 ++- dev/builddeps-veloxbe.sh | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/velox_docker.yml b/.github/workflows/velox_docker.yml index a8d2cf63f289..7bb01f6821f1 100644 --- a/.github/workflows/velox_docker.yml +++ b/.github/workflows/velox_docker.yml @@ -603,7 +603,8 @@ jobs: source /opt/rh/gcc-toolset-9/enable source ./dev/build_arrow.sh install_arrow_deps - ./dev/builddeps-veloxbe.sh --run_setup_script=OFF --enable_ep_cache=OFF --build_tests=ON --build_examples=ON --build_benchmarks=ON + ./dev/builddeps-veloxbe.sh --run_setup_script=OFF --enable_ep_cache=OFF --build_tests=ON \ + --build_examples=ON --build_benchmarks=ON --build_protobuf=ON - uses: actions/upload-artifact@v2 with: name: velox-native-lib-centos-8-${{github.sha}} diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index 4bd07ab24a5e..d5e33e926d82 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -217,7 +217,8 @@ function build_gluten_cpp { cd build cmake -DBUILD_VELOX_BACKEND=ON -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ -DBUILD_TESTS=$BUILD_TESTS -DBUILD_EXAMPLES=$BUILD_EXAMPLES -DBUILD_BENCHMARKS=$BUILD_BENCHMARKS -DBUILD_JEMALLOC=$BUILD_JEMALLOC \ - -DENABLE_HBM=$ENABLE_HBM -DENABLE_QAT=$ENABLE_QAT -DENABLE_IAA=$ENABLE_IAA -DENABLE_GCS=$ENABLE_GCS -DENABLE_S3=$ENABLE_S3 -DENABLE_HDFS=$ENABLE_HDFS -DENABLE_ABFS=$ENABLE_ABFS .. + -DENABLE_HBM=$ENABLE_HBM -DENABLE_QAT=$ENABLE_QAT -DENABLE_IAA=$ENABLE_IAA -DBUILD_PROTOBUF=$BUILD_PROTOBUF -DENABLE_GCS=$ENABLE_GCS \ + -DENABLE_S3=$ENABLE_S3 -DENABLE_HDFS=$ENABLE_HDFS -DENABLE_ABFS=$ENABLE_ABFS .. make -j $NUM_THREADS } From e836e03ed51f855ab77f8628131a9dca006ac287 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Thu, 27 Jun 2024 18:03:49 +0800 Subject: [PATCH 8/8] Allow finding from system --- cpp/CMake/ConfigArrow.cmake | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/CMake/ConfigArrow.cmake b/cpp/CMake/ConfigArrow.cmake index 8f036be53411..110836347cac 100644 --- a/cpp/CMake/ConfigArrow.cmake +++ b/cpp/CMake/ConfigArrow.cmake @@ -33,11 +33,12 @@ function(FIND_ARROW_LIB LIB_NAME) set(ARROW_LIB_FULL_NAME ${CMAKE_SHARED_LIBRARY_PREFIX}${LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX}) add_library(Arrow::${LIB_NAME} SHARED IMPORTED) + # Firstly find the lib from velox's arrow build path. If not found, try to + # find it from system. find_library( ARROW_LIB_${LIB_NAME} NAMES ${ARROW_LIB_FULL_NAME} - PATHS ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR} - NO_DEFAULT_PATH) + PATHS ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR}) if(NOT ARROW_LIB_${LIB_NAME}) message(FATAL_ERROR "Arrow library Not Found: ${ARROW_LIB_FULL_NAME}") else()