Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[VL] Support building arrow CPP and finding installed arrow libs from system #6229

Merged
merged 8 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions .github/workflows/velox_docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -600,13 +600,11 @@ jobs:
- name: Build Gluten Velox third party
if: ${{ steps.cache.outputs.cache-hit != 'true' }}
run: |
cd ep/build-velox/src
./get_velox.sh
source /opt/rh/gcc-toolset-9/enable
./build_arrow_deps_centos8.sh
./build_velox.sh --run_setup_script=OFF --enable_ep_cache=OFF --build_test_utils=ON
cd $GITHUB_WORKSPACE/cpp
./compile.sh --build_velox_backend=ON --build_protobuf=ON --build_tests=ON --build_examples=ON --build_benchmarks=ON
source ./dev/build_arrow.sh
install_arrow_deps
./dev/builddeps-veloxbe.sh --run_setup_script=OFF --enable_ep_cache=OFF --build_tests=ON \
--build_examples=ON --build_benchmarks=ON --build_protobuf=ON
- uses: actions/upload-artifact@v2
with:
name: velox-native-lib-centos-8-${{github.sha}}
Expand Down
5 changes: 3 additions & 2 deletions cpp/CMake/ConfigArrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@ function(FIND_ARROW_LIB LIB_NAME)
set(ARROW_LIB_FULL_NAME
${CMAKE_SHARED_LIBRARY_PREFIX}${LIB_NAME}${ARROW_SHARED_LIBRARY_SUFFIX})
add_library(Arrow::${LIB_NAME} SHARED IMPORTED)
# Firstly find the lib from velox's arrow build path. If not found, try to
# find it from system.
find_library(
ARROW_LIB_${LIB_NAME}
NAMES ${ARROW_LIB_FULL_NAME}
PATHS ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR}
NO_DEFAULT_PATH)
PATHS ${ARROW_LIB_DIR} ${ARROW_LIB64_DIR})
if(NOT ARROW_LIB_${LIB_NAME})
message(FATAL_ERROR "Arrow library Not Found: ${ARROW_LIB_FULL_NAME}")
else()
Expand Down
92 changes: 92 additions & 0 deletions dev/build_arrow.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

CURRENT_DIR=$(cd "$(dirname "$BASH_SOURCE")"; pwd)
source ${CURRENT_DIR}/build_helper_functions.sh
VELOX_ARROW_BUILD_VERSION=15.0.0
ARROW_PREFIX=$CURRENT_DIR/arrow_ep
# Always uses BUNDLED in case of that thrift is not installed.
THRIFT_SOURCE="BUNDLED"
BUILD_TYPE=Release

function prepare_arrow_build() {
sudo rm -rf arrow_ep/
wget_and_untar https://archive.apache.org/dist/arrow/arrow-${VELOX_ARROW_BUILD_VERSION}/apache-arrow-${VELOX_ARROW_BUILD_VERSION}.tar.gz arrow_ep
cd arrow_ep/
patch -p1 < $CURRENT_DIR/../ep/build-velox/src/modify_arrow.patch
patch -p1 < $CURRENT_DIR/../ep/build-velox/src/modify_arrow_dataset_scan_option.patch
}

function install_arrow_deps {
wget_and_untar https://github.com/openssl/openssl/archive/refs/tags/OpenSSL_1_1_1s.tar.gz openssl
pushd openssl
./config no-shared && make depend && make && sudo make install
popd
}

function build_arrow_cpp() {
if [ -n "$1" ]; then
BUILD_TYPE=$1
fi
pushd $ARROW_PREFIX/cpp

cmake_install \
-DARROW_PARQUET=ON \
-DARROW_FILESYSTEM=ON \
-DARROW_PROTOBUF_USE_SHARED=OFF \
-DARROW_WITH_THRIFT=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DARROW_JEMALLOC=OFF \
-DARROW_SIMD_LEVEL=NONE \
-DARROW_RUNTIME_SIMD_LEVEL=NONE \
-DARROW_WITH_UTF8PROC=OFF \
-DARROW_TESTING=ON \
-DCMAKE_INSTALL_PREFIX=/usr/local \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DARROW_BUILD_STATIC=ON \
-DThrift_SOURCE=${THRIFT_SOURCE}
popd
}

function build_arrow_java() {
ARROW_INSTALL_DIR="${ARROW_PREFIX}/install"

pushd $ARROW_PREFIX/java
# Because arrow-bom module need the -DprocessAllModules
mvn versions:set -DnewVersion=15.0.0-gluten -DprocessAllModules

mvn clean install -pl bom,maven/module-info-compiler-maven-plugin,vector -am \
-DskipTests -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -Dassembly.skipAssembly

# Arrow C Data Interface CPP libraries
mvn generate-resources -P generate-libs-cdata-all-os -Darrow.c.jni.dist.dir=$ARROW_INSTALL_DIR \
-Dmaven.test.skip -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -N

# Arrow JNI Date Interface CPP libraries
export PKG_CONFIG_PATH=/usr/local/lib64/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}
mvn generate-resources -Pgenerate-libs-jni-macos-linux -N -Darrow.dataset.jni.dist.dir=$ARROW_INSTALL_DIR \
-DARROW_GANDIVA=OFF -DARROW_JAVA_JNI_ENABLE_GANDIVA=OFF -DARROW_ORC=OFF -DARROW_JAVA_JNI_ENABLE_ORC=OFF \
-Dmaven.test.skip -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -N

# Arrow Java libraries
mvn install -Parrow-jni -P arrow-c-data -pl c,dataset -am \
-Darrow.c.jni.dist.dir=$ARROW_INSTALL_DIR/lib -Darrow.dataset.jni.dist.dir=$ARROW_INSTALL_DIR/lib -Darrow.cpp.build.dir=$ARROW_INSTALL_DIR/lib \
-Dmaven.test.skip -Drat.skip -Dmaven.gitcommitid.skip -Dcheckstyle.skip -Dassembly.skipAssembly
popd
}
118 changes: 118 additions & 0 deletions dev/build_helper_functions.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

function get_cxx_flags {
local CPU_ARCH=$1

local OS
OS=$(uname)
local MACHINE
MACHINE=$(uname -m)
ADDITIONAL_FLAGS=""

if [[ -z "$CPU_ARCH" ]] || [[ $CPU_ARCH == "unknown" ]]; then
if [ "$OS" = "Darwin" ]; then

if [ "$MACHINE" = "x86_64" ]; then
local CPU_CAPABILITIES
CPU_CAPABILITIES=$(sysctl -a | grep machdep.cpu.features | awk '{print tolower($0)}')

if [[ $CPU_CAPABILITIES =~ "avx" ]]; then
CPU_ARCH="avx"
else
CPU_ARCH="sse"
fi

elif [[ $(sysctl -a | grep machdep.cpu.brand_string) =~ "Apple" ]]; then
# Apple silicon.
CPU_ARCH="arm64"
fi

# On MacOs prevent the flood of translation visibility settings warnings.
ADDITIONAL_FLAGS="-fvisibility=hidden -fvisibility-inlines-hidden"
else [ "$OS" = "Linux" ];

local CPU_CAPABILITIES
CPU_CAPABILITIES=$(cat /proc/cpuinfo | grep flags | head -n 1| awk '{print tolower($0)}')

if [[ "$CPU_CAPABILITIES" =~ "avx" ]]; then
CPU_ARCH="avx"
elif [[ "$CPU_CAPABILITIES" =~ "sse" ]]; then
CPU_ARCH="sse"
elif [ "$MACHINE" = "aarch64" ]; then
CPU_ARCH="aarch64"
fi
fi
fi

case $CPU_ARCH in

"arm64")
echo -n "-mcpu=apple-m1+crc -std=c++17 -fvisibility=hidden $ADDITIONAL_FLAGS"
;;

"avx")
echo -n "-mavx2 -mfma -mavx -mf16c -mlzcnt -std=c++17 -mbmi2 $ADDITIONAL_FLAGS"
;;

"sse")
echo -n "-msse4.2 -std=c++17 $ADDITIONAL_FLAGS"
;;

"aarch64")
echo -n "-mcpu=neoverse-n1 -std=c++17 $ADDITIONAL_FLAGS"
;;
*)
echo -n "Architecture not supported!"
esac

}

function wget_and_untar {
local URL=$1
local DIR=$2
mkdir -p "${DIR}"
pushd "${DIR}"
curl -L "${URL}" > $2.tar.gz
tar -xz --strip-components=1 -f $2.tar.gz
popd
}

function cmake_install {
local NAME=$(basename "$(pwd)")
local BINARY_DIR=_build
SUDO="${SUDO:-""}"
if [ -d "${BINARY_DIR}" ] && prompt "Do you want to rebuild ${NAME}?"; then
${SUDO} rm -rf "${BINARY_DIR}"
fi
mkdir -p "${BINARY_DIR}"
CPU_TARGET="${CPU_TARGET:-unknown}"
COMPILER_FLAGS=$(get_cxx_flags $CPU_TARGET)

# CMAKE_POSITION_INDEPENDENT_CODE is required so that Velox can be built into dynamic libraries \
cmake -Wno-dev -B"${BINARY_DIR}" \
-GNinja \
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
-DCMAKE_CXX_STANDARD=17 \
"${INSTALL_PREFIX+-DCMAKE_PREFIX_PATH=}${INSTALL_PREFIX-}" \
"${INSTALL_PREFIX+-DCMAKE_INSTALL_PREFIX=}${INSTALL_PREFIX-}" \
-DCMAKE_CXX_FLAGS="$COMPILER_FLAGS" \
-DBUILD_TESTING=OFF \
"$@"

cmake --build "${BINARY_DIR}"
${SUDO} cmake --install "${BINARY_DIR}"
}
29 changes: 21 additions & 8 deletions dev/builddeps-veloxbe.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,13 @@ ENABLE_S3=OFF
ENABLE_HDFS=OFF
ENABLE_ABFS=OFF
ENABLE_EP_CACHE=OFF
ARROW_ENABLE_CUSTOM_CODEC=OFF
ENABLE_VCPKG=OFF
RUN_SETUP_SCRIPT=ON
VELOX_REPO=""
VELOX_BRANCH=""
VELOX_HOME=""
VELOX_PARAMETER=""
COMPILE_ARROW_JAVA=ON
BUILD_ARROW=ON
SPARK_VERSION=ALL

# set default number of threads as cpu cores minus 2
Expand Down Expand Up @@ -72,12 +71,10 @@ do
;;
--enable_qat=*)
ENABLE_QAT=("${arg#*=}")
ARROW_ENABLE_CUSTOM_CODEC=("${arg#*=}")
shift # Remove argument name from processing
;;
--enable_iaa=*)
ENABLE_IAA=("${arg#*=}")
ARROW_ENABLE_CUSTOM_CODEC=("${arg#*=}")
shift # Remove argument name from processing
;;
--enable_hbm=*)
Expand Down Expand Up @@ -136,8 +133,8 @@ do
BUILD_VELOX_BENCHMARKS=("${arg#*=}")
shift # Remove argument name from processing
;;
--compile_arrow_java=*)
COMPILE_ARROW_JAVA=("${arg#*=}")
--build_arrow=*)
BUILD_ARROW=("${arg#*=}")
shift # Remove argument name from processing
;;
--num_threads=*)
Expand Down Expand Up @@ -189,14 +186,26 @@ fi

concat_velox_param

function build_arrow {
echo "Start to build Arrow"
export SUDO=sudo
cd $GLUTEN_DIR/dev
source build_arrow.sh
prepare_arrow_build
build_arrow_cpp $BUILD_TYPE
echo "Finished building arrow CPP"
build_arrow_java
echo "Finished building arrow Java"
}

function build_velox {
echo "Start to build Velox"
cd $GLUTEN_DIR/ep/build-velox/src
./get_velox.sh --enable_hdfs=$ENABLE_HDFS --build_protobuf=$BUILD_PROTOBUF --enable_s3=$ENABLE_S3 --enable_gcs=$ENABLE_GCS --enable_abfs=$ENABLE_ABFS $VELOX_PARAMETER
# When BUILD_TESTS is on for gluten cpp, we need turn on VELOX_BUILD_TEST_UTILS via build_test_utils.
./build_velox.sh --run_setup_script=$RUN_SETUP_SCRIPT --enable_s3=$ENABLE_S3 --enable_gcs=$ENABLE_GCS --build_type=$BUILD_TYPE --enable_hdfs=$ENABLE_HDFS \
--enable_abfs=$ENABLE_ABFS --enable_ep_cache=$ENABLE_EP_CACHE --build_test_utils=$BUILD_TESTS --build_tests=$BUILD_VELOX_TESTS --build_benchmarks=$BUILD_VELOX_BENCHMARKS \
--compile_arrow_java=$COMPILE_ARROW_JAVA --num_threads=$NUM_THREADS
--num_threads=$NUM_THREADS
}

## compile gluten cpp
Expand All @@ -208,11 +217,15 @@ function build_gluten_cpp {
cd build
cmake -DBUILD_VELOX_BACKEND=ON -DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-DBUILD_TESTS=$BUILD_TESTS -DBUILD_EXAMPLES=$BUILD_EXAMPLES -DBUILD_BENCHMARKS=$BUILD_BENCHMARKS -DBUILD_JEMALLOC=$BUILD_JEMALLOC \
-DENABLE_HBM=$ENABLE_HBM -DENABLE_QAT=$ENABLE_QAT -DENABLE_IAA=$ENABLE_IAA -DENABLE_GCS=$ENABLE_GCS -DENABLE_S3=$ENABLE_S3 -DENABLE_HDFS=$ENABLE_HDFS -DENABLE_ABFS=$ENABLE_ABFS ..
-DENABLE_HBM=$ENABLE_HBM -DENABLE_QAT=$ENABLE_QAT -DENABLE_IAA=$ENABLE_IAA -DBUILD_PROTOBUF=$BUILD_PROTOBUF -DENABLE_GCS=$ENABLE_GCS \
-DENABLE_S3=$ENABLE_S3 -DENABLE_HDFS=$ENABLE_HDFS -DENABLE_ABFS=$ENABLE_ABFS ..
make -j $NUM_THREADS
}

function build_velox_backend {
if [ $BUILD_ARROW == "ON" ]; then
build_arrow
fi
build_velox
build_gluten_cpp
}
Expand Down
45 changes: 0 additions & 45 deletions ep/build-velox/src/build_arrow_deps_centos8.sh

This file was deleted.

Loading
Loading