diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index ff934a2a8e14e..d8def2a8e8020 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -55,6 +55,7 @@ option(ENABLE_IAA "Enable IAA for de/compression" OFF) option(ENABLE_GCS "Enable GCS" OFF) option(ENABLE_S3 "Enable S3" OFF) option(ENABLE_HDFS "Enable HDFS" OFF) +option(ENABLE_HDFS3 "Enable HDFS3" OFF) option(ENABLE_ORC "Enable ORC" OFF) option(ENABLE_ABFS "Enable ABFS" OFF) diff --git a/cpp/compile.sh b/cpp/compile.sh index 7006f69a3a725..f458f5909cfd0 100755 --- a/cpp/compile.sh +++ b/cpp/compile.sh @@ -27,6 +27,7 @@ ENABLE_HBM=OFF ENABLE_GCS=OFF ENABLE_S3=OFF ENABLE_HDFS=OFF +ENABLE_HDFS3=OFF ENABLE_ABFS=OFF VELOX_HOME= # set default number of threads as cpu cores minus 2 @@ -97,6 +98,10 @@ for arg in "$@"; do ENABLE_HDFS=("${arg#*=}") shift # Remove argument name from processing ;; + --enable_hdfs3=*) + ENABLE_HDFS3=("${arg#*=}") + shift # Remove argument name from processing + ;; *) OTHER_ARGUMENTS+=("$1") shift # Remove generic argument from processing @@ -127,6 +132,7 @@ echo "ENABLE_HBM=${ENABLE_HBM}" echo "ENABLE_GCS=${ENABLE_GCS}" echo "ENABLE_S3=${ENABLE_S3}" echo "ENABLE_HDFS=${ENABLE_HDFS}" +echo "ENABLE_HDFS3=${ENABLE_HDFS3}" echo "ENABLE_ABFS=${ENABLE_ABFS}" if [ -d build ]; then @@ -147,5 +153,6 @@ cmake .. \ -DENABLE_GCS=${ENABLE_GCS} \ -DENABLE_S3=${ENABLE_S3} \ -DENABLE_HDFS=${ENABLE_HDFS} \ + -DENABLE_HDFS3=${ENABLE_HDFS3} \ -DENABLE_ABFS=${ENABLE_ABFS} make -j$NPROC diff --git a/cpp/velox/CMakeLists.txt b/cpp/velox/CMakeLists.txt index 47e4ff8f13824..679540471dbf9 100644 --- a/cpp/velox/CMakeLists.txt +++ b/cpp/velox/CMakeLists.txt @@ -312,6 +312,35 @@ if(ENABLE_HDFS) add_definitions(-DENABLE_HDFS) endif() + +macro(find_libhdfs3) + find_package(libhdfs3 CONFIG) + if(libhdfs3_FOUND AND TARGET HDFS::hdfs3) + set(LIBHDFS3_LIBRARY HDFS::hdfs3) + else() + find_path(libhdfs3_INCLUDE_DIR hdfs/hdfs.h) + set(CMAKE_FIND_LIBRARY_SUFFIXES ".so") + find_library(libhdfs3_LIBRARY NAMES hdfs3) + find_package_handle_standard_args(libhdfs3 DEFAULT_MSG libhdfs3_INCLUDE_DIR + libhdfs3_LIBRARY) + add_library(HDFS::hdfs3 SHARED IMPORTED) + set_target_properties( + HDFS::hdfs3 + PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${libhdfs3_INCLUDE_DIR}" + IMPORTED_LOCATION "${libhdfs3_LIBRARY}") + endif() + + if(NOT libhdfs3_FOUND) + message(FATAL_ERROR "LIBHDFS3 Library Not Found") + endif() +endmacro() + +if(ENABLE_HDFS3) + find_libhdfs3() + target_link_libraries(velox PUBLIC HDFS::hdfs3) + add_definitions(-DENABLE_HDFS3) +endif() + if(ENABLE_S3) add_definitions(-DENABLE_S3) find_awssdk() diff --git a/cpp/velox/compute/VeloxBackend.cc b/cpp/velox/compute/VeloxBackend.cc index d39b0902c2502..d2514b336c586 100644 --- a/cpp/velox/compute/VeloxBackend.cc +++ b/cpp/velox/compute/VeloxBackend.cc @@ -134,7 +134,7 @@ void VeloxBackend::init(const std::unordered_map& conf // Setup and register. velox::filesystems::registerLocalFileSystem(); -#ifdef ENABLE_HDFS +#if (defined(ENABLE_HDFS) || defined(ENABLE_HDFS3)) velox::filesystems::registerHdfsFileSystem(); #endif #ifdef ENABLE_S3 diff --git a/dev/builddeps-veloxbe.sh b/dev/builddeps-veloxbe.sh index 1ed6e62e29d33..35bb6403adb41 100755 --- a/dev/builddeps-veloxbe.sh +++ b/dev/builddeps-veloxbe.sh @@ -21,6 +21,7 @@ ENABLE_HBM=OFF ENABLE_GCS=OFF ENABLE_S3=OFF ENABLE_HDFS=OFF +ENABLE_HDFS3=OFF ENABLE_ABFS=OFF ENABLE_EP_CACHE=OFF ENABLE_VCPKG=OFF @@ -91,6 +92,10 @@ do --enable_hdfs=*) ENABLE_HDFS=("${arg#*=}") shift # Remove argument name from processing + ;; + --enable_hdfs3=*) + ENABLE_HDFS3=("${arg#*=}") + shift # Remove argument name from processing ;; --enable_abfs=*) ENABLE_ABFS=("${arg#*=}") @@ -167,7 +172,7 @@ function concat_velox_param { if [ "$ENABLE_VCPKG" = "ON" ]; then # vcpkg will install static depends and init build environment BUILD_OPTIONS="--build_tests=$BUILD_TESTS --enable_s3=$ENABLE_S3 --enable_gcs=$ENABLE_GCS \ - --enable_hdfs=$ENABLE_HDFS --enable_abfs=$ENABLE_ABFS" + --enable_hdfs=$ENABLE_HDFS --enable_hdfs3=$ENABLE_HDFS3 --enable_abfs=$ENABLE_ABFS" source ./dev/vcpkg/env.sh ${BUILD_OPTIONS} fi @@ -191,7 +196,7 @@ function build_velox { echo "Start to build Velox" cd $GLUTEN_DIR/ep/build-velox/src # When BUILD_TESTS is on for gluten cpp, we need turn on VELOX_BUILD_TEST_UTILS via build_test_utils. - ./build_velox.sh --enable_s3=$ENABLE_S3 --enable_gcs=$ENABLE_GCS --build_type=$BUILD_TYPE --enable_hdfs=$ENABLE_HDFS \ + ./build_velox.sh --enable_s3=$ENABLE_S3 --enable_gcs=$ENABLE_GCS --build_type=$BUILD_TYPE --enable_hdfs=$ENABLE_HDFS --enable_hdfs3=$ENABLE_HDFS3 \ --enable_abfs=$ENABLE_ABFS --enable_ep_cache=$ENABLE_EP_CACHE --build_test_utils=$BUILD_TESTS \ --build_tests=$BUILD_VELOX_TESTS --build_benchmarks=$BUILD_VELOX_BENCHMARKS --num_threads=$NUM_THREADS \ --velox_home=$VELOX_HOME @@ -207,7 +212,7 @@ function build_gluten_cpp { -DVELOX_HOME=${VELOX_HOME} \ -DBUILD_TESTS=$BUILD_TESTS -DBUILD_EXAMPLES=$BUILD_EXAMPLES -DBUILD_BENCHMARKS=$BUILD_BENCHMARKS -DENABLE_JEMALLOC_STATS=$ENABLE_JEMALLOC_STATS \ -DENABLE_HBM=$ENABLE_HBM -DENABLE_QAT=$ENABLE_QAT -DENABLE_IAA=$ENABLE_IAA -DENABLE_GCS=$ENABLE_GCS \ - -DENABLE_S3=$ENABLE_S3 -DENABLE_HDFS=$ENABLE_HDFS -DENABLE_ABFS=$ENABLE_ABFS .. + -DENABLE_S3=$ENABLE_S3 -DENABLE_HDFS=$ENABLE_HDFS -DENABLE_HDFS3=$ENABLE_HDFS3 -DENABLE_ABFS=$ENABLE_ABFS .. make -j $NUM_THREADS } diff --git a/dev/vcpkg/init.sh b/dev/vcpkg/init.sh index bae1a8ad32eb0..ed6b4afbbb930 100755 --- a/dev/vcpkg/init.sh +++ b/dev/vcpkg/init.sh @@ -6,6 +6,7 @@ BUILD_TESTS=OFF ENABLE_S3=OFF ENABLE_GCS=OFF ENABLE_HDFS=OFF +ENABLE_HDFS3=OFF ENABLE_ABFS=OFF for arg in "$@"; do @@ -26,6 +27,10 @@ for arg in "$@"; do ENABLE_HDFS=("${arg#*=}") shift # Remove argument name from processing ;; + --enable_hdfs3=*) + ENABLE_HDFS3=("${arg#*=}") + shift # Remove argument name from processing + ;; --enable_abfs=*) ENABLE_ABFS=("${arg#*=}") shift # Remove argument name from processing diff --git a/ep/build-velox/src/build_velox.sh b/ep/build-velox/src/build_velox.sh index b48f28a374b2f..472377b0914c6 100755 --- a/ep/build-velox/src/build_velox.sh +++ b/ep/build-velox/src/build_velox.sh @@ -22,6 +22,8 @@ ENABLE_S3=OFF ENABLE_GCS=OFF # Enable HDFS connector. ENABLE_HDFS=OFF +# Enable HDFS connector. +ENABLE_HDFS3=OFF # Enable ABFS connector. ENABLE_ABFS=OFF BUILD_TYPE=release @@ -57,6 +59,10 @@ for arg in "$@"; do ENABLE_HDFS=("${arg#*=}") shift # Remove argument name from processing ;; + --enable_hdfs3=*) + ENABLE_HDFS3=("${arg#*=}") + shift # Remove argument name from processing + ;; --enable_abfs=*) ENABLE_ABFS=("${arg#*=}") shift # Remove argument name from processing @@ -104,6 +110,9 @@ function compile { if [ $ENABLE_HDFS == "ON" ]; then COMPILE_OPTION="$COMPILE_OPTION -DVELOX_ENABLE_HDFS=ON" fi + if [ $ENABLE_HDFS3 == "ON" ]; then + COMPILE_OPTION="$COMPILE_OPTION -DVELOX_ENABLE_HDFS3=ON" + fi if [ $ENABLE_S3 == "ON" ]; then COMPILE_OPTION="$COMPILE_OPTION -DVELOX_ENABLE_S3=ON" fi diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index 815d318583b84..7ee8df7a87629 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -16,8 +16,8 @@ set -exu -VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_10_28 +VELOX_REPO=https://github.com/JkSelf/velox.git +VELOX_BRANCH=libhdfs3-support VELOX_HOME="" OS=`uname -s`