From a14f08197398b1cb56e3527f451d925ccc6c3d45 Mon Sep 17 00:00:00 2001 From: Jia Ke Date: Tue, 16 Apr 2024 04:20:37 +0800 Subject: [PATCH] Support jvm libhdfs in velox --- .github/workflows/velox_docker.yml | 10 +++ cpp/velox/CMakeLists.txt | 6 +- cpp/velox/compute/WholeStageResultIterator.cc | 10 +-- cpp/velox/utils/HdfsUtils.cc | 66 ------------------- cpp/velox/utils/HdfsUtils.h | 22 ------- ep/build-velox/src/get_velox.sh | 2 +- ep/build-velox/src/modify_velox.patch | 20 ------ 7 files changed, 17 insertions(+), 119 deletions(-) delete mode 100644 cpp/velox/utils/HdfsUtils.cc delete mode 100644 cpp/velox/utils/HdfsUtils.h diff --git a/.github/workflows/velox_docker.yml b/.github/workflows/velox_docker.yml index 271daf679ae5b..48701aab1dbe3 100644 --- a/.github/workflows/velox_docker.yml +++ b/.github/workflows/velox_docker.yml @@ -64,6 +64,16 @@ jobs: - name: Build Gluten Velox third party if: ${{ steps.cache.outputs.cache-hit != 'true' }} run: | + yum update -y && yum install -y java-1.8.0-openjdk-devel wget + export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk + + echo "JAVA_HOME: $JAVA_HOME" + + wget https://archive.apache.org/dist/hadoop/core/hadoop-2.10.1/hadoop-2.10.1.tar.gz + tar xf hadoop-2.10.1.tar.gz -C /usr/local/ + export HADOOP_HOME='/usr/local/hadoop-2.10.1' + echo "HADOOP_HOME: $HADOOP_HOME" + source dev/ci-velox-buildstatic.sh - uses: actions/upload-artifact@v2 with: diff --git a/cpp/velox/CMakeLists.txt b/cpp/velox/CMakeLists.txt index 3b865209a8c7a..770ecb6f80fbf 100644 --- a/cpp/velox/CMakeLists.txt +++ b/cpp/velox/CMakeLists.txt @@ -330,9 +330,9 @@ set(VELOX_SRCS utils/Common.cc ) -if (ENABLE_HDFS) - list(APPEND VELOX_SRCS utils/HdfsUtils.cc) -endif () +# if (ENABLE_HDFS) +# list(APPEND VELOX_SRCS utils/HdfsUtils.cc) +# endif () if(ENABLE_S3) find_package(ZLIB) diff --git a/cpp/velox/compute/WholeStageResultIterator.cc b/cpp/velox/compute/WholeStageResultIterator.cc index 9f43251e2fccf..9aba405f607f9 100644 --- a/cpp/velox/compute/WholeStageResultIterator.cc +++ b/cpp/velox/compute/WholeStageResultIterator.cc @@ -24,10 +24,6 @@ #include "utils/ConfigExtractor.h" -#ifdef ENABLE_HDFS -#include "utils/HdfsUtils.h" -#endif - using namespace facebook; namespace gluten { @@ -113,9 +109,9 @@ WholeStageResultIterator::WholeStageResultIterator( scanNodeIds_(scanNodeIds), scanInfos_(scanInfos), streamIds_(streamIds) { -#ifdef ENABLE_HDFS - gluten::updateHdfsTokens(veloxCfg_.get()); -#endif + // #ifdef ENABLE_HDFS + // gluten::updateHdfsTokens(veloxCfg_.get()); + // #endif spillStrategy_ = veloxCfg_->get(kSpillStrategy, kSpillStrategyDefaultValue); getOrderedNodeIds(veloxPlan_, orderedNodeIds_); diff --git a/cpp/velox/utils/HdfsUtils.cc b/cpp/velox/utils/HdfsUtils.cc deleted file mode 100644 index a912c04eee7e7..0000000000000 --- a/cpp/velox/utils/HdfsUtils.cc +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "HdfsUtils.h" -#include -#include "config/GlutenConfig.h" -#include "utils/exception.h" - -namespace gluten { - -namespace { -struct Credential { - const std::string userName; - const std::string allTokens; - - bool operator==(const Credential& rhs) const { - return userName == rhs.userName && allTokens == rhs.allTokens; - } - bool operator!=(const Credential& rhs) const { - return !(rhs == *this); - } -}; -} // namespace - -void updateHdfsTokens(const facebook::velox::Config* veloxCfg) { - static std::mutex mtx; - std::lock_guard lock{mtx}; - - static std::optional activeCredential{std::nullopt}; - - const auto& newUserName = veloxCfg->get(gluten::kUGIUserName); - const auto& newAllTokens = veloxCfg->get(gluten::kUGITokens); - - if (!newUserName.hasValue() || !newAllTokens.hasValue()) { - return; - } - - Credential newCredential{newUserName.value(), newAllTokens.value()}; - - if (activeCredential.has_value() && activeCredential.value() == newCredential) { - // Do nothing if the credential is the same with before. - return; - } - - hdfsSetDefautUserName(newCredential.userName.c_str()); - std::vector tokens; - folly::split('\0', newCredential.allTokens, tokens); - for (auto& token : tokens) - hdfsSetTokenForDefaultUser(token.data()); - activeCredential.emplace(newCredential); -} -} // namespace gluten diff --git a/cpp/velox/utils/HdfsUtils.h b/cpp/velox/utils/HdfsUtils.h deleted file mode 100644 index cd017f250ad22..0000000000000 --- a/cpp/velox/utils/HdfsUtils.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -namespace gluten { -void updateHdfsTokens(const facebook::velox::Config* veloxCfg); -} diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index ba4ef44fefbbd..0bc369e199743 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -17,7 +17,7 @@ set -exu VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_04_19 +VELOX_BRANCH=libhdfs VELOX_HOME="" #Set on run gluten on HDFS diff --git a/ep/build-velox/src/modify_velox.patch b/ep/build-velox/src/modify_velox.patch index 4bcb228455b1f..55826c4e8c2d0 100644 --- a/ep/build-velox/src/modify_velox.patch +++ b/ep/build-velox/src/modify_velox.patch @@ -39,26 +39,6 @@ diff --git a/CMakeLists.txt b/CMakeLists.txt index 53aaf4391..90aba6916 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -243,10 +243,15 @@ if(VELOX_ENABLE_ABFS) - endif() - - if(VELOX_ENABLE_HDFS) -- find_library( -- LIBHDFS3 -- NAMES libhdfs3.so libhdfs3.dylib -- HINTS "${CMAKE_SOURCE_DIR}/hawq/depends/libhdfs3/_build/src/" REQUIRED) -+ find_package(libhdfs3) -+ if(libhdfs3_FOUND AND TARGET HDFS::hdfs3) -+ set(LIBHDFS3 HDFS::hdfs3) -+ else() -+ find_library( -+ LIBHDFS3 -+ NAMES libhdfs3.so libhdfs3.dylib -+ HINTS "${CMAKE_SOURCE_DIR}/hawq/depends/libhdfs3/_build/src/" REQUIRED) -+ endif() - add_definitions(-DVELOX_ENABLE_HDFS3) - endif() - @@ -386,7 +391,7 @@ resolve_dependency(Boost 1.77.0 COMPONENTS ${BOOST_INCLUDE_LIBRARIES}) # for reference. find_package(range-v3)