From 16abf39665320f122b516de3f3a0c796bd605f52 Mon Sep 17 00:00:00 2001 From: Yuan Date: Wed, 22 Nov 2023 20:58:33 +0800 Subject: [PATCH] [GLUTEN-3715][VL] followup to add GCS support for CentOS7 (#3789) add GCS support on Centos7, also removed the unnecessary arrow components in static build job Signed-off-by: Yuan Zhou --- .github/workflows/velox_be.yml | 29 ++++++++++------------ dev/vcpkg/init.sh | 2 +- dev/vcpkg/vcpkg.json | 19 +------------- ep/build-velox/src/get_velox.sh | 3 +++ tools/gluten-te/centos/dockerfile-buildenv | 12 ++++----- 5 files changed, 24 insertions(+), 41 deletions(-) diff --git a/.github/workflows/velox_be.yml b/.github/workflows/velox_be.yml index 7dbb164a0b57..959b79d900b0 100644 --- a/.github/workflows/velox_be.yml +++ b/.github/workflows/velox_be.yml @@ -42,6 +42,7 @@ concurrency: cancel-in-progress: true jobs: + ubuntu2004-test-spark32: runs-on: velox-self-hosted steps: @@ -87,8 +88,7 @@ jobs: if: ${{ always() }} run: | docker stop ubuntu2004-test-$GITHUB_RUN_ID || true - - ubuntu2004-test-slow-spark32: + ubuntu2004-test-spark32-slow: runs-on: velox-self-hosted steps: - uses: actions/checkout@v4 @@ -125,7 +125,6 @@ jobs: if: ${{ always() }} run: | docker stop ubuntu2004-test-slow-$GITHUB_RUN_ID || true - ubuntu2004-test-spark33-slow: runs-on: velox-self-hosted steps: @@ -162,7 +161,6 @@ jobs: if: ${{ always() }} run: | docker stop ubuntu2004-test-spark33-slow-$GITHUB_RUN_ID || true - ubuntu2004-test-spark33: runs-on: velox-self-hosted steps: @@ -229,7 +227,6 @@ jobs: if: ${{ always() }} run: | docker stop ubuntu2004-test-spark34-slow-$GITHUB_RUN_ID || true - ubuntu2004-test-spark34: runs-on: velox-self-hosted steps: @@ -259,7 +256,6 @@ jobs: if: ${{ always() }} run: | docker stop ubuntu2004-test-spark34-$GITHUB_RUN_ID || true - ubuntu2204-test: runs-on: velox-self-hosted steps: @@ -337,7 +333,6 @@ jobs: if: ${{ always() }} run: | docker stop ubuntu2204-test-$GITHUB_RUN_ID || true - centos8-test: runs-on: velox-self-hosted steps: @@ -393,23 +388,25 @@ jobs: - uses: actions/checkout@v4 - name: Setup docker container run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name centos7-test-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach 10.0.2.4:5000/gluten-dev/centos:7 \ - bash -c 'cd /opt/gluten && sleep 14400' + EXTRA_DOCKER_OPTIONS="--name centos7-test-$GITHUB_RUN_ID -e NUM_THREADS=30 --detach" \ + NON_INTERACTIVE=ON \ + MOUNT_MAVEN_CACHE=OFF \ + OS_IMAGE=centos:7 \ + OS_VERSION=7 \ + tools/gluten-te/centos/cbash.sh sleep 14400 - name: Build Gluten velox third party run: | docker exec centos7-test-$GITHUB_RUN_ID bash -c ' - source /env.sh && \ - sudo yum -y install patch && \ + yum -y install epel-release centos-release-scl patch sudo && \ cd /opt/gluten/ep/build-velox/src && \ - ./get_velox.sh --velox_home=/opt/velox && \ - ./build_velox.sh --run_setup_script=OFF --velox_home=/opt/velox --enable_ep_cache=ON' + ./get_velox.sh --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON && \ + ./build_velox.sh --velox_home=/opt/velox --enable_ep_cache=ON --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON' - name: Build Gluten CPP library run: | docker exec centos7-test-$GITHUB_RUN_ID bash -c ' - source /env.sh && \ cd /opt/gluten/cpp && \ - ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox' + source /opt/rh/devtoolset-9/enable && \ + ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON' - name: Build for Spark 3.2.2 run: | docker exec centos7-test-$GITHUB_RUN_ID bash -c ' diff --git a/dev/vcpkg/init.sh b/dev/vcpkg/init.sh index 1b481ccb0883..d5f828e2022c 100755 --- a/dev/vcpkg/init.sh +++ b/dev/vcpkg/init.sh @@ -12,7 +12,7 @@ VCPKG_TRIPLET=x64-linux-avx cd "$SCRIPT_ROOT" if [ ! -d "$VCPKG_ROOT" ] || [ -z "$(ls "$VCPKG_ROOT")" ]; then - git clone https://github.com/microsoft/vcpkg.git --branch master "$VCPKG_ROOT" + git clone https://github.com/microsoft/vcpkg.git --branch 2023.10.19 "$VCPKG_ROOT" fi [ -f "$VCPKG" ] || "$VCPKG_ROOT/bootstrap-vcpkg.sh" -disableMetrics diff --git a/dev/vcpkg/vcpkg.json b/dev/vcpkg/vcpkg.json index 09feb4041618..56b924f81df1 100644 --- a/dev/vcpkg/vcpkg.json +++ b/dev/vcpkg/vcpkg.json @@ -2,25 +2,8 @@ "$schema": "https://raw.githubusercontent.com/microsoft/vcpkg-tool/main/docs/vcpkg.schema.json", "builtin-baseline": "a7b6122f6b6504d16d96117336a0562693579933", "dependencies": ["jemalloc"], - "default-features": ["arrow", "velox", "velox-s3", "velox-gcs", "velox-hdfs"], + "default-features": ["velox", "velox-s3", "velox-gcs", "velox-hdfs"], "features": { - "arrow": { - "description": "Apache Arrow", - "dependencies": [ - "libevent", - "libdwarf", - "openssl", - "orc", - "protobuf", - "rapidjson", - "snappy", - "xsimd", - "zlib", - "zstd", - "thrift", - "jemalloc" - ] - }, "velox": { "description": "Velox backend", "dependencies": [ diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index 73bc546b2c18..e3425c4875c1 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -151,6 +151,9 @@ function process_setup_centos7 { if [ $ENABLE_S3 == "ON" ]; then sed -i '/^ run_and_time install_fmt/a \ \ run_and_time install_awssdk' scripts/setup-centos7.sh fi + if [ $ENABLE_GCS == "ON" ]; then + sed -i '/^ run_and_time install_fmt/a \ \ '${VELOX_HOME}/scripts'/setup-adapters.sh gcs' scripts/setup-centos7.sh + fi } function process_setup_alinux3 { diff --git a/tools/gluten-te/centos/dockerfile-buildenv b/tools/gluten-te/centos/dockerfile-buildenv index 67fab78942d3..3f36f8c91933 100755 --- a/tools/gluten-te/centos/dockerfile-buildenv +++ b/tools/gluten-te/centos/dockerfile-buildenv @@ -56,13 +56,13 @@ RUN wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven- rm apache-maven-3.8.8-bin.tar.gz && \ mv apache-maven-3.8.8 /usr/lib/maven -# Build & install Spark 3.2.2 -RUN cd /opt && git clone --depth 1 --branch v3.2.2 https://github.com/apache/spark.git spark322 -RUN cd /opt/spark322 && ./build/mvn -Pyarn -DskipTests clean install +# # Build & install Spark 3.2.2 +# RUN cd /opt && git clone --depth 1 --branch v3.2.2 https://github.com/apache/spark.git spark322 +# RUN cd /opt/spark322 && ./build/mvn -Pyarn -DskipTests clean install -# Build & install Spark 3.3.1 -RUN cd /opt && git clone --depth 1 --branch v3.3.1 https://github.com/apache/spark.git spark331 -RUN cd /opt/spark331 && ./build/mvn -Pyarn -DskipTests clean install +# # Build & install Spark 3.3.1 +# RUN cd /opt && git clone --depth 1 --branch v3.3.1 https://github.com/apache/spark.git spark331 +# RUN cd /opt/spark331 && ./build/mvn -Pyarn -DskipTests clean install ENV PATH="$PATH:/usr/lib/maven/bin" ENV LD_LIBRARY_PATH=/usr/local/lib64:/usr/local/lib:/usr/lib64:/usr/lib:/lib64:/lib