[GLUTEN-3582][CORE][VL][CH] Refactor filter pushdown logic #8595
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Licensed to the Apache Software Foundation (ASF) under one or more | |
# contributor license agreements. See the NOTICE file distributed with | |
# this work for additional information regarding copyright ownership. | |
# The ASF licenses this file to You under the Apache License, Version 2.0 | |
# (the "License"); you may not use this file except in compliance with | |
# the License. You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
name: Velox backend | |
on: | |
pull_request: | |
paths: | |
- '.github/**' | |
- 'pom.xml' | |
- 'backends-velox/**' | |
- 'gluten-celeborn/**' | |
- 'gluten-core/**' | |
- 'gluten-data/**' | |
- 'gluten-delta/**' | |
- 'gluten-ut/**' | |
- 'shims/**' | |
- 'tools/gluten-it/**' | |
- 'tools/gluten-te/**' | |
- 'ep/build-velox/**' | |
- 'cpp/*' | |
- 'cpp/CMake/**' | |
- 'cpp/velox/**' | |
- 'cpp/core/**' | |
- 'dev/**' | |
- 'substrait/substrait-spark/**' | |
env: | |
HTTP_PROXY_HOST: proxy-shz.intel.com | |
HTTP_PROXY_PORT: 911 | |
PATH_TO_GLUTEN_TE: ./tools/gluten-te | |
DOCKER_PULL_REGISTRY: 10.1.0.25:5000 | |
MAVEN_OPTS: -Dmaven.wagon.http.retryHandler.count=3 | |
concurrency: | |
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} | |
cancel-in-progress: true | |
jobs: | |
ubuntu2004-test-spark32: | |
runs-on: velox-self-hosted | |
env: | |
OS_IMAGE_NAME: ubuntu | |
OS_IMAGE_TAG: 20.04 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup docker container | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh | |
- name: Build Gluten velox third party | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/ep/build-velox/src && \ | |
./get_velox.sh --velox_home=/opt/velox && \ | |
./build_velox.sh --run_setup_script=OFF --velox_home=/opt/velox --enable_ep_cache=ON --build_test_utils=ON' | |
- name: Build Gluten CPP library | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/cpp && \ | |
./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --build_tests=ON --build_examples=ON --build_benchmarks=ON' | |
- name: Run CPP unit test | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/cpp/build && \ | |
ctest -V' | |
- name: Run HBM CPP unit test | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/cpp/build && \ | |
cmake -DBUILD_TESTS=ON -DENABLE_HBM=ON .. && \ | |
cmake --build . --target hbw_allocator_test -- -j && \ | |
ctest -V -R TestHbw' | |
- name: Build and run unit test for Spark 3.2.2 (other tests) | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten && \ | |
mvn clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox -Prss -DargLine="-Dspark.test.home=/opt/spark322" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ | |
mvn test -Pspark-3.2 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest' | |
# Cpp micro benchmarks will use generated files from unit test in backends-velox module. | |
- name: Run micro benchmarks | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/cpp/build/velox/benchmarks && \ | |
./generic_benchmark --with-shuffle --threads 1 --iterations 1' | |
- name: Exit docker container | |
if: ${{ always() }} | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh | |
ubuntu2004-test-spark32-slow: | |
runs-on: velox-self-hosted | |
env: | |
OS_IMAGE_NAME: ubuntu | |
OS_IMAGE_TAG: 20.04 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup docker container | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh | |
- name: Build Gluten velox third party | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/ep/build-velox/src && \ | |
./get_velox.sh --velox_home=/opt/velox && \ | |
./build_velox.sh --run_setup_script=OFF --velox_home=/opt/velox --enable_ep_cache=ON' | |
- name: Build Gluten CPP library | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/cpp && \ | |
./compile.sh --build_velox_backend=ON --velox_home=/opt/velox' | |
- name: Build and run unit test for Spark 3.2.2 (slow tests) | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten && \ | |
mvn clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox -Prss -Piceberg -Pdelta -DargLine="-Dspark.test.home=/opt/spark322" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest' | |
- name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ | |
mvn clean install -Pspark-3.2 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1' | |
- name: Exit docker container | |
if: ${{ always() }} | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh | |
ubuntu2004-test-spark33-slow: | |
runs-on: velox-self-hosted | |
env: | |
OS_IMAGE_NAME: ubuntu | |
OS_IMAGE_TAG: 20.04 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup docker container | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh | |
- name: Build Gluten velox third party | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/ep/build-velox/src && \ | |
./get_velox.sh --velox_home=/opt/velox && \ | |
./build_velox.sh --run_setup_script=OFF --velox_home=/opt/velox --enable_ep_cache=ON' | |
- name: Build Gluten CPP library | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/cpp && \ | |
./compile.sh --build_velox_backend=ON --velox_home=/opt/velox' | |
- name: Build and Run unit test for Spark 3.3.1 (slow tests) | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ | |
mvn clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark331" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest' | |
- name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.3 | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ | |
mvn clean install -Pspark-3.3 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1' | |
- name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.3 Q38 flush | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 --queries=q38 \ | |
--disable-bhj \ | |
--extra-conf=spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.1 \ | |
--extra-conf=spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.2 \ | |
--extra-conf=spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100 \ | |
--extra-conf=spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0' | |
- name: Exit docker container | |
if: ${{ always() }} | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh | |
ubuntu2004-test-spark33: | |
runs-on: velox-self-hosted | |
env: | |
OS_IMAGE_NAME: ubuntu | |
OS_IMAGE_TAG: 20.04 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup docker container | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh | |
- name: Build Gluten velox third party | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/ep/build-velox/src && \ | |
./get_velox.sh --velox_home=/opt/velox && \ | |
./build_velox.sh --run_setup_script=OFF --velox_home=/opt/velox --enable_ep_cache=ON' | |
- name: Build Gluten CPP library | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/cpp && \ | |
./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --build_examples=ON' | |
- name: Build and Run unit test for Spark 3.3.1 (other tests) | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ | |
mvn clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark331" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ | |
mvn test -Pspark-3.3 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest' | |
- name: Exit docker container | |
if: ${{ always() }} | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh | |
ubuntu2004-test-spark34-slow: | |
runs-on: velox-self-hosted | |
env: | |
OS_IMAGE_NAME: ubuntu | |
OS_IMAGE_TAG: 20.04 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup docker container | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh | |
- name: Build Gluten velox third party | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/ep/build-velox/src && \ | |
./get_velox.sh --velox_home=/opt/velox && \ | |
./build_velox.sh --run_setup_script=OFF --velox_home=/opt/velox --enable_ep_cache=ON' | |
- name: Build Gluten CPP library | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/cpp && \ | |
./compile.sh --build_velox_backend=ON --velox_home=/opt/velox ' | |
- name: Build and Run unit test for Spark 3.4.2 (slow tests) | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ | |
mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark342" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest' | |
- name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.4 | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ | |
mvn clean install -Pspark-3.4 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1' | |
- name: Exit docker container | |
if: ${{ always() }} | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh | |
ubuntu2004-test-spark34: | |
runs-on: velox-self-hosted | |
env: | |
OS_IMAGE_NAME: ubuntu | |
OS_IMAGE_TAG: 20.04 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup docker container | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh | |
- name: Build Gluten velox third party | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/ep/build-velox/src && \ | |
./get_velox.sh --velox_home=/opt/velox && \ | |
./build_velox.sh --run_setup_script=OFF --velox_home=/opt/velox --enable_ep_cache=ON' | |
- name: Build Gluten CPP library | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/cpp && \ | |
./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --build_examples=ON' | |
- name: Build and Run unit test for Spark 3.4.2 (other tests) | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ | |
mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark342" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ | |
mvn test -Pspark-3.4 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest' | |
- name: Exit docker container | |
if: ${{ always() }} | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh | |
ubuntu2204-test-spark33-spark34: | |
runs-on: velox-self-hosted | |
env: | |
OS_IMAGE_NAME: ubuntu | |
OS_IMAGE_TAG: 22.04 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup docker container | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh | |
- name: Build Gluten velox third party | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/ep/build-velox/src && \ | |
./get_velox.sh --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON && \ | |
./build_velox.sh --run_setup_script=OFF --velox_home=/opt/velox --enable_ep_cache=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' | |
- name: Build Gluten CPP library | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/cpp && \ | |
./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' | |
- name: Build for Spark 3.3.1 | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten && \ | |
mvn clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' | |
- name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.3 | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ | |
mvn clean install -Pspark-3.3 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx20G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=30g -s=10.0 --threads=32 --iterations=1' | |
- name: Build for Spark 3.4.2 | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten && \ | |
mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' | |
- name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.4 | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ | |
mvn clean install -Pspark-3.4 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx20G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=30g -s=10.0 --threads=32 --iterations=1' | |
- name: Exit docker container | |
if: ${{ always() }} | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh | |
ubuntu2204-test: | |
runs-on: velox-self-hosted | |
env: | |
OS_IMAGE_NAME: ubuntu | |
OS_IMAGE_TAG: 22.04 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup docker container | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh | |
- name: Build Gluten velox third party | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/ep/build-velox/src && \ | |
./get_velox.sh --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON && \ | |
./build_velox.sh --run_setup_script=OFF --velox_home=/opt/velox --enable_ep_cache=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' | |
- name: Build Gluten CPP library | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/cpp && \ | |
./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' | |
- name: Build for Spark 3.2.2 | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten && \ | |
mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' | |
- name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2 | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ | |
mvn clean install -Pspark-3.2 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx20G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=40g -s=10.0 --threads=32 --iterations=1' | |
- name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2 with Celeborn | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh \ | |
'wget https://dlcdn.apache.org/incubator/celeborn/celeborn-0.3.0-incubating/apache-celeborn-0.3.0-incubating-bin.tgz && \ | |
tar xzf apache-celeborn-0.3.0-incubating-bin.tgz && cd apache-celeborn-0.3.0-incubating-bin && \ | |
mv ./conf/celeborn-env.sh.template ./conf/celeborn-env.sh && \ | |
echo -e "CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g" > ./conf/celeborn-env.sh && \ | |
echo -e "celeborn.worker.commitFiles.threads 128\nceleborn.worker.sortPartition.threads 64" > ./conf/celeborn-defaults.conf \ | |
&& bash ./sbin/start-master.sh && bash ./sbin/start-worker.sh && \ | |
cd /opt/gluten/tools/gluten-it && mvn clean install -Pspark-3.2,rss \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox-with-celeborn --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox-with-celeborn --benchmark-type=ds --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 && \ | |
bash /opt/apache-celeborn-0.3.0-incubating-bin/sbin/stop-worker.sh \ | |
&& bash /opt/apache-celeborn-0.3.0-incubating-bin/sbin/stop-master.sh' | |
- name: Exit docker container | |
if: ${{ always() }} | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh | |
centos8-test: | |
runs-on: velox-self-hosted | |
env: | |
OS_IMAGE_NAME: centos | |
OS_IMAGE_TAG: 8 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup docker container | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh | |
- name: Build Gluten velox third party | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
source /env.sh && \ | |
sudo yum -y install patch && \ | |
cd /opt/gluten/ep/build-velox/src && \ | |
./get_velox.sh --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON && \ | |
./build_velox.sh --run_setup_script=OFF --velox_home=/opt/velox --enable_ep_cache=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' | |
- name: Build Gluten CPP library | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
source /env.sh && \ | |
cd /opt/gluten/cpp && \ | |
./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' | |
- name: Build for Spark 3.2.2 | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten && \ | |
mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' | |
- name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ | |
mvn clean install -Pspark-3.2 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --threads=32 --iterations=1' | |
- name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 random kill tasks | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ | |
mvn clean install -Pspark-3.2 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries \ | |
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 --skip-data-gen --random-kill-tasks \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh queries \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --threads=32 --iterations=1 --skip-data-gen --random-kill-tasks' | |
- name: Exit docker container | |
if: ${{ always() }} | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh | |
centos7-test: | |
runs-on: velox-self-hosted | |
env: | |
OS_IMAGE_NAME: centos | |
OS_IMAGE_TAG: 7 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup docker container | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh | |
- name: Build Gluten velox third party | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
yum -y install epel-release centos-release-scl patch sudo && \ | |
cd /opt/gluten/ep/build-velox/src && \ | |
source /opt/rh/devtoolset-9/enable && \ | |
./get_velox.sh --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON && \ | |
./build_velox.sh --run_setup_script=ON --velox_home=/opt/velox --enable_ep_cache=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON --enable_hdfs=ON' | |
- name: Build Gluten CPP library | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten/cpp && \ | |
source /opt/rh/devtoolset-9/enable && \ | |
./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=ON' | |
- name: Build for Spark 3.2.2 | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
cd /opt/gluten && \ | |
mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' | |
- name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ | |
mvn clean install -Pspark-3.2 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=h --error-on-memleak --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --threads=32 --iterations=1' | |
- name: TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation off | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ | |
mvn clean install -Pspark-3.2 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ | |
--skip-data-gen -m=OffHeapExecutionMemory \ | |
-d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ | |
-d=OFFHEAP_SIZE:5g,spark.memory.offHeap.size=5g \ | |
-d=OFFHEAP_SIZE:3g,spark.memory.offHeap.size=3g \ | |
-d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ | |
-d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5' | |
- name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation on | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ | |
mvn clean install -Pspark-3.2 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ | |
--skip-data-gen -m=OffHeapExecutionMemory \ | |
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ | |
-d=OFFHEAP_SIZE:5g,spark.memory.offHeap.size=5g \ | |
-d=OFFHEAP_SIZE:3g,spark.memory.offHeap.size=3g \ | |
-d=OVER_ACQUIRE:0.3,spark.gluten.memory.overAcquiredMemoryRatio=0.3 \ | |
-d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5' || true | |
- name: TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low memory | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ | |
GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ | |
--skip-data-gen -m=OffHeapExecutionMemory \ | |
-d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ | |
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ | |
-d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ | |
-d=FLUSH_MODE:DISABLED,spark.gluten.sql.columnar.backend.velox.flushablePartialAggregation=false,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ | |
-d=FLUSH_MODE:ABANDONED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=1.0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=0,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0 \ | |
-d=FLUSH_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0' | |
- name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory # The case currently causes crash with "free: invalid size". | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ | |
GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q97 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ | |
--skip-data-gen -m=OffHeapExecutionMemory \ | |
-d=ISOLATION:OFF,spark.gluten.memory.isolation=false \ | |
-d=ISOLATION:ON,spark.gluten.memory.isolation=true,spark.memory.storageFraction=0.1 \ | |
-d=OFFHEAP_SIZE:2g,spark.memory.offHeap.size=2g \ | |
-d=OFFHEAP_SIZE:1g,spark.memory.offHeap.size=1g' || true | |
- name: Exit docker container | |
if: ${{ always() }} | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh | |
static-build-centos7-test: | |
runs-on: velox-self-hosted | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup docker container | |
run: | | |
docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ | |
-v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach $DOCKER_PULL_REGISTRY/gluten-te/gluten-buildenv-centos:7 \ | |
bash -c 'cd /opt/gluten && sleep 14400' | |
- name: Build Gluten CPP library | |
run: | | |
docker exec -i static-build-test-$GITHUB_RUN_ID bash -c ' | |
source /env.sh && \ | |
sudo yum -y install patch && \ | |
cd /opt/gluten && \ | |
sudo -E ./dev/vcpkg/setup-build-depends.sh && \ | |
source ./dev/vcpkg/env.sh && \ | |
./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_tests=OFF --build_benchmarks=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON' | |
- name: Build for Spark 3.2.2 | |
run: | | |
docker exec static-build-test-$GITHUB_RUN_ID bash -c ' | |
cd /opt/gluten && \ | |
mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests && \ | |
cd /opt/gluten/tools/gluten-it && \ | |
mvn clean install -Pspark-3.2' | |
- name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 (ubuntu 20.04) | |
run: | | |
docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ | |
-v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 ubuntu:20.04 \ | |
bash -c 'apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install openjdk-8-jre -y \ | |
&& cd /opt/gluten/tools/gluten-it \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx10G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=20g -s=1.0 --threads=32 --iterations=1' | |
- name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 (ubuntu 22.04) | |
run: | | |
docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ | |
-v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 ubuntu:22.04 \ | |
bash -c 'apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install openjdk-8-jre -y \ | |
&& cd /opt/gluten/tools/gluten-it \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ | |
&& GLUTEN_IT_JVM_ARGS=-Xmx10G sbin/gluten-it.sh queries-compare \ | |
--local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=20g -s=1.0 --threads=32 --iterations=1' | |
- name: Exit docker container | |
if: ${{ always() }} | |
run: | | |
docker stop static-build-test-$GITHUB_RUN_ID || true | |
build-script-test: | |
runs-on: velox-self-hosted | |
env: | |
OS_IMAGE_NAME: centos | |
OS_IMAGE_TAG: 8 | |
steps: | |
- uses: actions/checkout@v4 | |
- name: Setup docker container | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/checkout.sh | |
- name: Build Script Test | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/exec.sh ' | |
source /env.sh && \ | |
sudo yum -y install patch && \ | |
cd /opt/gluten/ && \ | |
./dev/package.sh' | |
- name: Exit docker container | |
if: ${{ always() }} | |
run: | | |
$PATH_TO_GLUTEN_TE/$OS_IMAGE_NAME/gha/gha-checkout/clean.sh |