diff --git a/.github/workflows/util/install_spark_resources.sh b/.github/workflows/util/install_spark_resources.sh index e1645b170dd5..0afa69958217 100755 --- a/.github/workflows/util/install_spark_resources.sh +++ b/.github/workflows/util/install_spark_resources.sh @@ -50,13 +50,13 @@ case "$1" in 3.4) # Spark-3.4 cd ${INSTALL_DIR} && \ - wget -nv https://archive.apache.org/dist/spark/spark-3.4.2/spark-3.4.2-bin-hadoop3.tgz && \ - tar --strip-components=1 -xf spark-3.4.2-bin-hadoop3.tgz spark-3.4.2-bin-hadoop3/jars/ && \ - rm -rf spark-3.4.2-bin-hadoop3.tgz && \ + wget -nv https://archive.apache.org/dist/spark/spark-3.4.3/spark-3.4.3-bin-hadoop3.tgz && \ + tar --strip-components=1 -xf spark-3.4.3-bin-hadoop3.tgz spark-3.4.3-bin-hadoop3/jars/ && \ + rm -rf spark-3.4.3-bin-hadoop3.tgz && \ mkdir -p ${INSTALL_DIR}/shims/spark34/spark_home/assembly/target/scala-2.12 && \ mv jars ${INSTALL_DIR}/shims/spark34/spark_home/assembly/target/scala-2.12 && \ - wget -nv https://github.com/apache/spark/archive/refs/tags/v3.4.2.tar.gz && \ - tar --strip-components=1 -xf v3.4.2.tar.gz spark-3.4.2/sql/core/src/test/resources/ && \ + wget -nv https://github.com/apache/spark/archive/refs/tags/v3.4.3.tar.gz && \ + tar --strip-components=1 -xf v3.4.3.tar.gz spark-3.4.3/sql/core/src/test/resources/ && \ mkdir -p shims/spark34/spark_home/ && \ mv sql shims/spark34/spark_home/ ;; diff --git a/.github/workflows/velox_backend.yml b/.github/workflows/velox_backend.yml index a4fd3e440365..b70932092829 100644 --- a/.github/workflows/velox_backend.yml +++ b/.github/workflows/velox_backend.yml @@ -760,15 +760,15 @@ jobs: working-directory: ${{ github.workspace }} run: | mkdir -p '${{ env.CCACHE_DIR }}' - - name: Prepare spark.test.home for Spark 3.4.2 (other tests) + - name: Prepare spark.test.home for Spark 3.4.3 (other tests) run: | bash .github/workflows/util/install_spark_resources.sh 3.4 dnf module -y install python39 && \ alternatives --set python3 /usr/bin/python3.9 && \ pip3 install setuptools && \ - pip3 install pyspark==3.4.2 cython && \ + pip3 install pyspark==3.4.3 cython && \ pip3 install pandas pyarrow - - name: Build and Run unit test for Spark 3.4.2 (other tests) + - name: Build and Run unit test for Spark 3.4.3 (other tests) run: | cd $GITHUB_WORKSPACE/ export SPARK_SCALA_VERSION=2.12 @@ -818,10 +818,10 @@ jobs: working-directory: ${{ github.workspace }} run: | mkdir -p '${{ env.CCACHE_DIR }}' - - name: Prepare spark.test.home for Spark 3.4.2 (slow tests) + - name: Prepare spark.test.home for Spark 3.4.3 (slow tests) run: | bash .github/workflows/util/install_spark_resources.sh 3.4 - - name: Build and Run unit test for Spark 3.4.2 (slow tests) + - name: Build and Run unit test for Spark 3.4.3 (slow tests) run: | cd $GITHUB_WORKSPACE/ $MVN_CMD clean test -Pspark-3.4 -Pbackends-velox -Pceleborn -Piceberg -Pdelta -Pspark-ut -Phudi \ diff --git a/docs/get-started/Velox.md b/docs/get-started/Velox.md index 2f7dae4fb3dd..a0589fd408ea 100644 --- a/docs/get-started/Velox.md +++ b/docs/get-started/Velox.md @@ -7,19 +7,19 @@ parent: Getting-Started # Supported Version -| Type | Version | -|-------|---------------------------------| -| Spark | 3.2.2, 3.3.1, 3.4.2, 3.5.1 | -| OS | Ubuntu20.04/22.04, Centos7/8 | -| jdk | openjdk8/jdk17 | -| scala | 2.12 | +| Type | Version | +|-------|------------------------------| +| Spark | 3.2.2, 3.3.1, 3.4.3, 3.5.1 | +| OS | Ubuntu20.04/22.04, Centos7/8 | +| jdk | openjdk8/jdk17 | +| scala | 2.12 | # Prerequisite Currently, Gluten+Velox backend is only tested on **Ubuntu20.04/Ubuntu22.04/Centos7/Centos8**. Other kinds of OS support are still in progress. The long term goal is to support several common OS and conda env deployment. -Currently, the officially supported Spark versions are 3.2.2, 3.3.1, 3.4.2 and 3.5.1. +Currently, the officially supported Spark versions are 3.2.2, 3.3.1, 3.4.3 and 3.5.1. We need to set up the `JAVA_HOME` env. Currently, Gluten supports **java 8** and **java 17**. @@ -428,7 +428,7 @@ After the set-up, you can now build Gluten with HBM. Below command is used to en ```bash cd /path/to/gluten -## The script builds four jars for spark 3.2.2, 3.3.1, 3.4.2 and 3.5.1. +## The script builds four jars for spark 3.2.2, 3.3.1, 3.4.3 and 3.5.1. ./dev/buildbundle-veloxbe.sh --enable_hbm=ON ``` @@ -514,7 +514,7 @@ exit ```bash cd /path/to/gluten -## The script builds four jars for spark 3.2.2, 3.3.1, 3.4.2 and 3.5.1. +## The script builds four jars for spark 3.2.2, 3.3.1, 3.4.3 and 3.5.1. ./dev/buildbundle-veloxbe.sh --enable_qat=ON ``` @@ -610,7 +610,7 @@ After the set-up, you can now build Gluten with QAT. Below command is used to en ```bash cd /path/to/gluten -## The script builds four jars for spark 3.2.2, 3.3.1, 3.4.2 and 3.5.1. +## The script builds four jars for spark 3.2.2, 3.3.1, 3.4.3 and 3.5.1. ./dev/buildbundle-veloxbe.sh --enable_iaa=ON ``` diff --git a/docs/get-started/build-guide.md b/docs/get-started/build-guide.md index d1b1533ad1e3..f281ede14098 100644 --- a/docs/get-started/build-guide.md +++ b/docs/get-started/build-guide.md @@ -76,5 +76,5 @@ It's name pattern is `gluten--bundle-spark_< |---------------|----------------------|----------------------| | 3.2.2 | 3.2 | 2.12 | | 3.3.1 | 3.3 | 2.12 | -| 3.4.2 | 3.4 | 2.12 | +| 3.4.3 | 3.4 | 2.12 | | 3.5.1 | 3.5 | 2.12 | diff --git a/pom.xml b/pom.xml index f5fd01d12232..52ff8daaf12d 100644 --- a/pom.xml +++ b/pom.xml @@ -59,7 +59,7 @@ 2.12.15 3 3.4 - 3.4.2 + 3.4.3 spark34 spark-sql-columnar-shims-spark34 1.5.0 @@ -306,7 +306,7 @@ 3.4 spark34 spark-sql-columnar-shims-spark34 - 3.4.2 + 3.4.3 1.5.0 delta-core 2.4.0 diff --git a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/SparkShimProvider.scala b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/SparkShimProvider.scala index 1cd5fbdf6cb7..2e642366e011 100644 --- a/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/SparkShimProvider.scala +++ b/shims/spark34/src/main/scala/org/apache/gluten/sql/shims/spark34/SparkShimProvider.scala @@ -20,7 +20,7 @@ import org.apache.gluten.sql.shims.{SparkShimDescriptor, SparkShims} import org.apache.gluten.sql.shims.spark34.SparkShimProvider.DESCRIPTOR object SparkShimProvider { - val DESCRIPTOR = SparkShimDescriptor(3, 4, 2) + val DESCRIPTOR = SparkShimDescriptor(3, 4, 3) } class SparkShimProvider extends org.apache.gluten.sql.shims.SparkShimProvider { diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml index cff7a78eb097..b8930dd4a4f1 100644 --- a/tools/gluten-it/pom.xml +++ b/tools/gluten-it/pom.xml @@ -18,7 +18,7 @@ ${java.version} ${java.version} 2.12.17 - 3.4.2 + 3.4.3 2.12 3 0.3.2-incubating @@ -157,7 +157,7 @@ spark-3.4 - 3.4.2 + 3.4.3 2.12.17 diff --git a/tools/gluten-te/centos/dockerfile-buildenv b/tools/gluten-te/centos/dockerfile-buildenv index 12e75a30c65f..5bd2ed74a14b 100755 --- a/tools/gluten-te/centos/dockerfile-buildenv +++ b/tools/gluten-te/centos/dockerfile-buildenv @@ -66,9 +66,9 @@ RUN cd /opt && mkdir spark322 && tar -xvf spark-3.2.2-bin-hadoop3.2.tgz -C spark RUN cd /opt && wget https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz RUN cd /opt && mkdir spark331 && tar -xvf spark-3.3.1-bin-hadoop3.tgz -C spark331 --strip-components=1 -# Build & install Spark 3.4.2 -RUN cd /opt && wget https://archive.apache.org/dist/spark/spark-3.4.2/spark-3.4.2-bin-hadoop3.tgz -RUN cd /opt && mkdir spark342 && tar -xvf spark-3.4.2-bin-hadoop3.tgz -C spark342 --strip-components=1 +# Build & install Spark 3.4.3 +RUN cd /opt && wget https://archive.apache.org/dist/spark/spark-3.4.3/spark-3.4.3-bin-hadoop3.tgz +RUN cd /opt && mkdir spark342 && tar -xvf spark-3.4.3-bin-hadoop3.tgz -C spark342 --strip-components=1 # Build & install Spark 3.5.1 RUN cd /opt && wget https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz diff --git a/tools/gluten-te/ubuntu/dockerfile-buildenv b/tools/gluten-te/ubuntu/dockerfile-buildenv index e520fd295118..1449ed46a036 100644 --- a/tools/gluten-te/ubuntu/dockerfile-buildenv +++ b/tools/gluten-te/ubuntu/dockerfile-buildenv @@ -119,9 +119,9 @@ RUN cd /opt && mkdir spark322 && tar -xvf spark-3.2.2-bin-hadoop3.2.tgz -C spark RUN cd /opt && wget https://archive.apache.org/dist/spark/spark-3.3.1/spark-3.3.1-bin-hadoop3.tgz RUN cd /opt && mkdir spark331 && tar -xvf spark-3.3.1-bin-hadoop3.tgz -C spark331 --strip-components=1 -# Build & install Spark 3.4.2 -RUN cd /opt && wget https://archive.apache.org/dist/spark/spark-3.4.2/spark-3.4.2-bin-hadoop3.tgz -RUN cd /opt && mkdir spark342 && tar -xvf spark-3.4.2-bin-hadoop3.tgz -C spark342 --strip-components=1 +# Build & install Spark 3.4.3 +RUN cd /opt && wget https://archive.apache.org/dist/spark/spark-3.4.3/spark-3.4.3-bin-hadoop3.tgz +RUN cd /opt && mkdir spark343 && tar -xvf spark-3.4.3-bin-hadoop3.tgz -C spark343 --strip-components=1 # Build & install Spark 3.5.1 RUN cd /opt && wget https://archive.apache.org/dist/spark/spark-3.5.1/spark-3.5.1-bin-hadoop3.tgz