diff --git a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java
index cfd3848d8158..56ef68db9057 100644
--- a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java
+++ b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java
@@ -129,6 +129,9 @@ public String getSparkMasterUrl() {
if (!System.getenv().containsKey("SPARK_HOME")) {
throw new IllegalArgumentException("SPARK_HOME not set! Please use --local if there is no local Spark build");
}
+ if (!System.getenv().containsKey("SPARK_SCALA_VERSION")) {
+ throw new IllegalArgumentException("SPARK_SCALA_VERSION not set! Please set it first or use --local instead. Example: export SPARK_SCALA_VERSION=2.12");
+ }
return String.format("local-cluster[%d,%d,%d]", lcWorkers, lcWorkerCores, Utils.byteStringAsMb(lcWorkerMem));
}
diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml
index c092a0ebb0e6..a65324f2728d 100644
--- a/tools/gluten-it/pom.xml
+++ b/tools/gluten-it/pom.xml
@@ -17,7 +17,7 @@
1.8
${java.version}
${java.version}
- 2.12.15
+ 2.12.17
3.4.2
2.12
3
@@ -144,24 +144,28 @@
3.2.2
+ 2.12.15
spark-3.3
3.3.1
+ 2.12.15
spark-3.4
3.4.2
+ 2.12.17
spark-3.5
3.5.1
+ 2.12.18
diff --git a/tools/gluten-it/sbin/gluten-it.sh b/tools/gluten-it/sbin/gluten-it.sh
index fda117417936..b21038ccdef6 100755
--- a/tools/gluten-it/sbin/gluten-it.sh
+++ b/tools/gluten-it/sbin/gluten-it.sh
@@ -28,6 +28,14 @@ fi
JAR_PATH=$LIB_DIR/*
+EMBEDDED_SPARK_HOME=$BASEDIR/../spark-home
+
+export SPARK_HOME=${SPARK_HOME:-$EMBEDDED_SPARK_HOME}
+export SPARK_SCALA_VERSION=${SPARK_SCALA_VERSION:-'2.12'}
+
+echo "SPARK_HOME set at [$SPARK_HOME]."
+echo "SPARK_SCALA_VERSION set at [$SPARK_SCALA_VERSION]."
+
$JAVA_HOME/bin/java $GLUTEN_IT_JVM_ARGS \
-XX:+IgnoreUnrecognizedVMOptions \
--add-opens=java.base/java.lang=ALL-UNNAMED \
diff --git a/tools/gluten-it/spark-home/jars b/tools/gluten-it/spark-home/jars
new file mode 120000
index 000000000000..2939305caa54
--- /dev/null
+++ b/tools/gluten-it/spark-home/jars
@@ -0,0 +1 @@
+../package/target/lib
\ No newline at end of file
diff --git a/tools/gluten-te/centos/shared.sh b/tools/gluten-te/centos/shared.sh
index d14b35bf9d0f..0253c16cef5f 100755
--- a/tools/gluten-te/centos/shared.sh
+++ b/tools/gluten-te/centos/shared.sh
@@ -24,6 +24,13 @@ source "$SHARED_BASEDIR/defaults.conf"
export DOCKER_BUILDKIT=1
export BUILDKIT_PROGRESS=plain
+# Validate envs
+if [ -z "$HOME" ]
+then
+ echo 'Environment variable $HOME not found. Aborting.'
+ exit 1
+fi
+
# Set operating system
OS_IMAGE_NAME=${OS_IMAGE_NAME:-$DEFAULT_OS_IMAGE_NAME}
diff --git a/tools/gluten-te/ubuntu/dockerfile-buildenv b/tools/gluten-te/ubuntu/dockerfile-buildenv
index 41fc202395e8..e520fd295118 100644
--- a/tools/gluten-te/ubuntu/dockerfile-buildenv
+++ b/tools/gluten-te/ubuntu/dockerfile-buildenv
@@ -65,7 +65,7 @@ RUN cat /root/.m2/settings.xml
## APT dependencies
# Update, then install essentials
-RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y sudo locales wget tar tzdata git ccache cmake ninja-build build-essential llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev libboost-all-dev libcurl4-openssl-dev
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y sudo locales wget tar tzdata git ccache ninja-build build-essential llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev libboost-all-dev libcurl4-openssl-dev curl zip unzip tar pkg-config autoconf-archive bison flex
# install HBM dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y autoconf automake g++ libnuma-dev libtool numactl unzip libdaxctl-dev
@@ -103,6 +103,14 @@ RUN set-login-env "LANG=en_US.UTF-8"
RUN set-login-env "LANGUAGE=en_US:en"
RUN set-login-env "LC_ALL=en_US.UTF-8"
+# Install CMake
+RUN cd /opt && wget https://github.com/Kitware/CMake/releases/download/v3.28.3/cmake-3.28.3-linux-x86_64.sh \
+ && mkdir cmake \
+ && bash cmake-3.28.3-linux-x86_64.sh --skip-license --prefix=/opt/cmake \
+ && ln -s /opt/cmake/bin/cmake /usr/bin/cmake
+
+RUN cmake --version
+
# Build & install Spark 3.2.2
RUN cd /opt && wget https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz
RUN cd /opt && mkdir spark322 && tar -xvf spark-3.2.2-bin-hadoop3.2.tgz -C spark322 --strip-components=1
diff --git a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
index 27e97467d6c0..cd76e74e7767 100644
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
+++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md
@@ -15,7 +15,10 @@ The folder contains script code to build `libvelox.so` and `libgluten.so` in doc
export HTTP_PROXY_HOST=myproxy.example.com
export HTTP_PROXY_PORT=55555
-# 2. Build the C++ libs in a ubuntu 20.04 docker container.
+# 2. Set the following env to install Gluten's modified Arrow Jars on host.
+export MOUNT_MAVEN_CACHE=ON
+
+# 3. Build the C++ libs in a ubuntu 20.04 docker container.
# Note, this command could take much longer time to finish if it's never run before.
# After the first run, the essential build environment will be cached in docker builder.
#
@@ -24,10 +27,10 @@ export HTTP_PROXY_PORT=55555
cd gluten/
tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
-# 3. Check the built libs.
+# 4. Check the built libs.
ls -l cpp/build/releases/
-# 4. If you intend to build Gluten's bundled jar, continue running subsequent Maven commands.
+# 5. If you intend to build Gluten's bundled jar, continue running subsequent Maven commands.
# For example:
mvn clean install -P spark-3.4,backends-velox -DskipTests
```
\ No newline at end of file
diff --git a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh
new file mode 100755
index 000000000000..2648725ce0b5
--- /dev/null
+++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -ex
+
+BASEDIR=$(readlink -f $(dirname $0))
+
+$BASEDIR/run.sh --enable_vcpkg=ON --build_tests=OFF --build_benchmarks=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON
diff --git a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
index 8a0f71bbcb08..4d28d45211c7 100755
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
+++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh
@@ -22,4 +22,6 @@ TIMESTAMP=$(date +%s)
export EXTRA_DOCKER_OPTIONS="--name buildhere-veloxbe-portable-libs-$TIMESTAMP -v $BASEDIR/scripts:/opt/scripts"
-$BASEDIR/../../cbash-mount.sh '/opt/scripts/all.sh'
+BASH_ARGS="$*"
+
+$BASEDIR/../../cbash-mount.sh "/opt/scripts/all.sh $BASH_ARGS"
diff --git a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
index 26742355232b..18dd92a343f9 100755
--- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
+++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh
@@ -44,6 +44,8 @@ function retry {
cd /opt/gluten
retry apt-get update
-retry apt-get install -y curl zip unzip tar pkg-config autoconf-archive bison flex
-retry source ./dev/vcpkg/env.sh
-retry dev/builddeps-veloxbe.sh --build_tests=OFF --build_benchmarks=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON
+retry apt-get install -y --dry-run # We now have all essentials installed in image.
+
+BASH_ARGS=$@
+
+retry dev/builddeps-veloxbe.sh $BASH_ARGS
diff --git a/tools/gluten-te/ubuntu/shared.sh b/tools/gluten-te/ubuntu/shared.sh
index d14b35bf9d0f..0253c16cef5f 100755
--- a/tools/gluten-te/ubuntu/shared.sh
+++ b/tools/gluten-te/ubuntu/shared.sh
@@ -24,6 +24,13 @@ source "$SHARED_BASEDIR/defaults.conf"
export DOCKER_BUILDKIT=1
export BUILDKIT_PROGRESS=plain
+# Validate envs
+if [ -z "$HOME" ]
+then
+ echo 'Environment variable $HOME not found. Aborting.'
+ exit 1
+fi
+
# Set operating system
OS_IMAGE_NAME=${OS_IMAGE_NAME:-$DEFAULT_OS_IMAGE_NAME}