diff --git a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java index cfd3848d8158..56ef68db9057 100644 --- a/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java +++ b/tools/gluten-it/common/src/main/java/org/apache/gluten/integration/command/SparkRunModes.java @@ -129,6 +129,9 @@ public String getSparkMasterUrl() { if (!System.getenv().containsKey("SPARK_HOME")) { throw new IllegalArgumentException("SPARK_HOME not set! Please use --local if there is no local Spark build"); } + if (!System.getenv().containsKey("SPARK_SCALA_VERSION")) { + throw new IllegalArgumentException("SPARK_SCALA_VERSION not set! Please set it first or use --local instead. Example: export SPARK_SCALA_VERSION=2.12"); + } return String.format("local-cluster[%d,%d,%d]", lcWorkers, lcWorkerCores, Utils.byteStringAsMb(lcWorkerMem)); } diff --git a/tools/gluten-it/pom.xml b/tools/gluten-it/pom.xml index c092a0ebb0e6..a65324f2728d 100644 --- a/tools/gluten-it/pom.xml +++ b/tools/gluten-it/pom.xml @@ -17,7 +17,7 @@ 1.8 ${java.version} ${java.version} - 2.12.15 + 2.12.17 3.4.2 2.12 3 @@ -144,24 +144,28 @@ 3.2.2 + 2.12.15 spark-3.3 3.3.1 + 2.12.15 spark-3.4 3.4.2 + 2.12.17 spark-3.5 3.5.1 + 2.12.18 diff --git a/tools/gluten-it/sbin/gluten-it.sh b/tools/gluten-it/sbin/gluten-it.sh index fda117417936..b21038ccdef6 100755 --- a/tools/gluten-it/sbin/gluten-it.sh +++ b/tools/gluten-it/sbin/gluten-it.sh @@ -28,6 +28,14 @@ fi JAR_PATH=$LIB_DIR/* +EMBEDDED_SPARK_HOME=$BASEDIR/../spark-home + +export SPARK_HOME=${SPARK_HOME:-$EMBEDDED_SPARK_HOME} +export SPARK_SCALA_VERSION=${SPARK_SCALA_VERSION:-'2.12'} + +echo "SPARK_HOME set at [$SPARK_HOME]." +echo "SPARK_SCALA_VERSION set at [$SPARK_SCALA_VERSION]." + $JAVA_HOME/bin/java $GLUTEN_IT_JVM_ARGS \ -XX:+IgnoreUnrecognizedVMOptions \ --add-opens=java.base/java.lang=ALL-UNNAMED \ diff --git a/tools/gluten-it/spark-home/jars b/tools/gluten-it/spark-home/jars new file mode 120000 index 000000000000..2939305caa54 --- /dev/null +++ b/tools/gluten-it/spark-home/jars @@ -0,0 +1 @@ +../package/target/lib \ No newline at end of file diff --git a/tools/gluten-te/centos/shared.sh b/tools/gluten-te/centos/shared.sh index d14b35bf9d0f..0253c16cef5f 100755 --- a/tools/gluten-te/centos/shared.sh +++ b/tools/gluten-te/centos/shared.sh @@ -24,6 +24,13 @@ source "$SHARED_BASEDIR/defaults.conf" export DOCKER_BUILDKIT=1 export BUILDKIT_PROGRESS=plain +# Validate envs +if [ -z "$HOME" ] +then + echo 'Environment variable $HOME not found. Aborting.' + exit 1 +fi + # Set operating system OS_IMAGE_NAME=${OS_IMAGE_NAME:-$DEFAULT_OS_IMAGE_NAME} diff --git a/tools/gluten-te/ubuntu/dockerfile-buildenv b/tools/gluten-te/ubuntu/dockerfile-buildenv index 41fc202395e8..e520fd295118 100644 --- a/tools/gluten-te/ubuntu/dockerfile-buildenv +++ b/tools/gluten-te/ubuntu/dockerfile-buildenv @@ -65,7 +65,7 @@ RUN cat /root/.m2/settings.xml ## APT dependencies # Update, then install essentials -RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y sudo locales wget tar tzdata git ccache cmake ninja-build build-essential llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev libboost-all-dev libcurl4-openssl-dev +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y sudo locales wget tar tzdata git ccache ninja-build build-essential llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev libboost-all-dev libcurl4-openssl-dev curl zip unzip tar pkg-config autoconf-archive bison flex # install HBM dependencies RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y autoconf automake g++ libnuma-dev libtool numactl unzip libdaxctl-dev @@ -103,6 +103,14 @@ RUN set-login-env "LANG=en_US.UTF-8" RUN set-login-env "LANGUAGE=en_US:en" RUN set-login-env "LC_ALL=en_US.UTF-8" +# Install CMake +RUN cd /opt && wget https://github.com/Kitware/CMake/releases/download/v3.28.3/cmake-3.28.3-linux-x86_64.sh \ + && mkdir cmake \ + && bash cmake-3.28.3-linux-x86_64.sh --skip-license --prefix=/opt/cmake \ + && ln -s /opt/cmake/bin/cmake /usr/bin/cmake + +RUN cmake --version + # Build & install Spark 3.2.2 RUN cd /opt && wget https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz RUN cd /opt && mkdir spark322 && tar -xvf spark-3.2.2-bin-hadoop3.2.tgz -C spark322 --strip-components=1 diff --git a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md index 27e97467d6c0..cd76e74e7767 100644 --- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md +++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/README.md @@ -15,7 +15,10 @@ The folder contains script code to build `libvelox.so` and `libgluten.so` in doc export HTTP_PROXY_HOST=myproxy.example.com export HTTP_PROXY_PORT=55555 -# 2. Build the C++ libs in a ubuntu 20.04 docker container. +# 2. Set the following env to install Gluten's modified Arrow Jars on host. +export MOUNT_MAVEN_CACHE=ON + +# 3. Build the C++ libs in a ubuntu 20.04 docker container. # Note, this command could take much longer time to finish if it's never run before. # After the first run, the essential build environment will be cached in docker builder. # @@ -24,10 +27,10 @@ export HTTP_PROXY_PORT=55555 cd gluten/ tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh -# 3. Check the built libs. +# 4. Check the built libs. ls -l cpp/build/releases/ -# 4. If you intend to build Gluten's bundled jar, continue running subsequent Maven commands. +# 5. If you intend to build Gluten's bundled jar, continue running subsequent Maven commands. # For example: mvn clean install -P spark-3.4,backends-velox -DskipTests ``` \ No newline at end of file diff --git a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh new file mode 100755 index 000000000000..2648725ce0b5 --- /dev/null +++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run-default.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +BASEDIR=$(readlink -f $(dirname $0)) + +$BASEDIR/run.sh --enable_vcpkg=ON --build_tests=OFF --build_benchmarks=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON diff --git a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh index 8a0f71bbcb08..4d28d45211c7 100755 --- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh +++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh @@ -22,4 +22,6 @@ TIMESTAMP=$(date +%s) export EXTRA_DOCKER_OPTIONS="--name buildhere-veloxbe-portable-libs-$TIMESTAMP -v $BASEDIR/scripts:/opt/scripts" -$BASEDIR/../../cbash-mount.sh '/opt/scripts/all.sh' +BASH_ARGS="$*" + +$BASEDIR/../../cbash-mount.sh "/opt/scripts/all.sh $BASH_ARGS" diff --git a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh index 26742355232b..18dd92a343f9 100755 --- a/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh +++ b/tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/scripts/all.sh @@ -44,6 +44,8 @@ function retry { cd /opt/gluten retry apt-get update -retry apt-get install -y curl zip unzip tar pkg-config autoconf-archive bison flex -retry source ./dev/vcpkg/env.sh -retry dev/builddeps-veloxbe.sh --build_tests=OFF --build_benchmarks=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON +retry apt-get install -y --dry-run # We now have all essentials installed in image. + +BASH_ARGS=$@ + +retry dev/builddeps-veloxbe.sh $BASH_ARGS diff --git a/tools/gluten-te/ubuntu/shared.sh b/tools/gluten-te/ubuntu/shared.sh index d14b35bf9d0f..0253c16cef5f 100755 --- a/tools/gluten-te/ubuntu/shared.sh +++ b/tools/gluten-te/ubuntu/shared.sh @@ -24,6 +24,13 @@ source "$SHARED_BASEDIR/defaults.conf" export DOCKER_BUILDKIT=1 export BUILDKIT_PROGRESS=plain +# Validate envs +if [ -z "$HOME" ] +then + echo 'Environment variable $HOME not found. Aborting.' + exit 1 +fi + # Set operating system OS_IMAGE_NAME=${OS_IMAGE_NAME:-$DEFAULT_OS_IMAGE_NAME}