Skip to content

Commit

Permalink
[VL] Minor improvements on gluten-it / gluten-te toolchains (#6476)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhztheplayer authored Jul 18, 2024
1 parent 2c67843 commit a71e609
Show file tree
Hide file tree
Showing 11 changed files with 75 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,9 @@ public String getSparkMasterUrl() {
if (!System.getenv().containsKey("SPARK_HOME")) {
throw new IllegalArgumentException("SPARK_HOME not set! Please use --local if there is no local Spark build");
}
if (!System.getenv().containsKey("SPARK_SCALA_VERSION")) {
throw new IllegalArgumentException("SPARK_SCALA_VERSION not set! Please set it first or use --local instead. Example: export SPARK_SCALA_VERSION=2.12");
}
return String.format("local-cluster[%d,%d,%d]", lcWorkers, lcWorkerCores, Utils.byteStringAsMb(lcWorkerMem));
}

Expand Down
6 changes: 5 additions & 1 deletion tools/gluten-it/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
<java.version>1.8</java.version>
<maven.compiler.source>${java.version}</maven.compiler.source>
<maven.compiler.target>${java.version}</maven.compiler.target>
<scala.library.version>2.12.15</scala.library.version>
<scala.library.version>2.12.17</scala.library.version>
<spark.version>3.4.2</spark.version>
<scala.binary.version>2.12</scala.binary.version>
<spark.major.version>3</spark.major.version>
Expand Down Expand Up @@ -144,24 +144,28 @@
</activation>
<properties>
<spark.version>3.2.2</spark.version>
<scala.library.version>2.12.15</scala.library.version>
</properties>
</profile>
<profile>
<id>spark-3.3</id>
<properties>
<spark.version>3.3.1</spark.version>
<scala.library.version>2.12.15</scala.library.version>
</properties>
</profile>
<profile>
<id>spark-3.4</id>
<properties>
<spark.version>3.4.2</spark.version>
<scala.library.version>2.12.17</scala.library.version>
</properties>
</profile>
<profile>
<id>spark-3.5</id>
<properties>
<spark.version>3.5.1</spark.version>
<scala.library.version>2.12.18</scala.library.version>
</properties>
</profile>
<profile>
Expand Down
8 changes: 8 additions & 0 deletions tools/gluten-it/sbin/gluten-it.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ fi

JAR_PATH=$LIB_DIR/*

EMBEDDED_SPARK_HOME=$BASEDIR/../spark-home

export SPARK_HOME=${SPARK_HOME:-$EMBEDDED_SPARK_HOME}
export SPARK_SCALA_VERSION=${SPARK_SCALA_VERSION:-'2.12'}

echo "SPARK_HOME set at [$SPARK_HOME]."
echo "SPARK_SCALA_VERSION set at [$SPARK_SCALA_VERSION]."

$JAVA_HOME/bin/java $GLUTEN_IT_JVM_ARGS \
-XX:+IgnoreUnrecognizedVMOptions \
--add-opens=java.base/java.lang=ALL-UNNAMED \
Expand Down
1 change: 1 addition & 0 deletions tools/gluten-it/spark-home/jars
7 changes: 7 additions & 0 deletions tools/gluten-te/centos/shared.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ source "$SHARED_BASEDIR/defaults.conf"
export DOCKER_BUILDKIT=1
export BUILDKIT_PROGRESS=plain

# Validate envs
if [ -z "$HOME" ]
then
echo 'Environment variable $HOME not found. Aborting.'
exit 1
fi

# Set operating system
OS_IMAGE_NAME=${OS_IMAGE_NAME:-$DEFAULT_OS_IMAGE_NAME}

Expand Down
10 changes: 9 additions & 1 deletion tools/gluten-te/ubuntu/dockerfile-buildenv
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ RUN cat /root/.m2/settings.xml
## APT dependencies

# Update, then install essentials
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y sudo locales wget tar tzdata git ccache cmake ninja-build build-essential llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev libboost-all-dev libcurl4-openssl-dev
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y sudo locales wget tar tzdata git ccache ninja-build build-essential llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev libboost-all-dev libcurl4-openssl-dev curl zip unzip tar pkg-config autoconf-archive bison flex

# install HBM dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y autoconf automake g++ libnuma-dev libtool numactl unzip libdaxctl-dev
Expand Down Expand Up @@ -103,6 +103,14 @@ RUN set-login-env "LANG=en_US.UTF-8"
RUN set-login-env "LANGUAGE=en_US:en"
RUN set-login-env "LC_ALL=en_US.UTF-8"

# Install CMake
RUN cd /opt && wget https://github.com/Kitware/CMake/releases/download/v3.28.3/cmake-3.28.3-linux-x86_64.sh \
&& mkdir cmake \
&& bash cmake-3.28.3-linux-x86_64.sh --skip-license --prefix=/opt/cmake \
&& ln -s /opt/cmake/bin/cmake /usr/bin/cmake

RUN cmake --version

# Build & install Spark 3.2.2
RUN cd /opt && wget https://archive.apache.org/dist/spark/spark-3.2.2/spark-3.2.2-bin-hadoop3.2.tgz
RUN cd /opt && mkdir spark322 && tar -xvf spark-3.2.2-bin-hadoop3.2.tgz -C spark322 --strip-components=1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,10 @@ The folder contains script code to build `libvelox.so` and `libgluten.so` in doc
export HTTP_PROXY_HOST=myproxy.example.com
export HTTP_PROXY_PORT=55555

# 2. Build the C++ libs in a ubuntu 20.04 docker container.
# 2. Set the following env to install Gluten's modified Arrow Jars on host.
export MOUNT_MAVEN_CACHE=ON

# 3. Build the C++ libs in a ubuntu 20.04 docker container.
# Note, this command could take much longer time to finish if it's never run before.
# After the first run, the essential build environment will be cached in docker builder.
#
Expand All @@ -24,10 +27,10 @@ export HTTP_PROXY_PORT=55555
cd gluten/
tools/gluten-te/ubuntu/examples/buildhere-veloxbe-portable-libs/run.sh

# 3. Check the built libs.
# 4. Check the built libs.
ls -l cpp/build/releases/

# 4. If you intend to build Gluten's bundled jar, continue running subsequent Maven commands.
# 5. If you intend to build Gluten's bundled jar, continue running subsequent Maven commands.
# For example:
mvn clean install -P spark-3.4,backends-velox -DskipTests
```
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

BASEDIR=$(readlink -f $(dirname $0))

$BASEDIR/run.sh --enable_vcpkg=ON --build_tests=OFF --build_benchmarks=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,6 @@ TIMESTAMP=$(date +%s)

export EXTRA_DOCKER_OPTIONS="--name buildhere-veloxbe-portable-libs-$TIMESTAMP -v $BASEDIR/scripts:/opt/scripts"

$BASEDIR/../../cbash-mount.sh '/opt/scripts/all.sh'
BASH_ARGS="$*"

$BASEDIR/../../cbash-mount.sh "/opt/scripts/all.sh $BASH_ARGS"
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ function retry {

cd /opt/gluten
retry apt-get update
retry apt-get install -y curl zip unzip tar pkg-config autoconf-archive bison flex
retry source ./dev/vcpkg/env.sh
retry dev/builddeps-veloxbe.sh --build_tests=OFF --build_benchmarks=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON
retry apt-get install -y --dry-run # We now have all essentials installed in image.

BASH_ARGS=$@

retry dev/builddeps-veloxbe.sh $BASH_ARGS
7 changes: 7 additions & 0 deletions tools/gluten-te/ubuntu/shared.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@ source "$SHARED_BASEDIR/defaults.conf"
export DOCKER_BUILDKIT=1
export BUILDKIT_PROGRESS=plain

# Validate envs
if [ -z "$HOME" ]
then
echo 'Environment variable $HOME not found. Aborting.'
exit 1
fi

# Set operating system
OS_IMAGE_NAME=${OS_IMAGE_NAME:-$DEFAULT_OS_IMAGE_NAME}

Expand Down

0 comments on commit a71e609

Please sign in to comment.