Skip to content

Commit

Permalink
[VL][CI] update docker build script (apache#3904)
Browse files Browse the repository at this point in the history
this patch updates the docker build script for ubuntu/centos.
do not run setup scripts on centos 7 for static build, which is not necessary.

Signed-off-by: Yuan Zhou <[email protected]>
  • Loading branch information
zhouyuan authored Dec 6, 2023
1 parent d7d8e28 commit 144fc45
Show file tree
Hide file tree
Showing 7 changed files with 213 additions and 3 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/velox_be.yml
Original file line number Diff line number Diff line change
Expand Up @@ -488,7 +488,7 @@ jobs:
cd /opt/gluten && \
sudo -E ./dev/vcpkg/setup-build-depends.sh && \
source ./dev/vcpkg/env.sh && \
./dev/builddeps-veloxbe.sh --build_tests=ON --build_benchmarks=ON --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON'
./dev/builddeps-veloxbe.sh --run_setup_script=OFF --build_tests=ON --build_benchmarks=ON --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON'
- name: Build for Spark 3.2.2
run: |
docker exec static-build-test-$GITHUB_RUN_ID bash -c '
Expand Down
2 changes: 2 additions & 0 deletions ep/build-velox/src/get_velox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ function process_setup_centos8 {
sed -i '/^function dnf_install/i\DEPENDENCY_DIR=${DEPENDENCY_DIR:-$(pwd)}' scripts/setup-centos8.sh
sed -i '/^dnf_install autoconf/a\dnf_install libxml2-devel libgsasl-devel libuuid-devel' scripts/setup-centos8.sh
sed -i '/^function cmake_install_deps.*/i FB_OS_VERSION=v2022.11.14.00\n function install_folly {\n github_checkout facebook/folly "${FB_OS_VERSION}"\n cmake_install -DBUILD_TESTS=OFF -DFOLLY_HAVE_INT128_T=ON\n}\n' scripts/setup-centos8.sh
sed -i '/^function cmake_install_deps.*/i function install_openssl {\n wget_and_untar https://github.com/openssl/openssl/archive/refs/tags/OpenSSL_1_1_1s.tar.gz openssl \n cd openssl \n ./config no-shared && make depend && make && sudo make install \n}\n' scripts/setup-centos8.sh
sed -i '/^cmake_install_deps fmt/a \install_folly' scripts/setup-centos8.sh
sed -i '/^cmake_install_deps fmt/a \install_openssl' scripts/setup-centos8.sh

if [ $ENABLE_HDFS == "ON" ]; then
sed -i '/^function cmake_install_deps.*/i function install_libhdfs3 {\n cd "\${DEPENDENCY_DIR}"\n github_checkout oap-project/libhdfs3 master \n cmake_install\n}\n' scripts/setup-centos8.sh
Expand Down
99 changes: 99 additions & 0 deletions tools/gluten-te/centos/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

BASEDIR=$(dirname $0)

source "$BASEDIR/buildenv.sh"

## Debug build flags

# Create debug build
DEBUG_BUILD=${DEBUG_BUILD:-$DEFAULT_DEBUG_BUILD}

if [ -n $JDK_DEBUG_BUILD ]
then
echo "Do not set JDK_DEBUG_BUILD manually!"
fi

if [ -n $GLUTEN_DEBUG_BUILD ]
then
echo "Do not set GLUTEN_DEBUG_BUILD manually!"
fi

if [ "$DEBUG_BUILD" == "ON" ]
then
JDK_DEBUG_BUILD=OFF
GLUTEN_DEBUG_BUILD=ON
else
JDK_DEBUG_BUILD=OFF
GLUTEN_DEBUG_BUILD=OFF
fi

# The target branches
TARGET_GLUTEN_REPO=${TARGET_GLUTEN_REPO:-$DEFAULT_GLUTEN_REPO}
TARGET_GLUTEN_BRANCH=${TARGET_GLUTEN_BRANCH:-$DEFAULT_GLUTEN_BRANCH}

# The branches used to prepare dependencies
CACHE_GLUTEN_REPO=${CACHE_GLUTEN_REPO:-$DEFAULT_GLUTEN_REPO}
CACHE_GLUTEN_BRANCH=${CACHE_GLUTEN_BRANCH:-$DEFAULT_GLUTEN_BRANCH}

# Backend type
BUILD_BACKEND_TYPE=${BUILD_BACKEND_TYPE:-$DEFAULT_BUILD_BACKEND_TYPE}

# Build will result in this image
DOCKER_TARGET_IMAGE_BUILD=${DOCKER_TARGET_IMAGE_BUILD:-$DEFAULT_DOCKER_TARGET_IMAGE_BUILD}

DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILD-$OS_IMAGE"

## Fetch target commit

TARGET_GLUTEN_COMMIT="$(git ls-remote $TARGET_GLUTEN_REPO $TARGET_GLUTEN_BRANCH | awk '{print $1;}')"

if [ -z "$TARGET_GLUTEN_COMMIT" ]
then
echo "Unable to parse TARGET_GLUTEN_COMMIT."
exit 1
fi

##

BUILD_DOCKER_BUILD_ARGS=

BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --ulimit nofile=8192:8192"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE=$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg JDK_DEBUG_BUILD=$JDK_DEBUG_BUILD"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg GLUTEN_DEBUG_BUILD=$GLUTEN_DEBUG_BUILD"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg TARGET_GLUTEN_REPO=$TARGET_GLUTEN_REPO"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg TARGET_GLUTEN_COMMIT=$TARGET_GLUTEN_COMMIT"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg CACHE_GLUTEN_REPO=$CACHE_GLUTEN_REPO"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg CACHE_GLUTEN_BRANCH=$CACHE_GLUTEN_BRANCH"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg BUILD_BACKEND_TYPE=$BUILD_BACKEND_TYPE"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS -f $BASEDIR/dockerfile-build"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --target gluten-build"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS -t $DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE"

if [ -n "$DOCKER_CACHE_IMAGE" ]
then
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --cache-from $DOCKER_CACHE_IMAGE"
fi

BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS $BASEDIR"

docker build $BUILD_DOCKER_BUILD_ARGS

# EOF
1 change: 1 addition & 0 deletions tools/gluten-te/centos/centos-7-deps.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ yum -y install \
git \
dnf \
cmake3 \
ccache \
devtoolset-9 \
java-1.8.0-openjdk \
java-1.8.0-openjdk-devel \
Expand Down
101 changes: 101 additions & 0 deletions tools/gluten-te/centos/dockerfile-build
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
ARG DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE

FROM $DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE AS gluten-build
MAINTAINER Hongze Zhang<[email protected]>

# Whether debug build is enabled
ARG JDK_DEBUG_BUILD
ARG GLUTEN_DEBUG_BUILD
RUN echo "JDK debug build is [$JDK_DEBUG_BUILD]!"
RUN echo "Gluten debug build is [$GLUTEN_DEBUG_BUILD]!"

# If JDK debug is on
RUN if [ "$JDK_DEBUG_BUILD" == "ON" ]; \
then \
apt-get update; \
DEBIAN_FRONTEND=noninteractive apt-get uninstall -y openjdk-8-jdk; \
DEBIAN_FRONTEND=noninteractive apt-get uninstall -y maven; \
mkdir -p /opt/jdk/ \
&& mkdir -p /opt/maven/ \
&& cd /opt/jdk/ \
&& wget https://builds.shipilev.net/openjdk-jdk8/openjdk-jdk8-linux-x86_64-server-fastdebug-gcc8-glibc2.28.tar.xz \
&& tar -xvf openjdk-jdk8-linux-x86_64-server-fastdebug-gcc8-glibc2.28.tar.xz \
&& rm -f openjdk-jdk8-linux-x86_64-server-fastdebug-gcc8-glibc2.28.tar.xz \
&& cd /opt/maven/ \
&& wget https://dlcdn.apache.org/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz \
&& tar -xvf apache-maven-3.6.3-bin.tar.gz \
&& rm -f apache-maven-3.6.3-bin.tar.gz \
&& cp -rs /opt/jdk/j2sdk-image/bin/* /usr/local/bin/ \
&& cp -rs /opt/maven/apache-maven-3.6.3/bin/mvn /usr/local/bin/ \
&& echo "JAVA_HOME=/opt/jdk/j2sdk-image" > ~/.mavenrc; \
fi

# These branches are mainly for pre-downloading dependencies to speed-up builds.
# Thus it should not be required to change these values every time when the build branch
# is changed.
ARG CACHE_GLUTEN_REPO
ARG CACHE_GLUTEN_BRANCH

RUN test -n "$CACHE_GLUTEN_REPO" || (echo "CACHE_GLUTEN_REPO not set" && false)
RUN test -n "$CACHE_GLUTEN_BRANCH" || (echo "CACHE_GLUTEN_BRANCH not set" && false)

RUN cd /opt/ \
&& git clone $CACHE_GLUTEN_REPO -b $CACHE_GLUTEN_BRANCH gluten

# Set ccache size
RUN ccache -M 128G
RUN ccache -s

# Default Gluten Maven build options (empty as of now)
ENV GLUTEN_MAVEN_OPTIONS=
#RUN set-login-env "GLUTEN_MAVEN_OPTIONS="

ARG BUILD_BACKEND_TYPE

RUN test -n "$BUILD_BACKEND_TYPE" || (echo "BUILD_BACKEND_TYPE not set" && false)

RUN if [ "$BUILD_BACKEND_TYPE" == "velox" ]; \
then \
if [ "$GLUTEN_DEBUG_BUILD" == "ON" ]; then GLUTEN_BUILD_TYPE="Debug"; else GLUTEN_BUILD_TYPE="Release"; fi; \
DEPS_INSTALL_SCRIPT="source /env.sh && bash /opt/gluten/dev/builddeps-veloxbe.sh \
--enable_hdfs=ON --enable_s3=ON --enable_gcs=ON \
--build_type=$GLUTEN_BUILD_TYPE --enable_ep_cache=ON"; \
EXTRA_MAVEN_OPTIONS="-Pspark-3.2 \
-Pbackends-velox \
-Prss \
-DskipTests \
-Dscalastyle.skip=true \
-Dcheckstyle.skip=true"; \
else \
echo "Unrecognizable backend type: $BUILD_BACKEND_TYPE"; \
exit 1; \
fi \
&& echo $EXTRA_MAVEN_OPTIONS > ~/.gluten-mvn-options \
&& echo $DEPS_INSTALL_SCRIPT > ~/.gluten-deps-install-script

# Prebuild Gluten
RUN EXTRA_MAVEN_OPTIONS=$(cat ~/.gluten-mvn-options) \
DEPS_INSTALL_SCRIPT=$(cat ~/.gluten-deps-install-script) \
&& cd /opt/gluten \
&& bash -c "$DEPS_INSTALL_SCRIPT" \
&& bash -c "mvn clean install $GLUTEN_MAVEN_OPTIONS $EXTRA_MAVEN_OPTIONS"

# Build Gluten
ARG TARGET_GLUTEN_REPO
ARG TARGET_GLUTEN_COMMIT

RUN test -n "$TARGET_GLUTEN_REPO" || (echo "TARGET_GLUTEN_REPO not set" && false)
RUN test -n "$TARGET_GLUTEN_COMMIT" || (echo "TARGET_GLUTEN_COMMIT not set" && false)

RUN cd /opt/gluten \
&& git fetch $TARGET_GLUTEN_REPO $TARGET_GLUTEN_COMMIT:build_$TARGET_GLUTEN_COMMIT \
&& git checkout build_$TARGET_GLUTEN_COMMIT

RUN EXTRA_MAVEN_OPTIONS=$(cat ~/.gluten-mvn-options) \
DEPS_INSTALL_SCRIPT=$(cat ~/.gluten-deps-install-script) \
&& cd /opt/gluten \
&& bash -c "$DEPS_INSTALL_SCRIPT" \
&& bash -c "mvn clean install $GLUTEN_MAVEN_OPTIONS $EXTRA_MAVEN_OPTIONS" \
&& bash -c "mv ep/build-velox/build/velox_ep /opt/velox"

# EOF
7 changes: 5 additions & 2 deletions tools/gluten-te/ubuntu/dockerfile-build
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,9 @@ RUN test -n "$BUILD_BACKEND_TYPE" || (echo "BUILD_BACKEND_TYPE not set" && false
RUN if [ "$BUILD_BACKEND_TYPE" == "velox" ]; \
then \
if [ "$GLUTEN_DEBUG_BUILD" == "ON" ]; then GLUTEN_BUILD_TYPE="Debug"; else GLUTEN_BUILD_TYPE="Release"; fi; \
DEPS_INSTALL_SCRIPT="bash /opt/gluten/dev/builddeps-veloxbe.sh --build_type=$GLUTEN_BUILD_TYPE --enable_ep_cache=ON"; \
DEPS_INSTALL_SCRIPT="bash /opt/gluten/dev/builddeps-veloxbe.sh \
--enable_hdfs=ON --enable_s3=ON --enable_gcs=ON \
--build_type=$GLUTEN_BUILD_TYPE --enable_ep_cache=ON"; \
EXTRA_MAVEN_OPTIONS="-Pspark-3.2 \
-Pbackends-velox \
-Prss \
Expand Down Expand Up @@ -93,6 +95,7 @@ RUN EXTRA_MAVEN_OPTIONS=$(cat ~/.gluten-mvn-options) \
DEPS_INSTALL_SCRIPT=$(cat ~/.gluten-deps-install-script) \
&& cd /opt/gluten \
&& bash -c "$DEPS_INSTALL_SCRIPT" \
&& bash -c "mvn clean install $GLUTEN_MAVEN_OPTIONS $EXTRA_MAVEN_OPTIONS"
&& bash -c "mvn clean install $GLUTEN_MAVEN_OPTIONS $EXTRA_MAVEN_OPTIONS" \
&& bash -c "mv ep/build-velox/build/velox_ep /opt/velox"

# EOF
4 changes: 4 additions & 0 deletions tools/gluten-te/ubuntu/dockerfile-buildenv
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,10 @@ RUN cd /opt/spark322 && ./build/mvn -Pyarn -DskipTests clean install
RUN cd /opt && git clone --depth 1 --branch v3.3.1 https://github.com/apache/spark.git spark331
RUN cd /opt/spark331 && ./build/mvn -Pyarn -DskipTests clean install

# Build & install Spark 3.4.1
RUN cd /opt && git clone --depth 1 --branch v3.4.1 https://github.com/apache/spark.git spark341
RUN cd /opt/spark341 && ./build/mvn -Pyarn -DskipTests clean install

# Prepare entry command
COPY scripts/cmd.sh /root/.cmd.sh
CMD ["/root/.cmd.sh"]

0 comments on commit 144fc45

Please sign in to comment.