Skip to content

Commit

Permalink
[GLUTEN-3944][CH]Fix gluten.jar with delta20 when use spark 3.3 (apac…
Browse files Browse the repository at this point in the history
  • Loading branch information
lwz9103 authored and loneylee committed Dec 7, 2023
1 parent f98ebeb commit 1ae2c26
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 64 deletions.
9 changes: 4 additions & 5 deletions backends-clickhouse/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -207,11 +207,10 @@
<artifactId>maven-assembly-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<finalName>${jar.assembly.name.prefix}-${project.version}-spark-${sparkbundle.version}
</finalName>
<descriptors>
<descriptor>src/main/resources/gluten-source-exclude-sparkshims.xml</descriptor>
</descriptors>
<finalName>${jar.assembly.name.prefix}-${project.version}-spark-${sparkbundle.version}</finalName>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
Expand Down

This file was deleted.

51 changes: 26 additions & 25 deletions ep/build-clickhouse/src/package.sh
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,8 @@ mkdir -p "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"
mkdir "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/bin
mkdir "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/conf
mkdir "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/jars
mkdir "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/extraJars
mkdir "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/extraJars/spark32
mkdir "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/extraJars/spark33
mkdir "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/jars/spark32
mkdir "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/jars/spark33
mkdir "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/libs
mkdir "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/logs

Expand All @@ -78,37 +77,39 @@ mkdir "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/logs
cp "${GLUTEN_SOURCE}"/LICENSE "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"
cp "${GLUTEN_SOURCE}"/README.md "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"

# build gluten jar
cd "${GLUTEN_SOURCE}"
mvn clean package -Pbackends-clickhouse -Pspark-3.2 -Prss -DskipTests -Dcheckstyle.skip
mvn clean package -Pspark-3.3 -am -pl shims/spark33 -DskipTests -Dcheckstyle.skip
# build gluten with spark32
mvn clean install -Pbackends-clickhouse -Pspark-3.2 -Prss -DskipTests -Dcheckstyle.skip
cp "${GLUTEN_SOURCE}"/backends-clickhouse/target/gluten-*-spark-3.2-jar-with-dependencies.jar "${PACKAGE_DIR_PATH}"/jars/spark32/gluten.jar
cp "${GLUTEN_SOURCE}"/gluten-celeborn/clickhouse/target/gluten-celeborn-clickhouse-${PROJECT_VERSION}-jar-with-dependencies.jar "${PACKAGE_DIR_PATH}"/jars/spark32
delta_version_32=$(mvn -q -Dexec.executable="echo" -Dexec.args='${delta.version}' -Pspark-3.2 --non-recursive exec:exec)
wget https://repo1.maven.org/maven2/io/delta/delta-core_2.12/${delta_version_32}/delta-core_2.12-${delta_version_32}.jar -P "${PACKAGE_DIR_PATH}"/jars/spark32
wget https://repo1.maven.org/maven2/io/delta/delta-storage/${delta_version_32}/delta-storage-${delta_version_32}.jar -P "${PACKAGE_DIR_PATH}"/jars/spark32

# build gluten with spark33
mvn clean install -Pbackends-clickhouse -Pspark-3.3 -Prss -DskipTests -Dcheckstyle.skip
cp "${GLUTEN_SOURCE}"/backends-clickhouse/target/gluten-*-spark-3.3-jar-with-dependencies.jar "${PACKAGE_DIR_PATH}"/jars/spark33/gluten.jar
cp "${GLUTEN_SOURCE}"/gluten-celeborn/clickhouse/target/gluten-celeborn-clickhouse-${PROJECT_VERSION}-jar-with-dependencies.jar "${PACKAGE_DIR_PATH}"/jars/spark33
delta_version_33=$(mvn -q -Dexec.executable="echo" -Dexec.args='${delta.version}' -Pspark-3.3 --non-recursive exec:exec)
wget https://repo1.maven.org/maven2/io/delta/delta-core_2.12/${delta_version_33}/delta-core_2.12-${delta_version_33}.jar -P "${PACKAGE_DIR_PATH}"/jars/spark33
wget https://repo1.maven.org/maven2/io/delta/delta-storage/${delta_version_33}/delta-storage-${delta_version_33}.jar -P "${PACKAGE_DIR_PATH}"/jars/spark33

# download common 3rd party jars
protobuf_version=$(mvn -q -Dexec.executable="echo" -Dexec.args='${protobuf.version}' --non-recursive exec:exec)
wget https://repo1.maven.org/maven2/com/google/protobuf/protobuf-java/${protobuf_version}/protobuf-java-${protobuf_version}.jar -P "${PACKAGE_DIR_PATH}"/jars/spark32
cp "${PACKAGE_DIR_PATH}"/jars/spark32/protobuf-java-${protobuf_version}.jar "${PACKAGE_DIR_PATH}"/jars/spark33

celeborn_version=$(mvn -q -Dexec.executable="echo" -Dexec.args='${celeborn.version}' --non-recursive exec:exec)
wget https://repo1.maven.org/maven2/org/apache/celeborn/celeborn-client-spark-3-shaded_2.12/${celeborn_version}/celeborn-client-spark-3-shaded_2.12-${celeborn_version}.jar -P "${PACKAGE_DIR_PATH}"/jars/spark32
cp "${PACKAGE_DIR_PATH}"/jars/spark32/celeborn-client-spark-3-shaded_2.12-${celeborn_version}.jar "${PACKAGE_DIR_PATH}"/jars/spark33

# build libch.so
bash "${GLUTEN_SOURCE}"/ep/build-clickhouse/src/build_clickhouse.sh

# copy gluten jar and libch.so
cp "${GLUTEN_SOURCE}"/backends-clickhouse/target/gluten-*-jar-with-dependencies-exclude-sparkshims.jar "${PACKAGE_DIR_PATH}"/jars/gluten.jar
cp "${GLUTEN_SOURCE}"/gluten-celeborn/clickhouse/target/gluten-celeborn-clickhouse-${PROJECT_VERSION}-jar-with-dependencies.jar "${PACKAGE_DIR_PATH}"/jars
cp "$GLUTEN_SOURCE"/cpp-ch/build/utils/extern-local-engine/libch.so "${PACKAGE_DIR_PATH}"/libs/libch.so
cp "${GLUTEN_SOURCE}"/shims/spark32/target/spark-*-${PROJECT_VERSION}.jar "${PACKAGE_DIR_PATH}"/extraJars/spark32/gluten-spark32-shims.jar
cp "${GLUTEN_SOURCE}"/shims/spark33/target/spark-*-${PROJECT_VERSION}.jar "${PACKAGE_DIR_PATH}"/extraJars/spark33/gluten-spark33-shims.jar

# copy bin and conf
cp "${GLUTEN_SOURCE}"/ep/build-clickhouse/src/resources/bin/* "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/bin
cp "${GLUTEN_SOURCE}"/ep/build-clickhouse/src/resources/conf/* "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/conf

# download 3rd party jars
protobuf_version=$(mvn -q -Dexec.executable="echo" -Dexec.args='${protobuf.version}' --non-recursive exec:exec)
wget https://repo1.maven.org/maven2/com/google/protobuf/protobuf-java/${protobuf_version}/protobuf-java-${protobuf_version}.jar -P "${PACKAGE_DIR_PATH}"/jars
celeborn_version=$(mvn -q -Dexec.executable="echo" -Dexec.args='${celeborn.version}' --non-recursive exec:exec)
wget https://repo1.maven.org/maven2/org/apache/celeborn/celeborn-client-spark-3-shaded_2.12/${celeborn_version}/celeborn-client-spark-3-shaded_2.12-${celeborn_version}.jar -P "${PACKAGE_DIR_PATH}"/jars
delta_version_32=$(mvn -q -Dexec.executable="echo" -Dexec.args='${delta.version}' -Pspark-3.2 --non-recursive exec:exec)
wget https://repo1.maven.org/maven2/io/delta/delta-core_2.12/${delta_version_32}/delta-core_2.12-${delta_version_32}.jar -P "${PACKAGE_DIR_PATH}"/extraJars/spark32
wget https://repo1.maven.org/maven2/io/delta/delta-storage/${delta_version_32}/delta-storage-${delta_version_32}.jar -P "${PACKAGE_DIR_PATH}"/extraJars/spark32
delta_version_33=$(mvn -q -Dexec.executable="echo" -Dexec.args='${delta.version}' -Pspark-3.3 --non-recursive exec:exec)
wget https://repo1.maven.org/maven2/io/delta/delta-core_2.12/${delta_version_33}/delta-core_2.12-${delta_version_33}.jar -P "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/extraJars/spark33
wget https://repo1.maven.org/maven2/io/delta/delta-storage/${delta_version_33}/delta-storage-${delta_version_33}.jar -P "${GLUTEN_SOURCE}"/dist/"${PACKAGE_NAME}"/extraJars/spark33

# build tar.gz
cd "${GLUTEN_SOURCE}"/dist
tar -czf "${PACKAGE_NAME}".tar.gz "${PACKAGE_NAME}"
Expand Down
14 changes: 6 additions & 8 deletions ep/build-clickhouse/src/resources/bin/gluten.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,27 +35,25 @@ function start() {
DRIVER_OPTIONS=${DRIVER_OPTIONS:-"-Dlog4j.configuration=file:${GLUTEN_HOME}/conf/log4j.properties"}
DRIVER_OPTIONS="${DRIVER_OPTIONS} $(cat ${GLUTEN_HOME}/conf/gluten.properties | grep "^spark.driver.extraJavaOptions" | cut -d "=" -f 2)"

GLUTEN_JARS=${GLUTEN_HOME}/jars/*
echo "GLUTEN_JARS: ${GLUTEN_JARS} will be loaded."

GLUTEN_JARS=
if [ "${SPARK_MAJOR_MINOR_VERSION}" == "3.2" ]; then
EXTRA_JARS=${GLUTEN_HOME}/extraJars/spark33/*
GLUTEN_JARS=${GLUTEN_HOME}/jars/spark32/*
elif [ "${SPARK_MAJOR_MINOR_VERSION}" == "3.3" ]; then
EXTRA_JARS=${GLUTEN_HOME}/extraJars/spark33/*
GLUTEN_JARS=${GLUTEN_HOME}/jars/spark33/*
else
echo "Unsupported spark version: ${SPARK_MAJOR_MINOR_VERSION}"
exit 1
fi
echo "EXTRA_JARS: ${EXTRA_JARS} will be loaded."
echo "GLUTEN_JARS: ${GLUTEN_JARS} will be loaded."

export LD_PRELOAD=${GLUTEN_HOME}/libs/libch.so
export SPARK_LOG_DIR=${GLUTEN_HOME}/logs

rm -f ${GLUTEN_HOME}/logs/spark-*.out*
nohup ${SPARK_HOME}/sbin/start-thriftserver.sh \
--properties-file ${GLUTEN_HOME}/conf/spark-default.conf \
--conf spark.driver.extraClassPath=${GLUTEN_JARS}:${EXTRA_JARS} \
--conf spark.executor.extraClassPath=${GLUTEN_JARS}:${EXTRA_JARS} \
--conf spark.driver.extraClassPath=${GLUTEN_JARS} \
--conf spark.executor.extraClassPath=${GLUTEN_JARS} \
--conf spark.driver.extraJavaOptions=${DRIVER_OPTIONS} \
--conf spark.gluten.sql.columnar.libpath=${GLUTEN_HOME}/libs/libch.so \
--verbose \
Expand Down

0 comments on commit 1ae2c26

Please sign in to comment.