diff --git a/backends-clickhouse/pom.xml b/backends-clickhouse/pom.xml
index 9a606c48e694..e34f571961d6 100644
--- a/backends-clickhouse/pom.xml
+++ b/backends-clickhouse/pom.xml
@@ -14,6 +14,35 @@
Gluten Backends ClickHouse
+
+ celeborn
+
+ false
+
+
+
+ org.apache.gluten
+ gluten-celeborn
+ ${project.version}
+
+
+ org.apache.celeborn
+ celeborn-client-spark-${spark.major.version}-shaded_${scala.binary.version}
+ ${celeborn.version}
+ provided
+
+
+ org.apache.celeborn
+ celeborn-client-spark-${spark.major.version}_${scala.binary.version}
+
+
+ org.apache.celeborn
+ celeborn-spark-${spark.major.version}-columnar-shuffle_${scala.binary.version}
+
+
+
+
+
iceberg
diff --git a/gluten-celeborn/clickhouse/src/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory b/backends-clickhouse/src-celeborn/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory
similarity index 100%
rename from gluten-celeborn/clickhouse/src/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory
rename to backends-clickhouse/src-celeborn/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory
diff --git a/gluten-celeborn/clickhouse/src/main/scala/org/apache/spark/shuffle/CHCelebornColumnarBatchSerializer.scala b/backends-clickhouse/src-celeborn/main/scala/org/apache/spark/shuffle/CHCelebornColumnarBatchSerializer.scala
similarity index 100%
rename from gluten-celeborn/clickhouse/src/main/scala/org/apache/spark/shuffle/CHCelebornColumnarBatchSerializer.scala
rename to backends-clickhouse/src-celeborn/main/scala/org/apache/spark/shuffle/CHCelebornColumnarBatchSerializer.scala
diff --git a/gluten-celeborn/clickhouse/src/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriter.scala b/backends-clickhouse/src-celeborn/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriter.scala
similarity index 100%
rename from gluten-celeborn/clickhouse/src/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriter.scala
rename to backends-clickhouse/src-celeborn/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriter.scala
diff --git a/gluten-celeborn/clickhouse/src/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriterFactory.scala b/backends-clickhouse/src-celeborn/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriterFactory.scala
similarity index 100%
rename from gluten-celeborn/clickhouse/src/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriterFactory.scala
rename to backends-clickhouse/src-celeborn/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriterFactory.scala
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q01.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q01.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q01.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q01.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q02.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q02.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q02.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q02.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q03.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q03.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q03.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q03.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q04.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q04.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q04.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q04.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q05.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q05.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q05.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q05.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q06.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q06.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q06.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q06.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q07.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q07.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q07.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q07.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q08.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q08.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q08.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q08.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q09.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q09.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q09.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q09.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q10.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q10.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q10.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q10.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q11.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q11.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q11.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q11.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q12.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q12.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q12.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q12.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q13.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q13.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q13.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q13.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q14.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q14.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q14.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q14.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q15.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q15.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q15.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q15.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q16.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q16.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q16.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q16.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q17.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q17.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q17.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q17.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q18.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q18.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q18.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q18.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q19.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q19.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q19.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q19.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q20.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q20.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q20.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q20.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q21.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q21.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q21.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q21.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q22.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q22.out
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q22.out
rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q22.out
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q1.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q1.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q1.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q1.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q10.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q10.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q10.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q10.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q11.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q11.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q11.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q11.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q12.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q12.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q12.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q12.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q13.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q13.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q13.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q13.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q14.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q14.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q14.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q14.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q15.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q15.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q15.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q15.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q16.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q16.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q16.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q16.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q17.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q17.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q17.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q17.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q18.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q18.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q18.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q18.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q19.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q19.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q19.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q19.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q2.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q2.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q2.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q2.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q20.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q20.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q20.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q20.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q21.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q21.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q21.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q21.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q22.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q22.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q22.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q22.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q3.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q3.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q3.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q3.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q4.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q4.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q4.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q4.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q5.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q5.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q5.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q5.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q6.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q6.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q6.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q6.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q7.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q7.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q7.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q7.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q8.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q8.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q8.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q8.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q9.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q9.sql
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q9.sql
rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q9.sql
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/checksums.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/checksums.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/checksums.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/columns.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/columns.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/columns.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/count.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/count.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/count.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/data.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/data.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/data.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/data.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/data.mrk3 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/data.mrk3
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/data.mrk3
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/data.mrk3
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/default_compression_codec.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/default_compression_codec.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/default_compression_codec.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/checksums.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/checksums.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/checksums.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/columns.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/columns.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/columns.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/count.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/count.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/count.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/default_compression_codec.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/default_compression_codec.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/default_compression_codec.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/checksums.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/checksums.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/checksums.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/columns.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/columns.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/columns.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/count.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/count.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/count.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/data.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/data.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/data.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/data.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/data.mrk3 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/data.mrk3
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/data.mrk3
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/data.mrk3
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/default_compression_codec.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/default_compression_codec.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/default_compression_codec.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/checksums.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/checksums.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/checksums.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/columns.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/columns.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/columns.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/count.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/count.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/count.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/default_compression_codec.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/default_compression_codec.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/default_compression_codec.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/checksums.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/checksums.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/checksums.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/columns.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/columns.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/columns.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/count.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/count.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/count.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/data.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/data.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/data.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/data.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/data.mrk3 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/data.mrk3
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/data.mrk3
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/data.mrk3
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/default_compression_codec.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/default_compression_codec.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/default_compression_codec.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/checksums.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/checksums.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/checksums.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/columns.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/columns.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/columns.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/count.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/count.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/count.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/default_compression_codec.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/default_compression_codec.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/default_compression_codec.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.mrk2
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.mrk2
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.mrk2
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/checksums.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/checksums.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/checksums.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/columns.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/columns.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/columns.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/count.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/count.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/count.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/data.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/data.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/data.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/data.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/data.mrk3 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/data.mrk3
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/data.mrk3
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/data.mrk3
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/default_compression_codec.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/default_compression_codec.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/default_compression_codec.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/checksums.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/checksums.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/checksums.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/columns.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/columns.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/columns.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/count.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/count.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/count.txt
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/data.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/data.bin
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/data.bin
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/data.bin
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/data.mrk3 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/data.mrk3
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/data.mrk3
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/data.mrk3
diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/default_compression_codec.txt
similarity index 100%
rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/default_compression_codec.txt
rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/default_compression_codec.txt
diff --git a/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala b/backends-clickhouse/src-celeborn/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala
similarity index 95%
rename from gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala
rename to backends-clickhouse/src-celeborn/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala
index ee7657c505ac..10350898cf88 100644
--- a/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala
+++ b/backends-clickhouse/src-celeborn/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala
@@ -26,10 +26,10 @@ class GlutenClickHouseRSSColumnarMemorySortShuffleSuite
override protected val tablesPath: String = basePath + "/tpch-data-ch"
override protected val tpchQueries: String = rootPath + "queries/tpch-queries-ch"
override protected val queriesResults: String =
- rootPath + "../../../../../backends-clickhouse/src/test/resources/mergetree-queries-output"
+ rootPath + "../../../../backends-clickhouse/src/test/resources/mergetree-queries-output"
override protected val parquetTableDataPath: String =
- "../../../../../gluten-core/src/test/resources/tpch-data"
+ "../../../../gluten-core/src/test/resources/tpch-data"
/** Run Gluten + ClickHouse Backend with ColumnarShuffleManager */
override protected def sparkConf: SparkConf = {
diff --git a/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala b/backends-clickhouse/src-celeborn/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala
similarity index 97%
rename from gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala
rename to backends-clickhouse/src-celeborn/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala
index e62dbdd2a5fe..4c62ee73f0f7 100644
--- a/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala
+++ b/backends-clickhouse/src-celeborn/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala
@@ -30,10 +30,10 @@ class GlutenClickHouseRSSColumnarShuffleAQESuite
override protected val tablesPath: String = basePath + "/tpch-data-ch"
override protected val tpchQueries: String = rootPath + "queries/tpch-queries-ch"
override protected val queriesResults: String =
- rootPath + "../../../../../backends-clickhouse/src/test/resources/mergetree-queries-output"
+ rootPath + "../../../../backends-clickhouse/src/test/resources/mergetree-queries-output"
override protected val parquetTableDataPath: String =
- "../../../../../gluten-core/src/test/resources/tpch-data"
+ "../../../../gluten-core/src/test/resources/tpch-data"
/** Run Gluten + ClickHouse Backend with ColumnarShuffleManager */
override protected def sparkConf: SparkConf = {
diff --git a/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala b/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
index 05f7fdbfa423..cd3ce793747c 100644
--- a/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
+++ b/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala
@@ -29,9 +29,8 @@ import org.apache.spark.sql.delta.files._
import org.apache.spark.sql.delta.hooks.AutoCompact
import org.apache.spark.sql.delta.schema.{InnerInvariantViolationException, InvariantViolationException}
import org.apache.spark.sql.delta.sources.DeltaSQLConf
-import org.apache.spark.sql.execution.{QueryExecution, SparkPlan, SQLExecution}
-import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec
-import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, FileFormatWriter, GlutenWriterColumnarRules, WriteFiles, WriteJobStatsTracker}
+import org.apache.spark.sql.execution.SQLExecution
+import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, DeltaV1Writes, FileFormatWriter, GlutenWriterColumnarRules, WriteJobStatsTracker}
import org.apache.spark.sql.execution.datasources.v1.MergeTreeWriterInjects
import org.apache.spark.sql.execution.datasources.v1.clickhouse.MergeTreeFileFormatWriter
import org.apache.spark.sql.execution.datasources.v2.clickhouse.ClickHouseConfig
@@ -229,31 +228,12 @@ class ClickhouseOptimisticTransaction(
val (data, partitionSchema) = performCDCPartition(inputData)
val outputPath = deltaLog.dataPath
- val fileFormat = deltaLog.fileFormat(protocol, metadata) // TODO support changing formats.
-
- // Iceberg spec requires partition columns in data files
- val writePartitionColumns = IcebergCompat.isAnyEnabled(metadata)
- // Retain only a minimal selection of Spark writer options to avoid any potential
- // compatibility issues
- val options = (writeOptions match {
- case None => Map.empty[String, String]
- case Some(writeOptions) =>
- writeOptions.options.filterKeys {
- key =>
- key.equalsIgnoreCase(DeltaOptions.MAX_RECORDS_PER_FILE) ||
- key.equalsIgnoreCase(DeltaOptions.COMPRESSION)
- }.toMap
- }) + (DeltaOptions.WRITE_PARTITION_COLUMNS -> writePartitionColumns.toString)
-
- val (normalQueryExecution, output, generatedColumnConstraints, _) =
+ val (queryExecution, output, generatedColumnConstraints, _) =
normalizeData(deltaLog, writeOptions, data)
val partitioningColumns = getPartitioningColumns(partitionSchema, output)
- val logicalPlan = normalQueryExecution.optimizedPlan
- val write =
- WriteFiles(logicalPlan, fileFormat, partitioningColumns, None, options, Map.empty)
+ val fileFormat = deltaLog.fileFormat(protocol, metadata) // TODO support changing formats.
- val queryExecution = new QueryExecution(spark, write)
val (committer, collectStats) = fileFormat.toString match {
case "MergeTree" => (getCommitter2(outputPath), false)
case _ => (getCommitter(outputPath), true)
@@ -274,20 +254,24 @@ class ClickhouseOptimisticTransaction(
SQLExecution.withNewExecutionId(queryExecution, Option("deltaTransactionalWrite")) {
val outputSpec = FileFormatWriter.OutputSpec(outputPath.toString, Map.empty, output)
- val physicalPlan = materializeAdaptiveSparkPlan(queryExecution.executedPlan)
- // convertEmptyToNullIfNeeded(queryExecution.executedPlan, partitioningColumns, constraints)
- /* val checkInvariants = DeltaInvariantCheckerExec(empty2NullPlan, constraints)
+ val empty2NullPlan =
+ convertEmptyToNullIfNeeded(queryExecution.sparkPlan, partitioningColumns, constraints)
+ // TODO: val checkInvariants = DeltaInvariantCheckerExec(empty2NullPlan, constraints)
+ val checkInvariants = empty2NullPlan
+
// No need to plan optimized write if the write command is OPTIMIZE, which aims to produce
// evenly-balanced data files already.
- val physicalPlan =
- if (
- !isOptimize &&
- shouldOptimizeWrite(writeOptions, spark.sessionState.conf)
- ) {
- DeltaOptimizedWriterExec(checkInvariants, metadata.partitionColumns, deltaLog)
- } else {
- checkInvariants
- } */
+ // TODO: val physicalPlan =
+ // if (
+ // !isOptimize &&
+ // shouldOptimizeWrite(writeOptions, spark.sessionState.conf)
+ // ) {
+ // DeltaOptimizedWriterExec(checkInvariants, metadata.partitionColumns, deltaLog)
+ // } else {
+ // checkInvariants
+ // }
+ val physicalPlan = checkInvariants
+
val statsTrackers: ListBuffer[WriteJobStatsTracker] = ListBuffer()
if (spark.conf.get(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED)) {
@@ -298,10 +282,33 @@ class ClickhouseOptimisticTransaction(
statsTrackers.append(basicWriteJobStatsTracker)
}
+ // Iceberg spec requires partition columns in data files
+ val writePartitionColumns = IcebergCompat.isAnyEnabled(metadata)
+ // Retain only a minimal selection of Spark writer options to avoid any potential
+ // compatibility issues
+ val options = (writeOptions match {
+ case None => Map.empty[String, String]
+ case Some(writeOptions) =>
+ writeOptions.options.filterKeys {
+ key =>
+ key.equalsIgnoreCase(DeltaOptions.MAX_RECORDS_PER_FILE) ||
+ key.equalsIgnoreCase(DeltaOptions.COMPRESSION)
+ }.toMap
+ }) + (DeltaOptions.WRITE_PARTITION_COLUMNS -> writePartitionColumns.toString)
+
+ val executedPlan = DeltaV1Writes(
+ spark,
+ physicalPlan,
+ fileFormat,
+ partitioningColumns,
+ None,
+ options
+ ).executedPlan
+
try {
DeltaFileFormatWriter.write(
sparkSession = spark,
- plan = physicalPlan,
+ plan = executedPlan,
fileFormat = fileFormat,
committer = committer,
outputSpec = outputSpec,
@@ -358,8 +365,4 @@ class ClickhouseOptimisticTransaction(
resultFiles.toSeq ++ committer.changeFiles
}
- private def materializeAdaptiveSparkPlan(plan: SparkPlan): SparkPlan = plan match {
- case a: AdaptiveSparkPlanExec => a.finalPhysicalPlan
- case p: SparkPlan => p
- }
}
diff --git a/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/FileDeltaColumnarWrite.scala b/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/FileDeltaColumnarWrite.scala
index bf6b0c0074dc..df7ef7e23409 100644
--- a/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/FileDeltaColumnarWrite.scala
+++ b/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/FileDeltaColumnarWrite.scala
@@ -137,7 +137,8 @@ case class FileDeltaColumnarWrite(
// stats.map(row => x.apply(row).getString(0)).foreach(println)
// process stats
val commitInfo = DeltaFileCommitInfo(committer)
- val basicNativeStat = NativeBasicWriteTaskStatsTracker(description, basicWriteJobStatsTracker)
+ val basicNativeStat =
+ NativeBasicWriteTaskStatsTracker(description.path, basicWriteJobStatsTracker)
val basicNativeStats = Seq(commitInfo, basicNativeStat)
NativeStatCompute(stats)(basicNativeStats, nativeDeltaStats)
diff --git a/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/datasources/DeltaV1Writes.scala b/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/datasources/DeltaV1Writes.scala
new file mode 100644
index 000000000000..8ae99cc0d59f
--- /dev/null
+++ b/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/datasources/DeltaV1Writes.scala
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources
+import org.apache.gluten.backendsapi.BackendsApiManager
+
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.catalyst.catalog.BucketSpec
+import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec
+import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
+import org.apache.spark.sql.catalyst.plans.logical.LocalRelation
+import org.apache.spark.sql.execution.{QueryExecution, SortExec, SparkPlan}
+import org.apache.spark.sql.execution.datasources.V1WritesUtils.isOrderingMatched
+
+case class DeltaV1Writes(
+ spark: SparkSession,
+ query: SparkPlan,
+ fileFormat: FileFormat,
+ partitionColumns: Seq[Attribute],
+ bucketSpec: Option[BucketSpec],
+ options: Map[String, String],
+ staticPartitions: TablePartitionSpec = Map.empty) {
+
+ require(fileFormat != null, "FileFormat is required to write files.")
+ require(BackendsApiManager.getSettings.enableNativeWriteFiles())
+
+ private lazy val requiredOrdering: Seq[SortOrder] =
+ V1WritesUtils.getSortOrder(
+ query.output,
+ partitionColumns,
+ bucketSpec,
+ options,
+ staticPartitions.size)
+
+ lazy val sortPlan: SparkPlan = {
+ val outputOrdering = query.outputOrdering
+ val orderingMatched = isOrderingMatched(requiredOrdering.map(_.child), outputOrdering)
+ if (orderingMatched) {
+ query
+ } else {
+ SortExec(requiredOrdering, global = false, query)
+ }
+ }
+
+ lazy val writePlan: SparkPlan =
+ WriteFilesExec(
+ sortPlan,
+ fileFormat = fileFormat,
+ partitionColumns = partitionColumns,
+ bucketSpec = bucketSpec,
+ options = options,
+ staticPartitions = staticPartitions)
+
+ lazy val executedPlan: SparkPlan =
+ CallTransformer(spark, writePlan).executedPlan
+}
+
+case class CallTransformer(spark: SparkSession, physicalPlan: SparkPlan)
+ extends QueryExecution(spark, LocalRelation()) {
+ override lazy val sparkPlan: SparkPlan = physicalPlan
+}
diff --git a/backends-clickhouse/src-delta-32/test/scala/org/apache/spark/sql/execution/datasources/DeltaV1WritesSuite.scala b/backends-clickhouse/src-delta-32/test/scala/org/apache/spark/sql/execution/datasources/DeltaV1WritesSuite.scala
new file mode 100644
index 000000000000..1a90148df29e
--- /dev/null
+++ b/backends-clickhouse/src-delta-32/test/scala/org/apache/spark/sql/execution/datasources/DeltaV1WritesSuite.scala
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution.datasources
+
+import org.apache.gluten.GlutenConfig
+import org.apache.gluten.execution.{GlutenClickHouseWholeStageTransformerSuite, GlutenPlan, SortExecTransformer}
+import org.apache.spark.SparkConf
+import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
+import org.apache.spark.sql.execution.{SortExec, SparkPlan}
+
+class DeltaV1WritesSuite extends GlutenClickHouseWholeStageTransformerSuite {
+
+ import testImplicits._
+
+ override protected def sparkConf: SparkConf = {
+ super.sparkConf
+ .set(GlutenConfig.NATIVE_WRITER_ENABLED.key, "true")
+ }
+
+ override def beforeAll(): Unit = {
+ super.beforeAll()
+ (0 to 20)
+ .map(i => (i, i % 5, (i % 10).toString))
+ .toDF("i", "j", "k")
+ .write
+ .saveAsTable("t0")
+ }
+
+ override def afterAll(): Unit = {
+ sql("drop table if exists t0")
+ super.afterAll()
+ }
+
+ val format = new ParquetFileFormat
+ def getSort(child: SparkPlan): Option[SortExecTransformer] = {
+ child.collectFirst { case w: SortExecTransformer => w }
+ }
+ test("don't add sort when the required ordering is empty") {
+ val df = sql("select * from t0")
+ val plan = df.queryExecution.sparkPlan
+ val writes = DeltaV1Writes(spark, plan, format, Nil, None, Map.empty)
+ assert(writes.sortPlan === plan)
+ assert(writes.writePlan != null)
+ assert(writes.executedPlan.isInstanceOf[GlutenPlan])
+ val writeFilesOpt = V1WritesUtils.getWriteFilesOpt(writes.executedPlan)
+ assert(writeFilesOpt.isDefined)
+ val sortExec = getSort(writes.executedPlan)
+ assert(sortExec.isEmpty)
+ }
+
+ test("don't add sort when the required ordering is already satisfied") {
+ val df = sql("select * from t0")
+ def check(plan: SparkPlan): Unit = {
+ val partitionColumns = plan.output.find(_.name == "k").toSeq
+ val writes = DeltaV1Writes(spark, plan, format, partitionColumns, None, Map.empty)
+ assert(writes.sortPlan === plan)
+ assert(writes.writePlan != null)
+ assert(writes.executedPlan.isInstanceOf[GlutenPlan])
+ val writeFilesOpt = V1WritesUtils.getWriteFilesOpt(writes.executedPlan)
+ assert(writeFilesOpt.isDefined)
+ val sortExec = getSort(writes.executedPlan)
+ assert(sortExec.isDefined)
+ }
+ check(df.orderBy("k").queryExecution.sparkPlan)
+ check(df.orderBy("k", "j").queryExecution.sparkPlan)
+ }
+
+ test("add sort when the required ordering is not satisfied") {
+ val df = sql("select * from t0")
+ def check(plan: SparkPlan): Unit = {
+ val partitionColumns = plan.output.find(_.name == "k").toSeq
+ val writes = DeltaV1Writes(spark, plan, format, partitionColumns, None, Map.empty)
+ val sort = writes.sortPlan.asInstanceOf[SortExec]
+ assert(sort.child === plan)
+ assert(writes.writePlan != null)
+ assert(writes.executedPlan.isInstanceOf[GlutenPlan])
+ val writeFilesOpt = V1WritesUtils.getWriteFilesOpt(writes.executedPlan)
+ assert(writeFilesOpt.isDefined)
+ val sortExec = getSort(writes.executedPlan)
+ assert(sortExec.isDefined, s"writes.executedPlan: ${writes.executedPlan}")
+ }
+ check(df.queryExecution.sparkPlan)
+ check(df.orderBy("j", "k").queryExecution.sparkPlan)
+ }
+
+}
diff --git a/backends-clickhouse/src/main/resources/org/apache/spark/sql/execution/datasources/v1/write_optimization.proto b/backends-clickhouse/src/main/resources/org/apache/spark/sql/execution/datasources/v1/write_optimization.proto
index 89f606e4ffd3..fdf34f1a0a75 100644
--- a/backends-clickhouse/src/main/resources/org/apache/spark/sql/execution/datasources/v1/write_optimization.proto
+++ b/backends-clickhouse/src/main/resources/org/apache/spark/sql/execution/datasources/v1/write_optimization.proto
@@ -12,6 +12,9 @@ message Write {
message Common {
string format = 1;
string job_task_attempt_id = 2; // currently used in mergetree format
+
+ // Describes the partition index in the WriteRel.table_schema.
+ repeated int32 partition_col_index = 3;
}
message ParquetWrite{}
message OrcWrite{}
diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHRuleApi.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHRuleApi.scala
index 40e53536184c..32961c21a266 100644
--- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHRuleApi.scala
+++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHRuleApi.scala
@@ -93,7 +93,6 @@ object CHRuleApi {
// Legacy: Post-transform rules.
injector.injectPostTransform(_ => PruneNestedColumnsInHiveTableScan)
- injector.injectPostTransform(_ => RemoveNativeWriteFilesSortAndProject())
injector.injectPostTransform(c => intercept(RewriteTransformer.apply(c.session)))
injector.injectPostTransform(_ => PushDownFilterToScan)
injector.injectPostTransform(_ => PushDownInputFileExpression.PostOffload)
diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala
index 0be8cf2c25bf..ef5a4eff6fca 100644
--- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala
+++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala
@@ -17,7 +17,7 @@
package org.apache.gluten.backendsapi.clickhouse
import org.apache.gluten.backendsapi.TransformerApi
-import org.apache.gluten.execution.CHHashAggregateExecTransformer
+import org.apache.gluten.execution.{CHHashAggregateExecTransformer, WriteFilesExecTransformer}
import org.apache.gluten.expression.ConverterUtils
import org.apache.gluten.substrait.expression.{BooleanLiteralNode, ExpressionBuilder, ExpressionNode}
import org.apache.gluten.utils.{CHInputPartitionsUtil, ExpressionDocUtil}
@@ -31,7 +31,7 @@ import org.apache.spark.sql.delta.catalog.ClickHouseTableV2
import org.apache.spark.sql.delta.files.TahoeFileIndex
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.sql.execution.aggregate.HashAggregateExec
-import org.apache.spark.sql.execution.datasources.{FileFormat, HadoopFsRelation, PartitionDirectory}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, PartitionDirectory}
import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat
import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
import org.apache.spark.sql.execution.datasources.v1.Write
@@ -243,24 +243,31 @@ class CHTransformerApi extends TransformerApi with Logging {
GlutenDriverEndpoint.invalidateResourceRelation(executionId)
}
- override def genWriteParameters(
- fileFormat: FileFormat,
- writeOptions: Map[String, String]): Any = {
- val fileFormatStr = fileFormat match {
+ override def genWriteParameters(writeExec: WriteFilesExecTransformer): Any = {
+ val fileFormatStr = writeExec.fileFormat match {
case register: DataSourceRegister =>
register.shortName
case _ => "UnknownFileFormat"
}
- val write = Write
+ val childOutput = writeExec.child.output
+
+ val partitionIndexes =
+ writeExec.partitionColumns.map(p => childOutput.indexWhere(_.exprId == p.exprId))
+ require(partitionIndexes.forall(_ >= 0))
+
+ val common = Write.Common
.newBuilder()
- .setCommon(
- Write.Common
- .newBuilder()
- .setFormat(fileFormatStr)
- .setJobTaskAttemptId("") // we can get job and task id at the driver side
- .build())
+ .setFormat(s"$fileFormatStr")
+ .setJobTaskAttemptId("") // we cannot get job and task id at the driver side)
+ partitionIndexes.foreach {
+ idx =>
+ require(idx >= 0)
+ common.addPartitionColIndex(idx)
+ }
+
+ val write = Write.newBuilder().setCommon(common.build())
- fileFormat match {
+ writeExec.fileFormat match {
case d: MergeTreeFileFormat =>
write.setMergetree(MergeTreeFileFormat.createWrite(d.metadata))
case _: ParquetFileFormat =>
@@ -273,5 +280,5 @@ class CHTransformerApi extends TransformerApi with Logging {
/** use Hadoop Path class to encode the file path */
override def encodeFilePathIfNeed(filePath: String): String =
- (new Path(filePath)).toUri.toASCIIString
+ new Path(filePath).toUri.toASCIIString
}
diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeConfig.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeConfig.scala
index 12bb8d05d953..055c3b9d87b8 100644
--- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeConfig.scala
+++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeConfig.scala
@@ -22,6 +22,7 @@ object RuntimeConfig {
import CHConf._
import SQLConf._
+ /** Clickhouse Configuration */
val PATH =
buildConf(runtimeConfig("path"))
.doc(
@@ -37,9 +38,25 @@ object RuntimeConfig {
.createWithDefault("/tmp/libch")
// scalastyle:on line.size.limit
+ // scalastyle:off line.size.limit
+ val LOGGER_LEVEL =
+ buildConf(runtimeConfig("logger.level"))
+ .doc(
+ "https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings#logger")
+ .stringConf
+ .createWithDefault("warning")
+ // scalastyle:on line.size.limit
+
+ /** Gluten Configuration */
val USE_CURRENT_DIRECTORY_AS_TMP =
buildConf(runtimeConfig("use_current_directory_as_tmp"))
.doc("Use the current directory as the temporary directory.")
.booleanConf
.createWithDefault(false)
+
+ val DUMP_PIPELINE =
+ buildConf(runtimeConfig("dump_pipeline"))
+ .doc("Dump pipeline to file after execution")
+ .booleanConf
+ .createWithDefault(false)
}
diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/CHColumnarWrite.scala b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/CHColumnarWrite.scala
index 1342e250430e..427db0aad2b5 100644
--- a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/CHColumnarWrite.scala
+++ b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/CHColumnarWrite.scala
@@ -198,12 +198,12 @@ case class NativeStatCompute(rows: Seq[InternalRow]) {
}
case class NativeBasicWriteTaskStatsTracker(
- description: WriteJobDescription,
+ writeDir: String,
basicWriteJobStatsTracker: WriteTaskStatsTracker)
extends (NativeFileWriteResult => Unit) {
private var numWrittenRows: Long = 0
override def apply(stat: NativeFileWriteResult): Unit = {
- val absolutePath = s"${description.path}/${stat.relativePath}"
+ val absolutePath = s"$writeDir/${stat.relativePath}"
if (stat.partition_id != "__NO_PARTITION_ID__") {
basicWriteJobStatsTracker.newPartition(new GenericInternalRow(Array[Any](stat.partition_id)))
}
@@ -248,6 +248,8 @@ case class HadoopMapReduceCommitProtocolWrite(
extends CHColumnarWrite[HadoopMapReduceCommitProtocol]
with Logging {
+ private var stageDir: String = _
+
private lazy val adapter: HadoopMapReduceAdapter = HadoopMapReduceAdapter(committer)
/**
@@ -257,11 +259,12 @@ case class HadoopMapReduceCommitProtocolWrite(
override def doSetupNativeTask(): Unit = {
val (writePath, writeFilePattern) =
adapter.getTaskAttemptTempPathAndFilePattern(taskAttemptContext, description)
- logDebug(s"Native staging write path: $writePath and file pattern: $writeFilePattern")
+ stageDir = writePath
+ logDebug(s"Native staging write path: $stageDir and file pattern: $writeFilePattern")
val settings =
Map(
- RuntimeSettings.TASK_WRITE_TMP_DIR.key -> writePath,
+ RuntimeSettings.TASK_WRITE_TMP_DIR.key -> stageDir,
RuntimeSettings.TASK_WRITE_FILENAME_PATTERN.key -> writeFilePattern)
NativeExpressionEvaluator.updateQueryRuntimeSettings(settings)
}
@@ -272,7 +275,7 @@ case class HadoopMapReduceCommitProtocolWrite(
None
} else {
val commitInfo = FileCommitInfo(description)
- val basicNativeStat = NativeBasicWriteTaskStatsTracker(description, basicWriteJobStatsTracker)
+ val basicNativeStat = NativeBasicWriteTaskStatsTracker(stageDir, basicWriteJobStatsTracker)
val basicNativeStats = Seq(commitInfo, basicNativeStat)
NativeStatCompute(stats)(basicNativeStats)
val (partitions, addedAbsPathFiles) = commitInfo.result
diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDeltaParquetWriteSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDeltaParquetWriteSuite.scala
index 2f55510a7b1f..3736f0f14415 100644
--- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDeltaParquetWriteSuite.scala
+++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDeltaParquetWriteSuite.scala
@@ -1025,6 +1025,7 @@ class GlutenClickHouseDeltaParquetWriteSuite
}
}
+ // FIXME: optimize
testSparkVersionLE33("test parquet optimize with the path based table") {
val dataPath = s"$basePath/lineitem_delta_parquet_optimize_path_based"
clearDataPath(dataPath)
diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteSuite.scala
index cc577609656b..60ca58d9fc29 100644
--- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteSuite.scala
+++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteSuite.scala
@@ -57,6 +57,7 @@ class GlutenClickHouseMergeTreeWriteSuite
.set("spark.sql.adaptive.enabled", "true")
.set("spark.sql.files.maxPartitionBytes", "20000000")
.set(GlutenConfig.NATIVE_WRITER_ENABLED.key, "true")
+ .set(CHConf.ENABLE_ONEPIPELINE_MERGETREE_WRITE.key, spark35.toString)
.setCHSettings("min_insert_block_size_rows", 100000)
.setCHSettings("mergetree.merge_after_insert", false)
.setCHSettings("input_format_parquet_max_block_size", 8192)
@@ -67,178 +68,172 @@ class GlutenClickHouseMergeTreeWriteSuite
}
test("test mergetree table write") {
- withSQLConf((CHConf.ENABLE_ONEPIPELINE_MERGETREE_WRITE.key, spark35.toString)) {
- spark.sql(s"""
- |DROP TABLE IF EXISTS lineitem_mergetree;
- |""".stripMargin)
+ spark.sql(s"""
+ |DROP TABLE IF EXISTS lineitem_mergetree;
+ |""".stripMargin)
- // write.format.default = mergetree
- spark.sql(s"""
- |CREATE TABLE IF NOT EXISTS lineitem_mergetree
- |(
- | l_orderkey bigint,
- | l_partkey bigint,
- | l_suppkey bigint,
- | l_linenumber bigint,
- | l_quantity double,
- | l_extendedprice double,
- | l_discount double,
- | l_tax double,
- | l_returnflag string,
- | l_linestatus string,
- | l_shipdate date,
- | l_commitdate date,
- | l_receiptdate date,
- | l_shipinstruct string,
- | l_shipmode string,
- | l_comment string
- |)
- |USING clickhouse
- |TBLPROPERTIES (write.format.default = 'mergetree')
- |LOCATION '$basePath/lineitem_mergetree'
- |""".stripMargin)
+ // write.format.default = mergetree
+ spark.sql(s"""
+ |CREATE TABLE IF NOT EXISTS lineitem_mergetree
+ |(
+ | l_orderkey bigint,
+ | l_partkey bigint,
+ | l_suppkey bigint,
+ | l_linenumber bigint,
+ | l_quantity double,
+ | l_extendedprice double,
+ | l_discount double,
+ | l_tax double,
+ | l_returnflag string,
+ | l_linestatus string,
+ | l_shipdate date,
+ | l_commitdate date,
+ | l_receiptdate date,
+ | l_shipinstruct string,
+ | l_shipmode string,
+ | l_comment string
+ |)
+ |USING clickhouse
+ |TBLPROPERTIES (write.format.default = 'mergetree')
+ |LOCATION '$basePath/lineitem_mergetree'
+ |""".stripMargin)
- spark.sql(s"""
- | insert into table lineitem_mergetree
- | select * from lineitem
- |""".stripMargin)
+ spark.sql(s"""
+ | insert into table lineitem_mergetree
+ | select * from lineitem
+ |""".stripMargin)
- runTPCHQueryBySQL(1, q1("lineitem_mergetree")) {
- df =>
- val plans = collect(df.queryExecution.executedPlan) {
- case f: FileSourceScanExecTransformer => f
- case w: WholeStageTransformer => w
- }
- assertResult(4)(plans.size)
+ runTPCHQueryBySQL(1, q1("lineitem_mergetree")) {
+ df =>
+ val plans = collect(df.queryExecution.executedPlan) {
+ case f: FileSourceScanExecTransformer => f
+ case w: WholeStageTransformer => w
+ }
+ assertResult(4)(plans.size)
- val mergetreeScan = plans(3).asInstanceOf[FileSourceScanExecTransformer]
- assert(mergetreeScan.nodeName.startsWith("ScanTransformer mergetree"))
+ val mergetreeScan = plans(3).asInstanceOf[FileSourceScanExecTransformer]
+ assert(mergetreeScan.nodeName.startsWith("ScanTransformer mergetree"))
- val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex]
- assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).clickhouseTableConfigs.nonEmpty)
- assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption.isEmpty)
- assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).orderByKeyOption.isEmpty)
- assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).primaryKeyOption.isEmpty)
- assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty)
- val addFiles =
- fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts])
- assertResult(6)(addFiles.size)
- assertResult(600572)(addFiles.map(_.rows).sum)
+ val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex]
+ assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).clickhouseTableConfigs.nonEmpty)
+ assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption.isEmpty)
+ assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).orderByKeyOption.isEmpty)
+ assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).primaryKeyOption.isEmpty)
+ assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty)
+ val addFiles =
+ fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts])
+ assertResult(6)(addFiles.size)
+ assertResult(600572)(addFiles.map(_.rows).sum)
- // GLUTEN-5060: check the unnecessary FilterExec
- val wholeStageTransformer = plans(2).asInstanceOf[WholeStageTransformer]
- val planNodeJson = wholeStageTransformer.substraitPlanJson
- assert(
- !planNodeJson
- .replaceAll("\n", "")
- .replaceAll(" ", "")
- .contains("\"input\":{\"filter\":{"))
- }
+ // GLUTEN-5060: check the unnecessary FilterExec
+ val wholeStageTransformer = plans(2).asInstanceOf[WholeStageTransformer]
+ val planNodeJson = wholeStageTransformer.substraitPlanJson
+ assert(
+ !planNodeJson
+ .replaceAll("\n", "")
+ .replaceAll(" ", "")
+ .contains("\"input\":{\"filter\":{"))
}
}
test("test mergetree insert overwrite") {
- withSQLConf((CHConf.ENABLE_ONEPIPELINE_MERGETREE_WRITE.key, spark35.toString)) {
- spark.sql(s"""
- |DROP TABLE IF EXISTS lineitem_mergetree_insertoverwrite;
- |""".stripMargin)
+ spark.sql(s"""
+ |DROP TABLE IF EXISTS lineitem_mergetree_insertoverwrite;
+ |""".stripMargin)
- spark.sql(s"""
- |CREATE TABLE IF NOT EXISTS lineitem_mergetree_insertoverwrite
- |(
- | l_orderkey bigint,
- | l_partkey bigint,
- | l_suppkey bigint,
- | l_linenumber bigint,
- | l_quantity double,
- | l_extendedprice double,
- | l_discount double,
- | l_tax double,
- | l_returnflag string,
- | l_linestatus string,
- | l_shipdate date,
- | l_commitdate date,
- | l_receiptdate date,
- | l_shipinstruct string,
- | l_shipmode string,
- | l_comment string
- |)
- |USING clickhouse
- |LOCATION '$basePath/lineitem_mergetree_insertoverwrite'
- |""".stripMargin)
+ spark.sql(s"""
+ |CREATE TABLE IF NOT EXISTS lineitem_mergetree_insertoverwrite
+ |(
+ | l_orderkey bigint,
+ | l_partkey bigint,
+ | l_suppkey bigint,
+ | l_linenumber bigint,
+ | l_quantity double,
+ | l_extendedprice double,
+ | l_discount double,
+ | l_tax double,
+ | l_returnflag string,
+ | l_linestatus string,
+ | l_shipdate date,
+ | l_commitdate date,
+ | l_receiptdate date,
+ | l_shipinstruct string,
+ | l_shipmode string,
+ | l_comment string
+ |)
+ |USING clickhouse
+ |LOCATION '$basePath/lineitem_mergetree_insertoverwrite'
+ |""".stripMargin)
- spark.sql(s"""
- | insert into table lineitem_mergetree_insertoverwrite
- | select * from lineitem
- |""".stripMargin)
+ spark.sql(s"""
+ | insert into table lineitem_mergetree_insertoverwrite
+ | select * from lineitem
+ |""".stripMargin)
- spark.sql(s"""
- | insert overwrite table lineitem_mergetree_insertoverwrite
- | select * from lineitem where mod(l_orderkey,2) = 1
- |""".stripMargin)
- val sql2 =
- s"""
- | select count(*) from lineitem_mergetree_insertoverwrite
- |
- |""".stripMargin
- assertResult(300001)(
- // total rows should remain unchanged
- spark.sql(sql2).collect().apply(0).get(0)
- )
- }
+ spark.sql(s"""
+ | insert overwrite table lineitem_mergetree_insertoverwrite
+ | select * from lineitem where mod(l_orderkey,2) = 1
+ |""".stripMargin)
+ val sql2 =
+ s"""
+ | select count(*) from lineitem_mergetree_insertoverwrite
+ |
+ |""".stripMargin
+ assertResult(300001)(
+ // total rows should remain unchanged
+ spark.sql(sql2).collect().apply(0).get(0)
+ )
}
test("test mergetree insert overwrite partitioned table with small table, static") {
- withSQLConf((CHConf.ENABLE_ONEPIPELINE_MERGETREE_WRITE.key, spark35.toString)) {
- spark.sql(s"""
- |DROP TABLE IF EXISTS lineitem_mergetree_insertoverwrite2;
- |""".stripMargin)
+ spark.sql(s"""
+ |DROP TABLE IF EXISTS lineitem_mergetree_insertoverwrite2;
+ |""".stripMargin)
- spark.sql(s"""
- |CREATE TABLE IF NOT EXISTS lineitem_mergetree_insertoverwrite2
- |(
- | l_orderkey bigint,
- | l_partkey bigint,
- | l_suppkey bigint,
- | l_linenumber bigint,
- | l_quantity double,
- | l_extendedprice double,
- | l_discount double,
- | l_tax double,
- | l_returnflag string,
- | l_linestatus string,
- | l_shipdate date,
- | l_commitdate date,
- | l_receiptdate date,
- | l_shipinstruct string,
- | l_shipmode string,
- | l_comment string
- |)
- |USING clickhouse
- |PARTITIONED BY (l_shipdate)
- |LOCATION '$basePath/lineitem_mergetree_insertoverwrite2'
- |""".stripMargin)
+ spark.sql(s"""
+ |CREATE TABLE IF NOT EXISTS lineitem_mergetree_insertoverwrite2
+ |(
+ | l_orderkey bigint,
+ | l_partkey bigint,
+ | l_suppkey bigint,
+ | l_linenumber bigint,
+ | l_quantity double,
+ | l_extendedprice double,
+ | l_discount double,
+ | l_tax double,
+ | l_returnflag string,
+ | l_linestatus string,
+ | l_shipdate date,
+ | l_commitdate date,
+ | l_receiptdate date,
+ | l_shipinstruct string,
+ | l_shipmode string,
+ | l_comment string
+ |)
+ |USING clickhouse
+ |PARTITIONED BY (l_shipdate)
+ |LOCATION '$basePath/lineitem_mergetree_insertoverwrite2'
+ |""".stripMargin)
- spark.sql(s"""
- | insert into table lineitem_mergetree_insertoverwrite2
- | select * from lineitem
- |""".stripMargin)
+ spark.sql(s"""
+ | insert into table lineitem_mergetree_insertoverwrite2
+ | select * from lineitem
+ |""".stripMargin)
- spark.sql(
- s"""
- | insert overwrite table lineitem_mergetree_insertoverwrite2
- | select * from lineitem where l_shipdate BETWEEN date'1993-02-01' AND date'1993-02-10'
- |""".stripMargin)
- val sql2 =
- s"""
- | select count(*) from lineitem_mergetree_insertoverwrite2
- |
- |""".stripMargin
- assertResult(2418)(
- // total rows should remain unchanged
- spark.sql(sql2).collect().apply(0).get(0)
- )
- }
+ spark.sql(
+ s"""
+ | insert overwrite table lineitem_mergetree_insertoverwrite2
+ | select * from lineitem where l_shipdate BETWEEN date'1993-02-01' AND date'1993-02-10'
+ |""".stripMargin)
+ val sql2 =
+ s"""
+ | select count(*) from lineitem_mergetree_insertoverwrite2
+ |
+ |""".stripMargin
+ assertResult(2418)(
+ // total rows should remain unchanged
+ spark.sql(sql2).collect().apply(0).get(0)
+ )
}
test("test mergetree insert overwrite partitioned table with small table, dynamic") {
@@ -650,8 +645,8 @@ class GlutenClickHouseMergeTreeWriteSuite
// static partition
spark.sql(s"""
- | insert into lineitem_mergetree_partition PARTITION (l_shipdate=date'1995-01-21',
- | l_returnflag = 'A')
+ | insert into lineitem_mergetree_partition
+ | PARTITION (l_shipdate=date'1995-01-21', l_returnflag = 'A')
| (l_orderkey,
| l_partkey,
| l_suppkey,
@@ -729,7 +724,8 @@ class GlutenClickHouseMergeTreeWriteSuite
ClickHouseTableV2
.getTable(fileIndex.deltaLog)
.partitionColumns(1))
- val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts])
+ val addFiles =
+ fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts])
assertResult(3836)(addFiles.size)
assertResult(605363)(addFiles.map(_.rows).sum)
@@ -739,7 +735,7 @@ class GlutenClickHouseMergeTreeWriteSuite
}
}
- test("test mergetree write with bucket table") {
+ testSparkVersionLE33("test mergetree write with bucket table") {
spark.sql(s"""
|DROP TABLE IF EXISTS lineitem_mergetree_bucket;
|""".stripMargin)
@@ -979,7 +975,7 @@ class GlutenClickHouseMergeTreeWriteSuite
}
}
- test("test mergetree CTAS complex") {
+ test("test mergetree CTAS partition") {
spark.sql(s"""
|DROP TABLE IF EXISTS lineitem_mergetree_ctas2;
|""".stripMargin)
@@ -988,8 +984,6 @@ class GlutenClickHouseMergeTreeWriteSuite
|CREATE TABLE IF NOT EXISTS lineitem_mergetree_ctas2
|USING clickhouse
|PARTITIONED BY (l_shipdate)
- |CLUSTERED BY (l_orderkey)
- |${if (spark32) "" else "SORTED BY (l_partkey, l_returnflag)"} INTO 4 BUCKETS
|LOCATION '$basePath/lineitem_mergetree_ctas2'
| as select * from lineitem
|""".stripMargin)
@@ -1598,7 +1592,7 @@ class GlutenClickHouseMergeTreeWriteSuite
case scanExec: BasicScanExecTransformer => scanExec
}
assertResult(1)(plans.size)
- assertResult(conf._2)(plans.head.getSplitInfos.size)
+ assertResult(conf._2)(plans.head.getSplitInfos().size)
}
}
})
@@ -1622,12 +1616,12 @@ class GlutenClickHouseMergeTreeWriteSuite
case scanExec: BasicScanExecTransformer => scanExec
}
assertResult(1)(plans.size)
- assertResult(1)(plans.head.getSplitInfos.size)
+ assertResult(1)(plans.head.getSplitInfos().size)
}
}
}
- test("test mergetree with primary keys filter pruning by driver with bucket") {
+ testSparkVersionLE33("test mergetree with primary keys filter pruning by driver with bucket") {
spark.sql(s"""
|DROP TABLE IF EXISTS lineitem_mergetree_pk_pruning_by_driver_bucket;
|""".stripMargin)
@@ -1730,7 +1724,7 @@ class GlutenClickHouseMergeTreeWriteSuite
case f: BasicScanExecTransformer => f
}
assertResult(2)(scanExec.size)
- assertResult(conf._2)(scanExec(1).getSplitInfos.size)
+ assertResult(conf._2)(scanExec(1).getSplitInfos().size)
}
}
})
@@ -1776,7 +1770,7 @@ class GlutenClickHouseMergeTreeWriteSuite
Seq("true", "false").foreach {
skip =>
- withSQLConf("spark.databricks.delta.stats.skipping" -> skip.toString) {
+ withSQLConf("spark.databricks.delta.stats.skipping" -> skip) {
val sqlStr =
s"""
|SELECT
@@ -1799,7 +1793,7 @@ class GlutenClickHouseMergeTreeWriteSuite
}
}
- test("test mergetree with column case sensitive") {
+ testSparkVersionLE33("test mergetree with column case sensitive") {
spark.sql(s"""
|DROP TABLE IF EXISTS LINEITEM_MERGETREE_CASE_SENSITIVE;
|""".stripMargin)
@@ -1838,7 +1832,7 @@ class GlutenClickHouseMergeTreeWriteSuite
runTPCHQueryBySQL(6, q6("lineitem_mergetree_case_sensitive")) { _ => }
}
- test("test mergetree with partition with whitespace") {
+ testSparkVersionLE33("test mergetree with partition with whitespace") {
spark.sql(s"""
|DROP TABLE IF EXISTS lineitem_mergetree_partition_with_whitespace;
|""".stripMargin)
@@ -1900,7 +1894,7 @@ class GlutenClickHouseMergeTreeWriteSuite
Seq(("-1", 3), ("3", 3), ("6", 1)).foreach(
conf => {
withSQLConf(
- ("spark.gluten.sql.columnar.backend.ch.files.per.partition.threshold" -> conf._1)) {
+ "spark.gluten.sql.columnar.backend.ch.files.per.partition.threshold" -> conf._1) {
val sql =
s"""
|select count(1), min(l_returnflag) from lineitem_split
@@ -1913,7 +1907,7 @@ class GlutenClickHouseMergeTreeWriteSuite
val scanExec = collect(df.queryExecution.executedPlan) {
case f: FileSourceScanExecTransformer => f
}
- assert(scanExec(0).getPartitions.size == conf._2)
+ assert(scanExec.head.getPartitions.size == conf._2)
}
}
})
diff --git a/backends-velox/pom.xml b/backends-velox/pom.xml
index 48a044a17f2f..240b2218641c 100755
--- a/backends-velox/pom.xml
+++ b/backends-velox/pom.xml
@@ -28,6 +28,54 @@
org.apache.gluten.tags.UDFTest
+
+ celeborn
+
+ false
+
+
+
+ org.apache.gluten
+ gluten-celeborn
+ ${project.version}
+
+
+ org.apache.celeborn
+ celeborn-client-spark-${spark.major.version}-shaded_${scala.binary.version}
+ ${celeborn.version}
+ provided
+
+
+ org.apache.celeborn
+ celeborn-client-spark-${spark.major.version}_${scala.binary.version}
+
+
+ org.apache.celeborn
+ celeborn-spark-${spark.major.version}-columnar-shuffle_${scala.binary.version}
+
+
+
+
+
+
+ uniffle
+
+ false
+
+
+
+ org.apache.gluten
+ gluten-uniffle
+ ${project.version}
+
+
+ org.apache.uniffle
+ rss-client-spark${spark.major.version}-shaded
+ ${uniffle.version}
+ provided
+
+
+
iceberg
@@ -82,6 +130,29 @@
+
+ hudi
+
+
+ org.apache.gluten
+ gluten-hudi
+ ${project.version}
+
+
+ org.apache.gluten
+ gluten-hudi
+ ${project.version}
+ test-jar
+ test
+
+
+ org.apache.hudi
+ hudi-spark${sparkbundle.version}-bundle_${scala.binary.version}
+ ${hudi.version}
+ provided
+
+
+
diff --git a/gluten-celeborn/velox/src/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory b/backends-velox/src-celeborn/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory
similarity index 100%
rename from gluten-celeborn/velox/src/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory
rename to backends-velox/src-celeborn/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory
diff --git a/gluten-celeborn/velox/src/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarBatchSerializer.scala b/backends-velox/src-celeborn/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarBatchSerializer.scala
similarity index 100%
rename from gluten-celeborn/velox/src/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarBatchSerializer.scala
rename to backends-velox/src-celeborn/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarBatchSerializer.scala
diff --git a/gluten-celeborn/velox/src/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriter.scala b/backends-velox/src-celeborn/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriter.scala
similarity index 100%
rename from gluten-celeborn/velox/src/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriter.scala
rename to backends-velox/src-celeborn/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriter.scala
diff --git a/gluten-celeborn/velox/src/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriterFactory.scala b/backends-velox/src-celeborn/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriterFactory.scala
similarity index 100%
rename from gluten-celeborn/velox/src/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriterFactory.scala
rename to backends-velox/src-celeborn/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriterFactory.scala
diff --git a/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxHudiSuite.scala b/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxHudiSuite.scala
new file mode 100644
index 000000000000..00498f87411a
--- /dev/null
+++ b/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxHudiSuite.scala
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.execution
+
+import org.apache.gluten.execution.HudiSuite
+
+class VeloxHudiSuite extends HudiSuite {}
diff --git a/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxTPCHHudiSuite.scala b/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxTPCHHudiSuite.scala
similarity index 91%
rename from gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxTPCHHudiSuite.scala
rename to backends-velox/src-hudi/test/scala/org/apache/execution/VeloxTPCHHudiSuite.scala
index a4e10269c286..cdb3b2918080 100644
--- a/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxTPCHHudiSuite.scala
+++ b/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxTPCHHudiSuite.scala
@@ -14,16 +14,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-package org.apache.gluten.execution
+package org.apache.execution
+import org.apache.gluten.execution.VeloxTPCHSuite
import org.apache.spark.SparkConf
import java.io.File
class VeloxTPCHHudiSuite extends VeloxTPCHSuite {
-
- protected val tpchBasePath: String = new File(
- "../backends-velox/src/test/resources").getAbsolutePath
+ protected val tpchBasePath: String =
+ getClass.getResource("/").getPath + "../../../src/test/resources"
override protected val resourcePath: String =
new File(tpchBasePath, "tpch-data-parquet").getCanonicalPath
diff --git a/gluten-uniffle/velox/src/main/java/org/apache/spark/shuffle/gluten/uniffle/UniffleShuffleManager.java b/backends-velox/src-uniffle/main/java/org/apache/spark/shuffle/gluten/uniffle/UniffleShuffleManager.java
similarity index 100%
rename from gluten-uniffle/velox/src/main/java/org/apache/spark/shuffle/gluten/uniffle/UniffleShuffleManager.java
rename to backends-velox/src-uniffle/main/java/org/apache/spark/shuffle/gluten/uniffle/UniffleShuffleManager.java
diff --git a/gluten-uniffle/velox/src/main/java/org/apache/spark/shuffle/writer/VeloxUniffleColumnarShuffleWriter.java b/backends-velox/src-uniffle/main/java/org/apache/spark/shuffle/writer/VeloxUniffleColumnarShuffleWriter.java
similarity index 100%
rename from gluten-uniffle/velox/src/main/java/org/apache/spark/shuffle/writer/VeloxUniffleColumnarShuffleWriter.java
rename to backends-velox/src-uniffle/main/java/org/apache/spark/shuffle/writer/VeloxUniffleColumnarShuffleWriter.java
diff --git a/gluten-uniffle/velox/src/main/scala/org/apache/spark/shuffle/writer/PartitionPusher.scala b/backends-velox/src-uniffle/main/scala/org/apache/spark/shuffle/writer/PartitionPusher.scala
similarity index 100%
rename from gluten-uniffle/velox/src/main/scala/org/apache/spark/shuffle/writer/PartitionPusher.scala
rename to backends-velox/src-uniffle/main/scala/org/apache/spark/shuffle/writer/PartitionPusher.scala
diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala
index c6d2bc065879..d156fffa8b21 100644
--- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala
+++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala
@@ -27,7 +27,7 @@ import org.apache.gluten.vectorized.PlanEvaluatorJniWrapper
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
import org.apache.spark.sql.connector.read.InputPartition
-import org.apache.spark.sql.execution.datasources.{FileFormat, HadoopFsRelation, PartitionDirectory}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, PartitionDirectory}
import org.apache.spark.sql.sources.DataSourceRegister
import org.apache.spark.sql.types._
import org.apache.spark.task.TaskResources
@@ -96,16 +96,14 @@ class VeloxTransformerApi extends TransformerApi with Logging {
override def packPBMessage(message: Message): Any = Any.pack(message, "")
- override def genWriteParameters(
- fileFormat: FileFormat,
- writeOptions: Map[String, String]): Any = {
- val fileFormatStr = fileFormat match {
+ override def genWriteParameters(write: WriteFilesExecTransformer): Any = {
+ val fileFormatStr = write.fileFormat match {
case register: DataSourceRegister =>
register.shortName
case _ => "UnknownFileFormat"
}
val compressionCodec =
- WriteFilesExecTransformer.getCompressionCodec(writeOptions).capitalize
+ WriteFilesExecTransformer.getCompressionCodec(write.caseInsensitiveOptions).capitalize
val writeParametersStr = new StringBuffer("WriteParameters:")
writeParametersStr.append("is").append(compressionCodec).append("=1")
writeParametersStr.append(";format=").append(fileFormatStr).append("\n")
diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
index 8063a5d12207..989def88e70c 100644
--- a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
+++ b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala
@@ -1791,6 +1791,13 @@ class MiscOperatorSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa
assert(plan2.find(_.isInstanceOf[ProjectExecTransformer]).isDefined)
}
+ test("cast timestamp to date") {
+ val query = "select cast(ts as date) from values (timestamp'2024-01-01 00:00:00') as tab(ts)"
+ runQueryAndCompare(query) {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ }
+
test("timestamp broadcast join") {
spark.range(0, 5).createOrReplaceTempView("right")
spark.sql("SELECT id, timestamp_micros(id) as ts from right").createOrReplaceTempView("left")
diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
index 46d6870b04c9..94ea8be5200d 100644
--- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
+++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala
@@ -1357,6 +1357,26 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite {
}
}
+ test("concat_ws") {
+ runQueryAndCompare("SELECT concat_ws('~~', c_comment, c_address) FROM customer LIMIT 50") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+
+ withTempPath {
+ path =>
+ Seq[Seq[String]](Seq("ab", null, "cd", "", "ef"), Seq(null, "x", "", "y"), Seq.empty, null)
+ .toDF("col")
+ .write
+ .parquet(path.getCanonicalPath)
+
+ spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("array_tbl")
+
+ runQueryAndCompare("SELECT concat_ws('~~', col, 'end') AS res from array_tbl;") {
+ checkGlutenOperatorMatch[ProjectExecTransformer]
+ }
+ }
+ }
+
test("Test input_file_name function") {
runQueryAndCompare("""SELECT input_file_name(), l_orderkey
| from lineitem limit 100""".stripMargin) {
diff --git a/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.cpp b/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.cpp
index a76b4d398d97..0d57d53ff640 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.cpp
+++ b/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.cpp
@@ -21,9 +21,8 @@
#include
#include
#include
-#include
#include
-#include
+#include
#include
#include
#include
@@ -103,7 +102,7 @@ void adjust_output(const DB::QueryPipelineBuilderPtr & builder, const DB::Block
{
throw DB::Exception(
DB::ErrorCodes::LOGICAL_ERROR,
- "Missmatch result columns size, input size is {}, but output size is {}",
+ "Mismatch result columns size, input size is {}, but output size is {}",
input.columns(),
output.columns());
}
@@ -164,12 +163,6 @@ void addMergeTreeSinkTransform(
: std::make_shared(header, partition_by, merge_tree_table, write_settings, context, stats);
chain.addSource(sink);
- const DB::Settings & settings = context->getSettingsRef();
- chain.addSource(std::make_shared(
- header, settings[Setting::min_insert_block_size_rows], settings[Setting::min_insert_block_size_bytes]));
- chain.addSource(std::make_shared(
- header, settings[Setting::min_insert_block_size_rows], settings[Setting::min_insert_block_size_bytes]));
-
builder->addChain(std::move(chain));
}
@@ -212,6 +205,7 @@ void addNormalFileWriterSinkTransform(
namespace local_engine
{
+
IMPLEMENT_GLUTEN_SETTINGS(GlutenWriteSettings, WRITE_RELATED_SETTINGS)
void addSinkTransform(const DB::ContextPtr & context, const substrait::WriteRel & write_rel, const DB::QueryPipelineBuilderPtr & builder)
@@ -224,12 +218,18 @@ void addSinkTransform(const DB::ContextPtr & context, const substrait::WriteRel
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Failed to unpack write optimization with local_engine::Write.");
assert(write.has_common());
const substrait::NamedStruct & table_schema = write_rel.table_schema();
- auto output = TypeParser::buildBlockFromNamedStruct(table_schema);
- adjust_output(builder, output);
- const auto partitionCols = collect_partition_cols(output, table_schema);
+ auto partition_indexes = write.common().partition_col_index();
if (write.has_mergetree())
{
- local_engine::MergeTreeTable merge_tree_table(write, table_schema);
+ MergeTreeTable merge_tree_table(write, table_schema);
+ auto output = TypeParser::buildBlockFromNamedStruct(table_schema, merge_tree_table.low_card_key);
+ adjust_output(builder, output);
+
+ builder->addSimpleTransform(
+ [&](const Block & in_header) -> ProcessorPtr { return std::make_shared(in_header, false); });
+
+ const auto partition_by = collect_partition_cols(output, table_schema, partition_indexes);
+
GlutenWriteSettings write_settings = GlutenWriteSettings::get(context);
if (write_settings.task_write_tmp_dir.empty())
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "MergeTree Write Pipeline need inject relative path.");
@@ -237,23 +237,35 @@ void addSinkTransform(const DB::ContextPtr & context, const substrait::WriteRel
throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Non empty relative path for MergeTree table in pipeline mode.");
merge_tree_table.relative_path = write_settings.task_write_tmp_dir;
- addMergeTreeSinkTransform(context, builder, merge_tree_table, output, partitionCols);
+ addMergeTreeSinkTransform(context, builder, merge_tree_table, output, partition_by);
}
else
- addNormalFileWriterSinkTransform(context, builder, write.common().format(), output, partitionCols);
+ {
+ auto output = TypeParser::buildBlockFromNamedStruct(table_schema);
+ adjust_output(builder, output);
+ const auto partition_by = collect_partition_cols(output, table_schema, partition_indexes);
+ addNormalFileWriterSinkTransform(context, builder, write.common().format(), output, partition_by);
+ }
}
-
-DB::Names collect_partition_cols(const DB::Block & header, const substrait::NamedStruct & struct_)
+DB::Names collect_partition_cols(const DB::Block & header, const substrait::NamedStruct & struct_, const PartitionIndexes & partition_by)
{
- DB::Names result;
+ if (partition_by.empty())
+ {
+ assert(std::ranges::all_of(
+ struct_.column_types(), [](const int32_t type) { return type != ::substrait::NamedStruct::PARTITION_COL; }));
+ return {};
+ }
assert(struct_.column_types_size() == header.columns());
assert(struct_.column_types_size() == struct_.struct_().types_size());
- auto name_iter = header.begin();
- auto type_iter = struct_.column_types().begin();
- for (; name_iter != header.end(); ++name_iter, ++type_iter)
- if (*type_iter == ::substrait::NamedStruct::PARTITION_COL)
- result.push_back(name_iter->name);
+ DB::Names result;
+ result.reserve(partition_by.size());
+ for (auto idx : partition_by)
+ {
+ assert(idx >= 0 && idx < header.columns());
+ assert(struct_.column_types(idx) == ::substrait::NamedStruct::PARTITION_COL);
+ result.emplace_back(header.getByPosition(idx).name);
+ }
return result;
}
diff --git a/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.h b/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.h
index 01e0dabaaa7d..bb8c15c07d87 100644
--- a/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.h
+++ b/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.h
@@ -21,6 +21,7 @@
#include
#include
#include
+#include
#include
namespace substrait
@@ -38,9 +39,11 @@ using QueryPipelineBuilderPtr = std::unique_ptr;
namespace local_engine
{
+using PartitionIndexes = google::protobuf::RepeatedField<::int32_t>;
+
void addSinkTransform(const DB::ContextPtr & context, const substrait::WriteRel & write_rel, const DB::QueryPipelineBuilderPtr & builder);
-DB::Names collect_partition_cols(const DB::Block & header, const substrait::NamedStruct & struct_);
+DB::Names collect_partition_cols(const DB::Block & header, const substrait::NamedStruct & struct_, const PartitionIndexes & partition_by);
#define WRITE_RELATED_SETTINGS(M, ALIAS) \
M(String, task_write_tmp_dir, , "The temporary directory for writing data") \
diff --git a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.cpp b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.cpp
index 6c9dd890d851..d41e71fb848d 100644
--- a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.cpp
+++ b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.cpp
@@ -31,27 +31,37 @@ extern const Metric GlobalThreadActive;
extern const Metric GlobalThreadScheduled;
}
+namespace DB::Setting
+{
+extern const SettingsUInt64 min_insert_block_size_rows;
+extern const SettingsUInt64 min_insert_block_size_bytes;
+}
namespace local_engine
{
-void SparkMergeTreeSink::consume(Chunk & chunk)
+void SparkMergeTreeSink::write(const Chunk & chunk)
{
- assert(!sink_helper->metadata_snapshot->hasPartitionKey());
+ CurrentThread::flushUntrackedMemory();
+ /// Reset earlier, so put it in the scope
BlockWithPartition item{getHeader().cloneWithColumns(chunk.getColumns()), Row{}};
- size_t before_write_memory = 0;
- if (auto * memory_tracker = CurrentThread::getMemoryTracker())
- {
- CurrentThread::flushUntrackedMemory();
- before_write_memory = memory_tracker->get();
- }
+
sink_helper->writeTempPart(item, context, part_num);
part_num++;
- /// Reset earlier to free memory
- item.block.clear();
- item.partition.clear();
+}
- sink_helper->checkAndMerge();
+void SparkMergeTreeSink::consume(Chunk & chunk)
+{
+ Chunk tmp;
+ tmp.swap(chunk);
+ squashed_chunk = squashing.add(std::move(tmp));
+ if (static_cast(squashed_chunk))
+ {
+ write(Squashing::squash(std::move(squashed_chunk)));
+ sink_helper->checkAndMerge();
+ }
+ assert(squashed_chunk.getNumRows() == 0);
+ assert(chunk.getNumRows() == 0);
}
void SparkMergeTreeSink::onStart()
@@ -61,6 +71,11 @@ void SparkMergeTreeSink::onStart()
void SparkMergeTreeSink::onFinish()
{
+ assert(squashed_chunk.getNumRows() == 0);
+ squashed_chunk = squashing.flush();
+ if (static_cast(squashed_chunk))
+ write(Squashing::squash(std::move(squashed_chunk)));
+ assert(squashed_chunk.getNumRows() == 0);
sink_helper->finish(context);
if (stats_.has_value())
(*stats_)->collectStats(sink_helper->unsafeGet(), sink_helper->write_settings.partition_settings.partition_dir);
@@ -91,7 +106,9 @@ SinkToStoragePtr SparkMergeTreeSink::create(
}
else
sink_helper = std::make_shared(dest_storage, write_settings_, isRemoteStorage);
- return std::make_shared(sink_helper, context, stats);
+ const DB::Settings & settings = context->getSettingsRef();
+ return std::make_shared(
+ sink_helper, context, stats, settings[Setting::min_insert_block_size_rows], settings[Setting::min_insert_block_size_bytes]);
}
SinkHelper::SinkHelper(const SparkStorageMergeTreePtr & data_, const SparkMergeTreeWriteSettings & write_settings_, bool isRemoteStorage_)
diff --git a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.h b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.h
index b551d86d1d0c..828332d2d6c9 100644
--- a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.h
+++ b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.h
@@ -227,8 +227,17 @@ class SparkMergeTreeSink : public DB::SinkToStorage
const DB::ContextMutablePtr & context,
const SinkStatsOption & stats = {});
- explicit SparkMergeTreeSink(const SinkHelperPtr & sink_helper_, const ContextPtr & context_, const SinkStatsOption & stats)
- : SinkToStorage(sink_helper_->metadata_snapshot->getSampleBlock()), context(context_), sink_helper(sink_helper_), stats_(stats)
+ explicit SparkMergeTreeSink(
+ const SinkHelperPtr & sink_helper_,
+ const ContextPtr & context_,
+ const SinkStatsOption & stats,
+ size_t min_block_size_rows,
+ size_t min_block_size_bytes)
+ : SinkToStorage(sink_helper_->metadata_snapshot->getSampleBlock())
+ , context(context_)
+ , sink_helper(sink_helper_)
+ , stats_(stats)
+ , squashing(sink_helper_->metadata_snapshot->getSampleBlock(), min_block_size_rows, min_block_size_bytes)
{
}
~SparkMergeTreeSink() override = default;
@@ -241,9 +250,13 @@ class SparkMergeTreeSink : public DB::SinkToStorage
const SinkHelper & sinkHelper() const { return *sink_helper; }
private:
+ void write(const Chunk & chunk);
+
ContextPtr context;
SinkHelperPtr sink_helper;
std::optional> stats_;
+ Squashing squashing;
+ Chunk squashed_chunk;
int part_num = 1;
};
diff --git a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp
index a8fdfff6ff75..95145d43fab9 100644
--- a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp
+++ b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp
@@ -18,8 +18,6 @@
#include
#include
-#include
-#include
#include
#include
#include
@@ -28,11 +26,6 @@
#include
#include
-namespace DB::Setting
-{
-extern const SettingsUInt64 min_insert_block_size_rows;
-extern const SettingsUInt64 min_insert_block_size_bytes;
-}
using namespace DB;
namespace
{
@@ -125,12 +118,6 @@ std::unique_ptr SparkMergeTreeWriter::create(
//
// auto stats = std::make_shared(header, sink_helper);
// chain.addSink(stats);
- //
- chain.addSource(std::make_shared(
- header, settings[Setting::min_insert_block_size_rows], settings[Setting::min_insert_block_size_bytes]));
- chain.addSource(std::make_shared(
- header, settings[Setting::min_insert_block_size_rows], settings[Setting::min_insert_block_size_bytes]));
-
return std::make_unique(header, sink_helper, QueryPipeline{std::move(chain)}, spark_job_id);
}
diff --git a/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp b/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp
index a01dd363c56c..a36601d6afa5 100644
--- a/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp
+++ b/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp
@@ -146,7 +146,7 @@ TEST(WritePipeline, SubstraitFileSink)
DB::Names expected{"s_suppkey", "s_name", "s_address", "s_nationkey", "s_phone", "s_acctbal", "s_comment111"};
EXPECT_EQ(expected, names);
- auto partitionCols = collect_partition_cols(block, table_schema);
+ auto partitionCols = collect_partition_cols(block, table_schema, {});
DB::Names expected_partition_cols;
EXPECT_EQ(expected_partition_cols, partitionCols);
@@ -164,7 +164,7 @@ TEST(WritePipeline, SubstraitFileSink)
INCBIN(native_write_one_partition, SOURCE_DIR "/utils/extern-local-engine/tests/json/native_write_one_partition.json");
-TEST(WritePipeline, SubstraitPartitionedFileSink)
+/*TEST(WritePipeline, SubstraitPartitionedFileSink)
{
const auto context = DB::Context::createCopy(QueryContext::globalContext());
GlutenWriteSettings settings{
@@ -193,7 +193,7 @@ TEST(WritePipeline, SubstraitPartitionedFileSink)
DB::Names expected{"s_suppkey", "s_name", "s_address", "s_phone", "s_acctbal", "s_comment", "s_nationkey"};
EXPECT_EQ(expected, names);
- auto partitionCols = local_engine::collect_partition_cols(block, table_schema);
+ auto partitionCols = local_engine::collect_partition_cols(block, table_schema, {});
DB::Names expected_partition_cols{"s_nationkey"};
EXPECT_EQ(expected_partition_cols, partitionCols);
@@ -201,12 +201,12 @@ TEST(WritePipeline, SubstraitPartitionedFileSink)
const Block & x = *local_executor->nextColumnar();
debug::headBlock(x, 25);
EXPECT_EQ(25, x.rows());
-}
+}*/
TEST(WritePipeline, ComputePartitionedExpression)
{
const auto context = DB::Context::createCopy(QueryContext::globalContext());
-
+
Block sample_block{{STRING(), "name"}, {UINT(), "s_nationkey"}};
auto partition_by = SubstraitPartitionedFileSink::make_partition_expression({"s_nationkey", "name"}, sample_block);
// auto partition_by = printColumn("s_nationkey");
diff --git a/cpp-ch/local-engine/tests/gtest_write_pipeline_mergetree.cpp b/cpp-ch/local-engine/tests/gtest_write_pipeline_mergetree.cpp
index 1ad90060f475..a5cd3fd7f39c 100644
--- a/cpp-ch/local-engine/tests/gtest_write_pipeline_mergetree.cpp
+++ b/cpp-ch/local-engine/tests/gtest_write_pipeline_mergetree.cpp
@@ -258,11 +258,18 @@ TEST(MergeTree, SparkMergeTree)
INCBIN(_3_mergetree_plan_input_, SOURCE_DIR "/utils/extern-local-engine/tests/json/mergetree/lineitem_parquet_input.json");
namespace
{
-void writeMerge(std::string_view json_plan,
- const std::string & outputPath ,
- const std::function & callback, std::optional input = std::nullopt)
+void writeMerge(
+ std::string_view json_plan,
+ const std::string & outputPath,
+ const std::function & callback,
+ std::optional input = std::nullopt)
{
const auto context = DB::Context::createCopy(QueryContext::globalContext());
+
+ auto queryid = QueryContext::instance().initializeQuery("gtest_mergetree");
+ SCOPE_EXIT({ QueryContext::instance().finalizeQuery(queryid); });
+
+
GlutenWriteSettings settings{.task_write_tmp_dir = outputPath};
settings.set(context);
SparkMergeTreeWritePartitionSettings partition_settings{.part_name_prefix = "pipline_prefix"};
@@ -279,18 +286,24 @@ INCBIN(_3_mergetree_plan_, SOURCE_DIR "/utils/extern-local-engine/tests/json/mer
INCBIN(_4_mergetree_plan_, SOURCE_DIR "/utils/extern-local-engine/tests/json/mergetree/4_one_pipeline.json");
TEST(MergeTree, Pipeline)
{
- writeMerge(EMBEDDED_PLAN(_3_mergetree_plan_),"tmp/lineitem_mergetree",[&](const DB::Block & block)
- {
- EXPECT_EQ(1, block.rows());
- debug::headBlock(block);
- });
+ writeMerge(
+ EMBEDDED_PLAN(_3_mergetree_plan_),
+ "tmp/lineitem_mergetree",
+ [&](const DB::Block & block)
+ {
+ EXPECT_EQ(1, block.rows());
+ debug::headBlock(block);
+ });
}
TEST(MergeTree, PipelineWithPartition)
{
- writeMerge(EMBEDDED_PLAN(_4_mergetree_plan_),"tmp/lineitem_mergetree_p",[&](const DB::Block & block)
- {
- EXPECT_EQ(2525, block.rows());
- debug::headBlock(block);
- });
+ writeMerge(
+ EMBEDDED_PLAN(_4_mergetree_plan_),
+ "tmp/lineitem_mergetree_p",
+ [&](const DB::Block & block)
+ {
+ EXPECT_EQ(3815, block.rows());
+ debug::headBlock(block);
+ });
}
\ No newline at end of file
diff --git a/cpp-ch/local-engine/tests/json/mergetree/4_one_pipeline.json b/cpp-ch/local-engine/tests/json/mergetree/4_one_pipeline.json
index 14a9b3dda2ad..513f54a707d4 100644
--- a/cpp-ch/local-engine/tests/json/mergetree/4_one_pipeline.json
+++ b/cpp-ch/local-engine/tests/json/mergetree/4_one_pipeline.json
@@ -9,13 +9,18 @@
"optimization": {
"@type": "type.googleapis.com/local_engine.Write",
"common": {
- "format": "mergetree"
+ "format": "mergetree",
+ "partitionColIndex": [
+ 10,
+ 8
+ ]
},
"mergetree": {
"database": "default",
- "table": "lineitem_mergetree_insertoverwrite2",
- "snapshotId": "1731309448915_0",
- "orderByKey": "tuple()",
+ "table": "lineitem_mergetree_partition",
+ "snapshotId": "1734145864855_0",
+ "orderByKey": "l_orderkey",
+ "primaryKey": "l_orderkey",
"storagePolicy": "default"
}
},
@@ -221,7 +226,7 @@
"NORMAL_COL",
"NORMAL_COL",
"NORMAL_COL",
- "NORMAL_COL",
+ "PARTITION_COL",
"NORMAL_COL",
"PARTITION_COL",
"NORMAL_COL",
@@ -232,138 +237,171 @@
]
},
"input": {
- "read": {
+ "sort": {
"common": {
"direct": {}
},
- "baseSchema": {
- "names": [
- "l_orderkey",
- "l_partkey",
- "l_suppkey",
- "l_linenumber",
- "l_quantity",
- "l_extendedprice",
- "l_discount",
- "l_tax",
- "l_returnflag",
- "l_linestatus",
- "l_shipdate",
- "l_commitdate",
- "l_receiptdate",
- "l_shipinstruct",
- "l_shipmode",
- "l_comment"
- ],
- "struct": {
- "types": [
- {
- "i64": {
- "nullability": "NULLABILITY_NULLABLE"
- }
- },
- {
- "i64": {
- "nullability": "NULLABILITY_NULLABLE"
- }
- },
- {
- "i64": {
- "nullability": "NULLABILITY_NULLABLE"
- }
- },
- {
- "i64": {
- "nullability": "NULLABILITY_NULLABLE"
- }
- },
- {
- "fp64": {
- "nullability": "NULLABILITY_NULLABLE"
- }
- },
- {
- "fp64": {
- "nullability": "NULLABILITY_NULLABLE"
- }
- },
- {
- "fp64": {
- "nullability": "NULLABILITY_NULLABLE"
- }
- },
- {
- "fp64": {
- "nullability": "NULLABILITY_NULLABLE"
- }
- },
- {
- "string": {
- "nullability": "NULLABILITY_NULLABLE"
- }
- },
- {
- "string": {
- "nullability": "NULLABILITY_NULLABLE"
- }
- },
- {
- "date": {
- "nullability": "NULLABILITY_NULLABLE"
- }
+ "input": {
+ "read": {
+ "common": {
+ "direct": {}
+ },
+ "baseSchema": {
+ "names": [
+ "l_orderkey",
+ "l_partkey",
+ "l_suppkey",
+ "l_linenumber",
+ "l_quantity",
+ "l_extendedprice",
+ "l_discount",
+ "l_tax",
+ "l_returnflag",
+ "l_linestatus",
+ "l_shipdate",
+ "l_commitdate",
+ "l_receiptdate",
+ "l_shipinstruct",
+ "l_shipmode",
+ "l_comment"
+ ],
+ "struct": {
+ "types": [
+ {
+ "i64": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "i64": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "i64": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "i64": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "fp64": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "fp64": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "fp64": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "fp64": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "string": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "string": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "date": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "date": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "date": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "string": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "string": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ },
+ {
+ "string": {
+ "nullability": "NULLABILITY_NULLABLE"
+ }
+ }
+ ]
},
- {
- "date": {
- "nullability": "NULLABILITY_NULLABLE"
- }
- },
- {
- "date": {
- "nullability": "NULLABILITY_NULLABLE"
- }
- },
- {
- "string": {
- "nullability": "NULLABILITY_NULLABLE"
- }
- },
- {
- "string": {
- "nullability": "NULLABILITY_NULLABLE"
- }
- },
- {
- "string": {
- "nullability": "NULLABILITY_NULLABLE"
+ "columnTypes": [
+ "NORMAL_COL",
+ "NORMAL_COL",
+ "NORMAL_COL",
+ "NORMAL_COL",
+ "NORMAL_COL",
+ "NORMAL_COL",
+ "NORMAL_COL",
+ "NORMAL_COL",
+ "NORMAL_COL",
+ "NORMAL_COL",
+ "NORMAL_COL",
+ "NORMAL_COL",
+ "NORMAL_COL",
+ "NORMAL_COL",
+ "NORMAL_COL",
+ "NORMAL_COL"
+ ]
+ },
+ "advancedExtension": {
+ "optimization": {
+ "@type": "type.googleapis.com/google.protobuf.StringValue",
+ "value": "isMergeTree=0\n"
+ }
+ }
+ }
+ },
+ "sorts": [
+ {
+ "expr": {
+ "selection": {
+ "directReference": {
+ "structField": {
+ "field": 10
+ }
}
}
- ]
+ },
+ "direction": "SORT_DIRECTION_ASC_NULLS_FIRST"
},
- "columnTypes": [
- "NORMAL_COL",
- "NORMAL_COL",
- "NORMAL_COL",
- "NORMAL_COL",
- "NORMAL_COL",
- "NORMAL_COL",
- "NORMAL_COL",
- "NORMAL_COL",
- "NORMAL_COL",
- "NORMAL_COL",
- "NORMAL_COL",
- "NORMAL_COL",
- "NORMAL_COL",
- "NORMAL_COL",
- "NORMAL_COL",
- "NORMAL_COL"
- ]
- },
- "advancedExtension": {
- "optimization": {
- "@type": "type.googleapis.com/google.protobuf.StringValue",
- "value": "isMergeTree=0\n"
+ {
+ "expr": {
+ "selection": {
+ "directReference": {
+ "structField": {
+ "field": 8
+ }
+ }
+ }
+ },
+ "direction": "SORT_DIRECTION_ASC_NULLS_FIRST"
}
- }
+ ]
}
}
}
diff --git a/cpp/core/utils/Timer.h b/cpp/core/utils/Timer.h
index b6dec29b1a6a..4fe39068bb77 100644
--- a/cpp/core/utils/Timer.h
+++ b/cpp/core/utils/Timer.h
@@ -19,11 +19,11 @@
#include
-using TimePoint = std::chrono::time_point;
-
namespace gluten {
+template
class Timer {
public:
+ using TimePoint = std::chrono::time_point;
explicit Timer() = default;
void start() {
@@ -36,8 +36,7 @@ class Timer {
return;
}
running_ = false;
- realTimeUsed_ +=
- std::chrono::duration_cast(std::chrono::steady_clock::now() - startTime_).count();
+ realTimeUsed_ += std::chrono::duration_cast(std::chrono::steady_clock::now() - startTime_).count();
}
void reset() {
@@ -62,13 +61,14 @@ class Timer {
int64_t realTimeUsed_ = 0;
};
-class ScopedTimer {
+template
+class ScopedTimerImpl {
public:
- explicit ScopedTimer(int64_t* toAdd) : toAdd_(toAdd) {
+ explicit ScopedTimerImpl(int64_t* toAdd) : toAdd_(toAdd) {
startInternal();
}
- ~ScopedTimer() {
+ ~ScopedTimerImpl() {
stopInternal();
}
@@ -79,7 +79,7 @@ class ScopedTimer {
}
private:
- Timer timer_{};
+ Timer timer_{};
int64_t* toAdd_;
void stopInternal() {
@@ -92,4 +92,10 @@ class ScopedTimer {
timer_.start();
}
};
+
+using ScopedTimer = ScopedTimerImpl;
+using ScopedSecondsTimer = ScopedTimerImpl;
+using ScopedMillisecondsTimer = ScopedTimerImpl;
+using ScopedMicrosecondsTimer = ScopedTimerImpl;
+
} // namespace gluten
diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
index 682bf0fcd5d6..996b3bdce033 100644
--- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
@@ -61,7 +61,6 @@ const std::unordered_set kRegexFunctions = {
const std::unordered_set kBlackList = {
"split_part",
"factorial",
- "concat_ws",
"from_json",
"json_array_length",
"trunc",
@@ -300,10 +299,13 @@ bool SubstraitToVeloxPlanValidator::validateCast(
case TypeKind::VARBINARY:
LOG_VALIDATION_MSG("Invalid input type in casting: ARRAY/MAP/ROW/VARBINARY.");
return false;
- case TypeKind::TIMESTAMP: {
- LOG_VALIDATION_MSG("Casting from TIMESTAMP is not supported or has incorrect result.");
- return false;
- }
+ case TypeKind::TIMESTAMP:
+ // Only support cast timestamp to date
+ if (!toType->isDate()) {
+ LOG_VALIDATION_MSG(
+ "Casting from TIMESTAMP to " + toType->toString() + " is not supported or has incorrect result.");
+ return false;
+ }
default: {
}
}
diff --git a/docs/developers/HowTo.md b/docs/developers/HowTo.md
index 22ad3e30efc7..dce32d55c02d 100644
--- a/docs/developers/HowTo.md
+++ b/docs/developers/HowTo.md
@@ -134,16 +134,16 @@ to let it override the corresponding C standard functions entirely. It may help
Now, both Parquet and DWRF format files are supported, related scripts and files are under the directory of `${GLUTEN_HOME}/backends-velox/workload/tpch`.
The file `README.md` under `${GLUTEN_HOME}/backends-velox/workload/tpch` offers some useful help, but it's still not enough and exact.
-One way of run TPC-H test is to run velox-be by workflow, you can refer to [velox_be.yml](https://github.com/apache/incubator-gluten/blob/main/.github/workflows/velox_be.yml#L90)
+One way of run TPC-H test is to run velox-be by workflow, you can refer to [velox_backend.yml](https://github.com/apache/incubator-gluten/blob/main/.github/workflows/velox_backend.yml#L280)
Here we will explain how to run TPC-H on Velox backend with the Parquet file format.
1. First, prepare the datasets, you have two choices.
- - One way, generate Parquet datasets using the script under `${GLUTEN_HOME}/backends-velox/workload/tpch/gen_data/parquet_dataset`, you can get help from the above
+ - One way, generate Parquet datasets using the script under `${GLUTEN_HOME}/tools/workload/tpch/gen_data/parquet_dataset`, you can get help from the above
-mentioned `README.md`.
- The other way, using the small dataset under `${GLUTEN_HOME}/backends-velox/src/test/resources/tpch-data-parquet` directly, if you just want to make simple
TPC-H testing, this dataset is a good choice.
2. Second, run TPC-H on Velox backend testing.
- - Modify `${GLUTEN_HOME}/backends-velox/workload/tpch/run_tpch/tpch_parquet.scala`.
+ - Modify `${GLUTEN_HOME}/tools/workload/tpch/run_tpch/tpch_parquet.scala`.
- Set `var parquet_file_path` to correct directory. If using the small dataset directly in the step one, then modify it as below:
```scala
@@ -156,12 +156,12 @@ Here we will explain how to run TPC-H on Velox backend with the Parquet file for
var gluten_root = "/home/gluten"
```
- - Modify `${GLUTEN_HOME}/backends-velox/workload/tpch/run_tpch/tpch_parquet.sh`.
+ - Modify `${GLUTEN_HOME}/tools/workload/tpch/run_tpch/tpch_parquet.sh`.
- Set `GLUTEN_JAR` correctly. Please refer to the section of [Build Gluten with Velox Backend](../get-started/Velox.md/#2-build-gluten-with-velox-backend)
- Set `SPARK_HOME` correctly.
- Set the memory configurations appropriately.
- Execute `tpch_parquet.sh` using the below command.
- - `cd ${GLUTEN_HOME}/backends-velox/workload/tpch/run_tpch/`
+ - `cd ${GLUTEN_HOME}/tools/workload/tpch/run_tpch/`
- `./tpch_parquet.sh`
# How to run TPC-DS
diff --git a/ep/build-clickhouse/src/package.sh b/ep/build-clickhouse/src/package.sh
index 2583727b212e..522e073fc3f5 100755
--- a/ep/build-clickhouse/src/package.sh
+++ b/ep/build-clickhouse/src/package.sh
@@ -90,7 +90,7 @@ function build_gluten_by_spark_version() {
sv=$(echo "$spark_profile" | tr -d '.')
echo "build gluten with spark ${spark_profile}, scala ${scala_version}"
- mvn clean install -Pbackends-clickhouse -Pspark-"${spark_profile}" -Pscala-"${scala_version}" -Pceleborn -Piceberg -DskipTests -Dcheckstyle.skip
+ mvn clean install -Pbackends-clickhouse -Pspark-"${spark_profile}" -Pscala-"${scala_version}" -Pceleborn -Piceberg -Pdelta -DskipTests -Dcheckstyle.skip
cp "${GLUTEN_SOURCE}"/backends-clickhouse/target/gluten-*-spark-"${spark_profile}"-jar-with-dependencies.jar "${PACKAGE_DIR_PATH}"/jars/spark"${sv}"/gluten.jar
cp "${GLUTEN_SOURCE}"/gluten-celeborn/clickhouse/target/gluten-celeborn-clickhouse-"${PROJECT_VERSION}"-jar-with-dependencies.jar "${PACKAGE_DIR_PATH}"/jars/spark"${sv}"
delta_version=$(mvn -q -Dexec.executable="echo" -Dexec.args='${delta.version}' -Pspark-"${spark_profile}" --non-recursive exec:exec)
diff --git a/gluten-arrow/src/main/java/org/apache/gluten/memory/listener/ReservationListeners.java b/gluten-arrow/src/main/java/org/apache/gluten/memory/listener/ReservationListeners.java
index b221db13e375..9d63a8601b4d 100644
--- a/gluten-arrow/src/main/java/org/apache/gluten/memory/listener/ReservationListeners.java
+++ b/gluten-arrow/src/main/java/org/apache/gluten/memory/listener/ReservationListeners.java
@@ -52,18 +52,7 @@ private static ReservationListener create0(
tmm, name, Spillers.withMinSpillSize(spiller, reservationBlockSize), mutableStats);
final MemoryTarget overConsumer =
MemoryTargets.newConsumer(
- tmm,
- consumer.name() + ".OverAcquire",
- new Spiller() {
- @Override
- public long spill(MemoryTarget self, Phase phase, long size) {
- if (!Spillers.PHASE_SET_ALL.contains(phase)) {
- return 0L;
- }
- return self.repay(size);
- }
- },
- Collections.emptyMap());
+ tmm, consumer.name() + ".OverAcquire", Spillers.NOOP, Collections.emptyMap());
final MemoryTarget target =
MemoryTargets.throwOnOom(
MemoryTargets.overAcquire(
diff --git a/gluten-celeborn/clickhouse/pom.xml b/gluten-celeborn/clickhouse/pom.xml
deleted file mode 100755
index 21263443d735..000000000000
--- a/gluten-celeborn/clickhouse/pom.xml
+++ /dev/null
@@ -1,260 +0,0 @@
-
-
-
- gluten-celeborn
- org.apache.gluten
- 1.3.0-SNAPSHOT
- ../pom.xml
-
- 4.0.0
-
- gluten-celeborn-clickhouse
- jar
- Gluten Celeborn Clickhouse
-
-
-
- org.apache.gluten
- backends-clickhouse
- ${project.version}
- provided
-
-
- org.apache.gluten
- backends-clickhouse
- ${project.version}
- test-jar
- test
-
-
- org.apache.gluten
- gluten-substrait
- ${project.version}
- test-jar
- test
-
-
- org.apache.gluten
- gluten-celeborn-common
- ${project.version}
- compile
-
-
- org.apache.spark
- spark-core_${scala.binary.version}
- test-jar
- test
-
-
- org.apache.spark
- spark-sql_${scala.binary.version}
- test-jar
- test
-
-
- org.apache.spark
- spark-catalyst_${scala.binary.version}
- test-jar
- test
-
-
- org.apache.spark
- spark-yarn_${scala.binary.version}
- ${spark.version}
- test-jar
- test
-
-
- org.apache.spark
- spark-hive_${scala.binary.version}
- ${spark.version}
- test-jar
- test
-
-
- org.apache.spark
- spark-hive_${scala.binary.version}
- test
-
-
- org.apache.hive.hcatalog
- hive-hcatalog-core
- 2.3.9
- test
-
-
- org.pentaho
- pentaho-aggdesigner-algorithm
-
-
- net.minidev
- json-smart
-
-
- org.apache.hive
- hive-exec
-
-
- guava
- com.google.guava
-
-
- hadoop-common
- org.apache.hadoop
-
-
- hadoop-hdfs
- org.apache.hadoop
-
-
-
-
- io.delta
- ${delta.package.name}_${scala.binary.version}
- test
-
-
- junit
- junit
-
-
- org.mockito
- mockito-core
- 2.23.4
- test
-
-
- org.scalatestplus
- scalatestplus-mockito_${scala.binary.version}
- 1.0.0-M2
- test
-
-
- org.scalatest
- scalatest_${scala.binary.version}
- test
-
-
- org.scalatestplus
- scalatestplus-scalacheck_${scala.binary.version}
- 3.1.0.0-RC2
- test
-
-
- org.apache.hadoop
- hadoop-client
- ${hadoop.version}
- test
-
-
- org.apache.arrow
- arrow-memory-core
- ${arrow.version}
- provided
-
-
- io.netty
- netty-common
-
-
- io.netty
- netty-buffer
-
-
-
-
- org.apache.arrow
- arrow-vector
- ${arrow.version}
- provided
-
-
- io.netty
- netty-common
-
-
- io.netty
- netty-buffer
-
-
-
-
-
-
- target/scala-${scala.binary.version}/classes
- target/scala-${scala.binary.version}/test-classes
-
-
- org.apache.maven.plugins
- maven-resources-plugin
-
-
- net.alchim31.maven
- scala-maven-plugin
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
-
-
- org.scalastyle
- scalastyle-maven-plugin
-
-
- org.apache.maven.plugins
- maven-checkstyle-plugin
-
-
- maven-assembly-plugin
- 3.3.0
-
-
- jar-with-dependencies
-
-
-
-
- make-assembly
- package
-
- single
-
-
-
-
-
- org.scalatest
- scalatest-maven-plugin
-
-
- test
-
- test
-
-
-
- ${clickhouse.lib.path}
- ${tpcds.data.path}
-
-
-
-
-
-
- org.apache.maven.plugins
- maven-jar-plugin
-
-
- prepare-test-jar
- test-compile
-
- test-jar
-
-
-
-
-
-
-
diff --git a/gluten-celeborn/common/pom.xml b/gluten-celeborn/common/pom.xml
deleted file mode 100755
index da7e68987659..000000000000
--- a/gluten-celeborn/common/pom.xml
+++ /dev/null
@@ -1,47 +0,0 @@
-
-
-
- gluten-celeborn
- org.apache.gluten
- 1.3.0-SNAPSHOT
- ../pom.xml
-
- 4.0.0
-
- gluten-celeborn-common
- jar
- Gluten Celeborn Common
-
-
- target/scala-${scala.binary.version}/classes
- target/scala-${scala.binary.version}/test-classes
-
-
- net.alchim31.maven
- scala-maven-plugin
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
-
-
- org.scalastyle
- scalastyle-maven-plugin
-
-
- org.apache.maven.plugins
- maven-checkstyle-plugin
-
-
- com.diffplug.spotless
- spotless-maven-plugin
-
-
- org.apache.maven.plugins
- maven-jar-plugin
-
-
-
-
diff --git a/gluten-celeborn/package/pom.xml b/gluten-celeborn/package/pom.xml
deleted file mode 100644
index 7b18787b4e16..000000000000
--- a/gluten-celeborn/package/pom.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-
- 4.0.0
-
- gluten-celeborn
- org.apache.gluten
- 1.3.0-SNAPSHOT
- ../pom.xml
-
-
- gluten-celeborn-package
- jar
- Gluten Celeborn Package
-
-
-
- backends-velox
-
-
- org.apache.gluten
- gluten-celeborn-velox
- ${project.version}
-
-
-
-
- backends-clickhouse
-
-
- org.apache.gluten
- gluten-celeborn-clickhouse
- ${project.version}
-
-
-
-
-
diff --git a/gluten-celeborn/pom.xml b/gluten-celeborn/pom.xml
index de19132b38f8..0eca5da979e1 100755
--- a/gluten-celeborn/pom.xml
+++ b/gluten-celeborn/pom.xml
@@ -11,7 +11,7 @@
4.0.0
gluten-celeborn
- pom
+ jar
Gluten Celeborn
@@ -56,50 +56,19 @@
-
-
-
- net.alchim31.maven
- scala-maven-plugin
-
- true
-
- -Xss128m
-
-
-
-
- org.scalastyle
- scalastyle-maven-plugin
-
-
- com.diffplug.spotless
- spotless-maven-plugin
-
-
-
+
+
+ net.alchim31.maven
+ scala-maven-plugin
+
+
+ org.scalastyle
+ scalastyle-maven-plugin
+
+
+ com.diffplug.spotless
+ spotless-maven-plugin
+
+
-
-
-
- backends-velox
-
-
-
- velox
- common
- package
-
-
-
- backends-clickhouse
-
-
-
- clickhouse
- common
- package
-
-
-
diff --git a/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java b/gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java
similarity index 100%
rename from gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java
rename to gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java
diff --git a/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleWriterFactory.java b/gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleWriterFactory.java
similarity index 100%
rename from gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleWriterFactory.java
rename to gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleWriterFactory.java
diff --git a/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornUtils.java b/gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornUtils.java
similarity index 100%
rename from gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornUtils.java
rename to gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornUtils.java
diff --git a/gluten-celeborn/common/src/main/scala/org/apache/spark/shuffle/CelebornColumnarShuffleWriter.scala b/gluten-celeborn/src-celeborn/main/scala/org/apache/spark/shuffle/CelebornColumnarShuffleWriter.scala
similarity index 99%
rename from gluten-celeborn/common/src/main/scala/org/apache/spark/shuffle/CelebornColumnarShuffleWriter.scala
rename to gluten-celeborn/src-celeborn/main/scala/org/apache/spark/shuffle/CelebornColumnarShuffleWriter.scala
index 10cf06a3ce59..42e939e4420d 100644
--- a/gluten-celeborn/common/src/main/scala/org/apache/spark/shuffle/CelebornColumnarShuffleWriter.scala
+++ b/gluten-celeborn/src-celeborn/main/scala/org/apache/spark/shuffle/CelebornColumnarShuffleWriter.scala
@@ -16,8 +16,9 @@
*/
package org.apache.spark.shuffle
+import org.apache.celeborn.client.ShuffleClient
+import org.apache.celeborn.common.CelebornConf
import org.apache.gluten.GlutenConfig
-
import org.apache.spark._
import org.apache.spark.internal.Logging
import org.apache.spark.internal.config.SHUFFLE_COMPRESS
@@ -26,9 +27,6 @@ import org.apache.spark.shuffle.celeborn.CelebornShuffleHandle
import org.apache.spark.sql.vectorized.ColumnarBatch
import org.apache.spark.storage.BlockManager
-import org.apache.celeborn.client.ShuffleClient
-import org.apache.celeborn.common.CelebornConf
-
import java.io.IOException
import java.util.Locale
diff --git a/gluten-celeborn/common/src/main/scala/org/apache/spark/shuffle/CelebornPartitionPusher.scala b/gluten-celeborn/src-celeborn/main/scala/org/apache/spark/shuffle/CelebornPartitionPusher.scala
similarity index 99%
rename from gluten-celeborn/common/src/main/scala/org/apache/spark/shuffle/CelebornPartitionPusher.scala
rename to gluten-celeborn/src-celeborn/main/scala/org/apache/spark/shuffle/CelebornPartitionPusher.scala
index 2f59307230a0..545a4c113936 100644
--- a/gluten-celeborn/common/src/main/scala/org/apache/spark/shuffle/CelebornPartitionPusher.scala
+++ b/gluten-celeborn/src-celeborn/main/scala/org/apache/spark/shuffle/CelebornPartitionPusher.scala
@@ -16,11 +16,10 @@
*/
package org.apache.spark.shuffle
+import org.apache.celeborn.client.ShuffleClient
import org.apache.spark._
import org.apache.spark.internal.Logging
-import org.apache.celeborn.client.ShuffleClient
-
import java.io.IOException
class CelebornPartitionPusher(
diff --git a/gluten-celeborn/velox/pom.xml b/gluten-celeborn/velox/pom.xml
deleted file mode 100755
index 55aa8f3c9b5f..000000000000
--- a/gluten-celeborn/velox/pom.xml
+++ /dev/null
@@ -1,68 +0,0 @@
-
-
-
- gluten-celeborn
- org.apache.gluten
- 1.3.0-SNAPSHOT
- ../pom.xml
-
- 4.0.0
-
- gluten-celeborn-velox
- jar
- Gluten Celeborn Velox
-
-
-
- org.apache.gluten
- backends-velox
- ${project.version}
- provided
-
-
- org.apache.gluten
- gluten-arrow
- ${project.version}
- provided
-
-
- org.apache.gluten
- gluten-celeborn-common
- ${project.version}
- compile
-
-
-
-
- target/scala-${scala.binary.version}/classes
- target/scala-${scala.binary.version}/test-classes
-
-
- net.alchim31.maven
- scala-maven-plugin
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
-
-
- org.scalastyle
- scalastyle-maven-plugin
-
-
- org.apache.maven.plugins
- maven-checkstyle-plugin
-
-
- com.diffplug.spotless
- spotless-maven-plugin
-
-
- org.apache.maven.plugins
- maven-jar-plugin
-
-
-
-
diff --git a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargetVisitor.java b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargetVisitor.java
index e58dbb295b08..a42a51e0ce4e 100644
--- a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargetVisitor.java
+++ b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargetVisitor.java
@@ -35,4 +35,6 @@ public interface MemoryTargetVisitor {
T visit(NoopMemoryTarget noopMemoryTarget);
T visit(DynamicOffHeapSizingMemoryTarget dynamicOffHeapSizingMemoryTarget);
+
+ T visit(RetryOnOomMemoryTarget retryOnOomMemoryTarget);
}
diff --git a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargets.java b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargets.java
index 6f7cc9bd9c9c..c0f74c7990d1 100644
--- a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargets.java
+++ b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargets.java
@@ -20,8 +20,10 @@
import org.apache.gluten.memory.MemoryUsageStatsBuilder;
import org.apache.gluten.memory.memtarget.spark.TreeMemoryConsumers;
+import org.apache.spark.SparkEnv;
import org.apache.spark.annotation.Experimental;
import org.apache.spark.memory.TaskMemoryManager;
+import org.apache.spark.util.SparkResourceUtil;
import java.util.Map;
@@ -43,6 +45,14 @@ public static MemoryTarget overAcquire(
return new OverAcquire(target, overTarget, overAcquiredRatio);
}
+ public static TreeMemoryTarget retrySpillOnOom(TreeMemoryTarget target) {
+ SparkEnv env = SparkEnv.get();
+ if (env != null && env.conf() != null && SparkResourceUtil.getTaskSlots(env.conf()) > 1) {
+ return new RetryOnOomMemoryTarget(target);
+ }
+ return target;
+ }
+
@Experimental
public static MemoryTarget dynamicOffHeapSizingIfEnabled(MemoryTarget memoryTarget) {
if (GlutenConfig.getConf().dynamicOffHeapSizingEnabled()) {
@@ -59,11 +69,12 @@ public static TreeMemoryTarget newConsumer(
Map virtualChildren) {
final TreeMemoryConsumers.Factory factory;
if (GlutenConfig.getConf().memoryIsolation()) {
- factory = TreeMemoryConsumers.isolated();
+ return TreeMemoryConsumers.isolated().newConsumer(tmm, name, spiller, virtualChildren);
} else {
- factory = TreeMemoryConsumers.shared();
+ // Retry of spilling is needed in shared mode because the maxMemoryPerTask of Vanilla Spark
+ // ExecutionMemoryPool is dynamic when with multi-slot config.
+ return MemoryTargets.retrySpillOnOom(
+ TreeMemoryConsumers.shared().newConsumer(tmm, name, spiller, virtualChildren));
}
-
- return factory.newConsumer(tmm, name, spiller, virtualChildren);
}
}
diff --git a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/OverAcquire.java b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/OverAcquire.java
index e7321b4b7e0e..7724083d6852 100644
--- a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/OverAcquire.java
+++ b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/OverAcquire.java
@@ -57,13 +57,15 @@ public long borrow(long size) {
}
Preconditions.checkState(overTarget.usedBytes() == 0);
long granted = target.borrow(size);
- long majorSize = target.usedBytes();
- long overSize = (long) (ratio * majorSize);
- long overAcquired = overTarget.borrow(overSize);
- Preconditions.checkState(overAcquired == overTarget.usedBytes());
- long releasedOverSize = overTarget.repay(overAcquired);
- Preconditions.checkState(releasedOverSize == overAcquired);
- Preconditions.checkState(overTarget.usedBytes() == 0);
+ if (granted >= size) {
+ long majorSize = target.usedBytes();
+ long overSize = (long) (ratio * majorSize);
+ long overAcquired = overTarget.borrow(overSize);
+ Preconditions.checkState(overAcquired == overTarget.usedBytes());
+ long releasedOverSize = overTarget.repay(overAcquired);
+ Preconditions.checkState(releasedOverSize == overAcquired);
+ Preconditions.checkState(overTarget.usedBytes() == 0);
+ }
return granted;
}
diff --git a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/RetryOnOomMemoryTarget.java b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/RetryOnOomMemoryTarget.java
new file mode 100644
index 000000000000..1a5388d0d187
--- /dev/null
+++ b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/RetryOnOomMemoryTarget.java
@@ -0,0 +1,115 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.gluten.memory.memtarget;
+
+import org.apache.gluten.memory.MemoryUsageStatsBuilder;
+import org.apache.gluten.proto.MemoryUsageStats;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Map;
+
+public class RetryOnOomMemoryTarget implements TreeMemoryTarget {
+ private static final Logger LOGGER = LoggerFactory.getLogger(RetryOnOomMemoryTarget.class);
+ private final TreeMemoryTarget target;
+
+ RetryOnOomMemoryTarget(TreeMemoryTarget target) {
+ this.target = target;
+ }
+
+ @Override
+ public long borrow(long size) {
+ long granted = target.borrow(size);
+ if (granted < size) {
+ LOGGER.info("Retrying spill require:{} got:{}", size, granted);
+ final long spilled = retryingSpill(Long.MAX_VALUE);
+ final long remaining = size - granted;
+ if (spilled >= remaining) {
+ granted += target.borrow(remaining);
+ }
+ LOGGER.info("Retrying spill spilled:{} final granted:{}", spilled, granted);
+ }
+ return granted;
+ }
+
+ private long retryingSpill(long size) {
+ TreeMemoryTarget rootTarget = target;
+ while (true) {
+ try {
+ rootTarget = rootTarget.parent();
+ } catch (IllegalStateException e) {
+ // Reached the root node
+ break;
+ }
+ }
+ return TreeMemoryTargets.spillTree(rootTarget, size);
+ }
+
+ @Override
+ public long repay(long size) {
+ return target.repay(size);
+ }
+
+ @Override
+ public long usedBytes() {
+ return target.usedBytes();
+ }
+
+ @Override
+ public T accept(MemoryTargetVisitor visitor) {
+ return visitor.visit(this);
+ }
+
+ @Override
+ public String name() {
+ return target.name();
+ }
+
+ @Override
+ public MemoryUsageStats stats() {
+ return target.stats();
+ }
+
+ @Override
+ public TreeMemoryTarget newChild(
+ String name,
+ long capacity,
+ Spiller spiller,
+ Map virtualChildren) {
+ return target.newChild(name, capacity, spiller, virtualChildren);
+ }
+
+ @Override
+ public Map children() {
+ return target.children();
+ }
+
+ @Override
+ public TreeMemoryTarget parent() {
+ return target.parent();
+ }
+
+ @Override
+ public Spiller getNodeSpiller() {
+ return target.getNodeSpiller();
+ }
+
+ public TreeMemoryTarget target() {
+ return target;
+ }
+}
diff --git a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumer.java b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumer.java
index 44c725798c75..1289a01c349e 100644
--- a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumer.java
+++ b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumer.java
@@ -138,7 +138,8 @@ public Map children() {
@Override
public TreeMemoryTarget parent() {
// we are root
- throw new IllegalStateException("Unreachable code");
+ throw new IllegalStateException(
+ "Unreachable code org.apache.gluten.memory.memtarget.spark.TreeMemoryConsumer.parent");
}
@Override
diff --git a/gluten-core/src/main/scala/org/apache/spark/memory/SparkMemoryUtil.scala b/gluten-core/src/main/scala/org/apache/spark/memory/SparkMemoryUtil.scala
index d221fafce418..637ef8b22fd4 100644
--- a/gluten-core/src/main/scala/org/apache/spark/memory/SparkMemoryUtil.scala
+++ b/gluten-core/src/main/scala/org/apache/spark/memory/SparkMemoryUtil.scala
@@ -131,6 +131,10 @@ object SparkMemoryUtil {
dynamicOffHeapSizingMemoryTarget: DynamicOffHeapSizingMemoryTarget): String = {
dynamicOffHeapSizingMemoryTarget.delegated().accept(this)
}
+
+ override def visit(retryOnOomMemoryTarget: RetryOnOomMemoryTarget): String = {
+ retryOnOomMemoryTarget.target().accept(this)
+ }
})
}
diff --git a/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala b/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala
index b061aa332c74..df5917125b64 100644
--- a/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala
+++ b/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala
@@ -298,9 +298,14 @@ class TaskResourceRegistry extends Logging {
o1: util.Map.Entry[Int, util.LinkedHashSet[TaskResource]],
o2: util.Map.Entry[Int, util.LinkedHashSet[TaskResource]]) => {
val diff = o2.getKey - o1.getKey // descending by priority
- if (diff > 0) 1
- else if (diff < 0) -1
- else throw new IllegalStateException("Unreachable code")
+ if (diff > 0) {
+ 1
+ } else if (diff < 0) {
+ -1
+ } else {
+ throw new IllegalStateException(
+ "Unreachable code from org.apache.spark.task.TaskResourceRegistry.releaseAll")
+ }
}
)
table.forEach {
diff --git a/gluten-hudi/pom.xml b/gluten-hudi/pom.xml
index 7900182f853a..5865f1f6ece8 100755
--- a/gluten-hudi/pom.xml
+++ b/gluten-hudi/pom.xml
@@ -46,19 +46,6 @@
test-jar
test
-
- org.apache.gluten
- backends-velox
- ${project.version}
- test
-
-
- org.apache.gluten
- backends-velox
- ${project.version}
- test-jar
- test
-
org.apache.spark
spark-core_${scala.binary.version}
diff --git a/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxHudiSuite.scala b/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/HudiSuite.scala
similarity index 98%
rename from gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxHudiSuite.scala
rename to gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/HudiSuite.scala
index b760ec556535..97633fa064cc 100644
--- a/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxHudiSuite.scala
+++ b/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/HudiSuite.scala
@@ -19,7 +19,7 @@ package org.apache.gluten.execution
import org.apache.spark.SparkConf
import org.apache.spark.sql.Row
-class VeloxHudiSuite extends WholeStageTransformerSuite {
+abstract class HudiSuite extends WholeStageTransformerSuite {
protected val rootPath: String = getClass.getResource("/").getPath
override protected val resourcePath: String = "/tpch-data-parquet"
diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/TransformerApi.scala b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/TransformerApi.scala
index 69cea9c5470d..984450bf164e 100644
--- a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/TransformerApi.scala
+++ b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/TransformerApi.scala
@@ -16,12 +16,13 @@
*/
package org.apache.gluten.backendsapi
+import org.apache.gluten.execution.WriteFilesExecTransformer
import org.apache.gluten.substrait.expression.ExpressionNode
import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression}
import org.apache.spark.sql.connector.read.InputPartition
import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.execution.datasources.{FileFormat, HadoopFsRelation, PartitionDirectory}
+import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, PartitionDirectory}
import org.apache.spark.sql.types.{DataType, DecimalType, StructType}
import org.apache.spark.util.collection.BitSet
@@ -75,7 +76,7 @@ trait TransformerApi {
/** This method is only used for CH backend tests */
def invalidateSQLExecutionResource(executionId: String): Unit = {}
- def genWriteParameters(fileFormat: FileFormat, writeOptions: Map[String, String]): Any
+ def genWriteParameters(write: WriteFilesExecTransformer): Any
/** use Hadoop Path class to encode the file path */
def encodeFilePathIfNeed(filePath: String): String = filePath
diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala
index a9d3a6282ae1..726dbdc3ef30 100644
--- a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala
+++ b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala
@@ -67,7 +67,7 @@ case class WriteFilesExecTransformer(
override def output: Seq[Attribute] = Seq.empty
- private val caseInsensitiveOptions = CaseInsensitiveMap(options)
+ val caseInsensitiveOptions: CaseInsensitiveMap[String] = CaseInsensitiveMap(options)
def getRelNode(
context: SubstraitContext,
@@ -99,8 +99,7 @@ case class WriteFilesExecTransformer(
ConverterUtils.collectAttributeNames(inputAttributes.toSeq)
val extensionNode = if (!validation) {
ExtensionBuilder.makeAdvancedExtension(
- BackendsApiManager.getTransformerApiInstance
- .genWriteParameters(fileFormat, caseInsensitiveOptions),
+ BackendsApiManager.getTransformerApiInstance.genWriteParameters(this),
SubstraitUtil.createEnhancement(originalInputAttributes)
)
} else {
diff --git a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/ApplyResourceProfileExec.scala b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/ApplyResourceProfileExec.scala
new file mode 100644
index 000000000000..17640f461213
--- /dev/null
+++ b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/ApplyResourceProfileExec.scala
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.spark.sql.execution
+
+import org.apache.gluten.execution.GlutenPlan
+import org.apache.gluten.extension.columnar.transition.Convention
+
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.rdd.RDD
+import org.apache.spark.resource.ResourceProfile
+import org.apache.spark.sql.catalyst.InternalRow
+import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder}
+import org.apache.spark.sql.catalyst.plans.physical.{Distribution, Partitioning}
+import org.apache.spark.sql.vectorized.ColumnarBatch
+
+/**
+ * Used to apply specified resource profile for the whole stage.
+ * @param child
+ * @param resourceProfile
+ * resource profile specified for child belong stage.
+ */
+@Experimental
+case class ApplyResourceProfileExec(child: SparkPlan, resourceProfile: ResourceProfile)
+ extends UnaryExecNode
+ with GlutenPlan {
+
+ override def batchType(): Convention.BatchType = {
+ Convention.get(child).batchType
+ }
+
+ override def rowType0(): Convention.RowType = {
+ Convention.get(child).rowType
+ }
+
+ override def outputPartitioning: Partitioning = {
+ child.outputPartitioning
+ }
+
+ override def requiredChildDistribution: scala.Seq[Distribution] = {
+ child.requiredChildDistribution
+ }
+
+ override def outputOrdering: scala.Seq[SortOrder] = {
+ child.outputOrdering
+ }
+
+ override def requiredChildOrdering: scala.Seq[scala.Seq[SortOrder]] = {
+ child.requiredChildOrdering
+ }
+
+ override protected def doExecute(): RDD[InternalRow] = {
+ log.info(s"Apply $resourceProfile for plan ${child.nodeName}")
+ child.execute.withResources(resourceProfile)
+ }
+
+ override protected def doExecuteColumnar(): RDD[ColumnarBatch] = {
+ log.info(s"Apply $resourceProfile for columnar plan ${child.nodeName}")
+ child.executeColumnar.withResources(resourceProfile)
+ }
+
+ override def output: scala.Seq[Attribute] = child.output
+
+ override protected def withNewChildInternal(newChild: SparkPlan): ApplyResourceProfileExec =
+ copy(child = newChild)
+}
diff --git a/gluten-uniffle/.gitkeep b/gluten-uniffle/.gitkeep
new file mode 100644
index 000000000000..f2d1254d2735
--- /dev/null
+++ b/gluten-uniffle/.gitkeep
@@ -0,0 +1 @@
+The module is kept for adding common code shared by backends for Uniffle support in Gluten.
diff --git a/gluten-uniffle/package/pom.xml b/gluten-uniffle/package/pom.xml
deleted file mode 100644
index e49748e7c8e9..000000000000
--- a/gluten-uniffle/package/pom.xml
+++ /dev/null
@@ -1,29 +0,0 @@
-
-
-
- gluten-uniffle
- org.apache.gluten
- 1.3.0-SNAPSHOT
- ../pom.xml
-
- 4.0.0
-
- gluten-uniffle-package
- jar
- Gluten Uniffle Package
-
-
-
- backends-velox
-
-
- org.apache.gluten
- gluten-uniffle-velox
- ${project.version}
-
-
-
-
-
diff --git a/gluten-uniffle/pom.xml b/gluten-uniffle/pom.xml
index b7fe4c2e4268..efc8ce6555c5 100644
--- a/gluten-uniffle/pom.xml
+++ b/gluten-uniffle/pom.xml
@@ -11,7 +11,7 @@
4.0.0
gluten-uniffle
- pom
+ jar
Gluten Uniffle
@@ -75,15 +75,4 @@
-
-
- backends-velox
-
-
-
- velox
- package
-
-
-
diff --git a/gluten-uniffle/velox/pom.xml b/gluten-uniffle/velox/pom.xml
deleted file mode 100755
index ab730674fbb3..000000000000
--- a/gluten-uniffle/velox/pom.xml
+++ /dev/null
@@ -1,62 +0,0 @@
-
-
-
- gluten-uniffle
- org.apache.gluten
- 1.3.0-SNAPSHOT
- ../pom.xml
-
- 4.0.0
-
- gluten-uniffle-velox
- jar
- Gluten Uniffle Velox
-
-
-
- org.apache.gluten
- backends-velox
- ${project.version}
- provided
-
-
- org.apache.gluten
- gluten-arrow
- ${project.version}
- provided
-
-
-
-
- target/scala-${scala.binary.version}/classes
- target/scala-${scala.binary.version}/test-classes
-
-
- net.alchim31.maven
- scala-maven-plugin
-
-
- org.apache.maven.plugins
- maven-compiler-plugin
-
-
- org.scalastyle
- scalastyle-maven-plugin
-
-
- org.apache.maven.plugins
- maven-checkstyle-plugin
-
-
- com.diffplug.spotless
- spotless-maven-plugin
-
-
- org.apache.maven.plugins
- maven-jar-plugin
-
-
-
-
diff --git a/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala b/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala
index dce8ac83710c..51e8174da7fb 100644
--- a/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala
+++ b/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala
@@ -80,7 +80,8 @@ abstract class BackendTestSettings {
return !isExcluded
}
- throw new IllegalStateException("Unreachable code")
+ throw new IllegalStateException(
+ "Unreachable code from org.apache.gluten.utils.BackendTestSettings.shouldRun")
}
final protected class SuiteSettings {
diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
index 36d5b5177c6b..16879489d29e 100644
--- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
+++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala
@@ -286,6 +286,7 @@ class ClickHouseTestSettings extends BackendTestSettings {
.exclude("groupBy.as")
enableSuite[GlutenDateFunctionsSuite]
.exclude("function to_date")
+ .excludeGlutenTest("function to_date")
.exclude("unix_timestamp")
.exclude("to_unix_timestamp")
.exclude("to_timestamp")
diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 15495270a189..2c6b882850c4 100644
--- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -265,6 +265,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Legacy mode is not supported and velox getTimestamp function does not throw
+ // exception when format is "yyyy-dd-aa".
+ .exclude("function to_date")
enableSuite[GlutenDataFrameFunctionsSuite]
// blocked by Velox-5768
.exclude("aggregate function - array for primitive type containing null")
diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index 8d1f7320dd42..5ddfe6fc1ff3 100644
--- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -248,4 +248,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra
}
}
}
+
+ testGluten("function to_date") {
+ val d1 = Date.valueOf("2015-07-22")
+ val d2 = Date.valueOf("2015-07-01")
+ val d3 = Date.valueOf("2014-12-31")
+ val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+ val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val s1 = "2015-07-22 10:00:00"
+ val s2 = "2014-12-31"
+ val s3 = "2014-31-12"
+ val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+ checkAnswer(
+ df.select(to_date(col("t"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("s"))),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ checkAnswer(
+ df.selectExpr("to_date(t)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(d)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(s)"),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ // now with format
+ checkAnswer(
+ df.select(to_date(col("t"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+ withSQLConf(confKey -> "corrected") {
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-MM-dd")),
+ Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+ }
+ // legacyParserPolicy is not respected by Gluten.
+ // withSQLConf(confKey -> "exception") {
+ // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+ // }
+
+ // now switch format
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-dd-MM")),
+ Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+ // invalid format
+ checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null)))
+ // velox getTimestamp function does not throw exception when format is "yyyy-dd-aa".
+ // val e =
+ // intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect())
+ // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+ // assert(
+ // e.getMessage.contains("You may get a different result due to the upgrading to Spark"))
+
+ // February
+ val x1 = "2016-02-29"
+ val x2 = "2017-02-29"
+ val df1 = Seq(x1, x2).toDF("x")
+ checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil)
+ }
}
diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 407b9c8b95cc..f83b91ede1cc 100644
--- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1084,6 +1084,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Legacy mode is not supported and velox getTimestamp function does not throw
+ // exception when format is "yyyy-dd-aa".
+ .exclude("function to_date")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de4345..ae86c9d06e81 100644
--- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra
}
}
}
+
+ testGluten("function to_date") {
+ val d1 = Date.valueOf("2015-07-22")
+ val d2 = Date.valueOf("2015-07-01")
+ val d3 = Date.valueOf("2014-12-31")
+ val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+ val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val s1 = "2015-07-22 10:00:00"
+ val s2 = "2014-12-31"
+ val s3 = "2014-31-12"
+ val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+ checkAnswer(
+ df.select(to_date(col("t"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("s"))),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ checkAnswer(
+ df.selectExpr("to_date(t)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(d)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(s)"),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ // now with format
+ checkAnswer(
+ df.select(to_date(col("t"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+ withSQLConf(confKey -> "corrected") {
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-MM-dd")),
+ Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+ }
+ // legacyParserPolicy is not respected by Gluten.
+ // withSQLConf(confKey -> "exception") {
+ // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+ // }
+
+ // now switch format
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-dd-MM")),
+ Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+ // invalid format
+ checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null)))
+ // velox getTimestamp function does not throw exception when format is "yyyy-dd-aa".
+ // val e =
+ // intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect())
+ // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+ // assert(
+ // e.getMessage.contains("You may get a different result due to the upgrading to Spark"))
+
+ // February
+ val x1 = "2016-02-29"
+ val x2 = "2017-02-29"
+ val df1 = Seq(x1, x2).toDF("x")
+ checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil)
+ }
}
diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index dbb01fbe7067..b0446d3ca7b6 100644
--- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1101,6 +1101,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Legacy mode is not supported and velox getTimestamp function does not throw
+ // exception when format is "yyyy-dd-aa".
+ .exclude("function to_date")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de4345..ae86c9d06e81 100644
--- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra
}
}
}
+
+ testGluten("function to_date") {
+ val d1 = Date.valueOf("2015-07-22")
+ val d2 = Date.valueOf("2015-07-01")
+ val d3 = Date.valueOf("2014-12-31")
+ val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+ val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val s1 = "2015-07-22 10:00:00"
+ val s2 = "2014-12-31"
+ val s3 = "2014-31-12"
+ val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+ checkAnswer(
+ df.select(to_date(col("t"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("s"))),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ checkAnswer(
+ df.selectExpr("to_date(t)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(d)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(s)"),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ // now with format
+ checkAnswer(
+ df.select(to_date(col("t"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+ withSQLConf(confKey -> "corrected") {
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-MM-dd")),
+ Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+ }
+ // legacyParserPolicy is not respected by Gluten.
+ // withSQLConf(confKey -> "exception") {
+ // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+ // }
+
+ // now switch format
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-dd-MM")),
+ Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+ // invalid format
+ checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null)))
+ // velox getTimestamp function does not throw exception when format is "yyyy-dd-aa".
+ // val e =
+ // intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect())
+ // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+ // assert(
+ // e.getMessage.contains("You may get a different result due to the upgrading to Spark"))
+
+ // February
+ val x1 = "2016-02-29"
+ val x2 = "2017-02-29"
+ val df1 = Seq(x1, x2).toDF("x")
+ checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil)
+ }
}
diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala
index b8ac906d8076..f2a83bf234a9 100644
--- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala
+++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala
@@ -40,15 +40,12 @@ class GlutenCastSuite extends CastSuiteBase with GlutenTestsTrait {
testGluten("missing cases - from boolean") {
(DataTypeTestUtils.numericTypeWithoutDecimal + BooleanType).foreach {
- t =>
- t match {
- case BooleanType =>
- checkEvaluation(cast(cast(true, BooleanType), t), true)
- checkEvaluation(cast(cast(false, BooleanType), t), false)
- case _ =>
- checkEvaluation(cast(cast(true, BooleanType), t), 1)
- checkEvaluation(cast(cast(false, BooleanType), t), 0)
- }
+ case t @ BooleanType =>
+ checkEvaluation(cast(cast(true, BooleanType), t), true)
+ checkEvaluation(cast(cast(false, BooleanType), t), false)
+ case t =>
+ checkEvaluation(cast(cast(true, BooleanType), t), 1)
+ checkEvaluation(cast(cast(false, BooleanType), t), 0)
}
}
diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index f5a1a076956e..a01d0cb4b331 100644
--- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -1123,6 +1123,9 @@ class VeloxTestSettings extends BackendTestSettings {
.exclude("to_timestamp")
// Legacy mode is not supported, assuming this mode is not commonly used.
.exclude("SPARK-30668: use legacy timestamp parser in to_timestamp")
+ // Legacy mode is not supported and velox getTimestamp function does not throw
+ // exception when format is "yyyy-dd-aa".
+ .exclude("function to_date")
enableSuite[GlutenDeprecatedAPISuite]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff]
enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn]
diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
index a946e6de4345..ae86c9d06e81 100644
--- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
+++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala
@@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra
}
}
}
+
+ testGluten("function to_date") {
+ val d1 = Date.valueOf("2015-07-22")
+ val d2 = Date.valueOf("2015-07-01")
+ val d3 = Date.valueOf("2014-12-31")
+ val t1 = Timestamp.valueOf("2015-07-22 10:00:00")
+ val t2 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val t3 = Timestamp.valueOf("2014-12-31 23:59:59")
+ val s1 = "2015-07-22 10:00:00"
+ val s2 = "2014-12-31"
+ val s3 = "2014-31-12"
+ val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s")
+
+ checkAnswer(
+ df.select(to_date(col("t"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"))),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("s"))),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ checkAnswer(
+ df.selectExpr("to_date(t)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(d)"),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.selectExpr("to_date(s)"),
+ Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null)))
+
+ // now with format
+ checkAnswer(
+ df.select(to_date(col("t"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2014-12-31")),
+ Row(Date.valueOf("2014-12-31"))))
+ checkAnswer(
+ df.select(to_date(col("d"), "yyyy-MM-dd")),
+ Seq(
+ Row(Date.valueOf("2015-07-22")),
+ Row(Date.valueOf("2015-07-01")),
+ Row(Date.valueOf("2014-12-31"))))
+ val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key
+ withSQLConf(confKey -> "corrected") {
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-MM-dd")),
+ Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null)))
+ }
+ // legacyParserPolicy is not respected by Gluten.
+ // withSQLConf(confKey -> "exception") {
+ // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd")))
+ // }
+
+ // now switch format
+ checkAnswer(
+ df.select(to_date(col("s"), "yyyy-dd-MM")),
+ Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31"))))
+
+ // invalid format
+ checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null)))
+ // velox getTimestamp function does not throw exception when format is "yyyy-dd-aa".
+ // val e =
+ // intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect())
+ // assert(e.getCause.isInstanceOf[IllegalArgumentException])
+ // assert(
+ // e.getMessage.contains("You may get a different result due to the upgrading to Spark"))
+
+ // February
+ val x1 = "2016-02-29"
+ val x2 = "2017-02-29"
+ val df1 = Seq(x1, x2).toDF("x")
+ checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil)
+ }
}
diff --git a/package/pom.xml b/package/pom.xml
index e0620e5cf5e1..b9c114181bcd 100644
--- a/package/pom.xml
+++ b/package/pom.xml
@@ -68,7 +68,7 @@
org.apache.gluten
- gluten-celeborn-package
+ gluten-celeborn
${project.version}
@@ -78,7 +78,7 @@
org.apache.gluten
- gluten-uniffle-package
+ gluten-uniffle
${project.version}
diff --git a/pom.xml b/pom.xml
index 3c59b4f19e11..4d704dc9b448 100644
--- a/pom.xml
+++ b/pom.xml
@@ -422,6 +422,70 @@
gluten-celeborn
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-celeborn-sources
+ generate-sources
+
+ add-source
+
+
+
+
+
+
+
+
+
+ add-celeborn-resources
+ generate-resources
+
+ add-resource
+
+
+
+
+ ${project.basedir}/src-celeborn/main/resources
+
+
+
+
+
+ add-celeborn-test-sources
+ generate-test-sources
+
+ add-test-source
+
+
+
+
+
+
+
+
+
+ add-celeborn-test-resources
+ generate-test-resources
+
+ add-test-resource
+
+
+
+
+ ${project.basedir}/src-celeborn/test/resources
+
+
+
+
+
+
+
+
uniffle
@@ -431,6 +495,70 @@
gluten-uniffle
+
+
+
+ org.codehaus.mojo
+ build-helper-maven-plugin
+
+
+ add-uniffle-sources
+ generate-sources
+
+ add-source
+
+
+
+
+
+
+
+
+
+ add-uniffle-resources
+ generate-resources
+
+ add-resource
+
+
+
+
+ ${project.basedir}/src-uniffle/main/resources
+
+
+
+
+
+ add-uniffle-test-sources
+ generate-test-sources
+
+ add-test-source
+
+
+
+
+
+
+
+
+
+ add-uniffle-test-resources
+ generate-test-resources
+
+ add-test-resource
+
+
+
+
+ ${project.basedir}/src-uniffle/test/resources
+
+
+
+
+
+
+
+
delta
diff --git a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
index a3bd5079b016..fcdd3c3c8b4b 100644
--- a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
+++ b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
@@ -73,7 +73,9 @@ abstract class AbstractFileSourceScanExec(
override def supportsColumnar: Boolean = {
// The value should be defined in GlutenPlan.
- throw new UnsupportedOperationException("Unreachable code")
+ throw new UnsupportedOperationException(
+ "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" +
+ ".supportsColumnar")
}
private lazy val needsUnsafeRowConversion: Boolean = {
diff --git a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
index c885f0cf44b3..01df5ba62167 100644
--- a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
+++ b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
@@ -77,7 +77,9 @@ abstract class AbstractFileSourceScanExec(
override def supportsColumnar: Boolean = {
// The value should be defined in GlutenPlan.
- throw new UnsupportedOperationException("Unreachable code")
+ throw new UnsupportedOperationException(
+ "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" +
+ ".supportsColumnar")
}
private lazy val needsUnsafeRowConversion: Boolean = {
diff --git a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
index 53ea6f543a95..15e54ddb71f2 100644
--- a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
+++ b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
@@ -69,7 +69,9 @@ abstract class AbstractFileSourceScanExec(
override def supportsColumnar: Boolean = {
// The value should be defined in GlutenPlan.
- throw new UnsupportedOperationException("Unreachable code")
+ throw new UnsupportedOperationException(
+ "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" +
+ ".supportsColumnar")
}
private lazy val needsUnsafeRowConversion: Boolean = {
diff --git a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
index c8dbcc2fed4f..a83c763c4566 100644
--- a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
+++ b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala
@@ -69,7 +69,9 @@ abstract class AbstractFileSourceScanExec(
override def supportsColumnar: Boolean = {
// The value should be defined in GlutenPlan.
- throw new UnsupportedOperationException("Unreachable code")
+ throw new UnsupportedOperationException(
+ "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" +
+ ".supportsColumnar")
}
private lazy val needsUnsafeRowConversion: Boolean = {