From 059854575bafd34a03bf422b4c93c31b2a814f7a Mon Sep 17 00:00:00 2001 From: zhaokuo Date: Tue, 17 Dec 2024 09:14:28 +0800 Subject: [PATCH 01/14] [MINOR] [VL] Enhance the gluten timer to support seconds, milliseconds, and microseconds (#8231) --- cpp/core/utils/Timer.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/cpp/core/utils/Timer.h b/cpp/core/utils/Timer.h index b6dec29b1a6a..4fe39068bb77 100644 --- a/cpp/core/utils/Timer.h +++ b/cpp/core/utils/Timer.h @@ -19,11 +19,11 @@ #include -using TimePoint = std::chrono::time_point; - namespace gluten { +template class Timer { public: + using TimePoint = std::chrono::time_point; explicit Timer() = default; void start() { @@ -36,8 +36,7 @@ class Timer { return; } running_ = false; - realTimeUsed_ += - std::chrono::duration_cast(std::chrono::steady_clock::now() - startTime_).count(); + realTimeUsed_ += std::chrono::duration_cast(std::chrono::steady_clock::now() - startTime_).count(); } void reset() { @@ -62,13 +61,14 @@ class Timer { int64_t realTimeUsed_ = 0; }; -class ScopedTimer { +template +class ScopedTimerImpl { public: - explicit ScopedTimer(int64_t* toAdd) : toAdd_(toAdd) { + explicit ScopedTimerImpl(int64_t* toAdd) : toAdd_(toAdd) { startInternal(); } - ~ScopedTimer() { + ~ScopedTimerImpl() { stopInternal(); } @@ -79,7 +79,7 @@ class ScopedTimer { } private: - Timer timer_{}; + Timer timer_{}; int64_t* toAdd_; void stopInternal() { @@ -92,4 +92,10 @@ class ScopedTimer { timer_.start(); } }; + +using ScopedTimer = ScopedTimerImpl; +using ScopedSecondsTimer = ScopedTimerImpl; +using ScopedMillisecondsTimer = ScopedTimerImpl; +using ScopedMicrosecondsTimer = ScopedTimerImpl; + } // namespace gluten From 6b0445a2129ce7e59821b47e28f7ff53ac24185f Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Tue, 17 Dec 2024 09:27:08 +0800 Subject: [PATCH 02/14] [GLUTEN-7914][CORE] Flip dependency direction for gluten-celeborn (#8241) Closes #7914 --- backends-clickhouse/pom.xml | 29 ++ ...uten.celeborn.CelebornShuffleWriterFactory | 0 .../CHCelebornColumnarBatchSerializer.scala | 0 .../CHCelebornColumnarShuffleWriter.scala | 0 ...CelebornColumnarShuffleWriterFactory.scala | 0 .../test/resources/queries-output/q01.out | 0 .../test/resources/queries-output/q02.out | 0 .../test/resources/queries-output/q03.out | 0 .../test/resources/queries-output/q04.out | 0 .../test/resources/queries-output/q05.out | 0 .../test/resources/queries-output/q06.out | 0 .../test/resources/queries-output/q07.out | 0 .../test/resources/queries-output/q08.out | 0 .../test/resources/queries-output/q09.out | 0 .../test/resources/queries-output/q10.out | 0 .../test/resources/queries-output/q11.out | 0 .../test/resources/queries-output/q12.out | 0 .../test/resources/queries-output/q13.out | 0 .../test/resources/queries-output/q14.out | 0 .../test/resources/queries-output/q15.out | 0 .../test/resources/queries-output/q16.out | 0 .../test/resources/queries-output/q17.out | 0 .../test/resources/queries-output/q18.out | 0 .../test/resources/queries-output/q19.out | 0 .../test/resources/queries-output/q20.out | 0 .../test/resources/queries-output/q21.out | 0 .../test/resources/queries-output/q22.out | 0 .../resources/queries/tpch-queries-ch/q1.sql | 0 .../resources/queries/tpch-queries-ch/q10.sql | 0 .../resources/queries/tpch-queries-ch/q11.sql | 0 .../resources/queries/tpch-queries-ch/q12.sql | 0 .../resources/queries/tpch-queries-ch/q13.sql | 0 .../resources/queries/tpch-queries-ch/q14.sql | 0 .../resources/queries/tpch-queries-ch/q15.sql | 0 .../resources/queries/tpch-queries-ch/q16.sql | 0 .../resources/queries/tpch-queries-ch/q17.sql | 0 .../resources/queries/tpch-queries-ch/q18.sql | 0 .../resources/queries/tpch-queries-ch/q19.sql | 0 .../resources/queries/tpch-queries-ch/q2.sql | 0 .../resources/queries/tpch-queries-ch/q20.sql | 0 .../resources/queries/tpch-queries-ch/q21.sql | 0 .../resources/queries/tpch-queries-ch/q22.sql | 0 .../resources/queries/tpch-queries-ch/q3.sql | 0 .../resources/queries/tpch-queries-ch/q4.sql | 0 .../resources/queries/tpch-queries-ch/q5.sql | 0 .../resources/queries/tpch-queries-ch/q6.sql | 0 .../resources/queries/tpch-queries-ch/q7.sql | 0 .../resources/queries/tpch-queries-ch/q8.sql | 0 .../resources/queries/tpch-queries-ch/q9.sql | 0 .../customer/all_1_1_0/checksums.txt | Bin .../customer/all_1_1_0/columns.txt | 0 .../tpch-data-ch/customer/all_1_1_0/count.txt | 0 .../tpch-data-ch/customer/all_1_1_0/data.bin | Bin .../tpch-data-ch/customer/all_1_1_0/data.mrk3 | Bin .../all_1_1_0/default_compression_codec.txt | 0 .../lineitem/all_1_1_0/checksums.txt | Bin .../lineitem/all_1_1_0/columns.txt | 0 .../tpch-data-ch/lineitem/all_1_1_0/count.txt | 0 .../all_1_1_0/default_compression_codec.txt | 0 .../lineitem/all_1_1_0/l_comment.bin | Bin .../lineitem/all_1_1_0/l_comment.mrk2 | Bin .../lineitem/all_1_1_0/l_commitdate.bin | Bin .../lineitem/all_1_1_0/l_commitdate.mrk2 | Bin .../lineitem/all_1_1_0/l_discount.bin | Bin .../lineitem/all_1_1_0/l_discount.mrk2 | Bin .../lineitem/all_1_1_0/l_extendedprice.bin | Bin .../lineitem/all_1_1_0/l_extendedprice.mrk2 | Bin .../lineitem/all_1_1_0/l_linenumber.bin | Bin .../lineitem/all_1_1_0/l_linenumber.mrk2 | Bin .../lineitem/all_1_1_0/l_linestatus.bin | Bin .../lineitem/all_1_1_0/l_linestatus.mrk2 | Bin .../lineitem/all_1_1_0/l_orderkey.bin | Bin .../lineitem/all_1_1_0/l_orderkey.mrk2 | Bin .../lineitem/all_1_1_0/l_partkey.bin | Bin .../lineitem/all_1_1_0/l_partkey.mrk2 | Bin .../lineitem/all_1_1_0/l_quantity.bin | Bin .../lineitem/all_1_1_0/l_quantity.mrk2 | Bin .../lineitem/all_1_1_0/l_receiptdate.bin | Bin .../lineitem/all_1_1_0/l_receiptdate.mrk2 | Bin .../lineitem/all_1_1_0/l_returnflag.bin | Bin .../lineitem/all_1_1_0/l_returnflag.mrk2 | Bin .../lineitem/all_1_1_0/l_shipdate.bin | Bin .../lineitem/all_1_1_0/l_shipdate.mrk2 | Bin .../lineitem/all_1_1_0/l_shipinstruct.bin | Bin .../lineitem/all_1_1_0/l_shipinstruct.mrk2 | Bin .../lineitem/all_1_1_0/l_shipmode.bin | Bin .../lineitem/all_1_1_0/l_shipmode.mrk2 | Bin .../lineitem/all_1_1_0/l_suppkey.bin | Bin .../lineitem/all_1_1_0/l_suppkey.mrk2 | Bin .../tpch-data-ch/lineitem/all_1_1_0/l_tax.bin | Bin .../lineitem/all_1_1_0/l_tax.mrk2 | Bin .../nation/all_1_1_0/checksums.txt | Bin .../tpch-data-ch/nation/all_1_1_0/columns.txt | 0 .../tpch-data-ch/nation/all_1_1_0/count.txt | 0 .../tpch-data-ch/nation/all_1_1_0/data.bin | Bin .../tpch-data-ch/nation/all_1_1_0/data.mrk3 | Bin .../all_1_1_0/default_compression_codec.txt | 0 .../orders/all_1_1_0/checksums.txt | Bin .../tpch-data-ch/orders/all_1_1_0/columns.txt | 0 .../tpch-data-ch/orders/all_1_1_0/count.txt | 0 .../all_1_1_0/default_compression_codec.txt | 0 .../tpch-data-ch/orders/all_1_1_0/o_clerk.bin | Bin .../orders/all_1_1_0/o_clerk.mrk2 | Bin .../orders/all_1_1_0/o_comment.bin | Bin .../orders/all_1_1_0/o_comment.mrk2 | Bin .../orders/all_1_1_0/o_custkey.bin | Bin .../orders/all_1_1_0/o_custkey.mrk2 | Bin .../orders/all_1_1_0/o_orderdate.bin | Bin .../orders/all_1_1_0/o_orderdate.mrk2 | Bin .../orders/all_1_1_0/o_orderkey.bin | Bin .../orders/all_1_1_0/o_orderkey.mrk2 | Bin .../orders/all_1_1_0/o_orderpriority.bin | Bin .../orders/all_1_1_0/o_orderpriority.mrk2 | Bin .../orders/all_1_1_0/o_orderstatus.bin | Bin .../orders/all_1_1_0/o_orderstatus.mrk2 | Bin .../orders/all_1_1_0/o_shippriority.bin | Bin .../orders/all_1_1_0/o_shippriority.mrk2 | Bin .../orders/all_1_1_0/o_totalprice.bin | Bin .../orders/all_1_1_0/o_totalprice.mrk2 | Bin .../tpch-data-ch/part/all_1_1_0/checksums.txt | Bin .../tpch-data-ch/part/all_1_1_0/columns.txt | 0 .../tpch-data-ch/part/all_1_1_0/count.txt | 0 .../tpch-data-ch/part/all_1_1_0/data.bin | Bin .../tpch-data-ch/part/all_1_1_0/data.mrk3 | Bin .../all_1_1_0/default_compression_codec.txt | 0 .../partsupp/all_1_1_0/checksums.txt | Bin .../partsupp/all_1_1_0/columns.txt | 0 .../tpch-data-ch/partsupp/all_1_1_0/count.txt | 0 .../all_1_1_0/default_compression_codec.txt | 0 .../partsupp/all_1_1_0/ps_availqty.bin | Bin .../partsupp/all_1_1_0/ps_availqty.mrk2 | Bin .../partsupp/all_1_1_0/ps_comment.bin | Bin .../partsupp/all_1_1_0/ps_comment.mrk2 | Bin .../partsupp/all_1_1_0/ps_partkey.bin | Bin .../partsupp/all_1_1_0/ps_partkey.mrk2 | Bin .../partsupp/all_1_1_0/ps_suppkey.bin | Bin .../partsupp/all_1_1_0/ps_suppkey.mrk2 | Bin .../partsupp/all_1_1_0/ps_supplycost.bin | Bin .../partsupp/all_1_1_0/ps_supplycost.mrk2 | Bin .../region/all_1_1_0/checksums.txt | Bin .../tpch-data-ch/region/all_1_1_0/columns.txt | 0 .../tpch-data-ch/region/all_1_1_0/count.txt | 0 .../tpch-data-ch/region/all_1_1_0/data.bin | Bin .../tpch-data-ch/region/all_1_1_0/data.mrk3 | Bin .../all_1_1_0/default_compression_codec.txt | 0 .../supplier/all_1_1_0/checksums.txt | Bin .../supplier/all_1_1_0/columns.txt | 0 .../tpch-data-ch/supplier/all_1_1_0/count.txt | 0 .../tpch-data-ch/supplier/all_1_1_0/data.bin | Bin .../tpch-data-ch/supplier/all_1_1_0/data.mrk3 | Bin .../all_1_1_0/default_compression_codec.txt | 0 ...useRSSColumnarMemorySortShuffleSuite.scala | 4 +- ...ClickHouseRSSColumnarShuffleAQESuite.scala | 4 +- backends-velox/pom.xml | 29 ++ ...uten.celeborn.CelebornShuffleWriterFactory | 0 ...VeloxCelebornColumnarBatchSerializer.scala | 0 .../VeloxCelebornColumnarShuffleWriter.scala | 0 ...CelebornColumnarShuffleWriterFactory.scala | 0 gluten-celeborn/clickhouse/pom.xml | 260 ------------------ gluten-celeborn/common/pom.xml | 47 ---- gluten-celeborn/package/pom.xml | 38 --- gluten-celeborn/pom.xml | 61 +--- .../celeborn/CelebornShuffleManager.java | 0 .../CelebornShuffleWriterFactory.java | 0 .../gluten/celeborn/CelebornUtils.java | 0 .../CelebornColumnarShuffleWriter.scala | 6 +- .../shuffle/CelebornPartitionPusher.scala | 3 +- gluten-celeborn/velox/pom.xml | 68 ----- package/pom.xml | 2 +- pom.xml | 64 +++++ 170 files changed, 145 insertions(+), 470 deletions(-) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/main/scala/org/apache/spark/shuffle/CHCelebornColumnarBatchSerializer.scala (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriter.scala (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriterFactory.scala (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q01.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q02.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q03.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q04.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q05.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q06.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q07.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q08.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q09.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q10.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q11.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q12.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q13.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q14.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q15.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q16.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q17.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q18.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q19.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q20.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q21.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries-output/q22.out (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q1.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q10.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q11.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q12.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q13.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q14.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q15.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q16.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q17.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q18.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q19.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q2.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q20.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q21.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q22.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q3.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q4.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q5.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q6.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q7.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q8.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/queries/tpch-queries-ch/q9.sql (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/customer/all_1_1_0/checksums.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/customer/all_1_1_0/columns.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/customer/all_1_1_0/count.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/customer/all_1_1_0/data.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/customer/all_1_1_0/data.mrk3 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/customer/all_1_1_0/default_compression_codec.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/checksums.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/columns.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/count.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/default_compression_codec.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/nation/all_1_1_0/checksums.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/nation/all_1_1_0/columns.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/nation/all_1_1_0/count.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/nation/all_1_1_0/data.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/nation/all_1_1_0/data.mrk3 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/nation/all_1_1_0/default_compression_codec.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/checksums.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/columns.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/count.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/default_compression_codec.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/part/all_1_1_0/checksums.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/part/all_1_1_0/columns.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/part/all_1_1_0/count.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/part/all_1_1_0/data.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/part/all_1_1_0/data.mrk3 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/part/all_1_1_0/default_compression_codec.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/partsupp/all_1_1_0/checksums.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/partsupp/all_1_1_0/columns.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/partsupp/all_1_1_0/count.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/partsupp/all_1_1_0/default_compression_codec.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.mrk2 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/region/all_1_1_0/checksums.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/region/all_1_1_0/columns.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/region/all_1_1_0/count.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/region/all_1_1_0/data.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/region/all_1_1_0/data.mrk3 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/region/all_1_1_0/default_compression_codec.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/supplier/all_1_1_0/checksums.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/supplier/all_1_1_0/columns.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/supplier/all_1_1_0/count.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/supplier/all_1_1_0/data.bin (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/supplier/all_1_1_0/data.mrk3 (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/resources/tpch-data-ch/supplier/all_1_1_0/default_compression_codec.txt (100%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala (95%) rename {gluten-celeborn/clickhouse/src => backends-clickhouse/src-celeborn}/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala (97%) rename {gluten-celeborn/velox/src => backends-velox/src-celeborn}/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory (100%) rename {gluten-celeborn/velox/src => backends-velox/src-celeborn}/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarBatchSerializer.scala (100%) rename {gluten-celeborn/velox/src => backends-velox/src-celeborn}/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriter.scala (100%) rename {gluten-celeborn/velox/src => backends-velox/src-celeborn}/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriterFactory.scala (100%) delete mode 100755 gluten-celeborn/clickhouse/pom.xml delete mode 100755 gluten-celeborn/common/pom.xml delete mode 100644 gluten-celeborn/package/pom.xml rename gluten-celeborn/{common/src => src-celeborn}/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java (100%) rename gluten-celeborn/{common/src => src-celeborn}/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleWriterFactory.java (100%) rename gluten-celeborn/{common/src => src-celeborn}/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornUtils.java (100%) rename gluten-celeborn/{common/src => src-celeborn}/main/scala/org/apache/spark/shuffle/CelebornColumnarShuffleWriter.scala (99%) rename gluten-celeborn/{common/src => src-celeborn}/main/scala/org/apache/spark/shuffle/CelebornPartitionPusher.scala (99%) delete mode 100755 gluten-celeborn/velox/pom.xml diff --git a/backends-clickhouse/pom.xml b/backends-clickhouse/pom.xml index 9a606c48e694..e34f571961d6 100644 --- a/backends-clickhouse/pom.xml +++ b/backends-clickhouse/pom.xml @@ -14,6 +14,35 @@ Gluten Backends ClickHouse + + celeborn + + false + + + + org.apache.gluten + gluten-celeborn + ${project.version} + + + org.apache.celeborn + celeborn-client-spark-${spark.major.version}-shaded_${scala.binary.version} + ${celeborn.version} + provided + + + org.apache.celeborn + celeborn-client-spark-${spark.major.version}_${scala.binary.version} + + + org.apache.celeborn + celeborn-spark-${spark.major.version}-columnar-shuffle_${scala.binary.version} + + + + + iceberg diff --git a/gluten-celeborn/clickhouse/src/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory b/backends-clickhouse/src-celeborn/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory similarity index 100% rename from gluten-celeborn/clickhouse/src/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory rename to backends-clickhouse/src-celeborn/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory diff --git a/gluten-celeborn/clickhouse/src/main/scala/org/apache/spark/shuffle/CHCelebornColumnarBatchSerializer.scala b/backends-clickhouse/src-celeborn/main/scala/org/apache/spark/shuffle/CHCelebornColumnarBatchSerializer.scala similarity index 100% rename from gluten-celeborn/clickhouse/src/main/scala/org/apache/spark/shuffle/CHCelebornColumnarBatchSerializer.scala rename to backends-clickhouse/src-celeborn/main/scala/org/apache/spark/shuffle/CHCelebornColumnarBatchSerializer.scala diff --git a/gluten-celeborn/clickhouse/src/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriter.scala b/backends-clickhouse/src-celeborn/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriter.scala similarity index 100% rename from gluten-celeborn/clickhouse/src/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriter.scala rename to backends-clickhouse/src-celeborn/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriter.scala diff --git a/gluten-celeborn/clickhouse/src/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriterFactory.scala b/backends-clickhouse/src-celeborn/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriterFactory.scala similarity index 100% rename from gluten-celeborn/clickhouse/src/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriterFactory.scala rename to backends-clickhouse/src-celeborn/main/scala/org/apache/spark/shuffle/CHCelebornColumnarShuffleWriterFactory.scala diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q01.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q01.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q01.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q01.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q02.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q02.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q02.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q02.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q03.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q03.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q03.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q03.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q04.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q04.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q04.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q04.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q05.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q05.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q05.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q05.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q06.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q06.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q06.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q06.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q07.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q07.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q07.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q07.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q08.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q08.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q08.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q08.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q09.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q09.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q09.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q09.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q10.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q10.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q10.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q10.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q11.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q11.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q11.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q11.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q12.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q12.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q12.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q12.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q13.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q13.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q13.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q13.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q14.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q14.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q14.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q14.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q15.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q15.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q15.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q15.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q16.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q16.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q16.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q16.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q17.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q17.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q17.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q17.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q18.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q18.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q18.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q18.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q19.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q19.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q19.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q19.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q20.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q20.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q20.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q20.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q21.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q21.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q21.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q21.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries-output/q22.out b/backends-clickhouse/src-celeborn/test/resources/queries-output/q22.out similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries-output/q22.out rename to backends-clickhouse/src-celeborn/test/resources/queries-output/q22.out diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q1.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q1.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q1.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q1.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q10.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q10.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q10.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q10.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q11.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q11.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q11.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q11.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q12.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q12.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q12.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q12.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q13.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q13.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q13.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q13.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q14.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q14.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q14.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q14.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q15.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q15.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q15.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q15.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q16.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q16.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q16.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q16.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q17.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q17.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q17.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q17.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q18.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q18.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q18.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q18.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q19.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q19.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q19.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q19.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q2.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q2.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q2.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q2.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q20.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q20.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q20.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q20.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q21.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q21.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q21.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q21.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q22.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q22.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q22.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q22.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q3.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q3.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q3.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q3.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q4.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q4.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q4.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q4.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q5.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q5.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q5.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q5.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q6.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q6.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q6.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q6.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q7.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q7.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q7.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q7.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q8.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q8.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q8.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q8.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q9.sql b/backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q9.sql similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/queries/tpch-queries-ch/q9.sql rename to backends-clickhouse/src-celeborn/test/resources/queries/tpch-queries-ch/q9.sql diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/checksums.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/checksums.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/checksums.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/columns.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/columns.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/columns.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/count.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/count.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/count.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/data.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/data.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/data.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/data.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/data.mrk3 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/data.mrk3 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/data.mrk3 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/data.mrk3 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/default_compression_codec.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/customer/all_1_1_0/default_compression_codec.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/customer/all_1_1_0/default_compression_codec.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/checksums.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/checksums.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/checksums.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/columns.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/columns.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/columns.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/count.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/count.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/count.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/default_compression_codec.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/default_compression_codec.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/default_compression_codec.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_comment.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_commitdate.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_discount.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_extendedprice.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linenumber.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_linestatus.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_orderkey.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_partkey.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_quantity.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_receiptdate.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_returnflag.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipdate.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipinstruct.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_shipmode.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_suppkey.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/lineitem/all_1_1_0/l_tax.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/checksums.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/checksums.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/checksums.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/columns.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/columns.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/columns.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/count.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/count.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/count.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/data.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/data.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/data.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/data.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/data.mrk3 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/data.mrk3 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/data.mrk3 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/data.mrk3 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/default_compression_codec.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/nation/all_1_1_0/default_compression_codec.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/nation/all_1_1_0/default_compression_codec.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/checksums.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/checksums.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/checksums.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/columns.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/columns.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/columns.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/count.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/count.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/count.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/default_compression_codec.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/default_compression_codec.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/default_compression_codec.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_clerk.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_comment.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_custkey.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderdate.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderkey.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderpriority.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_orderstatus.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_shippriority.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/orders/all_1_1_0/o_totalprice.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/checksums.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/checksums.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/checksums.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/columns.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/columns.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/columns.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/count.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/count.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/count.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/data.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/data.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/data.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/data.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/data.mrk3 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/data.mrk3 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/data.mrk3 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/data.mrk3 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/default_compression_codec.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/part/all_1_1_0/default_compression_codec.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/part/all_1_1_0/default_compression_codec.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/checksums.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/checksums.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/checksums.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/columns.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/columns.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/columns.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/count.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/count.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/count.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/default_compression_codec.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/default_compression_codec.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/default_compression_codec.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_availqty.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_comment.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_partkey.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_suppkey.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.mrk2 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.mrk2 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.mrk2 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/partsupp/all_1_1_0/ps_supplycost.mrk2 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/checksums.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/checksums.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/checksums.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/columns.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/columns.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/columns.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/count.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/count.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/count.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/data.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/data.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/data.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/data.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/data.mrk3 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/data.mrk3 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/data.mrk3 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/data.mrk3 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/default_compression_codec.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/region/all_1_1_0/default_compression_codec.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/region/all_1_1_0/default_compression_codec.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/checksums.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/checksums.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/checksums.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/checksums.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/columns.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/columns.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/columns.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/columns.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/count.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/count.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/count.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/count.txt diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/data.bin b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/data.bin similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/data.bin rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/data.bin diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/data.mrk3 b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/data.mrk3 similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/data.mrk3 rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/data.mrk3 diff --git a/gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/default_compression_codec.txt b/backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/default_compression_codec.txt similarity index 100% rename from gluten-celeborn/clickhouse/src/test/resources/tpch-data-ch/supplier/all_1_1_0/default_compression_codec.txt rename to backends-clickhouse/src-celeborn/test/resources/tpch-data-ch/supplier/all_1_1_0/default_compression_codec.txt diff --git a/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala b/backends-clickhouse/src-celeborn/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala similarity index 95% rename from gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala rename to backends-clickhouse/src-celeborn/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala index ee7657c505ac..10350898cf88 100644 --- a/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala +++ b/backends-clickhouse/src-celeborn/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarMemorySortShuffleSuite.scala @@ -26,10 +26,10 @@ class GlutenClickHouseRSSColumnarMemorySortShuffleSuite override protected val tablesPath: String = basePath + "/tpch-data-ch" override protected val tpchQueries: String = rootPath + "queries/tpch-queries-ch" override protected val queriesResults: String = - rootPath + "../../../../../backends-clickhouse/src/test/resources/mergetree-queries-output" + rootPath + "../../../../backends-clickhouse/src/test/resources/mergetree-queries-output" override protected val parquetTableDataPath: String = - "../../../../../gluten-core/src/test/resources/tpch-data" + "../../../../gluten-core/src/test/resources/tpch-data" /** Run Gluten + ClickHouse Backend with ColumnarShuffleManager */ override protected def sparkConf: SparkConf = { diff --git a/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala b/backends-clickhouse/src-celeborn/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala similarity index 97% rename from gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala rename to backends-clickhouse/src-celeborn/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala index e62dbdd2a5fe..4c62ee73f0f7 100644 --- a/gluten-celeborn/clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala +++ b/backends-clickhouse/src-celeborn/test/scala/org/apache/gluten/execution/GlutenClickHouseRSSColumnarShuffleAQESuite.scala @@ -30,10 +30,10 @@ class GlutenClickHouseRSSColumnarShuffleAQESuite override protected val tablesPath: String = basePath + "/tpch-data-ch" override protected val tpchQueries: String = rootPath + "queries/tpch-queries-ch" override protected val queriesResults: String = - rootPath + "../../../../../backends-clickhouse/src/test/resources/mergetree-queries-output" + rootPath + "../../../../backends-clickhouse/src/test/resources/mergetree-queries-output" override protected val parquetTableDataPath: String = - "../../../../../gluten-core/src/test/resources/tpch-data" + "../../../../gluten-core/src/test/resources/tpch-data" /** Run Gluten + ClickHouse Backend with ColumnarShuffleManager */ override protected def sparkConf: SparkConf = { diff --git a/backends-velox/pom.xml b/backends-velox/pom.xml index 48a044a17f2f..ed0bf20616f7 100755 --- a/backends-velox/pom.xml +++ b/backends-velox/pom.xml @@ -28,6 +28,35 @@ org.apache.gluten.tags.UDFTest + + celeborn + + false + + + + org.apache.gluten + gluten-celeborn + ${project.version} + + + org.apache.celeborn + celeborn-client-spark-${spark.major.version}-shaded_${scala.binary.version} + ${celeborn.version} + provided + + + org.apache.celeborn + celeborn-client-spark-${spark.major.version}_${scala.binary.version} + + + org.apache.celeborn + celeborn-spark-${spark.major.version}-columnar-shuffle_${scala.binary.version} + + + + + iceberg diff --git a/gluten-celeborn/velox/src/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory b/backends-velox/src-celeborn/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory similarity index 100% rename from gluten-celeborn/velox/src/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory rename to backends-velox/src-celeborn/main/resources/META-INF/services/org.apache.spark.shuffle.gluten.celeborn.CelebornShuffleWriterFactory diff --git a/gluten-celeborn/velox/src/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarBatchSerializer.scala b/backends-velox/src-celeborn/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarBatchSerializer.scala similarity index 100% rename from gluten-celeborn/velox/src/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarBatchSerializer.scala rename to backends-velox/src-celeborn/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarBatchSerializer.scala diff --git a/gluten-celeborn/velox/src/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriter.scala b/backends-velox/src-celeborn/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriter.scala similarity index 100% rename from gluten-celeborn/velox/src/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriter.scala rename to backends-velox/src-celeborn/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriter.scala diff --git a/gluten-celeborn/velox/src/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriterFactory.scala b/backends-velox/src-celeborn/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriterFactory.scala similarity index 100% rename from gluten-celeborn/velox/src/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriterFactory.scala rename to backends-velox/src-celeborn/main/scala/org/apache/spark/shuffle/VeloxCelebornColumnarShuffleWriterFactory.scala diff --git a/gluten-celeborn/clickhouse/pom.xml b/gluten-celeborn/clickhouse/pom.xml deleted file mode 100755 index 21263443d735..000000000000 --- a/gluten-celeborn/clickhouse/pom.xml +++ /dev/null @@ -1,260 +0,0 @@ - - - - gluten-celeborn - org.apache.gluten - 1.3.0-SNAPSHOT - ../pom.xml - - 4.0.0 - - gluten-celeborn-clickhouse - jar - Gluten Celeborn Clickhouse - - - - org.apache.gluten - backends-clickhouse - ${project.version} - provided - - - org.apache.gluten - backends-clickhouse - ${project.version} - test-jar - test - - - org.apache.gluten - gluten-substrait - ${project.version} - test-jar - test - - - org.apache.gluten - gluten-celeborn-common - ${project.version} - compile - - - org.apache.spark - spark-core_${scala.binary.version} - test-jar - test - - - org.apache.spark - spark-sql_${scala.binary.version} - test-jar - test - - - org.apache.spark - spark-catalyst_${scala.binary.version} - test-jar - test - - - org.apache.spark - spark-yarn_${scala.binary.version} - ${spark.version} - test-jar - test - - - org.apache.spark - spark-hive_${scala.binary.version} - ${spark.version} - test-jar - test - - - org.apache.spark - spark-hive_${scala.binary.version} - test - - - org.apache.hive.hcatalog - hive-hcatalog-core - 2.3.9 - test - - - org.pentaho - pentaho-aggdesigner-algorithm - - - net.minidev - json-smart - - - org.apache.hive - hive-exec - - - guava - com.google.guava - - - hadoop-common - org.apache.hadoop - - - hadoop-hdfs - org.apache.hadoop - - - - - io.delta - ${delta.package.name}_${scala.binary.version} - test - - - junit - junit - - - org.mockito - mockito-core - 2.23.4 - test - - - org.scalatestplus - scalatestplus-mockito_${scala.binary.version} - 1.0.0-M2 - test - - - org.scalatest - scalatest_${scala.binary.version} - test - - - org.scalatestplus - scalatestplus-scalacheck_${scala.binary.version} - 3.1.0.0-RC2 - test - - - org.apache.hadoop - hadoop-client - ${hadoop.version} - test - - - org.apache.arrow - arrow-memory-core - ${arrow.version} - provided - - - io.netty - netty-common - - - io.netty - netty-buffer - - - - - org.apache.arrow - arrow-vector - ${arrow.version} - provided - - - io.netty - netty-common - - - io.netty - netty-buffer - - - - - - - target/scala-${scala.binary.version}/classes - target/scala-${scala.binary.version}/test-classes - - - org.apache.maven.plugins - maven-resources-plugin - - - net.alchim31.maven - scala-maven-plugin - - - org.apache.maven.plugins - maven-compiler-plugin - - - org.scalastyle - scalastyle-maven-plugin - - - org.apache.maven.plugins - maven-checkstyle-plugin - - - maven-assembly-plugin - 3.3.0 - - - jar-with-dependencies - - - - - make-assembly - package - - single - - - - - - org.scalatest - scalatest-maven-plugin - - - test - - test - - - - ${clickhouse.lib.path} - ${tpcds.data.path} - - - - - - - org.apache.maven.plugins - maven-jar-plugin - - - prepare-test-jar - test-compile - - test-jar - - - - - - - diff --git a/gluten-celeborn/common/pom.xml b/gluten-celeborn/common/pom.xml deleted file mode 100755 index da7e68987659..000000000000 --- a/gluten-celeborn/common/pom.xml +++ /dev/null @@ -1,47 +0,0 @@ - - - - gluten-celeborn - org.apache.gluten - 1.3.0-SNAPSHOT - ../pom.xml - - 4.0.0 - - gluten-celeborn-common - jar - Gluten Celeborn Common - - - target/scala-${scala.binary.version}/classes - target/scala-${scala.binary.version}/test-classes - - - net.alchim31.maven - scala-maven-plugin - - - org.apache.maven.plugins - maven-compiler-plugin - - - org.scalastyle - scalastyle-maven-plugin - - - org.apache.maven.plugins - maven-checkstyle-plugin - - - com.diffplug.spotless - spotless-maven-plugin - - - org.apache.maven.plugins - maven-jar-plugin - - - - diff --git a/gluten-celeborn/package/pom.xml b/gluten-celeborn/package/pom.xml deleted file mode 100644 index 7b18787b4e16..000000000000 --- a/gluten-celeborn/package/pom.xml +++ /dev/null @@ -1,38 +0,0 @@ - - 4.0.0 - - gluten-celeborn - org.apache.gluten - 1.3.0-SNAPSHOT - ../pom.xml - - - gluten-celeborn-package - jar - Gluten Celeborn Package - - - - backends-velox - - - org.apache.gluten - gluten-celeborn-velox - ${project.version} - - - - - backends-clickhouse - - - org.apache.gluten - gluten-celeborn-clickhouse - ${project.version} - - - - - diff --git a/gluten-celeborn/pom.xml b/gluten-celeborn/pom.xml index de19132b38f8..0eca5da979e1 100755 --- a/gluten-celeborn/pom.xml +++ b/gluten-celeborn/pom.xml @@ -11,7 +11,7 @@ 4.0.0 gluten-celeborn - pom + jar Gluten Celeborn @@ -56,50 +56,19 @@ - - - - net.alchim31.maven - scala-maven-plugin - - true - - -Xss128m - - - - - org.scalastyle - scalastyle-maven-plugin - - - com.diffplug.spotless - spotless-maven-plugin - - - + + + net.alchim31.maven + scala-maven-plugin + + + org.scalastyle + scalastyle-maven-plugin + + + com.diffplug.spotless + spotless-maven-plugin + + - - - - backends-velox - - - - velox - common - package - - - - backends-clickhouse - - - - clickhouse - common - package - - - diff --git a/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java b/gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java similarity index 100% rename from gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java rename to gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleManager.java diff --git a/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleWriterFactory.java b/gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleWriterFactory.java similarity index 100% rename from gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleWriterFactory.java rename to gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornShuffleWriterFactory.java diff --git a/gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornUtils.java b/gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornUtils.java similarity index 100% rename from gluten-celeborn/common/src/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornUtils.java rename to gluten-celeborn/src-celeborn/main/java/org/apache/spark/shuffle/gluten/celeborn/CelebornUtils.java diff --git a/gluten-celeborn/common/src/main/scala/org/apache/spark/shuffle/CelebornColumnarShuffleWriter.scala b/gluten-celeborn/src-celeborn/main/scala/org/apache/spark/shuffle/CelebornColumnarShuffleWriter.scala similarity index 99% rename from gluten-celeborn/common/src/main/scala/org/apache/spark/shuffle/CelebornColumnarShuffleWriter.scala rename to gluten-celeborn/src-celeborn/main/scala/org/apache/spark/shuffle/CelebornColumnarShuffleWriter.scala index 10cf06a3ce59..42e939e4420d 100644 --- a/gluten-celeborn/common/src/main/scala/org/apache/spark/shuffle/CelebornColumnarShuffleWriter.scala +++ b/gluten-celeborn/src-celeborn/main/scala/org/apache/spark/shuffle/CelebornColumnarShuffleWriter.scala @@ -16,8 +16,9 @@ */ package org.apache.spark.shuffle +import org.apache.celeborn.client.ShuffleClient +import org.apache.celeborn.common.CelebornConf import org.apache.gluten.GlutenConfig - import org.apache.spark._ import org.apache.spark.internal.Logging import org.apache.spark.internal.config.SHUFFLE_COMPRESS @@ -26,9 +27,6 @@ import org.apache.spark.shuffle.celeborn.CelebornShuffleHandle import org.apache.spark.sql.vectorized.ColumnarBatch import org.apache.spark.storage.BlockManager -import org.apache.celeborn.client.ShuffleClient -import org.apache.celeborn.common.CelebornConf - import java.io.IOException import java.util.Locale diff --git a/gluten-celeborn/common/src/main/scala/org/apache/spark/shuffle/CelebornPartitionPusher.scala b/gluten-celeborn/src-celeborn/main/scala/org/apache/spark/shuffle/CelebornPartitionPusher.scala similarity index 99% rename from gluten-celeborn/common/src/main/scala/org/apache/spark/shuffle/CelebornPartitionPusher.scala rename to gluten-celeborn/src-celeborn/main/scala/org/apache/spark/shuffle/CelebornPartitionPusher.scala index 2f59307230a0..545a4c113936 100644 --- a/gluten-celeborn/common/src/main/scala/org/apache/spark/shuffle/CelebornPartitionPusher.scala +++ b/gluten-celeborn/src-celeborn/main/scala/org/apache/spark/shuffle/CelebornPartitionPusher.scala @@ -16,11 +16,10 @@ */ package org.apache.spark.shuffle +import org.apache.celeborn.client.ShuffleClient import org.apache.spark._ import org.apache.spark.internal.Logging -import org.apache.celeborn.client.ShuffleClient - import java.io.IOException class CelebornPartitionPusher( diff --git a/gluten-celeborn/velox/pom.xml b/gluten-celeborn/velox/pom.xml deleted file mode 100755 index 55aa8f3c9b5f..000000000000 --- a/gluten-celeborn/velox/pom.xml +++ /dev/null @@ -1,68 +0,0 @@ - - - - gluten-celeborn - org.apache.gluten - 1.3.0-SNAPSHOT - ../pom.xml - - 4.0.0 - - gluten-celeborn-velox - jar - Gluten Celeborn Velox - - - - org.apache.gluten - backends-velox - ${project.version} - provided - - - org.apache.gluten - gluten-arrow - ${project.version} - provided - - - org.apache.gluten - gluten-celeborn-common - ${project.version} - compile - - - - - target/scala-${scala.binary.version}/classes - target/scala-${scala.binary.version}/test-classes - - - net.alchim31.maven - scala-maven-plugin - - - org.apache.maven.plugins - maven-compiler-plugin - - - org.scalastyle - scalastyle-maven-plugin - - - org.apache.maven.plugins - maven-checkstyle-plugin - - - com.diffplug.spotless - spotless-maven-plugin - - - org.apache.maven.plugins - maven-jar-plugin - - - - diff --git a/package/pom.xml b/package/pom.xml index e0620e5cf5e1..230f79d0942a 100644 --- a/package/pom.xml +++ b/package/pom.xml @@ -68,7 +68,7 @@ org.apache.gluten - gluten-celeborn-package + gluten-celeborn ${project.version} diff --git a/pom.xml b/pom.xml index 3c59b4f19e11..e6f3709c4cfe 100644 --- a/pom.xml +++ b/pom.xml @@ -422,6 +422,70 @@ gluten-celeborn + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-celeborn-sources + generate-sources + + add-source + + + + ${project.basedir}/src-celeborn/main/scala + ${project.basedir}/src-celeborn/main/java + + + + + add-celeborn-resources + generate-resources + + add-resource + + + + + ${project.basedir}/src-celeborn/main/resources + + + + + + add-celeborn-test-sources + generate-test-sources + + add-test-source + + + + ${project.basedir}/src-celeborn/test/scala + ${project.basedir}/src-celeborn/test/java + + + + + add-celeborn-test-resources + generate-test-resources + + add-test-resource + + + + + ${project.basedir}/src-celeborn/test/resources + + + + + + + + uniffle From 82b48b38d10217ea4921248d3e4ffc5fc48877b7 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Tue, 17 Dec 2024 09:37:51 +0800 Subject: [PATCH 03/14] [GLUTEN-7911][CORE] Flip dependency direction for gluten-hudi (#8240) Closes #7911 --- backends-velox/pom.xml | 23 +++++++++++++++++++ .../org/apache/execution/VeloxHudiSuite.scala | 21 +++++++++++++++++ .../execution/VeloxTPCHHudiSuite.scala | 8 +++---- .../memtarget/spark/TreeMemoryConsumer.java | 3 ++- .../org/apache/spark/task/TaskResources.scala | 11 ++++++--- gluten-hudi/pom.xml | 13 ----------- .../{VeloxHudiSuite.scala => HudiSuite.scala} | 2 +- .../gluten/utils/BackendTestSettings.scala | 3 ++- .../AbstractFileSourceScanExec.scala | 4 +++- .../AbstractFileSourceScanExec.scala | 4 +++- .../AbstractFileSourceScanExec.scala | 4 +++- .../AbstractFileSourceScanExec.scala | 4 +++- 12 files changed, 73 insertions(+), 27 deletions(-) create mode 100644 backends-velox/src-hudi/test/scala/org/apache/execution/VeloxHudiSuite.scala rename {gluten-hudi/src-hudi/test/scala/org/apache/gluten => backends-velox/src-hudi/test/scala/org/apache}/execution/VeloxTPCHHudiSuite.scala (91%) rename gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/{VeloxHudiSuite.scala => HudiSuite.scala} (98%) diff --git a/backends-velox/pom.xml b/backends-velox/pom.xml index ed0bf20616f7..9349c3c0923c 100755 --- a/backends-velox/pom.xml +++ b/backends-velox/pom.xml @@ -111,6 +111,29 @@ + + hudi + + + org.apache.gluten + gluten-hudi + ${project.version} + + + org.apache.gluten + gluten-hudi + ${project.version} + test-jar + test + + + org.apache.hudi + hudi-spark${sparkbundle.version}-bundle_${scala.binary.version} + ${hudi.version} + provided + + + diff --git a/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxHudiSuite.scala b/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxHudiSuite.scala new file mode 100644 index 000000000000..00498f87411a --- /dev/null +++ b/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxHudiSuite.scala @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.execution + +import org.apache.gluten.execution.HudiSuite + +class VeloxHudiSuite extends HudiSuite {} diff --git a/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxTPCHHudiSuite.scala b/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxTPCHHudiSuite.scala similarity index 91% rename from gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxTPCHHudiSuite.scala rename to backends-velox/src-hudi/test/scala/org/apache/execution/VeloxTPCHHudiSuite.scala index a4e10269c286..cdb3b2918080 100644 --- a/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxTPCHHudiSuite.scala +++ b/backends-velox/src-hudi/test/scala/org/apache/execution/VeloxTPCHHudiSuite.scala @@ -14,16 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.gluten.execution +package org.apache.execution +import org.apache.gluten.execution.VeloxTPCHSuite import org.apache.spark.SparkConf import java.io.File class VeloxTPCHHudiSuite extends VeloxTPCHSuite { - - protected val tpchBasePath: String = new File( - "../backends-velox/src/test/resources").getAbsolutePath + protected val tpchBasePath: String = + getClass.getResource("/").getPath + "../../../src/test/resources" override protected val resourcePath: String = new File(tpchBasePath, "tpch-data-parquet").getCanonicalPath diff --git a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumer.java b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumer.java index 44c725798c75..1289a01c349e 100644 --- a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumer.java +++ b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/spark/TreeMemoryConsumer.java @@ -138,7 +138,8 @@ public Map children() { @Override public TreeMemoryTarget parent() { // we are root - throw new IllegalStateException("Unreachable code"); + throw new IllegalStateException( + "Unreachable code org.apache.gluten.memory.memtarget.spark.TreeMemoryConsumer.parent"); } @Override diff --git a/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala b/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala index b061aa332c74..df5917125b64 100644 --- a/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala +++ b/gluten-core/src/main/scala/org/apache/spark/task/TaskResources.scala @@ -298,9 +298,14 @@ class TaskResourceRegistry extends Logging { o1: util.Map.Entry[Int, util.LinkedHashSet[TaskResource]], o2: util.Map.Entry[Int, util.LinkedHashSet[TaskResource]]) => { val diff = o2.getKey - o1.getKey // descending by priority - if (diff > 0) 1 - else if (diff < 0) -1 - else throw new IllegalStateException("Unreachable code") + if (diff > 0) { + 1 + } else if (diff < 0) { + -1 + } else { + throw new IllegalStateException( + "Unreachable code from org.apache.spark.task.TaskResourceRegistry.releaseAll") + } } ) table.forEach { diff --git a/gluten-hudi/pom.xml b/gluten-hudi/pom.xml index 7900182f853a..5865f1f6ece8 100755 --- a/gluten-hudi/pom.xml +++ b/gluten-hudi/pom.xml @@ -46,19 +46,6 @@ test-jar test - - org.apache.gluten - backends-velox - ${project.version} - test - - - org.apache.gluten - backends-velox - ${project.version} - test-jar - test - org.apache.spark spark-core_${scala.binary.version} diff --git a/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxHudiSuite.scala b/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/HudiSuite.scala similarity index 98% rename from gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxHudiSuite.scala rename to gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/HudiSuite.scala index b760ec556535..97633fa064cc 100644 --- a/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/VeloxHudiSuite.scala +++ b/gluten-hudi/src-hudi/test/scala/org/apache/gluten/execution/HudiSuite.scala @@ -19,7 +19,7 @@ package org.apache.gluten.execution import org.apache.spark.SparkConf import org.apache.spark.sql.Row -class VeloxHudiSuite extends WholeStageTransformerSuite { +abstract class HudiSuite extends WholeStageTransformerSuite { protected val rootPath: String = getClass.getResource("/").getPath override protected val resourcePath: String = "/tpch-data-parquet" diff --git a/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala b/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala index dce8ac83710c..51e8174da7fb 100644 --- a/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala +++ b/gluten-ut/common/src/test/scala/org/apache/gluten/utils/BackendTestSettings.scala @@ -80,7 +80,8 @@ abstract class BackendTestSettings { return !isExcluded } - throw new IllegalStateException("Unreachable code") + throw new IllegalStateException( + "Unreachable code from org.apache.gluten.utils.BackendTestSettings.shouldRun") } final protected class SuiteSettings { diff --git a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala index a3bd5079b016..fcdd3c3c8b4b 100644 --- a/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala +++ b/shims/spark32/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala @@ -73,7 +73,9 @@ abstract class AbstractFileSourceScanExec( override def supportsColumnar: Boolean = { // The value should be defined in GlutenPlan. - throw new UnsupportedOperationException("Unreachable code") + throw new UnsupportedOperationException( + "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" + + ".supportsColumnar") } private lazy val needsUnsafeRowConversion: Boolean = { diff --git a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala index c885f0cf44b3..01df5ba62167 100644 --- a/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala +++ b/shims/spark33/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala @@ -77,7 +77,9 @@ abstract class AbstractFileSourceScanExec( override def supportsColumnar: Boolean = { // The value should be defined in GlutenPlan. - throw new UnsupportedOperationException("Unreachable code") + throw new UnsupportedOperationException( + "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" + + ".supportsColumnar") } private lazy val needsUnsafeRowConversion: Boolean = { diff --git a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala index 53ea6f543a95..15e54ddb71f2 100644 --- a/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala +++ b/shims/spark34/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala @@ -69,7 +69,9 @@ abstract class AbstractFileSourceScanExec( override def supportsColumnar: Boolean = { // The value should be defined in GlutenPlan. - throw new UnsupportedOperationException("Unreachable code") + throw new UnsupportedOperationException( + "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" + + ".supportsColumnar") } private lazy val needsUnsafeRowConversion: Boolean = { diff --git a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala index c8dbcc2fed4f..a83c763c4566 100644 --- a/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala +++ b/shims/spark35/src/main/scala/org/apache/spark/sql/execution/AbstractFileSourceScanExec.scala @@ -69,7 +69,9 @@ abstract class AbstractFileSourceScanExec( override def supportsColumnar: Boolean = { // The value should be defined in GlutenPlan. - throw new UnsupportedOperationException("Unreachable code") + throw new UnsupportedOperationException( + "Unreachable code from org.apache.spark.sql.execution.AbstractFileSourceScanExec" + + ".supportsColumnar") } private lazy val needsUnsafeRowConversion: Boolean = { From f63c29a5b28f282226063f359525d01f1897008d Mon Sep 17 00:00:00 2001 From: zhaokuo Date: Tue, 17 Dec 2024 09:50:47 +0800 Subject: [PATCH 04/14] [CORE] Minor: OverTarget is required only with sufficient memory and doesn't spill due to zero used bytes post-borrow (#8247) * fix overacquire spill * fix format --- .../memory/listener/ReservationListeners.java | 13 +------------ .../gluten/memory/memtarget/OverAcquire.java | 16 +++++++++------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/gluten-arrow/src/main/java/org/apache/gluten/memory/listener/ReservationListeners.java b/gluten-arrow/src/main/java/org/apache/gluten/memory/listener/ReservationListeners.java index b221db13e375..9d63a8601b4d 100644 --- a/gluten-arrow/src/main/java/org/apache/gluten/memory/listener/ReservationListeners.java +++ b/gluten-arrow/src/main/java/org/apache/gluten/memory/listener/ReservationListeners.java @@ -52,18 +52,7 @@ private static ReservationListener create0( tmm, name, Spillers.withMinSpillSize(spiller, reservationBlockSize), mutableStats); final MemoryTarget overConsumer = MemoryTargets.newConsumer( - tmm, - consumer.name() + ".OverAcquire", - new Spiller() { - @Override - public long spill(MemoryTarget self, Phase phase, long size) { - if (!Spillers.PHASE_SET_ALL.contains(phase)) { - return 0L; - } - return self.repay(size); - } - }, - Collections.emptyMap()); + tmm, consumer.name() + ".OverAcquire", Spillers.NOOP, Collections.emptyMap()); final MemoryTarget target = MemoryTargets.throwOnOom( MemoryTargets.overAcquire( diff --git a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/OverAcquire.java b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/OverAcquire.java index e7321b4b7e0e..7724083d6852 100644 --- a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/OverAcquire.java +++ b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/OverAcquire.java @@ -57,13 +57,15 @@ public long borrow(long size) { } Preconditions.checkState(overTarget.usedBytes() == 0); long granted = target.borrow(size); - long majorSize = target.usedBytes(); - long overSize = (long) (ratio * majorSize); - long overAcquired = overTarget.borrow(overSize); - Preconditions.checkState(overAcquired == overTarget.usedBytes()); - long releasedOverSize = overTarget.repay(overAcquired); - Preconditions.checkState(releasedOverSize == overAcquired); - Preconditions.checkState(overTarget.usedBytes() == 0); + if (granted >= size) { + long majorSize = target.usedBytes(); + long overSize = (long) (ratio * majorSize); + long overAcquired = overTarget.borrow(overSize); + Preconditions.checkState(overAcquired == overTarget.usedBytes()); + long releasedOverSize = overTarget.repay(overAcquired); + Preconditions.checkState(releasedOverSize == overAcquired); + Preconditions.checkState(overTarget.usedBytes() == 0); + } return granted; } From d5bd9f9487774f653e3eea3399390d3ae0663cbf Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Tue, 17 Dec 2024 09:55:56 +0800 Subject: [PATCH 05/14] [VL] Support concat_ws function (#8101) --- .../ScalarFunctionsValidateSuite.scala | 20 +++++++++++++++++++ .../SubstraitToVeloxPlanValidator.cc | 1 - 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala index 46d6870b04c9..94ea8be5200d 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/ScalarFunctionsValidateSuite.scala @@ -1357,6 +1357,26 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite { } } + test("concat_ws") { + runQueryAndCompare("SELECT concat_ws('~~', c_comment, c_address) FROM customer LIMIT 50") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + + withTempPath { + path => + Seq[Seq[String]](Seq("ab", null, "cd", "", "ef"), Seq(null, "x", "", "y"), Seq.empty, null) + .toDF("col") + .write + .parquet(path.getCanonicalPath) + + spark.read.parquet(path.getCanonicalPath).createOrReplaceTempView("array_tbl") + + runQueryAndCompare("SELECT concat_ws('~~', col, 'end') AS res from array_tbl;") { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + } + } + test("Test input_file_name function") { runQueryAndCompare("""SELECT input_file_name(), l_orderkey | from lineitem limit 100""".stripMargin) { diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc index 682bf0fcd5d6..84dfe68e2d22 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc @@ -61,7 +61,6 @@ const std::unordered_set kRegexFunctions = { const std::unordered_set kBlackList = { "split_part", "factorial", - "concat_ws", "from_json", "json_array_length", "trunc", From dda12ec516a6af1293dbdd5626dc4ae4933d6754 Mon Sep 17 00:00:00 2001 From: Wenzheng Liu Date: Tue, 17 Dec 2024 11:04:15 +0800 Subject: [PATCH 06/14] [CH] Minor, add delta profile to package.sh --- ep/build-clickhouse/src/package.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ep/build-clickhouse/src/package.sh b/ep/build-clickhouse/src/package.sh index 2583727b212e..522e073fc3f5 100755 --- a/ep/build-clickhouse/src/package.sh +++ b/ep/build-clickhouse/src/package.sh @@ -90,7 +90,7 @@ function build_gluten_by_spark_version() { sv=$(echo "$spark_profile" | tr -d '.') echo "build gluten with spark ${spark_profile}, scala ${scala_version}" - mvn clean install -Pbackends-clickhouse -Pspark-"${spark_profile}" -Pscala-"${scala_version}" -Pceleborn -Piceberg -DskipTests -Dcheckstyle.skip + mvn clean install -Pbackends-clickhouse -Pspark-"${spark_profile}" -Pscala-"${scala_version}" -Pceleborn -Piceberg -Pdelta -DskipTests -Dcheckstyle.skip cp "${GLUTEN_SOURCE}"/backends-clickhouse/target/gluten-*-spark-"${spark_profile}"-jar-with-dependencies.jar "${PACKAGE_DIR_PATH}"/jars/spark"${sv}"/gluten.jar cp "${GLUTEN_SOURCE}"/gluten-celeborn/clickhouse/target/gluten-celeborn-clickhouse-"${PROJECT_VERSION}"-jar-with-dependencies.jar "${PACKAGE_DIR_PATH}"/jars/spark"${sv}" delta_version=$(mvn -q -Dexec.executable="echo" -Dexec.args='${delta.version}' -Pspark-"${spark_profile}" --non-recursive exec:exec) From c1d66c4199418e57bb170aea0234375786957b44 Mon Sep 17 00:00:00 2001 From: Chang chen Date: Tue, 17 Dec 2024 11:05:33 +0800 Subject: [PATCH 07/14] [GLUTEN-7028][CH][Part-12] Add Local SortExec for Partition Write in one pipeline mode (#8237) * [Refactor] Pass WriteFilesExecTransformer to genWriteParameters * [Feature] Add SortExec and Remove RemoveNativeWriteFilesSortAndProject * [Bug Fix] collect_partition_cols and Remove ApplySquashingTransform and PlanSquashingTransform * [Bug Fix] Fix "WARN org.apache.spark.sql.execution.datasources.BasicWriteTaskStatsTracker: Expected x files, but only saw 0." * [Refactor]set CHConf.ENABLE_ONEPIPELINE_MERGETREE_WRITE.key to true in spark 35 for GlutenClickHouseMergeTreeWriteSuite * Fix Rebase issue --- .../ClickhouseOptimisticTransaction.scala | 85 ++--- .../execution/FileDeltaColumnarWrite.scala | 3 +- .../execution/datasources/DeltaV1Writes.scala | 74 ++++ .../datasources/DeltaV1WritesSuite.scala | 100 ++++++ .../datasources/v1/write_optimization.proto | 3 + .../backendsapi/clickhouse/CHRuleApi.scala | 1 - .../clickhouse/CHTransformerApi.scala | 37 +- .../clickhouse/RuntimeConfig.scala | 17 + .../spark/sql/execution/CHColumnarWrite.scala | 13 +- ...utenClickHouseDeltaParquetWriteSuite.scala | 1 + .../GlutenClickHouseMergeTreeWriteSuite.scala | 332 +++++++++--------- .../velox/VeloxTransformerApi.scala | 10 +- .../Parser/RelParsers/WriteRelParser.cpp | 58 +-- .../Parser/RelParsers/WriteRelParser.h | 5 +- .../Storages/MergeTree/SparkMergeTreeSink.cpp | 43 ++- .../Storages/MergeTree/SparkMergeTreeSink.h | 17 +- .../MergeTree/SparkMergeTreeWriter.cpp | 13 - .../tests/gtest_write_pipeline.cpp | 10 +- .../tests/gtest_write_pipeline_mergetree.cpp | 39 +- .../tests/json/mergetree/4_one_pipeline.json | 296 +++++++++------- .../gluten/backendsapi/TransformerApi.scala | 5 +- .../execution/WriteFilesExecTransformer.scala | 5 +- 22 files changed, 725 insertions(+), 442 deletions(-) create mode 100644 backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/datasources/DeltaV1Writes.scala create mode 100644 backends-clickhouse/src-delta-32/test/scala/org/apache/spark/sql/execution/datasources/DeltaV1WritesSuite.scala diff --git a/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala b/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala index 05f7fdbfa423..cd3ce793747c 100644 --- a/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala +++ b/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/delta/ClickhouseOptimisticTransaction.scala @@ -29,9 +29,8 @@ import org.apache.spark.sql.delta.files._ import org.apache.spark.sql.delta.hooks.AutoCompact import org.apache.spark.sql.delta.schema.{InnerInvariantViolationException, InvariantViolationException} import org.apache.spark.sql.delta.sources.DeltaSQLConf -import org.apache.spark.sql.execution.{QueryExecution, SparkPlan, SQLExecution} -import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanExec -import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, FileFormatWriter, GlutenWriterColumnarRules, WriteFiles, WriteJobStatsTracker} +import org.apache.spark.sql.execution.SQLExecution +import org.apache.spark.sql.execution.datasources.{BasicWriteJobStatsTracker, DeltaV1Writes, FileFormatWriter, GlutenWriterColumnarRules, WriteJobStatsTracker} import org.apache.spark.sql.execution.datasources.v1.MergeTreeWriterInjects import org.apache.spark.sql.execution.datasources.v1.clickhouse.MergeTreeFileFormatWriter import org.apache.spark.sql.execution.datasources.v2.clickhouse.ClickHouseConfig @@ -229,31 +228,12 @@ class ClickhouseOptimisticTransaction( val (data, partitionSchema) = performCDCPartition(inputData) val outputPath = deltaLog.dataPath - val fileFormat = deltaLog.fileFormat(protocol, metadata) // TODO support changing formats. - - // Iceberg spec requires partition columns in data files - val writePartitionColumns = IcebergCompat.isAnyEnabled(metadata) - // Retain only a minimal selection of Spark writer options to avoid any potential - // compatibility issues - val options = (writeOptions match { - case None => Map.empty[String, String] - case Some(writeOptions) => - writeOptions.options.filterKeys { - key => - key.equalsIgnoreCase(DeltaOptions.MAX_RECORDS_PER_FILE) || - key.equalsIgnoreCase(DeltaOptions.COMPRESSION) - }.toMap - }) + (DeltaOptions.WRITE_PARTITION_COLUMNS -> writePartitionColumns.toString) - - val (normalQueryExecution, output, generatedColumnConstraints, _) = + val (queryExecution, output, generatedColumnConstraints, _) = normalizeData(deltaLog, writeOptions, data) val partitioningColumns = getPartitioningColumns(partitionSchema, output) - val logicalPlan = normalQueryExecution.optimizedPlan - val write = - WriteFiles(logicalPlan, fileFormat, partitioningColumns, None, options, Map.empty) + val fileFormat = deltaLog.fileFormat(protocol, metadata) // TODO support changing formats. - val queryExecution = new QueryExecution(spark, write) val (committer, collectStats) = fileFormat.toString match { case "MergeTree" => (getCommitter2(outputPath), false) case _ => (getCommitter(outputPath), true) @@ -274,20 +254,24 @@ class ClickhouseOptimisticTransaction( SQLExecution.withNewExecutionId(queryExecution, Option("deltaTransactionalWrite")) { val outputSpec = FileFormatWriter.OutputSpec(outputPath.toString, Map.empty, output) - val physicalPlan = materializeAdaptiveSparkPlan(queryExecution.executedPlan) - // convertEmptyToNullIfNeeded(queryExecution.executedPlan, partitioningColumns, constraints) - /* val checkInvariants = DeltaInvariantCheckerExec(empty2NullPlan, constraints) + val empty2NullPlan = + convertEmptyToNullIfNeeded(queryExecution.sparkPlan, partitioningColumns, constraints) + // TODO: val checkInvariants = DeltaInvariantCheckerExec(empty2NullPlan, constraints) + val checkInvariants = empty2NullPlan + // No need to plan optimized write if the write command is OPTIMIZE, which aims to produce // evenly-balanced data files already. - val physicalPlan = - if ( - !isOptimize && - shouldOptimizeWrite(writeOptions, spark.sessionState.conf) - ) { - DeltaOptimizedWriterExec(checkInvariants, metadata.partitionColumns, deltaLog) - } else { - checkInvariants - } */ + // TODO: val physicalPlan = + // if ( + // !isOptimize && + // shouldOptimizeWrite(writeOptions, spark.sessionState.conf) + // ) { + // DeltaOptimizedWriterExec(checkInvariants, metadata.partitionColumns, deltaLog) + // } else { + // checkInvariants + // } + val physicalPlan = checkInvariants + val statsTrackers: ListBuffer[WriteJobStatsTracker] = ListBuffer() if (spark.conf.get(DeltaSQLConf.DELTA_HISTORY_METRICS_ENABLED)) { @@ -298,10 +282,33 @@ class ClickhouseOptimisticTransaction( statsTrackers.append(basicWriteJobStatsTracker) } + // Iceberg spec requires partition columns in data files + val writePartitionColumns = IcebergCompat.isAnyEnabled(metadata) + // Retain only a minimal selection of Spark writer options to avoid any potential + // compatibility issues + val options = (writeOptions match { + case None => Map.empty[String, String] + case Some(writeOptions) => + writeOptions.options.filterKeys { + key => + key.equalsIgnoreCase(DeltaOptions.MAX_RECORDS_PER_FILE) || + key.equalsIgnoreCase(DeltaOptions.COMPRESSION) + }.toMap + }) + (DeltaOptions.WRITE_PARTITION_COLUMNS -> writePartitionColumns.toString) + + val executedPlan = DeltaV1Writes( + spark, + physicalPlan, + fileFormat, + partitioningColumns, + None, + options + ).executedPlan + try { DeltaFileFormatWriter.write( sparkSession = spark, - plan = physicalPlan, + plan = executedPlan, fileFormat = fileFormat, committer = committer, outputSpec = outputSpec, @@ -358,8 +365,4 @@ class ClickhouseOptimisticTransaction( resultFiles.toSeq ++ committer.changeFiles } - private def materializeAdaptiveSparkPlan(plan: SparkPlan): SparkPlan = plan match { - case a: AdaptiveSparkPlanExec => a.finalPhysicalPlan - case p: SparkPlan => p - } } diff --git a/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/FileDeltaColumnarWrite.scala b/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/FileDeltaColumnarWrite.scala index bf6b0c0074dc..df7ef7e23409 100644 --- a/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/FileDeltaColumnarWrite.scala +++ b/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/FileDeltaColumnarWrite.scala @@ -137,7 +137,8 @@ case class FileDeltaColumnarWrite( // stats.map(row => x.apply(row).getString(0)).foreach(println) // process stats val commitInfo = DeltaFileCommitInfo(committer) - val basicNativeStat = NativeBasicWriteTaskStatsTracker(description, basicWriteJobStatsTracker) + val basicNativeStat = + NativeBasicWriteTaskStatsTracker(description.path, basicWriteJobStatsTracker) val basicNativeStats = Seq(commitInfo, basicNativeStat) NativeStatCompute(stats)(basicNativeStats, nativeDeltaStats) diff --git a/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/datasources/DeltaV1Writes.scala b/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/datasources/DeltaV1Writes.scala new file mode 100644 index 000000000000..8ae99cc0d59f --- /dev/null +++ b/backends-clickhouse/src-delta-32/main/scala/org/apache/spark/sql/execution/datasources/DeltaV1Writes.scala @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution.datasources +import org.apache.gluten.backendsapi.BackendsApiManager + +import org.apache.spark.sql.SparkSession +import org.apache.spark.sql.catalyst.catalog.BucketSpec +import org.apache.spark.sql.catalyst.catalog.CatalogTypes.TablePartitionSpec +import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder} +import org.apache.spark.sql.catalyst.plans.logical.LocalRelation +import org.apache.spark.sql.execution.{QueryExecution, SortExec, SparkPlan} +import org.apache.spark.sql.execution.datasources.V1WritesUtils.isOrderingMatched + +case class DeltaV1Writes( + spark: SparkSession, + query: SparkPlan, + fileFormat: FileFormat, + partitionColumns: Seq[Attribute], + bucketSpec: Option[BucketSpec], + options: Map[String, String], + staticPartitions: TablePartitionSpec = Map.empty) { + + require(fileFormat != null, "FileFormat is required to write files.") + require(BackendsApiManager.getSettings.enableNativeWriteFiles()) + + private lazy val requiredOrdering: Seq[SortOrder] = + V1WritesUtils.getSortOrder( + query.output, + partitionColumns, + bucketSpec, + options, + staticPartitions.size) + + lazy val sortPlan: SparkPlan = { + val outputOrdering = query.outputOrdering + val orderingMatched = isOrderingMatched(requiredOrdering.map(_.child), outputOrdering) + if (orderingMatched) { + query + } else { + SortExec(requiredOrdering, global = false, query) + } + } + + lazy val writePlan: SparkPlan = + WriteFilesExec( + sortPlan, + fileFormat = fileFormat, + partitionColumns = partitionColumns, + bucketSpec = bucketSpec, + options = options, + staticPartitions = staticPartitions) + + lazy val executedPlan: SparkPlan = + CallTransformer(spark, writePlan).executedPlan +} + +case class CallTransformer(spark: SparkSession, physicalPlan: SparkPlan) + extends QueryExecution(spark, LocalRelation()) { + override lazy val sparkPlan: SparkPlan = physicalPlan +} diff --git a/backends-clickhouse/src-delta-32/test/scala/org/apache/spark/sql/execution/datasources/DeltaV1WritesSuite.scala b/backends-clickhouse/src-delta-32/test/scala/org/apache/spark/sql/execution/datasources/DeltaV1WritesSuite.scala new file mode 100644 index 000000000000..1a90148df29e --- /dev/null +++ b/backends-clickhouse/src-delta-32/test/scala/org/apache/spark/sql/execution/datasources/DeltaV1WritesSuite.scala @@ -0,0 +1,100 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution.datasources + +import org.apache.gluten.GlutenConfig +import org.apache.gluten.execution.{GlutenClickHouseWholeStageTransformerSuite, GlutenPlan, SortExecTransformer} +import org.apache.spark.SparkConf +import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat +import org.apache.spark.sql.execution.{SortExec, SparkPlan} + +class DeltaV1WritesSuite extends GlutenClickHouseWholeStageTransformerSuite { + + import testImplicits._ + + override protected def sparkConf: SparkConf = { + super.sparkConf + .set(GlutenConfig.NATIVE_WRITER_ENABLED.key, "true") + } + + override def beforeAll(): Unit = { + super.beforeAll() + (0 to 20) + .map(i => (i, i % 5, (i % 10).toString)) + .toDF("i", "j", "k") + .write + .saveAsTable("t0") + } + + override def afterAll(): Unit = { + sql("drop table if exists t0") + super.afterAll() + } + + val format = new ParquetFileFormat + def getSort(child: SparkPlan): Option[SortExecTransformer] = { + child.collectFirst { case w: SortExecTransformer => w } + } + test("don't add sort when the required ordering is empty") { + val df = sql("select * from t0") + val plan = df.queryExecution.sparkPlan + val writes = DeltaV1Writes(spark, plan, format, Nil, None, Map.empty) + assert(writes.sortPlan === plan) + assert(writes.writePlan != null) + assert(writes.executedPlan.isInstanceOf[GlutenPlan]) + val writeFilesOpt = V1WritesUtils.getWriteFilesOpt(writes.executedPlan) + assert(writeFilesOpt.isDefined) + val sortExec = getSort(writes.executedPlan) + assert(sortExec.isEmpty) + } + + test("don't add sort when the required ordering is already satisfied") { + val df = sql("select * from t0") + def check(plan: SparkPlan): Unit = { + val partitionColumns = plan.output.find(_.name == "k").toSeq + val writes = DeltaV1Writes(spark, plan, format, partitionColumns, None, Map.empty) + assert(writes.sortPlan === plan) + assert(writes.writePlan != null) + assert(writes.executedPlan.isInstanceOf[GlutenPlan]) + val writeFilesOpt = V1WritesUtils.getWriteFilesOpt(writes.executedPlan) + assert(writeFilesOpt.isDefined) + val sortExec = getSort(writes.executedPlan) + assert(sortExec.isDefined) + } + check(df.orderBy("k").queryExecution.sparkPlan) + check(df.orderBy("k", "j").queryExecution.sparkPlan) + } + + test("add sort when the required ordering is not satisfied") { + val df = sql("select * from t0") + def check(plan: SparkPlan): Unit = { + val partitionColumns = plan.output.find(_.name == "k").toSeq + val writes = DeltaV1Writes(spark, plan, format, partitionColumns, None, Map.empty) + val sort = writes.sortPlan.asInstanceOf[SortExec] + assert(sort.child === plan) + assert(writes.writePlan != null) + assert(writes.executedPlan.isInstanceOf[GlutenPlan]) + val writeFilesOpt = V1WritesUtils.getWriteFilesOpt(writes.executedPlan) + assert(writeFilesOpt.isDefined) + val sortExec = getSort(writes.executedPlan) + assert(sortExec.isDefined, s"writes.executedPlan: ${writes.executedPlan}") + } + check(df.queryExecution.sparkPlan) + check(df.orderBy("j", "k").queryExecution.sparkPlan) + } + +} diff --git a/backends-clickhouse/src/main/resources/org/apache/spark/sql/execution/datasources/v1/write_optimization.proto b/backends-clickhouse/src/main/resources/org/apache/spark/sql/execution/datasources/v1/write_optimization.proto index 89f606e4ffd3..fdf34f1a0a75 100644 --- a/backends-clickhouse/src/main/resources/org/apache/spark/sql/execution/datasources/v1/write_optimization.proto +++ b/backends-clickhouse/src/main/resources/org/apache/spark/sql/execution/datasources/v1/write_optimization.proto @@ -12,6 +12,9 @@ message Write { message Common { string format = 1; string job_task_attempt_id = 2; // currently used in mergetree format + + // Describes the partition index in the WriteRel.table_schema. + repeated int32 partition_col_index = 3; } message ParquetWrite{} message OrcWrite{} diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHRuleApi.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHRuleApi.scala index 40e53536184c..32961c21a266 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHRuleApi.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHRuleApi.scala @@ -93,7 +93,6 @@ object CHRuleApi { // Legacy: Post-transform rules. injector.injectPostTransform(_ => PruneNestedColumnsInHiveTableScan) - injector.injectPostTransform(_ => RemoveNativeWriteFilesSortAndProject()) injector.injectPostTransform(c => intercept(RewriteTransformer.apply(c.session))) injector.injectPostTransform(_ => PushDownFilterToScan) injector.injectPostTransform(_ => PushDownInputFileExpression.PostOffload) diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala index 0be8cf2c25bf..ef5a4eff6fca 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/CHTransformerApi.scala @@ -17,7 +17,7 @@ package org.apache.gluten.backendsapi.clickhouse import org.apache.gluten.backendsapi.TransformerApi -import org.apache.gluten.execution.CHHashAggregateExecTransformer +import org.apache.gluten.execution.{CHHashAggregateExecTransformer, WriteFilesExecTransformer} import org.apache.gluten.expression.ConverterUtils import org.apache.gluten.substrait.expression.{BooleanLiteralNode, ExpressionBuilder, ExpressionNode} import org.apache.gluten.utils.{CHInputPartitionsUtil, ExpressionDocUtil} @@ -31,7 +31,7 @@ import org.apache.spark.sql.delta.catalog.ClickHouseTableV2 import org.apache.spark.sql.delta.files.TahoeFileIndex import org.apache.spark.sql.execution.SparkPlan import org.apache.spark.sql.execution.aggregate.HashAggregateExec -import org.apache.spark.sql.execution.datasources.{FileFormat, HadoopFsRelation, PartitionDirectory} +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, PartitionDirectory} import org.apache.spark.sql.execution.datasources.orc.OrcFileFormat import org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat import org.apache.spark.sql.execution.datasources.v1.Write @@ -243,24 +243,31 @@ class CHTransformerApi extends TransformerApi with Logging { GlutenDriverEndpoint.invalidateResourceRelation(executionId) } - override def genWriteParameters( - fileFormat: FileFormat, - writeOptions: Map[String, String]): Any = { - val fileFormatStr = fileFormat match { + override def genWriteParameters(writeExec: WriteFilesExecTransformer): Any = { + val fileFormatStr = writeExec.fileFormat match { case register: DataSourceRegister => register.shortName case _ => "UnknownFileFormat" } - val write = Write + val childOutput = writeExec.child.output + + val partitionIndexes = + writeExec.partitionColumns.map(p => childOutput.indexWhere(_.exprId == p.exprId)) + require(partitionIndexes.forall(_ >= 0)) + + val common = Write.Common .newBuilder() - .setCommon( - Write.Common - .newBuilder() - .setFormat(fileFormatStr) - .setJobTaskAttemptId("") // we can get job and task id at the driver side - .build()) + .setFormat(s"$fileFormatStr") + .setJobTaskAttemptId("") // we cannot get job and task id at the driver side) + partitionIndexes.foreach { + idx => + require(idx >= 0) + common.addPartitionColIndex(idx) + } + + val write = Write.newBuilder().setCommon(common.build()) - fileFormat match { + writeExec.fileFormat match { case d: MergeTreeFileFormat => write.setMergetree(MergeTreeFileFormat.createWrite(d.metadata)) case _: ParquetFileFormat => @@ -273,5 +280,5 @@ class CHTransformerApi extends TransformerApi with Logging { /** use Hadoop Path class to encode the file path */ override def encodeFilePathIfNeed(filePath: String): String = - (new Path(filePath)).toUri.toASCIIString + new Path(filePath).toUri.toASCIIString } diff --git a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeConfig.scala b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeConfig.scala index 12bb8d05d953..055c3b9d87b8 100644 --- a/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeConfig.scala +++ b/backends-clickhouse/src/main/scala/org/apache/gluten/backendsapi/clickhouse/RuntimeConfig.scala @@ -22,6 +22,7 @@ object RuntimeConfig { import CHConf._ import SQLConf._ + /** Clickhouse Configuration */ val PATH = buildConf(runtimeConfig("path")) .doc( @@ -37,9 +38,25 @@ object RuntimeConfig { .createWithDefault("/tmp/libch") // scalastyle:on line.size.limit + // scalastyle:off line.size.limit + val LOGGER_LEVEL = + buildConf(runtimeConfig("logger.level")) + .doc( + "https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings#logger") + .stringConf + .createWithDefault("warning") + // scalastyle:on line.size.limit + + /** Gluten Configuration */ val USE_CURRENT_DIRECTORY_AS_TMP = buildConf(runtimeConfig("use_current_directory_as_tmp")) .doc("Use the current directory as the temporary directory.") .booleanConf .createWithDefault(false) + + val DUMP_PIPELINE = + buildConf(runtimeConfig("dump_pipeline")) + .doc("Dump pipeline to file after execution") + .booleanConf + .createWithDefault(false) } diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/CHColumnarWrite.scala b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/CHColumnarWrite.scala index 1342e250430e..427db0aad2b5 100644 --- a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/CHColumnarWrite.scala +++ b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/CHColumnarWrite.scala @@ -198,12 +198,12 @@ case class NativeStatCompute(rows: Seq[InternalRow]) { } case class NativeBasicWriteTaskStatsTracker( - description: WriteJobDescription, + writeDir: String, basicWriteJobStatsTracker: WriteTaskStatsTracker) extends (NativeFileWriteResult => Unit) { private var numWrittenRows: Long = 0 override def apply(stat: NativeFileWriteResult): Unit = { - val absolutePath = s"${description.path}/${stat.relativePath}" + val absolutePath = s"$writeDir/${stat.relativePath}" if (stat.partition_id != "__NO_PARTITION_ID__") { basicWriteJobStatsTracker.newPartition(new GenericInternalRow(Array[Any](stat.partition_id))) } @@ -248,6 +248,8 @@ case class HadoopMapReduceCommitProtocolWrite( extends CHColumnarWrite[HadoopMapReduceCommitProtocol] with Logging { + private var stageDir: String = _ + private lazy val adapter: HadoopMapReduceAdapter = HadoopMapReduceAdapter(committer) /** @@ -257,11 +259,12 @@ case class HadoopMapReduceCommitProtocolWrite( override def doSetupNativeTask(): Unit = { val (writePath, writeFilePattern) = adapter.getTaskAttemptTempPathAndFilePattern(taskAttemptContext, description) - logDebug(s"Native staging write path: $writePath and file pattern: $writeFilePattern") + stageDir = writePath + logDebug(s"Native staging write path: $stageDir and file pattern: $writeFilePattern") val settings = Map( - RuntimeSettings.TASK_WRITE_TMP_DIR.key -> writePath, + RuntimeSettings.TASK_WRITE_TMP_DIR.key -> stageDir, RuntimeSettings.TASK_WRITE_FILENAME_PATTERN.key -> writeFilePattern) NativeExpressionEvaluator.updateQueryRuntimeSettings(settings) } @@ -272,7 +275,7 @@ case class HadoopMapReduceCommitProtocolWrite( None } else { val commitInfo = FileCommitInfo(description) - val basicNativeStat = NativeBasicWriteTaskStatsTracker(description, basicWriteJobStatsTracker) + val basicNativeStat = NativeBasicWriteTaskStatsTracker(stageDir, basicWriteJobStatsTracker) val basicNativeStats = Seq(commitInfo, basicNativeStat) NativeStatCompute(stats)(basicNativeStats) val (partitions, addedAbsPathFiles) = commitInfo.result diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDeltaParquetWriteSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDeltaParquetWriteSuite.scala index 2f55510a7b1f..3736f0f14415 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDeltaParquetWriteSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickHouseDeltaParquetWriteSuite.scala @@ -1025,6 +1025,7 @@ class GlutenClickHouseDeltaParquetWriteSuite } } + // FIXME: optimize testSparkVersionLE33("test parquet optimize with the path based table") { val dataPath = s"$basePath/lineitem_delta_parquet_optimize_path_based" clearDataPath(dataPath) diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteSuite.scala index cc577609656b..60ca58d9fc29 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/mergetree/GlutenClickHouseMergeTreeWriteSuite.scala @@ -57,6 +57,7 @@ class GlutenClickHouseMergeTreeWriteSuite .set("spark.sql.adaptive.enabled", "true") .set("spark.sql.files.maxPartitionBytes", "20000000") .set(GlutenConfig.NATIVE_WRITER_ENABLED.key, "true") + .set(CHConf.ENABLE_ONEPIPELINE_MERGETREE_WRITE.key, spark35.toString) .setCHSettings("min_insert_block_size_rows", 100000) .setCHSettings("mergetree.merge_after_insert", false) .setCHSettings("input_format_parquet_max_block_size", 8192) @@ -67,178 +68,172 @@ class GlutenClickHouseMergeTreeWriteSuite } test("test mergetree table write") { - withSQLConf((CHConf.ENABLE_ONEPIPELINE_MERGETREE_WRITE.key, spark35.toString)) { - spark.sql(s""" - |DROP TABLE IF EXISTS lineitem_mergetree; - |""".stripMargin) + spark.sql(s""" + |DROP TABLE IF EXISTS lineitem_mergetree; + |""".stripMargin) - // write.format.default = mergetree - spark.sql(s""" - |CREATE TABLE IF NOT EXISTS lineitem_mergetree - |( - | l_orderkey bigint, - | l_partkey bigint, - | l_suppkey bigint, - | l_linenumber bigint, - | l_quantity double, - | l_extendedprice double, - | l_discount double, - | l_tax double, - | l_returnflag string, - | l_linestatus string, - | l_shipdate date, - | l_commitdate date, - | l_receiptdate date, - | l_shipinstruct string, - | l_shipmode string, - | l_comment string - |) - |USING clickhouse - |TBLPROPERTIES (write.format.default = 'mergetree') - |LOCATION '$basePath/lineitem_mergetree' - |""".stripMargin) + // write.format.default = mergetree + spark.sql(s""" + |CREATE TABLE IF NOT EXISTS lineitem_mergetree + |( + | l_orderkey bigint, + | l_partkey bigint, + | l_suppkey bigint, + | l_linenumber bigint, + | l_quantity double, + | l_extendedprice double, + | l_discount double, + | l_tax double, + | l_returnflag string, + | l_linestatus string, + | l_shipdate date, + | l_commitdate date, + | l_receiptdate date, + | l_shipinstruct string, + | l_shipmode string, + | l_comment string + |) + |USING clickhouse + |TBLPROPERTIES (write.format.default = 'mergetree') + |LOCATION '$basePath/lineitem_mergetree' + |""".stripMargin) - spark.sql(s""" - | insert into table lineitem_mergetree - | select * from lineitem - |""".stripMargin) + spark.sql(s""" + | insert into table lineitem_mergetree + | select * from lineitem + |""".stripMargin) - runTPCHQueryBySQL(1, q1("lineitem_mergetree")) { - df => - val plans = collect(df.queryExecution.executedPlan) { - case f: FileSourceScanExecTransformer => f - case w: WholeStageTransformer => w - } - assertResult(4)(plans.size) + runTPCHQueryBySQL(1, q1("lineitem_mergetree")) { + df => + val plans = collect(df.queryExecution.executedPlan) { + case f: FileSourceScanExecTransformer => f + case w: WholeStageTransformer => w + } + assertResult(4)(plans.size) - val mergetreeScan = plans(3).asInstanceOf[FileSourceScanExecTransformer] - assert(mergetreeScan.nodeName.startsWith("ScanTransformer mergetree")) + val mergetreeScan = plans(3).asInstanceOf[FileSourceScanExecTransformer] + assert(mergetreeScan.nodeName.startsWith("ScanTransformer mergetree")) - val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] - assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).clickhouseTableConfigs.nonEmpty) - assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption.isEmpty) - assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).orderByKeyOption.isEmpty) - assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).primaryKeyOption.isEmpty) - assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty) - val addFiles = - fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) - assertResult(6)(addFiles.size) - assertResult(600572)(addFiles.map(_.rows).sum) + val fileIndex = mergetreeScan.relation.location.asInstanceOf[TahoeFileIndex] + assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).clickhouseTableConfigs.nonEmpty) + assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).bucketOption.isEmpty) + assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).orderByKeyOption.isEmpty) + assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).primaryKeyOption.isEmpty) + assert(ClickHouseTableV2.getTable(fileIndex.deltaLog).partitionColumns.isEmpty) + val addFiles = + fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) + assertResult(6)(addFiles.size) + assertResult(600572)(addFiles.map(_.rows).sum) - // GLUTEN-5060: check the unnecessary FilterExec - val wholeStageTransformer = plans(2).asInstanceOf[WholeStageTransformer] - val planNodeJson = wholeStageTransformer.substraitPlanJson - assert( - !planNodeJson - .replaceAll("\n", "") - .replaceAll(" ", "") - .contains("\"input\":{\"filter\":{")) - } + // GLUTEN-5060: check the unnecessary FilterExec + val wholeStageTransformer = plans(2).asInstanceOf[WholeStageTransformer] + val planNodeJson = wholeStageTransformer.substraitPlanJson + assert( + !planNodeJson + .replaceAll("\n", "") + .replaceAll(" ", "") + .contains("\"input\":{\"filter\":{")) } } test("test mergetree insert overwrite") { - withSQLConf((CHConf.ENABLE_ONEPIPELINE_MERGETREE_WRITE.key, spark35.toString)) { - spark.sql(s""" - |DROP TABLE IF EXISTS lineitem_mergetree_insertoverwrite; - |""".stripMargin) + spark.sql(s""" + |DROP TABLE IF EXISTS lineitem_mergetree_insertoverwrite; + |""".stripMargin) - spark.sql(s""" - |CREATE TABLE IF NOT EXISTS lineitem_mergetree_insertoverwrite - |( - | l_orderkey bigint, - | l_partkey bigint, - | l_suppkey bigint, - | l_linenumber bigint, - | l_quantity double, - | l_extendedprice double, - | l_discount double, - | l_tax double, - | l_returnflag string, - | l_linestatus string, - | l_shipdate date, - | l_commitdate date, - | l_receiptdate date, - | l_shipinstruct string, - | l_shipmode string, - | l_comment string - |) - |USING clickhouse - |LOCATION '$basePath/lineitem_mergetree_insertoverwrite' - |""".stripMargin) + spark.sql(s""" + |CREATE TABLE IF NOT EXISTS lineitem_mergetree_insertoverwrite + |( + | l_orderkey bigint, + | l_partkey bigint, + | l_suppkey bigint, + | l_linenumber bigint, + | l_quantity double, + | l_extendedprice double, + | l_discount double, + | l_tax double, + | l_returnflag string, + | l_linestatus string, + | l_shipdate date, + | l_commitdate date, + | l_receiptdate date, + | l_shipinstruct string, + | l_shipmode string, + | l_comment string + |) + |USING clickhouse + |LOCATION '$basePath/lineitem_mergetree_insertoverwrite' + |""".stripMargin) - spark.sql(s""" - | insert into table lineitem_mergetree_insertoverwrite - | select * from lineitem - |""".stripMargin) + spark.sql(s""" + | insert into table lineitem_mergetree_insertoverwrite + | select * from lineitem + |""".stripMargin) - spark.sql(s""" - | insert overwrite table lineitem_mergetree_insertoverwrite - | select * from lineitem where mod(l_orderkey,2) = 1 - |""".stripMargin) - val sql2 = - s""" - | select count(*) from lineitem_mergetree_insertoverwrite - | - |""".stripMargin - assertResult(300001)( - // total rows should remain unchanged - spark.sql(sql2).collect().apply(0).get(0) - ) - } + spark.sql(s""" + | insert overwrite table lineitem_mergetree_insertoverwrite + | select * from lineitem where mod(l_orderkey,2) = 1 + |""".stripMargin) + val sql2 = + s""" + | select count(*) from lineitem_mergetree_insertoverwrite + | + |""".stripMargin + assertResult(300001)( + // total rows should remain unchanged + spark.sql(sql2).collect().apply(0).get(0) + ) } test("test mergetree insert overwrite partitioned table with small table, static") { - withSQLConf((CHConf.ENABLE_ONEPIPELINE_MERGETREE_WRITE.key, spark35.toString)) { - spark.sql(s""" - |DROP TABLE IF EXISTS lineitem_mergetree_insertoverwrite2; - |""".stripMargin) + spark.sql(s""" + |DROP TABLE IF EXISTS lineitem_mergetree_insertoverwrite2; + |""".stripMargin) - spark.sql(s""" - |CREATE TABLE IF NOT EXISTS lineitem_mergetree_insertoverwrite2 - |( - | l_orderkey bigint, - | l_partkey bigint, - | l_suppkey bigint, - | l_linenumber bigint, - | l_quantity double, - | l_extendedprice double, - | l_discount double, - | l_tax double, - | l_returnflag string, - | l_linestatus string, - | l_shipdate date, - | l_commitdate date, - | l_receiptdate date, - | l_shipinstruct string, - | l_shipmode string, - | l_comment string - |) - |USING clickhouse - |PARTITIONED BY (l_shipdate) - |LOCATION '$basePath/lineitem_mergetree_insertoverwrite2' - |""".stripMargin) + spark.sql(s""" + |CREATE TABLE IF NOT EXISTS lineitem_mergetree_insertoverwrite2 + |( + | l_orderkey bigint, + | l_partkey bigint, + | l_suppkey bigint, + | l_linenumber bigint, + | l_quantity double, + | l_extendedprice double, + | l_discount double, + | l_tax double, + | l_returnflag string, + | l_linestatus string, + | l_shipdate date, + | l_commitdate date, + | l_receiptdate date, + | l_shipinstruct string, + | l_shipmode string, + | l_comment string + |) + |USING clickhouse + |PARTITIONED BY (l_shipdate) + |LOCATION '$basePath/lineitem_mergetree_insertoverwrite2' + |""".stripMargin) - spark.sql(s""" - | insert into table lineitem_mergetree_insertoverwrite2 - | select * from lineitem - |""".stripMargin) + spark.sql(s""" + | insert into table lineitem_mergetree_insertoverwrite2 + | select * from lineitem + |""".stripMargin) - spark.sql( - s""" - | insert overwrite table lineitem_mergetree_insertoverwrite2 - | select * from lineitem where l_shipdate BETWEEN date'1993-02-01' AND date'1993-02-10' - |""".stripMargin) - val sql2 = - s""" - | select count(*) from lineitem_mergetree_insertoverwrite2 - | - |""".stripMargin - assertResult(2418)( - // total rows should remain unchanged - spark.sql(sql2).collect().apply(0).get(0) - ) - } + spark.sql( + s""" + | insert overwrite table lineitem_mergetree_insertoverwrite2 + | select * from lineitem where l_shipdate BETWEEN date'1993-02-01' AND date'1993-02-10' + |""".stripMargin) + val sql2 = + s""" + | select count(*) from lineitem_mergetree_insertoverwrite2 + | + |""".stripMargin + assertResult(2418)( + // total rows should remain unchanged + spark.sql(sql2).collect().apply(0).get(0) + ) } test("test mergetree insert overwrite partitioned table with small table, dynamic") { @@ -650,8 +645,8 @@ class GlutenClickHouseMergeTreeWriteSuite // static partition spark.sql(s""" - | insert into lineitem_mergetree_partition PARTITION (l_shipdate=date'1995-01-21', - | l_returnflag = 'A') + | insert into lineitem_mergetree_partition + | PARTITION (l_shipdate=date'1995-01-21', l_returnflag = 'A') | (l_orderkey, | l_partkey, | l_suppkey, @@ -729,7 +724,8 @@ class GlutenClickHouseMergeTreeWriteSuite ClickHouseTableV2 .getTable(fileIndex.deltaLog) .partitionColumns(1)) - val addFiles = fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) + val addFiles = + fileIndex.matchingFiles(Nil, Nil).map(f => f.asInstanceOf[AddMergeTreeParts]) assertResult(3836)(addFiles.size) assertResult(605363)(addFiles.map(_.rows).sum) @@ -739,7 +735,7 @@ class GlutenClickHouseMergeTreeWriteSuite } } - test("test mergetree write with bucket table") { + testSparkVersionLE33("test mergetree write with bucket table") { spark.sql(s""" |DROP TABLE IF EXISTS lineitem_mergetree_bucket; |""".stripMargin) @@ -979,7 +975,7 @@ class GlutenClickHouseMergeTreeWriteSuite } } - test("test mergetree CTAS complex") { + test("test mergetree CTAS partition") { spark.sql(s""" |DROP TABLE IF EXISTS lineitem_mergetree_ctas2; |""".stripMargin) @@ -988,8 +984,6 @@ class GlutenClickHouseMergeTreeWriteSuite |CREATE TABLE IF NOT EXISTS lineitem_mergetree_ctas2 |USING clickhouse |PARTITIONED BY (l_shipdate) - |CLUSTERED BY (l_orderkey) - |${if (spark32) "" else "SORTED BY (l_partkey, l_returnflag)"} INTO 4 BUCKETS |LOCATION '$basePath/lineitem_mergetree_ctas2' | as select * from lineitem |""".stripMargin) @@ -1598,7 +1592,7 @@ class GlutenClickHouseMergeTreeWriteSuite case scanExec: BasicScanExecTransformer => scanExec } assertResult(1)(plans.size) - assertResult(conf._2)(plans.head.getSplitInfos.size) + assertResult(conf._2)(plans.head.getSplitInfos().size) } } }) @@ -1622,12 +1616,12 @@ class GlutenClickHouseMergeTreeWriteSuite case scanExec: BasicScanExecTransformer => scanExec } assertResult(1)(plans.size) - assertResult(1)(plans.head.getSplitInfos.size) + assertResult(1)(plans.head.getSplitInfos().size) } } } - test("test mergetree with primary keys filter pruning by driver with bucket") { + testSparkVersionLE33("test mergetree with primary keys filter pruning by driver with bucket") { spark.sql(s""" |DROP TABLE IF EXISTS lineitem_mergetree_pk_pruning_by_driver_bucket; |""".stripMargin) @@ -1730,7 +1724,7 @@ class GlutenClickHouseMergeTreeWriteSuite case f: BasicScanExecTransformer => f } assertResult(2)(scanExec.size) - assertResult(conf._2)(scanExec(1).getSplitInfos.size) + assertResult(conf._2)(scanExec(1).getSplitInfos().size) } } }) @@ -1776,7 +1770,7 @@ class GlutenClickHouseMergeTreeWriteSuite Seq("true", "false").foreach { skip => - withSQLConf("spark.databricks.delta.stats.skipping" -> skip.toString) { + withSQLConf("spark.databricks.delta.stats.skipping" -> skip) { val sqlStr = s""" |SELECT @@ -1799,7 +1793,7 @@ class GlutenClickHouseMergeTreeWriteSuite } } - test("test mergetree with column case sensitive") { + testSparkVersionLE33("test mergetree with column case sensitive") { spark.sql(s""" |DROP TABLE IF EXISTS LINEITEM_MERGETREE_CASE_SENSITIVE; |""".stripMargin) @@ -1838,7 +1832,7 @@ class GlutenClickHouseMergeTreeWriteSuite runTPCHQueryBySQL(6, q6("lineitem_mergetree_case_sensitive")) { _ => } } - test("test mergetree with partition with whitespace") { + testSparkVersionLE33("test mergetree with partition with whitespace") { spark.sql(s""" |DROP TABLE IF EXISTS lineitem_mergetree_partition_with_whitespace; |""".stripMargin) @@ -1900,7 +1894,7 @@ class GlutenClickHouseMergeTreeWriteSuite Seq(("-1", 3), ("3", 3), ("6", 1)).foreach( conf => { withSQLConf( - ("spark.gluten.sql.columnar.backend.ch.files.per.partition.threshold" -> conf._1)) { + "spark.gluten.sql.columnar.backend.ch.files.per.partition.threshold" -> conf._1) { val sql = s""" |select count(1), min(l_returnflag) from lineitem_split @@ -1913,7 +1907,7 @@ class GlutenClickHouseMergeTreeWriteSuite val scanExec = collect(df.queryExecution.executedPlan) { case f: FileSourceScanExecTransformer => f } - assert(scanExec(0).getPartitions.size == conf._2) + assert(scanExec.head.getPartitions.size == conf._2) } } }) diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala index c6d2bc065879..d156fffa8b21 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxTransformerApi.scala @@ -27,7 +27,7 @@ import org.apache.gluten.vectorized.PlanEvaluatorJniWrapper import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.connector.read.InputPartition -import org.apache.spark.sql.execution.datasources.{FileFormat, HadoopFsRelation, PartitionDirectory} +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, PartitionDirectory} import org.apache.spark.sql.sources.DataSourceRegister import org.apache.spark.sql.types._ import org.apache.spark.task.TaskResources @@ -96,16 +96,14 @@ class VeloxTransformerApi extends TransformerApi with Logging { override def packPBMessage(message: Message): Any = Any.pack(message, "") - override def genWriteParameters( - fileFormat: FileFormat, - writeOptions: Map[String, String]): Any = { - val fileFormatStr = fileFormat match { + override def genWriteParameters(write: WriteFilesExecTransformer): Any = { + val fileFormatStr = write.fileFormat match { case register: DataSourceRegister => register.shortName case _ => "UnknownFileFormat" } val compressionCodec = - WriteFilesExecTransformer.getCompressionCodec(writeOptions).capitalize + WriteFilesExecTransformer.getCompressionCodec(write.caseInsensitiveOptions).capitalize val writeParametersStr = new StringBuffer("WriteParameters:") writeParametersStr.append("is").append(compressionCodec).append("=1") writeParametersStr.append(";format=").append(fileFormatStr).append("\n") diff --git a/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.cpp b/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.cpp index a76b4d398d97..0d57d53ff640 100644 --- a/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.cpp +++ b/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.cpp @@ -21,9 +21,8 @@ #include #include #include -#include #include -#include +#include #include #include #include @@ -103,7 +102,7 @@ void adjust_output(const DB::QueryPipelineBuilderPtr & builder, const DB::Block { throw DB::Exception( DB::ErrorCodes::LOGICAL_ERROR, - "Missmatch result columns size, input size is {}, but output size is {}", + "Mismatch result columns size, input size is {}, but output size is {}", input.columns(), output.columns()); } @@ -164,12 +163,6 @@ void addMergeTreeSinkTransform( : std::make_shared(header, partition_by, merge_tree_table, write_settings, context, stats); chain.addSource(sink); - const DB::Settings & settings = context->getSettingsRef(); - chain.addSource(std::make_shared( - header, settings[Setting::min_insert_block_size_rows], settings[Setting::min_insert_block_size_bytes])); - chain.addSource(std::make_shared( - header, settings[Setting::min_insert_block_size_rows], settings[Setting::min_insert_block_size_bytes])); - builder->addChain(std::move(chain)); } @@ -212,6 +205,7 @@ void addNormalFileWriterSinkTransform( namespace local_engine { + IMPLEMENT_GLUTEN_SETTINGS(GlutenWriteSettings, WRITE_RELATED_SETTINGS) void addSinkTransform(const DB::ContextPtr & context, const substrait::WriteRel & write_rel, const DB::QueryPipelineBuilderPtr & builder) @@ -224,12 +218,18 @@ void addSinkTransform(const DB::ContextPtr & context, const substrait::WriteRel throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Failed to unpack write optimization with local_engine::Write."); assert(write.has_common()); const substrait::NamedStruct & table_schema = write_rel.table_schema(); - auto output = TypeParser::buildBlockFromNamedStruct(table_schema); - adjust_output(builder, output); - const auto partitionCols = collect_partition_cols(output, table_schema); + auto partition_indexes = write.common().partition_col_index(); if (write.has_mergetree()) { - local_engine::MergeTreeTable merge_tree_table(write, table_schema); + MergeTreeTable merge_tree_table(write, table_schema); + auto output = TypeParser::buildBlockFromNamedStruct(table_schema, merge_tree_table.low_card_key); + adjust_output(builder, output); + + builder->addSimpleTransform( + [&](const Block & in_header) -> ProcessorPtr { return std::make_shared(in_header, false); }); + + const auto partition_by = collect_partition_cols(output, table_schema, partition_indexes); + GlutenWriteSettings write_settings = GlutenWriteSettings::get(context); if (write_settings.task_write_tmp_dir.empty()) throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "MergeTree Write Pipeline need inject relative path."); @@ -237,23 +237,35 @@ void addSinkTransform(const DB::ContextPtr & context, const substrait::WriteRel throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Non empty relative path for MergeTree table in pipeline mode."); merge_tree_table.relative_path = write_settings.task_write_tmp_dir; - addMergeTreeSinkTransform(context, builder, merge_tree_table, output, partitionCols); + addMergeTreeSinkTransform(context, builder, merge_tree_table, output, partition_by); } else - addNormalFileWriterSinkTransform(context, builder, write.common().format(), output, partitionCols); + { + auto output = TypeParser::buildBlockFromNamedStruct(table_schema); + adjust_output(builder, output); + const auto partition_by = collect_partition_cols(output, table_schema, partition_indexes); + addNormalFileWriterSinkTransform(context, builder, write.common().format(), output, partition_by); + } } - -DB::Names collect_partition_cols(const DB::Block & header, const substrait::NamedStruct & struct_) +DB::Names collect_partition_cols(const DB::Block & header, const substrait::NamedStruct & struct_, const PartitionIndexes & partition_by) { - DB::Names result; + if (partition_by.empty()) + { + assert(std::ranges::all_of( + struct_.column_types(), [](const int32_t type) { return type != ::substrait::NamedStruct::PARTITION_COL; })); + return {}; + } assert(struct_.column_types_size() == header.columns()); assert(struct_.column_types_size() == struct_.struct_().types_size()); - auto name_iter = header.begin(); - auto type_iter = struct_.column_types().begin(); - for (; name_iter != header.end(); ++name_iter, ++type_iter) - if (*type_iter == ::substrait::NamedStruct::PARTITION_COL) - result.push_back(name_iter->name); + DB::Names result; + result.reserve(partition_by.size()); + for (auto idx : partition_by) + { + assert(idx >= 0 && idx < header.columns()); + assert(struct_.column_types(idx) == ::substrait::NamedStruct::PARTITION_COL); + result.emplace_back(header.getByPosition(idx).name); + } return result; } diff --git a/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.h b/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.h index 01e0dabaaa7d..bb8c15c07d87 100644 --- a/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.h +++ b/cpp-ch/local-engine/Parser/RelParsers/WriteRelParser.h @@ -21,6 +21,7 @@ #include #include #include +#include #include namespace substrait @@ -38,9 +39,11 @@ using QueryPipelineBuilderPtr = std::unique_ptr; namespace local_engine { +using PartitionIndexes = google::protobuf::RepeatedField<::int32_t>; + void addSinkTransform(const DB::ContextPtr & context, const substrait::WriteRel & write_rel, const DB::QueryPipelineBuilderPtr & builder); -DB::Names collect_partition_cols(const DB::Block & header, const substrait::NamedStruct & struct_); +DB::Names collect_partition_cols(const DB::Block & header, const substrait::NamedStruct & struct_, const PartitionIndexes & partition_by); #define WRITE_RELATED_SETTINGS(M, ALIAS) \ M(String, task_write_tmp_dir, , "The temporary directory for writing data") \ diff --git a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.cpp b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.cpp index 6c9dd890d851..d41e71fb848d 100644 --- a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.cpp +++ b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.cpp @@ -31,27 +31,37 @@ extern const Metric GlobalThreadActive; extern const Metric GlobalThreadScheduled; } +namespace DB::Setting +{ +extern const SettingsUInt64 min_insert_block_size_rows; +extern const SettingsUInt64 min_insert_block_size_bytes; +} namespace local_engine { -void SparkMergeTreeSink::consume(Chunk & chunk) +void SparkMergeTreeSink::write(const Chunk & chunk) { - assert(!sink_helper->metadata_snapshot->hasPartitionKey()); + CurrentThread::flushUntrackedMemory(); + /// Reset earlier, so put it in the scope BlockWithPartition item{getHeader().cloneWithColumns(chunk.getColumns()), Row{}}; - size_t before_write_memory = 0; - if (auto * memory_tracker = CurrentThread::getMemoryTracker()) - { - CurrentThread::flushUntrackedMemory(); - before_write_memory = memory_tracker->get(); - } + sink_helper->writeTempPart(item, context, part_num); part_num++; - /// Reset earlier to free memory - item.block.clear(); - item.partition.clear(); +} - sink_helper->checkAndMerge(); +void SparkMergeTreeSink::consume(Chunk & chunk) +{ + Chunk tmp; + tmp.swap(chunk); + squashed_chunk = squashing.add(std::move(tmp)); + if (static_cast(squashed_chunk)) + { + write(Squashing::squash(std::move(squashed_chunk))); + sink_helper->checkAndMerge(); + } + assert(squashed_chunk.getNumRows() == 0); + assert(chunk.getNumRows() == 0); } void SparkMergeTreeSink::onStart() @@ -61,6 +71,11 @@ void SparkMergeTreeSink::onStart() void SparkMergeTreeSink::onFinish() { + assert(squashed_chunk.getNumRows() == 0); + squashed_chunk = squashing.flush(); + if (static_cast(squashed_chunk)) + write(Squashing::squash(std::move(squashed_chunk))); + assert(squashed_chunk.getNumRows() == 0); sink_helper->finish(context); if (stats_.has_value()) (*stats_)->collectStats(sink_helper->unsafeGet(), sink_helper->write_settings.partition_settings.partition_dir); @@ -91,7 +106,9 @@ SinkToStoragePtr SparkMergeTreeSink::create( } else sink_helper = std::make_shared(dest_storage, write_settings_, isRemoteStorage); - return std::make_shared(sink_helper, context, stats); + const DB::Settings & settings = context->getSettingsRef(); + return std::make_shared( + sink_helper, context, stats, settings[Setting::min_insert_block_size_rows], settings[Setting::min_insert_block_size_bytes]); } SinkHelper::SinkHelper(const SparkStorageMergeTreePtr & data_, const SparkMergeTreeWriteSettings & write_settings_, bool isRemoteStorage_) diff --git a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.h b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.h index b551d86d1d0c..828332d2d6c9 100644 --- a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.h +++ b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeSink.h @@ -227,8 +227,17 @@ class SparkMergeTreeSink : public DB::SinkToStorage const DB::ContextMutablePtr & context, const SinkStatsOption & stats = {}); - explicit SparkMergeTreeSink(const SinkHelperPtr & sink_helper_, const ContextPtr & context_, const SinkStatsOption & stats) - : SinkToStorage(sink_helper_->metadata_snapshot->getSampleBlock()), context(context_), sink_helper(sink_helper_), stats_(stats) + explicit SparkMergeTreeSink( + const SinkHelperPtr & sink_helper_, + const ContextPtr & context_, + const SinkStatsOption & stats, + size_t min_block_size_rows, + size_t min_block_size_bytes) + : SinkToStorage(sink_helper_->metadata_snapshot->getSampleBlock()) + , context(context_) + , sink_helper(sink_helper_) + , stats_(stats) + , squashing(sink_helper_->metadata_snapshot->getSampleBlock(), min_block_size_rows, min_block_size_bytes) { } ~SparkMergeTreeSink() override = default; @@ -241,9 +250,13 @@ class SparkMergeTreeSink : public DB::SinkToStorage const SinkHelper & sinkHelper() const { return *sink_helper; } private: + void write(const Chunk & chunk); + ContextPtr context; SinkHelperPtr sink_helper; std::optional> stats_; + Squashing squashing; + Chunk squashed_chunk; int part_num = 1; }; diff --git a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp index a8fdfff6ff75..95145d43fab9 100644 --- a/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp +++ b/cpp-ch/local-engine/Storages/MergeTree/SparkMergeTreeWriter.cpp @@ -18,8 +18,6 @@ #include #include -#include -#include #include #include #include @@ -28,11 +26,6 @@ #include #include -namespace DB::Setting -{ -extern const SettingsUInt64 min_insert_block_size_rows; -extern const SettingsUInt64 min_insert_block_size_bytes; -} using namespace DB; namespace { @@ -125,12 +118,6 @@ std::unique_ptr SparkMergeTreeWriter::create( // // auto stats = std::make_shared(header, sink_helper); // chain.addSink(stats); - // - chain.addSource(std::make_shared( - header, settings[Setting::min_insert_block_size_rows], settings[Setting::min_insert_block_size_bytes])); - chain.addSource(std::make_shared( - header, settings[Setting::min_insert_block_size_rows], settings[Setting::min_insert_block_size_bytes])); - return std::make_unique(header, sink_helper, QueryPipeline{std::move(chain)}, spark_job_id); } diff --git a/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp b/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp index a01dd363c56c..a36601d6afa5 100644 --- a/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp +++ b/cpp-ch/local-engine/tests/gtest_write_pipeline.cpp @@ -146,7 +146,7 @@ TEST(WritePipeline, SubstraitFileSink) DB::Names expected{"s_suppkey", "s_name", "s_address", "s_nationkey", "s_phone", "s_acctbal", "s_comment111"}; EXPECT_EQ(expected, names); - auto partitionCols = collect_partition_cols(block, table_schema); + auto partitionCols = collect_partition_cols(block, table_schema, {}); DB::Names expected_partition_cols; EXPECT_EQ(expected_partition_cols, partitionCols); @@ -164,7 +164,7 @@ TEST(WritePipeline, SubstraitFileSink) INCBIN(native_write_one_partition, SOURCE_DIR "/utils/extern-local-engine/tests/json/native_write_one_partition.json"); -TEST(WritePipeline, SubstraitPartitionedFileSink) +/*TEST(WritePipeline, SubstraitPartitionedFileSink) { const auto context = DB::Context::createCopy(QueryContext::globalContext()); GlutenWriteSettings settings{ @@ -193,7 +193,7 @@ TEST(WritePipeline, SubstraitPartitionedFileSink) DB::Names expected{"s_suppkey", "s_name", "s_address", "s_phone", "s_acctbal", "s_comment", "s_nationkey"}; EXPECT_EQ(expected, names); - auto partitionCols = local_engine::collect_partition_cols(block, table_schema); + auto partitionCols = local_engine::collect_partition_cols(block, table_schema, {}); DB::Names expected_partition_cols{"s_nationkey"}; EXPECT_EQ(expected_partition_cols, partitionCols); @@ -201,12 +201,12 @@ TEST(WritePipeline, SubstraitPartitionedFileSink) const Block & x = *local_executor->nextColumnar(); debug::headBlock(x, 25); EXPECT_EQ(25, x.rows()); -} +}*/ TEST(WritePipeline, ComputePartitionedExpression) { const auto context = DB::Context::createCopy(QueryContext::globalContext()); - + Block sample_block{{STRING(), "name"}, {UINT(), "s_nationkey"}}; auto partition_by = SubstraitPartitionedFileSink::make_partition_expression({"s_nationkey", "name"}, sample_block); // auto partition_by = printColumn("s_nationkey"); diff --git a/cpp-ch/local-engine/tests/gtest_write_pipeline_mergetree.cpp b/cpp-ch/local-engine/tests/gtest_write_pipeline_mergetree.cpp index 1ad90060f475..a5cd3fd7f39c 100644 --- a/cpp-ch/local-engine/tests/gtest_write_pipeline_mergetree.cpp +++ b/cpp-ch/local-engine/tests/gtest_write_pipeline_mergetree.cpp @@ -258,11 +258,18 @@ TEST(MergeTree, SparkMergeTree) INCBIN(_3_mergetree_plan_input_, SOURCE_DIR "/utils/extern-local-engine/tests/json/mergetree/lineitem_parquet_input.json"); namespace { -void writeMerge(std::string_view json_plan, - const std::string & outputPath , - const std::function & callback, std::optional input = std::nullopt) +void writeMerge( + std::string_view json_plan, + const std::string & outputPath, + const std::function & callback, + std::optional input = std::nullopt) { const auto context = DB::Context::createCopy(QueryContext::globalContext()); + + auto queryid = QueryContext::instance().initializeQuery("gtest_mergetree"); + SCOPE_EXIT({ QueryContext::instance().finalizeQuery(queryid); }); + + GlutenWriteSettings settings{.task_write_tmp_dir = outputPath}; settings.set(context); SparkMergeTreeWritePartitionSettings partition_settings{.part_name_prefix = "pipline_prefix"}; @@ -279,18 +286,24 @@ INCBIN(_3_mergetree_plan_, SOURCE_DIR "/utils/extern-local-engine/tests/json/mer INCBIN(_4_mergetree_plan_, SOURCE_DIR "/utils/extern-local-engine/tests/json/mergetree/4_one_pipeline.json"); TEST(MergeTree, Pipeline) { - writeMerge(EMBEDDED_PLAN(_3_mergetree_plan_),"tmp/lineitem_mergetree",[&](const DB::Block & block) - { - EXPECT_EQ(1, block.rows()); - debug::headBlock(block); - }); + writeMerge( + EMBEDDED_PLAN(_3_mergetree_plan_), + "tmp/lineitem_mergetree", + [&](const DB::Block & block) + { + EXPECT_EQ(1, block.rows()); + debug::headBlock(block); + }); } TEST(MergeTree, PipelineWithPartition) { - writeMerge(EMBEDDED_PLAN(_4_mergetree_plan_),"tmp/lineitem_mergetree_p",[&](const DB::Block & block) - { - EXPECT_EQ(2525, block.rows()); - debug::headBlock(block); - }); + writeMerge( + EMBEDDED_PLAN(_4_mergetree_plan_), + "tmp/lineitem_mergetree_p", + [&](const DB::Block & block) + { + EXPECT_EQ(3815, block.rows()); + debug::headBlock(block); + }); } \ No newline at end of file diff --git a/cpp-ch/local-engine/tests/json/mergetree/4_one_pipeline.json b/cpp-ch/local-engine/tests/json/mergetree/4_one_pipeline.json index 14a9b3dda2ad..513f54a707d4 100644 --- a/cpp-ch/local-engine/tests/json/mergetree/4_one_pipeline.json +++ b/cpp-ch/local-engine/tests/json/mergetree/4_one_pipeline.json @@ -9,13 +9,18 @@ "optimization": { "@type": "type.googleapis.com/local_engine.Write", "common": { - "format": "mergetree" + "format": "mergetree", + "partitionColIndex": [ + 10, + 8 + ] }, "mergetree": { "database": "default", - "table": "lineitem_mergetree_insertoverwrite2", - "snapshotId": "1731309448915_0", - "orderByKey": "tuple()", + "table": "lineitem_mergetree_partition", + "snapshotId": "1734145864855_0", + "orderByKey": "l_orderkey", + "primaryKey": "l_orderkey", "storagePolicy": "default" } }, @@ -221,7 +226,7 @@ "NORMAL_COL", "NORMAL_COL", "NORMAL_COL", - "NORMAL_COL", + "PARTITION_COL", "NORMAL_COL", "PARTITION_COL", "NORMAL_COL", @@ -232,138 +237,171 @@ ] }, "input": { - "read": { + "sort": { "common": { "direct": {} }, - "baseSchema": { - "names": [ - "l_orderkey", - "l_partkey", - "l_suppkey", - "l_linenumber", - "l_quantity", - "l_extendedprice", - "l_discount", - "l_tax", - "l_returnflag", - "l_linestatus", - "l_shipdate", - "l_commitdate", - "l_receiptdate", - "l_shipinstruct", - "l_shipmode", - "l_comment" - ], - "struct": { - "types": [ - { - "i64": { - "nullability": "NULLABILITY_NULLABLE" - } - }, - { - "i64": { - "nullability": "NULLABILITY_NULLABLE" - } - }, - { - "i64": { - "nullability": "NULLABILITY_NULLABLE" - } - }, - { - "i64": { - "nullability": "NULLABILITY_NULLABLE" - } - }, - { - "fp64": { - "nullability": "NULLABILITY_NULLABLE" - } - }, - { - "fp64": { - "nullability": "NULLABILITY_NULLABLE" - } - }, - { - "fp64": { - "nullability": "NULLABILITY_NULLABLE" - } - }, - { - "fp64": { - "nullability": "NULLABILITY_NULLABLE" - } - }, - { - "string": { - "nullability": "NULLABILITY_NULLABLE" - } - }, - { - "string": { - "nullability": "NULLABILITY_NULLABLE" - } - }, - { - "date": { - "nullability": "NULLABILITY_NULLABLE" - } + "input": { + "read": { + "common": { + "direct": {} + }, + "baseSchema": { + "names": [ + "l_orderkey", + "l_partkey", + "l_suppkey", + "l_linenumber", + "l_quantity", + "l_extendedprice", + "l_discount", + "l_tax", + "l_returnflag", + "l_linestatus", + "l_shipdate", + "l_commitdate", + "l_receiptdate", + "l_shipinstruct", + "l_shipmode", + "l_comment" + ], + "struct": { + "types": [ + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "i64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "fp64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "fp64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "fp64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "fp64": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "date": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "date": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "date": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + }, + { + "string": { + "nullability": "NULLABILITY_NULLABLE" + } + } + ] }, - { - "date": { - "nullability": "NULLABILITY_NULLABLE" - } - }, - { - "date": { - "nullability": "NULLABILITY_NULLABLE" - } - }, - { - "string": { - "nullability": "NULLABILITY_NULLABLE" - } - }, - { - "string": { - "nullability": "NULLABILITY_NULLABLE" - } - }, - { - "string": { - "nullability": "NULLABILITY_NULLABLE" + "columnTypes": [ + "NORMAL_COL", + "NORMAL_COL", + "NORMAL_COL", + "NORMAL_COL", + "NORMAL_COL", + "NORMAL_COL", + "NORMAL_COL", + "NORMAL_COL", + "NORMAL_COL", + "NORMAL_COL", + "NORMAL_COL", + "NORMAL_COL", + "NORMAL_COL", + "NORMAL_COL", + "NORMAL_COL", + "NORMAL_COL" + ] + }, + "advancedExtension": { + "optimization": { + "@type": "type.googleapis.com/google.protobuf.StringValue", + "value": "isMergeTree=0\n" + } + } + } + }, + "sorts": [ + { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 10 + } } } - ] + }, + "direction": "SORT_DIRECTION_ASC_NULLS_FIRST" }, - "columnTypes": [ - "NORMAL_COL", - "NORMAL_COL", - "NORMAL_COL", - "NORMAL_COL", - "NORMAL_COL", - "NORMAL_COL", - "NORMAL_COL", - "NORMAL_COL", - "NORMAL_COL", - "NORMAL_COL", - "NORMAL_COL", - "NORMAL_COL", - "NORMAL_COL", - "NORMAL_COL", - "NORMAL_COL", - "NORMAL_COL" - ] - }, - "advancedExtension": { - "optimization": { - "@type": "type.googleapis.com/google.protobuf.StringValue", - "value": "isMergeTree=0\n" + { + "expr": { + "selection": { + "directReference": { + "structField": { + "field": 8 + } + } + } + }, + "direction": "SORT_DIRECTION_ASC_NULLS_FIRST" } - } + ] } } } diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/TransformerApi.scala b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/TransformerApi.scala index 69cea9c5470d..984450bf164e 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/TransformerApi.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/backendsapi/TransformerApi.scala @@ -16,12 +16,13 @@ */ package org.apache.gluten.backendsapi +import org.apache.gluten.execution.WriteFilesExecTransformer import org.apache.gluten.substrait.expression.ExpressionNode import org.apache.spark.sql.catalyst.expressions.{Attribute, Expression} import org.apache.spark.sql.connector.read.InputPartition import org.apache.spark.sql.execution.SparkPlan -import org.apache.spark.sql.execution.datasources.{FileFormat, HadoopFsRelation, PartitionDirectory} +import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, PartitionDirectory} import org.apache.spark.sql.types.{DataType, DecimalType, StructType} import org.apache.spark.util.collection.BitSet @@ -75,7 +76,7 @@ trait TransformerApi { /** This method is only used for CH backend tests */ def invalidateSQLExecutionResource(executionId: String): Unit = {} - def genWriteParameters(fileFormat: FileFormat, writeOptions: Map[String, String]): Any + def genWriteParameters(write: WriteFilesExecTransformer): Any /** use Hadoop Path class to encode the file path */ def encodeFilePathIfNeed(filePath: String): String = filePath diff --git a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala index a9d3a6282ae1..726dbdc3ef30 100644 --- a/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala +++ b/gluten-substrait/src/main/scala/org/apache/gluten/execution/WriteFilesExecTransformer.scala @@ -67,7 +67,7 @@ case class WriteFilesExecTransformer( override def output: Seq[Attribute] = Seq.empty - private val caseInsensitiveOptions = CaseInsensitiveMap(options) + val caseInsensitiveOptions: CaseInsensitiveMap[String] = CaseInsensitiveMap(options) def getRelNode( context: SubstraitContext, @@ -99,8 +99,7 @@ case class WriteFilesExecTransformer( ConverterUtils.collectAttributeNames(inputAttributes.toSeq) val extensionNode = if (!validation) { ExtensionBuilder.makeAdvancedExtension( - BackendsApiManager.getTransformerApiInstance - .genWriteParameters(fileFormat, caseInsensitiveOptions), + BackendsApiManager.getTransformerApiInstance.genWriteParameters(this), SubstraitUtil.createEnhancement(originalInputAttributes) ) } else { From 789dd586cc6e22caf528982687df0f1cd120d1fd Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Tue, 17 Dec 2024 11:12:14 +0800 Subject: [PATCH 08/14] [GLUTEN-7913][CORE] Flip dependency direction for gluten-uniffle (#8242) Closes #7913 --- backends-velox/pom.xml | 19 ++++++ .../gluten/uniffle/UniffleShuffleManager.java | 0 .../VeloxUniffleColumnarShuffleWriter.java | 0 .../shuffle/writer/PartitionPusher.scala | 0 gluten-uniffle/.gitkeep | 1 + gluten-uniffle/package/pom.xml | 29 --------- gluten-uniffle/pom.xml | 13 +--- gluten-uniffle/velox/pom.xml | 62 ------------------ package/pom.xml | 2 +- pom.xml | 64 +++++++++++++++++++ 10 files changed, 86 insertions(+), 104 deletions(-) rename {gluten-uniffle/velox/src => backends-velox/src-uniffle}/main/java/org/apache/spark/shuffle/gluten/uniffle/UniffleShuffleManager.java (100%) rename {gluten-uniffle/velox/src => backends-velox/src-uniffle}/main/java/org/apache/spark/shuffle/writer/VeloxUniffleColumnarShuffleWriter.java (100%) rename {gluten-uniffle/velox/src => backends-velox/src-uniffle}/main/scala/org/apache/spark/shuffle/writer/PartitionPusher.scala (100%) create mode 100644 gluten-uniffle/.gitkeep delete mode 100644 gluten-uniffle/package/pom.xml delete mode 100755 gluten-uniffle/velox/pom.xml diff --git a/backends-velox/pom.xml b/backends-velox/pom.xml index 9349c3c0923c..240b2218641c 100755 --- a/backends-velox/pom.xml +++ b/backends-velox/pom.xml @@ -57,6 +57,25 @@ + + uniffle + + false + + + + org.apache.gluten + gluten-uniffle + ${project.version} + + + org.apache.uniffle + rss-client-spark${spark.major.version}-shaded + ${uniffle.version} + provided + + + iceberg diff --git a/gluten-uniffle/velox/src/main/java/org/apache/spark/shuffle/gluten/uniffle/UniffleShuffleManager.java b/backends-velox/src-uniffle/main/java/org/apache/spark/shuffle/gluten/uniffle/UniffleShuffleManager.java similarity index 100% rename from gluten-uniffle/velox/src/main/java/org/apache/spark/shuffle/gluten/uniffle/UniffleShuffleManager.java rename to backends-velox/src-uniffle/main/java/org/apache/spark/shuffle/gluten/uniffle/UniffleShuffleManager.java diff --git a/gluten-uniffle/velox/src/main/java/org/apache/spark/shuffle/writer/VeloxUniffleColumnarShuffleWriter.java b/backends-velox/src-uniffle/main/java/org/apache/spark/shuffle/writer/VeloxUniffleColumnarShuffleWriter.java similarity index 100% rename from gluten-uniffle/velox/src/main/java/org/apache/spark/shuffle/writer/VeloxUniffleColumnarShuffleWriter.java rename to backends-velox/src-uniffle/main/java/org/apache/spark/shuffle/writer/VeloxUniffleColumnarShuffleWriter.java diff --git a/gluten-uniffle/velox/src/main/scala/org/apache/spark/shuffle/writer/PartitionPusher.scala b/backends-velox/src-uniffle/main/scala/org/apache/spark/shuffle/writer/PartitionPusher.scala similarity index 100% rename from gluten-uniffle/velox/src/main/scala/org/apache/spark/shuffle/writer/PartitionPusher.scala rename to backends-velox/src-uniffle/main/scala/org/apache/spark/shuffle/writer/PartitionPusher.scala diff --git a/gluten-uniffle/.gitkeep b/gluten-uniffle/.gitkeep new file mode 100644 index 000000000000..f2d1254d2735 --- /dev/null +++ b/gluten-uniffle/.gitkeep @@ -0,0 +1 @@ +The module is kept for adding common code shared by backends for Uniffle support in Gluten. diff --git a/gluten-uniffle/package/pom.xml b/gluten-uniffle/package/pom.xml deleted file mode 100644 index e49748e7c8e9..000000000000 --- a/gluten-uniffle/package/pom.xml +++ /dev/null @@ -1,29 +0,0 @@ - - - - gluten-uniffle - org.apache.gluten - 1.3.0-SNAPSHOT - ../pom.xml - - 4.0.0 - - gluten-uniffle-package - jar - Gluten Uniffle Package - - - - backends-velox - - - org.apache.gluten - gluten-uniffle-velox - ${project.version} - - - - - diff --git a/gluten-uniffle/pom.xml b/gluten-uniffle/pom.xml index b7fe4c2e4268..efc8ce6555c5 100644 --- a/gluten-uniffle/pom.xml +++ b/gluten-uniffle/pom.xml @@ -11,7 +11,7 @@ 4.0.0 gluten-uniffle - pom + jar Gluten Uniffle @@ -75,15 +75,4 @@ - - - backends-velox - - - - velox - package - - - diff --git a/gluten-uniffle/velox/pom.xml b/gluten-uniffle/velox/pom.xml deleted file mode 100755 index ab730674fbb3..000000000000 --- a/gluten-uniffle/velox/pom.xml +++ /dev/null @@ -1,62 +0,0 @@ - - - - gluten-uniffle - org.apache.gluten - 1.3.0-SNAPSHOT - ../pom.xml - - 4.0.0 - - gluten-uniffle-velox - jar - Gluten Uniffle Velox - - - - org.apache.gluten - backends-velox - ${project.version} - provided - - - org.apache.gluten - gluten-arrow - ${project.version} - provided - - - - - target/scala-${scala.binary.version}/classes - target/scala-${scala.binary.version}/test-classes - - - net.alchim31.maven - scala-maven-plugin - - - org.apache.maven.plugins - maven-compiler-plugin - - - org.scalastyle - scalastyle-maven-plugin - - - org.apache.maven.plugins - maven-checkstyle-plugin - - - com.diffplug.spotless - spotless-maven-plugin - - - org.apache.maven.plugins - maven-jar-plugin - - - - diff --git a/package/pom.xml b/package/pom.xml index 230f79d0942a..b9c114181bcd 100644 --- a/package/pom.xml +++ b/package/pom.xml @@ -78,7 +78,7 @@ org.apache.gluten - gluten-uniffle-package + gluten-uniffle ${project.version} diff --git a/pom.xml b/pom.xml index e6f3709c4cfe..4d704dc9b448 100644 --- a/pom.xml +++ b/pom.xml @@ -495,6 +495,70 @@ gluten-uniffle + + + + org.codehaus.mojo + build-helper-maven-plugin + + + add-uniffle-sources + generate-sources + + add-source + + + + ${project.basedir}/src-uniffle/main/scala + ${project.basedir}/src-uniffle/main/java + + + + + add-uniffle-resources + generate-resources + + add-resource + + + + + ${project.basedir}/src-uniffle/main/resources + + + + + + add-uniffle-test-sources + generate-test-sources + + add-test-source + + + + ${project.basedir}/src-uniffle/test/scala + ${project.basedir}/src-uniffle/test/java + + + + + add-uniffle-test-resources + generate-test-resources + + add-test-resource + + + + + ${project.basedir}/src-uniffle/test/resources + + + + + + + + delta From b572715e28b6817f51f3f0813aafae42e40fb551 Mon Sep 17 00:00:00 2001 From: zhaokuo Date: Tue, 17 Dec 2024 12:19:13 +0800 Subject: [PATCH 09/14] [GLUTEN-8128][VL] Retry borrowing when granted size is less than requested in multi-slot and shared mode (#8132) --- .../memory/memtarget/MemoryTargetVisitor.java | 2 + .../memory/memtarget/MemoryTargets.java | 19 ++- .../memtarget/RetryOnOomMemoryTarget.java | 115 ++++++++++++++++++ .../apache/spark/memory/SparkMemoryUtil.scala | 4 + 4 files changed, 136 insertions(+), 4 deletions(-) create mode 100644 gluten-core/src/main/java/org/apache/gluten/memory/memtarget/RetryOnOomMemoryTarget.java diff --git a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargetVisitor.java b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargetVisitor.java index e58dbb295b08..a42a51e0ce4e 100644 --- a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargetVisitor.java +++ b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargetVisitor.java @@ -35,4 +35,6 @@ public interface MemoryTargetVisitor { T visit(NoopMemoryTarget noopMemoryTarget); T visit(DynamicOffHeapSizingMemoryTarget dynamicOffHeapSizingMemoryTarget); + + T visit(RetryOnOomMemoryTarget retryOnOomMemoryTarget); } diff --git a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargets.java b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargets.java index 6f7cc9bd9c9c..c0f74c7990d1 100644 --- a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargets.java +++ b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/MemoryTargets.java @@ -20,8 +20,10 @@ import org.apache.gluten.memory.MemoryUsageStatsBuilder; import org.apache.gluten.memory.memtarget.spark.TreeMemoryConsumers; +import org.apache.spark.SparkEnv; import org.apache.spark.annotation.Experimental; import org.apache.spark.memory.TaskMemoryManager; +import org.apache.spark.util.SparkResourceUtil; import java.util.Map; @@ -43,6 +45,14 @@ public static MemoryTarget overAcquire( return new OverAcquire(target, overTarget, overAcquiredRatio); } + public static TreeMemoryTarget retrySpillOnOom(TreeMemoryTarget target) { + SparkEnv env = SparkEnv.get(); + if (env != null && env.conf() != null && SparkResourceUtil.getTaskSlots(env.conf()) > 1) { + return new RetryOnOomMemoryTarget(target); + } + return target; + } + @Experimental public static MemoryTarget dynamicOffHeapSizingIfEnabled(MemoryTarget memoryTarget) { if (GlutenConfig.getConf().dynamicOffHeapSizingEnabled()) { @@ -59,11 +69,12 @@ public static TreeMemoryTarget newConsumer( Map virtualChildren) { final TreeMemoryConsumers.Factory factory; if (GlutenConfig.getConf().memoryIsolation()) { - factory = TreeMemoryConsumers.isolated(); + return TreeMemoryConsumers.isolated().newConsumer(tmm, name, spiller, virtualChildren); } else { - factory = TreeMemoryConsumers.shared(); + // Retry of spilling is needed in shared mode because the maxMemoryPerTask of Vanilla Spark + // ExecutionMemoryPool is dynamic when with multi-slot config. + return MemoryTargets.retrySpillOnOom( + TreeMemoryConsumers.shared().newConsumer(tmm, name, spiller, virtualChildren)); } - - return factory.newConsumer(tmm, name, spiller, virtualChildren); } } diff --git a/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/RetryOnOomMemoryTarget.java b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/RetryOnOomMemoryTarget.java new file mode 100644 index 000000000000..1a5388d0d187 --- /dev/null +++ b/gluten-core/src/main/java/org/apache/gluten/memory/memtarget/RetryOnOomMemoryTarget.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.gluten.memory.memtarget; + +import org.apache.gluten.memory.MemoryUsageStatsBuilder; +import org.apache.gluten.proto.MemoryUsageStats; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; + +public class RetryOnOomMemoryTarget implements TreeMemoryTarget { + private static final Logger LOGGER = LoggerFactory.getLogger(RetryOnOomMemoryTarget.class); + private final TreeMemoryTarget target; + + RetryOnOomMemoryTarget(TreeMemoryTarget target) { + this.target = target; + } + + @Override + public long borrow(long size) { + long granted = target.borrow(size); + if (granted < size) { + LOGGER.info("Retrying spill require:{} got:{}", size, granted); + final long spilled = retryingSpill(Long.MAX_VALUE); + final long remaining = size - granted; + if (spilled >= remaining) { + granted += target.borrow(remaining); + } + LOGGER.info("Retrying spill spilled:{} final granted:{}", spilled, granted); + } + return granted; + } + + private long retryingSpill(long size) { + TreeMemoryTarget rootTarget = target; + while (true) { + try { + rootTarget = rootTarget.parent(); + } catch (IllegalStateException e) { + // Reached the root node + break; + } + } + return TreeMemoryTargets.spillTree(rootTarget, size); + } + + @Override + public long repay(long size) { + return target.repay(size); + } + + @Override + public long usedBytes() { + return target.usedBytes(); + } + + @Override + public T accept(MemoryTargetVisitor visitor) { + return visitor.visit(this); + } + + @Override + public String name() { + return target.name(); + } + + @Override + public MemoryUsageStats stats() { + return target.stats(); + } + + @Override + public TreeMemoryTarget newChild( + String name, + long capacity, + Spiller spiller, + Map virtualChildren) { + return target.newChild(name, capacity, spiller, virtualChildren); + } + + @Override + public Map children() { + return target.children(); + } + + @Override + public TreeMemoryTarget parent() { + return target.parent(); + } + + @Override + public Spiller getNodeSpiller() { + return target.getNodeSpiller(); + } + + public TreeMemoryTarget target() { + return target; + } +} diff --git a/gluten-core/src/main/scala/org/apache/spark/memory/SparkMemoryUtil.scala b/gluten-core/src/main/scala/org/apache/spark/memory/SparkMemoryUtil.scala index d221fafce418..637ef8b22fd4 100644 --- a/gluten-core/src/main/scala/org/apache/spark/memory/SparkMemoryUtil.scala +++ b/gluten-core/src/main/scala/org/apache/spark/memory/SparkMemoryUtil.scala @@ -131,6 +131,10 @@ object SparkMemoryUtil { dynamicOffHeapSizingMemoryTarget: DynamicOffHeapSizingMemoryTarget): String = { dynamicOffHeapSizingMemoryTarget.delegated().accept(this) } + + override def visit(retryOnOomMemoryTarget: RetryOnOomMemoryTarget): String = { + retryOnOomMemoryTarget.target().accept(this) + } }) } From 36f0a8fc75d08d409ffa538af8cc4781f97d15d0 Mon Sep 17 00:00:00 2001 From: Mingliang Zhu Date: Tue, 17 Dec 2024 14:32:24 +0800 Subject: [PATCH 10/14] [GLUTEN-8215][VL] Support cast timestamp to date (#8212) --- .../gluten/execution/MiscOperatorSuite.scala | 7 ++ .../SubstraitToVeloxPlanValidator.cc | 11 ++- .../utils/velox/VeloxTestSettings.scala | 3 + .../spark/sql/GlutenDateFunctionsSuite.scala | 89 +++++++++++++++++++ .../utils/velox/VeloxTestSettings.scala | 3 + .../spark/sql/GlutenDateFunctionsSuite.scala | 89 +++++++++++++++++++ .../utils/velox/VeloxTestSettings.scala | 3 + .../spark/sql/GlutenDateFunctionsSuite.scala | 89 +++++++++++++++++++ .../expressions/GlutenCastSuite.scala | 15 ++-- .../utils/velox/VeloxTestSettings.scala | 3 + .../spark/sql/GlutenDateFunctionsSuite.scala | 89 +++++++++++++++++++ 11 files changed, 388 insertions(+), 13 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala index 8063a5d12207..989def88e70c 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/MiscOperatorSuite.scala @@ -1791,6 +1791,13 @@ class MiscOperatorSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa assert(plan2.find(_.isInstanceOf[ProjectExecTransformer]).isDefined) } + test("cast timestamp to date") { + val query = "select cast(ts as date) from values (timestamp'2024-01-01 00:00:00') as tab(ts)" + runQueryAndCompare(query) { + checkGlutenOperatorMatch[ProjectExecTransformer] + } + } + test("timestamp broadcast join") { spark.range(0, 5).createOrReplaceTempView("right") spark.sql("SELECT id, timestamp_micros(id) as ts from right").createOrReplaceTempView("left") diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc index 84dfe68e2d22..996b3bdce033 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc @@ -299,10 +299,13 @@ bool SubstraitToVeloxPlanValidator::validateCast( case TypeKind::VARBINARY: LOG_VALIDATION_MSG("Invalid input type in casting: ARRAY/MAP/ROW/VARBINARY."); return false; - case TypeKind::TIMESTAMP: { - LOG_VALIDATION_MSG("Casting from TIMESTAMP is not supported or has incorrect result."); - return false; - } + case TypeKind::TIMESTAMP: + // Only support cast timestamp to date + if (!toType->isDate()) { + LOG_VALIDATION_MSG( + "Casting from TIMESTAMP to " + toType->toString() + " is not supported or has incorrect result."); + return false; + } default: { } } diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 15495270a189..2c6b882850c4 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -265,6 +265,9 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_timestamp") // Legacy mode is not supported, assuming this mode is not commonly used. .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp") + // Legacy mode is not supported and velox getTimestamp function does not throw + // exception when format is "yyyy-dd-aa". + .exclude("function to_date") enableSuite[GlutenDataFrameFunctionsSuite] // blocked by Velox-5768 .exclude("aggregate function - array for primitive type containing null") diff --git a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala index 8d1f7320dd42..5ddfe6fc1ff3 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala @@ -248,4 +248,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra } } } + + testGluten("function to_date") { + val d1 = Date.valueOf("2015-07-22") + val d2 = Date.valueOf("2015-07-01") + val d3 = Date.valueOf("2014-12-31") + val t1 = Timestamp.valueOf("2015-07-22 10:00:00") + val t2 = Timestamp.valueOf("2014-12-31 23:59:59") + val t3 = Timestamp.valueOf("2014-12-31 23:59:59") + val s1 = "2015-07-22 10:00:00" + val s2 = "2014-12-31" + val s3 = "2014-31-12" + val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s") + + checkAnswer( + df.select(to_date(col("t"))), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2014-12-31")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.select(to_date(col("d"))), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2015-07-01")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.select(to_date(col("s"))), + Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null))) + + checkAnswer( + df.selectExpr("to_date(t)"), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2014-12-31")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.selectExpr("to_date(d)"), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2015-07-01")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.selectExpr("to_date(s)"), + Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null))) + + // now with format + checkAnswer( + df.select(to_date(col("t"), "yyyy-MM-dd")), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2014-12-31")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.select(to_date(col("d"), "yyyy-MM-dd")), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2015-07-01")), + Row(Date.valueOf("2014-12-31")))) + val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key + withSQLConf(confKey -> "corrected") { + checkAnswer( + df.select(to_date(col("s"), "yyyy-MM-dd")), + Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null))) + } + // legacyParserPolicy is not respected by Gluten. + // withSQLConf(confKey -> "exception") { + // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd"))) + // } + + // now switch format + checkAnswer( + df.select(to_date(col("s"), "yyyy-dd-MM")), + Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31")))) + + // invalid format + checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null))) + // velox getTimestamp function does not throw exception when format is "yyyy-dd-aa". + // val e = + // intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect()) + // assert(e.getCause.isInstanceOf[IllegalArgumentException]) + // assert( + // e.getMessage.contains("You may get a different result due to the upgrading to Spark")) + + // February + val x1 = "2016-02-29" + val x2 = "2017-02-29" + val df1 = Seq(x1, x2).toDF("x") + checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil) + } } diff --git a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index 407b9c8b95cc..f83b91ede1cc 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -1084,6 +1084,9 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_timestamp") // Legacy mode is not supported, assuming this mode is not commonly used. .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp") + // Legacy mode is not supported and velox getTimestamp function does not throw + // exception when format is "yyyy-dd-aa". + .exclude("function to_date") enableSuite[GlutenDeprecatedAPISuite] enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff] enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn] diff --git a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala index a946e6de4345..ae86c9d06e81 100644 --- a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala +++ b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala @@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra } } } + + testGluten("function to_date") { + val d1 = Date.valueOf("2015-07-22") + val d2 = Date.valueOf("2015-07-01") + val d3 = Date.valueOf("2014-12-31") + val t1 = Timestamp.valueOf("2015-07-22 10:00:00") + val t2 = Timestamp.valueOf("2014-12-31 23:59:59") + val t3 = Timestamp.valueOf("2014-12-31 23:59:59") + val s1 = "2015-07-22 10:00:00" + val s2 = "2014-12-31" + val s3 = "2014-31-12" + val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s") + + checkAnswer( + df.select(to_date(col("t"))), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2014-12-31")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.select(to_date(col("d"))), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2015-07-01")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.select(to_date(col("s"))), + Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null))) + + checkAnswer( + df.selectExpr("to_date(t)"), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2014-12-31")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.selectExpr("to_date(d)"), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2015-07-01")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.selectExpr("to_date(s)"), + Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null))) + + // now with format + checkAnswer( + df.select(to_date(col("t"), "yyyy-MM-dd")), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2014-12-31")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.select(to_date(col("d"), "yyyy-MM-dd")), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2015-07-01")), + Row(Date.valueOf("2014-12-31")))) + val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key + withSQLConf(confKey -> "corrected") { + checkAnswer( + df.select(to_date(col("s"), "yyyy-MM-dd")), + Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null))) + } + // legacyParserPolicy is not respected by Gluten. + // withSQLConf(confKey -> "exception") { + // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd"))) + // } + + // now switch format + checkAnswer( + df.select(to_date(col("s"), "yyyy-dd-MM")), + Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31")))) + + // invalid format + checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null))) + // velox getTimestamp function does not throw exception when format is "yyyy-dd-aa". + // val e = + // intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect()) + // assert(e.getCause.isInstanceOf[IllegalArgumentException]) + // assert( + // e.getMessage.contains("You may get a different result due to the upgrading to Spark")) + + // February + val x1 = "2016-02-29" + val x2 = "2017-02-29" + val df1 = Seq(x1, x2).toDF("x") + checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil) + } } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index dbb01fbe7067..b0446d3ca7b6 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -1101,6 +1101,9 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_timestamp") // Legacy mode is not supported, assuming this mode is not commonly used. .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp") + // Legacy mode is not supported and velox getTimestamp function does not throw + // exception when format is "yyyy-dd-aa". + .exclude("function to_date") enableSuite[GlutenDeprecatedAPISuite] enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff] enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn] diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala index a946e6de4345..ae86c9d06e81 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala @@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra } } } + + testGluten("function to_date") { + val d1 = Date.valueOf("2015-07-22") + val d2 = Date.valueOf("2015-07-01") + val d3 = Date.valueOf("2014-12-31") + val t1 = Timestamp.valueOf("2015-07-22 10:00:00") + val t2 = Timestamp.valueOf("2014-12-31 23:59:59") + val t3 = Timestamp.valueOf("2014-12-31 23:59:59") + val s1 = "2015-07-22 10:00:00" + val s2 = "2014-12-31" + val s3 = "2014-31-12" + val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s") + + checkAnswer( + df.select(to_date(col("t"))), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2014-12-31")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.select(to_date(col("d"))), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2015-07-01")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.select(to_date(col("s"))), + Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null))) + + checkAnswer( + df.selectExpr("to_date(t)"), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2014-12-31")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.selectExpr("to_date(d)"), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2015-07-01")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.selectExpr("to_date(s)"), + Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null))) + + // now with format + checkAnswer( + df.select(to_date(col("t"), "yyyy-MM-dd")), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2014-12-31")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.select(to_date(col("d"), "yyyy-MM-dd")), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2015-07-01")), + Row(Date.valueOf("2014-12-31")))) + val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key + withSQLConf(confKey -> "corrected") { + checkAnswer( + df.select(to_date(col("s"), "yyyy-MM-dd")), + Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null))) + } + // legacyParserPolicy is not respected by Gluten. + // withSQLConf(confKey -> "exception") { + // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd"))) + // } + + // now switch format + checkAnswer( + df.select(to_date(col("s"), "yyyy-dd-MM")), + Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31")))) + + // invalid format + checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null))) + // velox getTimestamp function does not throw exception when format is "yyyy-dd-aa". + // val e = + // intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect()) + // assert(e.getCause.isInstanceOf[IllegalArgumentException]) + // assert( + // e.getMessage.contains("You may get a different result due to the upgrading to Spark")) + + // February + val x1 = "2016-02-29" + val x2 = "2017-02-29" + val df1 = Seq(x1, x2).toDF("x") + checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil) + } } diff --git a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala index b8ac906d8076..f2a83bf234a9 100644 --- a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala +++ b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/catalyst/expressions/GlutenCastSuite.scala @@ -40,15 +40,12 @@ class GlutenCastSuite extends CastSuiteBase with GlutenTestsTrait { testGluten("missing cases - from boolean") { (DataTypeTestUtils.numericTypeWithoutDecimal + BooleanType).foreach { - t => - t match { - case BooleanType => - checkEvaluation(cast(cast(true, BooleanType), t), true) - checkEvaluation(cast(cast(false, BooleanType), t), false) - case _ => - checkEvaluation(cast(cast(true, BooleanType), t), 1) - checkEvaluation(cast(cast(false, BooleanType), t), 0) - } + case t @ BooleanType => + checkEvaluation(cast(cast(true, BooleanType), t), true) + checkEvaluation(cast(cast(false, BooleanType), t), false) + case t => + checkEvaluation(cast(cast(true, BooleanType), t), 1) + checkEvaluation(cast(cast(false, BooleanType), t), 0) } } diff --git a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala index f5a1a076956e..a01d0cb4b331 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala @@ -1123,6 +1123,9 @@ class VeloxTestSettings extends BackendTestSettings { .exclude("to_timestamp") // Legacy mode is not supported, assuming this mode is not commonly used. .exclude("SPARK-30668: use legacy timestamp parser in to_timestamp") + // Legacy mode is not supported and velox getTimestamp function does not throw + // exception when format is "yyyy-dd-aa". + .exclude("function to_date") enableSuite[GlutenDeprecatedAPISuite] enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOff] enableSuite[GlutenDynamicPartitionPruningV1SuiteAEOn] diff --git a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala index a946e6de4345..ae86c9d06e81 100644 --- a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala +++ b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDateFunctionsSuite.scala @@ -246,4 +246,93 @@ class GlutenDateFunctionsSuite extends DateFunctionsSuite with GlutenSQLTestsTra } } } + + testGluten("function to_date") { + val d1 = Date.valueOf("2015-07-22") + val d2 = Date.valueOf("2015-07-01") + val d3 = Date.valueOf("2014-12-31") + val t1 = Timestamp.valueOf("2015-07-22 10:00:00") + val t2 = Timestamp.valueOf("2014-12-31 23:59:59") + val t3 = Timestamp.valueOf("2014-12-31 23:59:59") + val s1 = "2015-07-22 10:00:00" + val s2 = "2014-12-31" + val s3 = "2014-31-12" + val df = Seq((d1, t1, s1), (d2, t2, s2), (d3, t3, s3)).toDF("d", "t", "s") + + checkAnswer( + df.select(to_date(col("t"))), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2014-12-31")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.select(to_date(col("d"))), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2015-07-01")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.select(to_date(col("s"))), + Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null))) + + checkAnswer( + df.selectExpr("to_date(t)"), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2014-12-31")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.selectExpr("to_date(d)"), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2015-07-01")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.selectExpr("to_date(s)"), + Seq(Row(Date.valueOf("2015-07-22")), Row(Date.valueOf("2014-12-31")), Row(null))) + + // now with format + checkAnswer( + df.select(to_date(col("t"), "yyyy-MM-dd")), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2014-12-31")), + Row(Date.valueOf("2014-12-31")))) + checkAnswer( + df.select(to_date(col("d"), "yyyy-MM-dd")), + Seq( + Row(Date.valueOf("2015-07-22")), + Row(Date.valueOf("2015-07-01")), + Row(Date.valueOf("2014-12-31")))) + val confKey = SQLConf.LEGACY_TIME_PARSER_POLICY.key + withSQLConf(confKey -> "corrected") { + checkAnswer( + df.select(to_date(col("s"), "yyyy-MM-dd")), + Seq(Row(null), Row(Date.valueOf("2014-12-31")), Row(null))) + } + // legacyParserPolicy is not respected by Gluten. + // withSQLConf(confKey -> "exception") { + // checkExceptionMessage(df.select(to_date(col("s"), "yyyy-MM-dd"))) + // } + + // now switch format + checkAnswer( + df.select(to_date(col("s"), "yyyy-dd-MM")), + Seq(Row(null), Row(null), Row(Date.valueOf("2014-12-31")))) + + // invalid format + checkAnswer(df.select(to_date(col("s"), "yyyy-hh-MM")), Seq(Row(null), Row(null), Row(null))) + // velox getTimestamp function does not throw exception when format is "yyyy-dd-aa". + // val e = + // intercept[SparkUpgradeException](df.select(to_date(col("s"), "yyyy-dd-aa")).collect()) + // assert(e.getCause.isInstanceOf[IllegalArgumentException]) + // assert( + // e.getMessage.contains("You may get a different result due to the upgrading to Spark")) + + // February + val x1 = "2016-02-29" + val x2 = "2017-02-29" + val df1 = Seq(x1, x2).toDF("x") + checkAnswer(df1.select(to_date(col("x"))), Row(Date.valueOf("2016-02-29")) :: Row(null) :: Nil) + } } From e94e01d202ac1da0005e48ac6ca815f584d1449a Mon Sep 17 00:00:00 2001 From: Terry Wang Date: Tue, 17 Dec 2024 16:59:08 +0800 Subject: [PATCH 11/14] [GLUTEN-8018][CORE] Introduce ApplyResourceProfileExec to apply resource profile for query stage (#8195) --- .../execution/ApplyResourceProfileExec.scala | 79 +++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 gluten-substrait/src/main/scala/org/apache/spark/sql/execution/ApplyResourceProfileExec.scala diff --git a/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/ApplyResourceProfileExec.scala b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/ApplyResourceProfileExec.scala new file mode 100644 index 000000000000..17640f461213 --- /dev/null +++ b/gluten-substrait/src/main/scala/org/apache/spark/sql/execution/ApplyResourceProfileExec.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql.execution + +import org.apache.gluten.execution.GlutenPlan +import org.apache.gluten.extension.columnar.transition.Convention + +import org.apache.spark.annotation.Experimental +import org.apache.spark.rdd.RDD +import org.apache.spark.resource.ResourceProfile +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, SortOrder} +import org.apache.spark.sql.catalyst.plans.physical.{Distribution, Partitioning} +import org.apache.spark.sql.vectorized.ColumnarBatch + +/** + * Used to apply specified resource profile for the whole stage. + * @param child + * @param resourceProfile + * resource profile specified for child belong stage. + */ +@Experimental +case class ApplyResourceProfileExec(child: SparkPlan, resourceProfile: ResourceProfile) + extends UnaryExecNode + with GlutenPlan { + + override def batchType(): Convention.BatchType = { + Convention.get(child).batchType + } + + override def rowType0(): Convention.RowType = { + Convention.get(child).rowType + } + + override def outputPartitioning: Partitioning = { + child.outputPartitioning + } + + override def requiredChildDistribution: scala.Seq[Distribution] = { + child.requiredChildDistribution + } + + override def outputOrdering: scala.Seq[SortOrder] = { + child.outputOrdering + } + + override def requiredChildOrdering: scala.Seq[scala.Seq[SortOrder]] = { + child.requiredChildOrdering + } + + override protected def doExecute(): RDD[InternalRow] = { + log.info(s"Apply $resourceProfile for plan ${child.nodeName}") + child.execute.withResources(resourceProfile) + } + + override protected def doExecuteColumnar(): RDD[ColumnarBatch] = { + log.info(s"Apply $resourceProfile for columnar plan ${child.nodeName}") + child.executeColumnar.withResources(resourceProfile) + } + + override def output: scala.Seq[Attribute] = child.output + + override protected def withNewChildInternal(newChild: SparkPlan): ApplyResourceProfileExec = + copy(child = newChild) +} From 41aa153ee46c7fced09182de3543ba73bbc5c531 Mon Sep 17 00:00:00 2001 From: Chang chen Date: Tue, 17 Dec 2024 20:38:25 +0800 Subject: [PATCH 12/14] [CH] Hotfix to #8212 (#8259) --- .../apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala index 36d5b5177c6b..16879489d29e 100644 --- a/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala +++ b/gluten-ut/spark32/src/test/scala/org/apache/gluten/utils/clickhouse/ClickHouseTestSettings.scala @@ -286,6 +286,7 @@ class ClickHouseTestSettings extends BackendTestSettings { .exclude("groupBy.as") enableSuite[GlutenDateFunctionsSuite] .exclude("function to_date") + .excludeGlutenTest("function to_date") .exclude("unix_timestamp") .exclude("to_unix_timestamp") .exclude("to_timestamp") From 2cb18cdb94ece3e47613bb47fbcd068ee037c7bf Mon Sep 17 00:00:00 2001 From: Terry Wang Date: Tue, 17 Dec 2024 21:20:19 +0800 Subject: [PATCH 13/14] [DOC] Update HowTo.md to fix outdated link and test script location (#8255) --- docs/developers/HowTo.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/developers/HowTo.md b/docs/developers/HowTo.md index 22ad3e30efc7..dce32d55c02d 100644 --- a/docs/developers/HowTo.md +++ b/docs/developers/HowTo.md @@ -134,16 +134,16 @@ to let it override the corresponding C standard functions entirely. It may help Now, both Parquet and DWRF format files are supported, related scripts and files are under the directory of `${GLUTEN_HOME}/backends-velox/workload/tpch`. The file `README.md` under `${GLUTEN_HOME}/backends-velox/workload/tpch` offers some useful help, but it's still not enough and exact. -One way of run TPC-H test is to run velox-be by workflow, you can refer to [velox_be.yml](https://github.com/apache/incubator-gluten/blob/main/.github/workflows/velox_be.yml#L90) +One way of run TPC-H test is to run velox-be by workflow, you can refer to [velox_backend.yml](https://github.com/apache/incubator-gluten/blob/main/.github/workflows/velox_backend.yml#L280) Here we will explain how to run TPC-H on Velox backend with the Parquet file format. 1. First, prepare the datasets, you have two choices. - - One way, generate Parquet datasets using the script under `${GLUTEN_HOME}/backends-velox/workload/tpch/gen_data/parquet_dataset`, you can get help from the above + - One way, generate Parquet datasets using the script under `${GLUTEN_HOME}/tools/workload/tpch/gen_data/parquet_dataset`, you can get help from the above -mentioned `README.md`. - The other way, using the small dataset under `${GLUTEN_HOME}/backends-velox/src/test/resources/tpch-data-parquet` directly, if you just want to make simple TPC-H testing, this dataset is a good choice. 2. Second, run TPC-H on Velox backend testing. - - Modify `${GLUTEN_HOME}/backends-velox/workload/tpch/run_tpch/tpch_parquet.scala`. + - Modify `${GLUTEN_HOME}/tools/workload/tpch/run_tpch/tpch_parquet.scala`. - Set `var parquet_file_path` to correct directory. If using the small dataset directly in the step one, then modify it as below: ```scala @@ -156,12 +156,12 @@ Here we will explain how to run TPC-H on Velox backend with the Parquet file for var gluten_root = "/home/gluten" ``` - - Modify `${GLUTEN_HOME}/backends-velox/workload/tpch/run_tpch/tpch_parquet.sh`. + - Modify `${GLUTEN_HOME}/tools/workload/tpch/run_tpch/tpch_parquet.sh`. - Set `GLUTEN_JAR` correctly. Please refer to the section of [Build Gluten with Velox Backend](../get-started/Velox.md/#2-build-gluten-with-velox-backend) - Set `SPARK_HOME` correctly. - Set the memory configurations appropriately. - Execute `tpch_parquet.sh` using the below command. - - `cd ${GLUTEN_HOME}/backends-velox/workload/tpch/run_tpch/` + - `cd ${GLUTEN_HOME}/tools/workload/tpch/run_tpch/` - `./tpch_parquet.sh` # How to run TPC-DS From ea0e175d69f9c41bf2b5e49e2d338375812f433b Mon Sep 17 00:00:00 2001 From: Gluten Performance Bot <137994563+GlutenPerfBot@users.noreply.github.com> Date: Tue, 17 Dec 2024 21:50:40 +0800 Subject: [PATCH 14/14] [GLUTEN-6887][VL] Daily Update Velox Version (2024_12_16) (#8238) Upstream Velox's New Commits: 2c9a42ef9 by Yenda Li, fix: Classification functions should return [] instead null (11864) 12942c1eb by Yenda Li, feat: add classification functions (11792) 22ba7ec86 by Ge Gao, refactor: Add arg cpuExecutor to ConnectorFactory::newConnector (11861) --- ep/build-velox/src/get_velox.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index 8f3abf6472bd..60c3adc0284e 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -17,7 +17,7 @@ set -exu VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_12_15 +VELOX_BRANCH=2024_12_16 VELOX_HOME="" OS=`uname -s`