diff --git a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala index 38c15fde7cdc..4496d893fcd7 100644 --- a/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala +++ b/backends-clickhouse/src/main/scala/org/apache/spark/sql/execution/utils/CHExecUtil.scala @@ -319,7 +319,7 @@ object CHExecUtil extends Logging { // Thus in Columnar Shuffle we never use the "key" part. val isOrderSensitive = isRoundRobin && !SQLConf.get.sortBeforeRepartition - val rddWithpartitionKey: RDD[Product2[Int, ColumnarBatch]] = + val rddWithPartitionKey: RDD[Product2[Int, ColumnarBatch]] = if ( GlutenConfig.getConf.isUseColumnarShuffleManager || GlutenConfig.getConf.isUseCelebornShuffleManager @@ -345,7 +345,7 @@ object CHExecUtil extends Logging { val dependency = new ColumnarShuffleDependency[Int, ColumnarBatch, ColumnarBatch]( - rddWithpartitionKey, + rddWithPartitionKey, new PartitionIdPassthrough(newPartitioning.numPartitions), serializer, shuffleWriterProcessor = ShuffleExchangeExec.createShuffleWriteProcessor(writeMetrics), diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala similarity index 94% rename from backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala rename to backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala index 4130ea348fb4..11d5290c0d0e 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseFunctionSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseFunctionSuite.scala @@ -14,9 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.gluten.execution +package org.apache.gluten.execution.compatibility import org.apache.gluten.GlutenConfig +import org.apache.gluten.execution.GlutenClickHouseTPCHAbstractSuite import org.apache.gluten.utils.UTSystemParameters import org.apache.spark.SparkConf @@ -78,6 +79,18 @@ class GlutenClickhouseFunctionSuite extends GlutenClickHouseTPCHAbstractSuite { } } + test("https://github.com/apache/incubator-gluten/issues/6938") { + val testSQL = + s""" + |select * from ( + | select 1 as x, r_name as y, 's' as z from region + | union all + | select 2 as x, n_name as y, null as z from nation + |) order by y,x,z + |""".stripMargin + runQueryAndCompare(testSQL)(_ => ()) + } + test("Support In list option contains non-foldable expression") { runQueryAndCompare( """ @@ -217,5 +230,4 @@ class GlutenClickhouseFunctionSuite extends GlutenClickHouseTPCHAbstractSuite { ) } } - } diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseStringFunctionsSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseStringFunctionsSuite.scala similarity index 97% rename from backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseStringFunctionsSuite.scala rename to backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseStringFunctionsSuite.scala index 163a8fedab7e..98c0c2b35f20 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/GlutenClickhouseStringFunctionsSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/compatibility/GlutenClickhouseStringFunctionsSuite.scala @@ -14,7 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.gluten.execution +package org.apache.gluten.execution.compatibility + +import org.apache.gluten.execution.GlutenClickHouseWholeStageTransformerSuite import org.apache.spark.SparkConf diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala index 0a8d1729c810..a2f897d37811 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/execution/parquet/GlutenParquetFilterSuite.scala @@ -17,13 +17,12 @@ package org.apache.gluten.execution.parquet import org.apache.gluten.execution.{FileSourceScanExecTransformer, GlutenClickHouseWholeStageTransformerSuite} -import org.apache.gluten.test.GlutenSQLTestUtils +import org.apache.gluten.test.{GlutenSQLTestUtils, GlutenTPCHBase} import org.apache.spark.SparkConf import org.apache.spark.internal.Logging import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ -import org.apache.spark.sql.gluten.test.GlutenTPCHBase import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.types.Decimal @@ -385,13 +384,13 @@ class GlutenParquetFilterSuite 'p_size.int >= 1, 'p_partkey.long.isNotNull, ('p_brand.string === "Brand#12" && - ('p_container.string.in("SM CASE", "SM BOX", "SM PACK", "SM PKG")) && + 'p_container.string.in("SM CASE", "SM BOX", "SM PACK", "SM PKG") && 'p_size.int <= 5) || ('p_brand.string === "Brand#23" && - ('p_container.string.in("MED BAG", "MED BOX", "MED PKG", "MED PACK")) && + 'p_container.string.in("MED BAG", "MED BOX", "MED PKG", "MED PACK") && 'p_size.int <= 10) || ('p_brand.string === "Brand#34" && - ('p_container.string.in("LG CASE", "LG BOX", "LG PACK", "LG PKG")) && + 'p_container.string.in("LG CASE", "LG BOX", "LG PACK", "LG PKG") && 'p_size.int <= 15) ) ), diff --git a/backends-clickhouse/src/test/scala/org/apache/gluten/test/GlutenTPCHBase.scala b/backends-clickhouse/src/test/scala/org/apache/gluten/test/GlutenTPCHBase.scala index 685f185ac81f..5a07ebab15b5 100644 --- a/backends-clickhouse/src/test/scala/org/apache/gluten/test/GlutenTPCHBase.scala +++ b/backends-clickhouse/src/test/scala/org/apache/gluten/test/GlutenTPCHBase.scala @@ -14,9 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.sql.gluten.test - -import org.apache.gluten.test.GlutenTPCBase +package org.apache.gluten.test import org.apache.spark.sql.catalyst.TableIdentifier @@ -51,7 +49,11 @@ trait GlutenTPCHBase extends GlutenTPCBase { override def dropTables(): Unit = { tpchCreateTable.keys.foreach { - tableName => spark.sessionState.catalog.dropTable(TableIdentifier(tableName), true, true) + tableName => + spark.sessionState.catalog.dropTable( + TableIdentifier(tableName), + ignoreIfNotExists = true, + purge = true) } } diff --git a/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp b/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp index b9bd02c3ef68..272a6f2f6bee 100644 --- a/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp +++ b/cpp-ch/local-engine/Shuffle/SelectorBuilder.cpp @@ -367,19 +367,31 @@ void RangeSelectorBuilder::computePartitionIdByBinarySearch(DB::Block & block, D selector.emplace_back(selected_partition); } } +namespace { +int doCompareAt(const ColumnPtr & lhs, size_t n, size_t m, const IColumn & rhs, int nan_direction_hint) +{ + if (const auto * l_const = typeid_cast(lhs.get())) + { + // we know rhs never be Const + chassert(l_const->getDataType() == rhs.getDataType()); + return l_const->getDataColumn().compareAt(0, m, rhs, nan_direction_hint); + } + return lhs->compareAt(n, m, rhs, nan_direction_hint); +} +} int RangeSelectorBuilder::compareRow( const DB::Columns & columns, const std::vector & required_columns, size_t row, const DB::Columns & bound_columns, - size_t bound_row) + size_t bound_row) const { for (size_t i = 0, n = required_columns.size(); i < n; ++i) { auto lpos = required_columns[i]; auto rpos = i; - auto res = columns[lpos]->compareAt(row, bound_row, *bound_columns[rpos], sort_descriptions[i].nulls_direction) + auto res = doCompareAt(columns[lpos], row, bound_row, *bound_columns[rpos], sort_descriptions[i].nulls_direction) * sort_descriptions[i].direction; if (res != 0) return res; diff --git a/cpp-ch/local-engine/Shuffle/SelectorBuilder.h b/cpp-ch/local-engine/Shuffle/SelectorBuilder.h index 97894daa3c14..7349849f538e 100644 --- a/cpp-ch/local-engine/Shuffle/SelectorBuilder.h +++ b/cpp-ch/local-engine/Shuffle/SelectorBuilder.h @@ -118,7 +118,7 @@ class RangeSelectorBuilder : public SelectorBuilder const std::vector & required_columns, size_t row, const DB::Columns & bound_columns, - size_t bound_row); + size_t bound_row) const; int binarySearchBound( const DB::Columns & bound_columns,