From 4ec068a5ebb1e17fccea18bac6c2e4a080c36597 Mon Sep 17 00:00:00 2001 From: ulysses-you Date: Thu, 4 Jul 2024 19:19:23 +0800 Subject: [PATCH] Deduplicate sorting keys --- .../apache/gluten/execution/TestOperator.scala | 6 ++++++ cpp/velox/substrait/SubstraitToVeloxPlan.cc | 16 +++++++--------- cpp/velox/substrait/SubstraitToVeloxPlan.h | 1 + 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala index c6e72197d6c7..d6b5efaf61de 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala @@ -1911,4 +1911,10 @@ class TestOperator extends VeloxWholeStageTransformerSuite with AdaptiveSparkPla val resultLength = df.collect().length assert(resultLength > 25000 && resultLength < 35000) } + + test("Deduplicate sorting keys") { + runQueryAndCompare("select * from lineitem order by l_orderkey, l_orderkey") { + checkGlutenOperatorMatch[SortExecTransformer] + } + } } diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc index 73047b2f4907..34710c35a40d 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc @@ -1046,17 +1046,15 @@ SubstraitToVeloxPlanConverter::processSortField( const RowTypePtr& inputType) { std::vector sortingKeys; std::vector sortingOrders; - sortingKeys.reserve(sortFields.size()); - sortingOrders.reserve(sortFields.size()); - + std::unordered_set uniqueKeys; for (const auto& sort : sortFields) { - sortingOrders.emplace_back(toSortOrder(sort)); - - if (sort.has_expr()) { - auto expression = exprConverter_->toVeloxExpr(sort.expr(), inputType); - auto fieldExpr = std::dynamic_pointer_cast(expression); - VELOX_USER_CHECK_NOT_NULL(fieldExpr, "Sort Operator only supports field sorting key"); + GLUTEN_CHECK(sort.has_expr(), "Sort field must have expr"); + auto expression = exprConverter_->toVeloxExpr(sort.expr(), inputType); + auto fieldExpr = std::dynamic_pointer_cast(expression); + VELOX_USER_CHECK_NOT_NULL(fieldExpr, "Sort Operator only supports field sorting key"); + if (uniqueKeys.insert(fieldExpr->name()).second) { sortingKeys.emplace_back(fieldExpr); + sortingOrders.emplace_back(toSortOrder(sort)); } } return {sortingKeys, sortingOrders}; diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.h b/cpp/velox/substrait/SubstraitToVeloxPlan.h index 1535b1f85f51..1f2f39f51ae8 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.h +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.h @@ -198,6 +198,7 @@ class SubstraitToVeloxPlanConverter { /// Helper Function to convert Substrait sortField to Velox sortingKeys and /// sortingOrders. + /// Note that, this method would deduplicate the sorting keys which have the same field name. std::pair, std::vector> processSortField( const ::google::protobuf::RepeatedPtrField<::substrait::SortField>& sortField, const RowTypePtr& inputType);