diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala index c010b9128ce1c..230fc565d9eb3 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/TestOperator.scala @@ -1911,4 +1911,10 @@ class TestOperator extends VeloxWholeStageTransformerSuite with AdaptiveSparkPla val resultLength = df.collect().length assert(resultLength > 25000 && resultLength < 35000) } + + test("Deduplicate sorting keys") { + runQueryAndCompare("select * from lineitem order by l_orderkey, l_orderkey") { + checkGlutenOperatorMatch[SortExecTransformer] + } + } } diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc index 73047b2f49073..34710c35a40db 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc @@ -1046,17 +1046,15 @@ SubstraitToVeloxPlanConverter::processSortField( const RowTypePtr& inputType) { std::vector sortingKeys; std::vector sortingOrders; - sortingKeys.reserve(sortFields.size()); - sortingOrders.reserve(sortFields.size()); - + std::unordered_set uniqueKeys; for (const auto& sort : sortFields) { - sortingOrders.emplace_back(toSortOrder(sort)); - - if (sort.has_expr()) { - auto expression = exprConverter_->toVeloxExpr(sort.expr(), inputType); - auto fieldExpr = std::dynamic_pointer_cast(expression); - VELOX_USER_CHECK_NOT_NULL(fieldExpr, "Sort Operator only supports field sorting key"); + GLUTEN_CHECK(sort.has_expr(), "Sort field must have expr"); + auto expression = exprConverter_->toVeloxExpr(sort.expr(), inputType); + auto fieldExpr = std::dynamic_pointer_cast(expression); + VELOX_USER_CHECK_NOT_NULL(fieldExpr, "Sort Operator only supports field sorting key"); + if (uniqueKeys.insert(fieldExpr->name()).second) { sortingKeys.emplace_back(fieldExpr); + sortingOrders.emplace_back(toSortOrder(sort)); } } return {sortingKeys, sortingOrders}; diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.h b/cpp/velox/substrait/SubstraitToVeloxPlan.h index 1535b1f85f51b..1f2f39f51ae85 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.h +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.h @@ -198,6 +198,7 @@ class SubstraitToVeloxPlanConverter { /// Helper Function to convert Substrait sortField to Velox sortingKeys and /// sortingOrders. + /// Note that, this method would deduplicate the sorting keys which have the same field name. std::pair, std::vector> processSortField( const ::google::protobuf::RepeatedPtrField<::substrait::SortField>& sortField, const RowTypePtr& inputType); diff --git a/ep/build-velox/src/get_velox.sh b/ep/build-velox/src/get_velox.sh index 6cd62332a3ccf..078ea0a44a65a 100755 --- a/ep/build-velox/src/get_velox.sh +++ b/ep/build-velox/src/get_velox.sh @@ -17,7 +17,7 @@ set -exu VELOX_REPO=https://github.com/oap-project/velox.git -VELOX_BRANCH=2024_07_03 +VELOX_BRANCH=2024_07_04 VELOX_HOME="" #Set on run gluten on HDFS