From 6a27cf3b1ba1b127b2c485e0ea748c4372c8fd75 Mon Sep 17 00:00:00 2001 From: Jia Ke Date: Wed, 8 Nov 2023 08:26:45 +0000 Subject: [PATCH] Add duplicated sorting keys validation in TopNNode --- cpp/velox/substrait/SubstraitToVeloxPlan.h | 12 ++++---- .../SubstraitToVeloxPlanValidator.cc | 28 +++++++++++++++++++ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.h b/cpp/velox/substrait/SubstraitToVeloxPlan.h index 1485900af42fd..f8ad7d0727252 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.h +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.h @@ -152,6 +152,12 @@ class SubstraitToVeloxPlanConverter { /// Get aggregation step from AggregateRel. core::AggregationNode::Step toAggregationStep(const ::substrait::AggregateRel& sAgg); + /// Helper Function to convert Substrait sortField to Velox sortingKeys and + /// sortingOrders. + std::pair, std::vector> processSortField( + const ::google::protobuf::RepeatedPtrField<::substrait::SortField>& sortField, + const RowTypePtr& inputType); + private: /// Integrate Substrait emit feature. Here a given 'substrait::RelCommon' /// is passed and check if emit is defined for this relation. Basically a @@ -328,12 +334,6 @@ class SubstraitToVeloxPlanConverter { std::vector values_; }; - /// Helper Function to convert Substrait sortField to Velox sortingKeys and - /// sortingOrders. - std::pair, std::vector> processSortField( - const ::google::protobuf::RepeatedPtrField<::substrait::SortField>& sortField, - const RowTypePtr& inputType); - /// Returns unique ID to use for plan node. Produces sequential numbers /// starting from zero. std::string nextPlanNodeId(); diff --git a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc index 2d4247f650c3e..352d23f667593 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc @@ -358,6 +358,34 @@ bool SubstraitToVeloxPlanValidator::validate(const ::substrait::FetchRel& fetchR logValidateMsg("native validation failed due to: Offset and count should be valid in FetchRel."); return false; } + + core::PlanNodePtr childNode; + // Check the input of fetchRel, if it's sortRel, we need to check whether the sorting key is duplicated. + ::substrait::SortRel sortRel; + bool topNFlag; + if (fetchRel.has_input()) { + topNFlag = fetchRel.input().has_sort(); + if (topNFlag) { + sortRel = fetchRel.input().sort(); + childNode = planConverter_.toVeloxPlan(sortRel.input()); + } else { + childNode = planConverter_.toVeloxPlan(fetchRel.input()); + } + } + + if (topNFlag) { + auto [sortingKeys, sortingOrders] = planConverter_.processSortField(sortRel.sorts(), childNode->outputType()); + + folly::F14FastSet sortingKeyNames; + for (const auto& sortingKey : sortingKeys) { + auto result = sortingKeyNames.insert(sortingKey->name()); + if (!result.second) { + logValidateMsg( + "native validation failed due to: if the input of fetchRel is a SortRel, we will convert it to a TopNNode. In Velox, it is important to ensure unique sorting keys. However, duplicate keys were found in this case."); + return false; + } + } + } return true; }