From 824ee53ef77f64f628fc899eac2ec177fbdb838e Mon Sep 17 00:00:00 2001 From: Rui Mo Date: Tue, 28 Nov 2023 09:55:58 +0800 Subject: [PATCH] [VL] Support IsNull filter pushdown (#3791) --- cpp/velox/substrait/SubstraitToVeloxPlan.cc | 57 +++++++++++++++------ cpp/velox/substrait/SubstraitToVeloxPlan.h | 38 +++++++++++--- 2 files changed, 71 insertions(+), 24 deletions(-) diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.cc b/cpp/velox/substrait/SubstraitToVeloxPlan.cc index 9c94f7d42a2c..5d0a8462603a 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.cc +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.cc @@ -94,6 +94,7 @@ std::string getMax() { // Substrait function names. const std::string sIsNotNull = "is_not_null"; +const std::string sIsNull = "is_null"; const std::string sGte = "gte"; const std::string sGt = "gt"; const std::string sLte = "lte"; @@ -1275,15 +1276,15 @@ bool SubstraitToVeloxPlanConverter::childrenFunctionsOnSameField( return false; } -bool SubstraitToVeloxPlanConverter::canPushdownCommonFunction( +bool SubstraitToVeloxPlanConverter::canPushdownFunction( const ::substrait::Expression_ScalarFunction& scalarFunction, const std::string& filterName, uint32_t& fieldIdx) { // Condtions can be pushed down. - static const std::unordered_set supportedCommonFunctions = {sIsNotNull, sGte, sGt, sLte, sLt, sEqual}; + static const std::unordered_set supportedFunctions = {sIsNotNull, sIsNull, sGte, sGt, sLte, sLt, sEqual}; bool canPushdown = false; - if (supportedCommonFunctions.find(filterName) != supportedCommonFunctions.end() && + if (supportedFunctions.find(filterName) != supportedFunctions.end() && fieldOrWithLiteral(scalarFunction.arguments(), fieldIdx)) { // The arg should be field or field with literal. canPushdown = true; @@ -1417,7 +1418,7 @@ void SubstraitToVeloxPlanConverter::separateFilters( } else { // Check if the condition is supported to be pushed down. uint32_t fieldIdx; - if (canPushdownCommonFunction(scalarFunction, filterName, fieldIdx) && + if (canPushdownFunction(scalarFunction, filterName, fieldIdx) && rangeRecorders.at(fieldIdx).setCertainRangeForFunction(filterName)) { subfieldFunctions.emplace_back(scalarFunction); } else { @@ -1465,6 +1466,12 @@ bool SubstraitToVeloxPlanConverter::RangeRecorder::setCertainRangeForFunction( // Is not null can always coexist with the other range. return true; } + } else if (functionName == sIsNull) { + if (reverse) { + return setCertainRangeForFunction(sIsNotNull, false, forOrRelation); + } else { + return setIsNull(); + } } else { return false; } @@ -1477,9 +1484,16 @@ void SubstraitToVeloxPlanConverter::setColumnFilterInfo( bool reverse) { if (filterName == sIsNotNull) { if (reverse) { - VELOX_NYI("Reverse not supported for filter name '{}'", filterName); + columnFilterInfo.setNull(); + } else { + columnFilterInfo.forbidsNull(); + } + } else if (filterName == sIsNull) { + if (reverse) { + columnFilterInfo.forbidsNull(); + } else { + columnFilterInfo.setNull(); } - columnFilterInfo.forbidsNull(); } else if (filterName == sGte) { if (reverse) { columnFilterInfo.setUpper(literalVariant, true); @@ -1548,11 +1562,11 @@ void SubstraitToVeloxPlanConverter::setFilterInfo( static const std::unordered_map functionRevertMap = { {sLt, sGt}, {sGt, sLt}, {sGte, sLte}, {sLte, sGte}}; - // Handle "123 < q1" type expression case + // Handle the case where literal is before the variable in a binary function, e.g. "123 < q1". if (typeCases.size() > 1 && (typeCases[0] == "kLiteral" && typeCases[1] == "kSelection")) { auto x = functionRevertMap.find(functionName); if (x != functionRevertMap.end()) { - // change the function name: lt => gt, gt => lt, gte => lte, lte => gte + // Change the function name: lt => gt, gt => lt, gte => lte, lte => gte. functionName = x->second; } } @@ -1817,17 +1831,23 @@ void SubstraitToVeloxPlanConverter::constructSubfieldFilters( } bool nullAllowed = filterInfo.nullAllowed_; + bool isNull = filterInfo.isNull_; uint32_t rangeSize = std::max(filterInfo.lowerBounds_.size(), filterInfo.upperBounds_.size()); if constexpr (KIND == facebook::velox::TypeKind::HUGEINT) { // TODO: open it when the Velox's modification is ready. VELOX_NYI("constructSubfieldFilters not support for HUGEINT type"); } else if constexpr (KIND == facebook::velox::TypeKind::ARRAY || KIND == facebook::velox::TypeKind::MAP) { - // Only IsNotNull filter is supported for the above two type kinds now. - if (rangeSize == 0 && !nullAllowed) { - filters[common::Subfield(inputName)] = std::move(std::make_unique()); - } else { - VELOX_NYI("constructSubfieldFilters only support IsNotNull for input type '{}'", inputType); + // Only IsNotNull and IsNull are supported for array and map types. + if (rangeSize == 0) { + if (!nullAllowed) { + filters[common::Subfield(inputName)] = std::move(std::make_unique()); + } else if (isNull) { + filters[common::Subfield(inputName)] = std::move(std::make_unique()); + } else { + VELOX_NYI( + "Only IsNotNull and IsNull are supported in constructSubfieldFilters for input type '{}'.", inputType); + } } } else { using NativeType = typename RangeTraits::NativeType; @@ -1866,9 +1886,14 @@ void SubstraitToVeloxPlanConverter::constructSubfieldFilters( } // Handle null filtering. - if (rangeSize == 0 && !nullAllowed) { - std::unique_ptr filter = std::make_unique(); - filters[common::Subfield(inputName)] = std::move(filter); + if (rangeSize == 0) { + if (!nullAllowed) { + filters[common::Subfield(inputName)] = std::move(std::make_unique()); + } else if (isNull) { + filters[common::Subfield(inputName)] = std::move(std::make_unique()); + } else { + VELOX_NYI("Only IsNotNull and IsNull are supported in constructSubfieldFilters when no other filter ranges."); + } return; } diff --git a/cpp/velox/substrait/SubstraitToVeloxPlan.h b/cpp/velox/substrait/SubstraitToVeloxPlan.h index f8ad7d072725..3a13fa1109ae 100644 --- a/cpp/velox/substrait/SubstraitToVeloxPlan.h +++ b/cpp/velox/substrait/SubstraitToVeloxPlan.h @@ -190,7 +190,7 @@ class SubstraitToVeloxPlanConverter { if (forOrRelation) { return true; } - if (inRange_ || multiRange_ || leftBound_ || rightBound_) { + if (inRange_ || multiRange_ || leftBound_ || rightBound_ || isNull_) { return false; } inRange_ = true; @@ -205,7 +205,7 @@ class SubstraitToVeloxPlanConverter { leftBound_ = true; return !rightBound_; } - if (leftBound_ || inRange_ || multiRange_) { + if (leftBound_ || inRange_ || multiRange_ || isNull_) { return false; } leftBound_ = true; @@ -220,23 +220,33 @@ class SubstraitToVeloxPlanConverter { rightBound_ = true; return !leftBound_; } - if (rightBound_ || inRange_ || multiRange_) { + if (rightBound_ || inRange_ || multiRange_ || isNull_) { return false; } rightBound_ = true; return true; } - /// Set the multi-range and returns whether it can coexist with + /// Set the existence of multi-range and returns whether it can coexist with /// existing conditions for this field. bool setMultiRange() { - if (inRange_ || multiRange_ || leftBound_ || rightBound_) { + if (inRange_ || multiRange_ || leftBound_ || rightBound_ || isNull_) { return false; } multiRange_ = true; return true; } + /// Set the existence of IsNull and returns whether it can coexist with + /// existing conditions for this field. + bool setIsNull() { + if (inRange_ || multiRange_ || leftBound_ || rightBound_) { + return false; + } + isNull_ = true; + return true; + } + /// Set certain existence according to function name and returns whether it /// can coexist with existing conditions for this field. bool setCertainRangeForFunction(const std::string& functionName, bool reverse = false, bool forOrRelation = false); @@ -253,12 +263,15 @@ class SubstraitToVeloxPlanConverter { /// The existence of multi-range. bool multiRange_ = false; + + /// The existence of IsNull. + bool isNull_ = false; }; /// Filter info for a column used in filter push down. class FilterInfo { public: - // Disable null allow. + // Null is not allowed. void forbidsNull() { nullAllowed_ = false; if (!initialized_) { @@ -266,6 +279,15 @@ class SubstraitToVeloxPlanConverter { } } + // Only null is allowed. + void setNull() { + isNull_ = true; + nullAllowed_ = true; + if (!initialized_) { + initialized_ = true; + } + } + // Return the initialization status. bool isInitialized() const { return initialized_; @@ -312,8 +334,8 @@ class SubstraitToVeloxPlanConverter { // Whether this filter map is initialized. bool initialized_ = false; - // The null allow. bool nullAllowed_ = false; + bool isNull_ = false; // If true, left bound will be exclusive. std::vector lowerExclusives_; @@ -360,7 +382,7 @@ class SubstraitToVeloxPlanConverter { const dwio::common::FileFormat& format); /// Returns whether a function can be pushed down. - static bool canPushdownCommonFunction( + static bool canPushdownFunction( const ::substrait::Expression_ScalarFunction& scalarFunction, const std::string& filterName, uint32_t& fieldIdx);