Skip to content

Commit

Permalink
[VL] Support IsNull filter pushdown (apache#3791)
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo authored Nov 28, 2023
1 parent b707ba6 commit 824ee53
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 24 deletions.
57 changes: 41 additions & 16 deletions cpp/velox/substrait/SubstraitToVeloxPlan.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ std::string getMax<std::string>() {

// Substrait function names.
const std::string sIsNotNull = "is_not_null";
const std::string sIsNull = "is_null";
const std::string sGte = "gte";
const std::string sGt = "gt";
const std::string sLte = "lte";
Expand Down Expand Up @@ -1275,15 +1276,15 @@ bool SubstraitToVeloxPlanConverter::childrenFunctionsOnSameField(
return false;
}

bool SubstraitToVeloxPlanConverter::canPushdownCommonFunction(
bool SubstraitToVeloxPlanConverter::canPushdownFunction(
const ::substrait::Expression_ScalarFunction& scalarFunction,
const std::string& filterName,
uint32_t& fieldIdx) {
// Condtions can be pushed down.
static const std::unordered_set<std::string> supportedCommonFunctions = {sIsNotNull, sGte, sGt, sLte, sLt, sEqual};
static const std::unordered_set<std::string> supportedFunctions = {sIsNotNull, sIsNull, sGte, sGt, sLte, sLt, sEqual};

bool canPushdown = false;
if (supportedCommonFunctions.find(filterName) != supportedCommonFunctions.end() &&
if (supportedFunctions.find(filterName) != supportedFunctions.end() &&
fieldOrWithLiteral(scalarFunction.arguments(), fieldIdx)) {
// The arg should be field or field with literal.
canPushdown = true;
Expand Down Expand Up @@ -1417,7 +1418,7 @@ void SubstraitToVeloxPlanConverter::separateFilters(
} else {
// Check if the condition is supported to be pushed down.
uint32_t fieldIdx;
if (canPushdownCommonFunction(scalarFunction, filterName, fieldIdx) &&
if (canPushdownFunction(scalarFunction, filterName, fieldIdx) &&
rangeRecorders.at(fieldIdx).setCertainRangeForFunction(filterName)) {
subfieldFunctions.emplace_back(scalarFunction);
} else {
Expand Down Expand Up @@ -1465,6 +1466,12 @@ bool SubstraitToVeloxPlanConverter::RangeRecorder::setCertainRangeForFunction(
// Is not null can always coexist with the other range.
return true;
}
} else if (functionName == sIsNull) {
if (reverse) {
return setCertainRangeForFunction(sIsNotNull, false, forOrRelation);
} else {
return setIsNull();
}
} else {
return false;
}
Expand All @@ -1477,9 +1484,16 @@ void SubstraitToVeloxPlanConverter::setColumnFilterInfo(
bool reverse) {
if (filterName == sIsNotNull) {
if (reverse) {
VELOX_NYI("Reverse not supported for filter name '{}'", filterName);
columnFilterInfo.setNull();
} else {
columnFilterInfo.forbidsNull();
}
} else if (filterName == sIsNull) {
if (reverse) {
columnFilterInfo.forbidsNull();
} else {
columnFilterInfo.setNull();
}
columnFilterInfo.forbidsNull();
} else if (filterName == sGte) {
if (reverse) {
columnFilterInfo.setUpper(literalVariant, true);
Expand Down Expand Up @@ -1548,11 +1562,11 @@ void SubstraitToVeloxPlanConverter::setFilterInfo(
static const std::unordered_map<std::string, std::string> functionRevertMap = {
{sLt, sGt}, {sGt, sLt}, {sGte, sLte}, {sLte, sGte}};

// Handle "123 < q1" type expression case
// Handle the case where literal is before the variable in a binary function, e.g. "123 < q1".
if (typeCases.size() > 1 && (typeCases[0] == "kLiteral" && typeCases[1] == "kSelection")) {
auto x = functionRevertMap.find(functionName);
if (x != functionRevertMap.end()) {
// change the function name: lt => gt, gt => lt, gte => lte, lte => gte
// Change the function name: lt => gt, gt => lt, gte => lte, lte => gte.
functionName = x->second;
}
}
Expand Down Expand Up @@ -1817,17 +1831,23 @@ void SubstraitToVeloxPlanConverter::constructSubfieldFilters(
}

bool nullAllowed = filterInfo.nullAllowed_;
bool isNull = filterInfo.isNull_;
uint32_t rangeSize = std::max(filterInfo.lowerBounds_.size(), filterInfo.upperBounds_.size());

if constexpr (KIND == facebook::velox::TypeKind::HUGEINT) {
// TODO: open it when the Velox's modification is ready.
VELOX_NYI("constructSubfieldFilters not support for HUGEINT type");
} else if constexpr (KIND == facebook::velox::TypeKind::ARRAY || KIND == facebook::velox::TypeKind::MAP) {
// Only IsNotNull filter is supported for the above two type kinds now.
if (rangeSize == 0 && !nullAllowed) {
filters[common::Subfield(inputName)] = std::move(std::make_unique<common::IsNotNull>());
} else {
VELOX_NYI("constructSubfieldFilters only support IsNotNull for input type '{}'", inputType);
// Only IsNotNull and IsNull are supported for array and map types.
if (rangeSize == 0) {
if (!nullAllowed) {
filters[common::Subfield(inputName)] = std::move(std::make_unique<common::IsNotNull>());
} else if (isNull) {
filters[common::Subfield(inputName)] = std::move(std::make_unique<common::IsNull>());
} else {
VELOX_NYI(
"Only IsNotNull and IsNull are supported in constructSubfieldFilters for input type '{}'.", inputType);
}
}
} else {
using NativeType = typename RangeTraits<KIND>::NativeType;
Expand Down Expand Up @@ -1866,9 +1886,14 @@ void SubstraitToVeloxPlanConverter::constructSubfieldFilters(
}

// Handle null filtering.
if (rangeSize == 0 && !nullAllowed) {
std::unique_ptr<common::IsNotNull> filter = std::make_unique<common::IsNotNull>();
filters[common::Subfield(inputName)] = std::move(filter);
if (rangeSize == 0) {
if (!nullAllowed) {
filters[common::Subfield(inputName)] = std::move(std::make_unique<common::IsNotNull>());
} else if (isNull) {
filters[common::Subfield(inputName)] = std::move(std::make_unique<common::IsNull>());
} else {
VELOX_NYI("Only IsNotNull and IsNull are supported in constructSubfieldFilters when no other filter ranges.");
}
return;
}

Expand Down
38 changes: 30 additions & 8 deletions cpp/velox/substrait/SubstraitToVeloxPlan.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ class SubstraitToVeloxPlanConverter {
if (forOrRelation) {
return true;
}
if (inRange_ || multiRange_ || leftBound_ || rightBound_) {
if (inRange_ || multiRange_ || leftBound_ || rightBound_ || isNull_) {
return false;
}
inRange_ = true;
Expand All @@ -205,7 +205,7 @@ class SubstraitToVeloxPlanConverter {
leftBound_ = true;
return !rightBound_;
}
if (leftBound_ || inRange_ || multiRange_) {
if (leftBound_ || inRange_ || multiRange_ || isNull_) {
return false;
}
leftBound_ = true;
Expand All @@ -220,23 +220,33 @@ class SubstraitToVeloxPlanConverter {
rightBound_ = true;
return !leftBound_;
}
if (rightBound_ || inRange_ || multiRange_) {
if (rightBound_ || inRange_ || multiRange_ || isNull_) {
return false;
}
rightBound_ = true;
return true;
}

/// Set the multi-range and returns whether it can coexist with
/// Set the existence of multi-range and returns whether it can coexist with
/// existing conditions for this field.
bool setMultiRange() {
if (inRange_ || multiRange_ || leftBound_ || rightBound_) {
if (inRange_ || multiRange_ || leftBound_ || rightBound_ || isNull_) {
return false;
}
multiRange_ = true;
return true;
}

/// Set the existence of IsNull and returns whether it can coexist with
/// existing conditions for this field.
bool setIsNull() {
if (inRange_ || multiRange_ || leftBound_ || rightBound_) {
return false;
}
isNull_ = true;
return true;
}

/// Set certain existence according to function name and returns whether it
/// can coexist with existing conditions for this field.
bool setCertainRangeForFunction(const std::string& functionName, bool reverse = false, bool forOrRelation = false);
Expand All @@ -253,19 +263,31 @@ class SubstraitToVeloxPlanConverter {

/// The existence of multi-range.
bool multiRange_ = false;

/// The existence of IsNull.
bool isNull_ = false;
};

/// Filter info for a column used in filter push down.
class FilterInfo {
public:
// Disable null allow.
// Null is not allowed.
void forbidsNull() {
nullAllowed_ = false;
if (!initialized_) {
initialized_ = true;
}
}

// Only null is allowed.
void setNull() {
isNull_ = true;
nullAllowed_ = true;
if (!initialized_) {
initialized_ = true;
}
}

// Return the initialization status.
bool isInitialized() const {
return initialized_;
Expand Down Expand Up @@ -312,8 +334,8 @@ class SubstraitToVeloxPlanConverter {
// Whether this filter map is initialized.
bool initialized_ = false;

// The null allow.
bool nullAllowed_ = false;
bool isNull_ = false;

// If true, left bound will be exclusive.
std::vector<bool> lowerExclusives_;
Expand Down Expand Up @@ -360,7 +382,7 @@ class SubstraitToVeloxPlanConverter {
const dwio::common::FileFormat& format);

/// Returns whether a function can be pushed down.
static bool canPushdownCommonFunction(
static bool canPushdownFunction(
const ::substrait::Expression_ScalarFunction& scalarFunction,
const std::string& filterName,
uint32_t& fieldIdx);
Expand Down

0 comments on commit 824ee53

Please sign in to comment.