Skip to content

Commit

Permalink
Remove self-registered tokenizer
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Aug 5, 2024
1 parent 53abee6 commit c61b8b6
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 31 deletions.
1 change: 0 additions & 1 deletion cpp/velox/compute/VeloxBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,6 @@ void VeloxBackend::init(const std::unordered_map<std::string, std::string>& conf
velox::exec::Operator::registerOperator(std::make_unique<RowVectorStreamOperatorTranslator>());

initUdf();
registerSparkTokenizer();

// initialize the global memory manager for current process
facebook::velox::memory::MemoryManager::initialize({});
Expand Down
63 changes: 35 additions & 28 deletions cpp/velox/substrait/SubstraitToVeloxPlan.cc
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,13 @@ RowTypePtr getJoinOutputType(
}
VELOX_FAIL("Output should include left or right columns.");
}

std::shared_ptr<common::Separators> getSeparator() {
auto separators = std::make_shared<common::Separators>();
separators->dot = '\0';
return separators;
}

} // namespace

core::PlanNodePtr SubstraitToVeloxPlanConverter::processEmit(
Expand Down Expand Up @@ -2018,9 +2025,9 @@ void SubstraitToVeloxPlanConverter::setInFilter<TypeKind::BIGINT>(
values.emplace_back(value);
}
if (negated) {
filters[common::Subfield(inputName)] = common::createNegatedBigintValues(values, nullAllowed);
filters[common::Subfield(inputName, getSeparator())] = common::createNegatedBigintValues(values, nullAllowed);
} else {
filters[common::Subfield(inputName)] = common::createBigintValues(values, nullAllowed);
filters[common::Subfield(inputName, getSeparator())] = common::createBigintValues(values, nullAllowed);
}
}

Expand All @@ -2040,9 +2047,9 @@ void SubstraitToVeloxPlanConverter::setInFilter<TypeKind::INTEGER>(
values.emplace_back(value);
}
if (negated) {
filters[common::Subfield(inputName)] = common::createNegatedBigintValues(values, nullAllowed);
filters[common::Subfield(inputName, getSeparator())] = common::createNegatedBigintValues(values, nullAllowed);
} else {
filters[common::Subfield(inputName)] = common::createBigintValues(values, nullAllowed);
filters[common::Subfield(inputName, getSeparator())] = common::createBigintValues(values, nullAllowed);
}
}

Expand All @@ -2062,9 +2069,9 @@ void SubstraitToVeloxPlanConverter::setInFilter<TypeKind::SMALLINT>(
values.emplace_back(value);
}
if (negated) {
filters[common::Subfield(inputName)] = common::createNegatedBigintValues(values, nullAllowed);
filters[common::Subfield(inputName, getSeparator())] = common::createNegatedBigintValues(values, nullAllowed);
} else {
filters[common::Subfield(inputName)] = common::createBigintValues(values, nullAllowed);
filters[common::Subfield(inputName, getSeparator())] = common::createBigintValues(values, nullAllowed);
}
}

Expand All @@ -2084,9 +2091,9 @@ void SubstraitToVeloxPlanConverter::setInFilter<TypeKind::TINYINT>(
values.emplace_back(value);
}
if (negated) {
filters[common::Subfield(inputName)] = common::createNegatedBigintValues(values, nullAllowed);
filters[common::Subfield(inputName, getSeparator())] = common::createNegatedBigintValues(values, nullAllowed);
} else {
filters[common::Subfield(inputName)] = common::createBigintValues(values, nullAllowed);
filters[common::Subfield(inputName, getSeparator())] = common::createBigintValues(values, nullAllowed);
}
}

Expand All @@ -2104,9 +2111,9 @@ void SubstraitToVeloxPlanConverter::setInFilter<TypeKind::VARCHAR>(
values.emplace_back(value);
}
if (negated) {
filters[common::Subfield(inputName)] = std::make_unique<common::NegatedBytesValues>(values, nullAllowed);
filters[common::Subfield(inputName, getSeparator())] = std::make_unique<common::NegatedBytesValues>(values, nullAllowed);
} else {
filters[common::Subfield(inputName)] = std::make_unique<common::BytesValues>(values, nullAllowed);
filters[common::Subfield(inputName, getSeparator())] = std::make_unique<common::BytesValues>(values, nullAllowed);
}
}

Expand All @@ -2119,7 +2126,7 @@ void SubstraitToVeloxPlanConverter::setSubfieldFilter(
using MultiRangeType = typename RangeTraits<KIND>::MultiRangeType;

if (colFilters.size() == 1) {
filters[common::Subfield(inputName)] = std::move(colFilters[0]);
filters[common::Subfield(inputName, getSeparator())] = std::move(colFilters[0]);
} else if (colFilters.size() > 1) {
// BigintMultiRange should have been sorted
if (colFilters[0]->kind() == common::FilterKind::kBigintRange) {
Expand All @@ -2129,10 +2136,10 @@ void SubstraitToVeloxPlanConverter::setSubfieldFilter(
});
}
if constexpr (std::is_same_v<MultiRangeType, common::MultiRange>) {
filters[common::Subfield(inputName)] =
filters[common::Subfield(inputName, getSeparator())] =
std::make_unique<common::MultiRange>(std::move(colFilters), nullAllowed, true /*nanAllowed*/);
} else {
filters[common::Subfield(inputName)] = std::make_unique<MultiRangeType>(std::move(colFilters), nullAllowed);
filters[common::Subfield(inputName, getSeparator())] = std::make_unique<MultiRangeType>(std::move(colFilters), nullAllowed);
}
}
}
Expand Down Expand Up @@ -2160,25 +2167,25 @@ void SubstraitToVeloxPlanConverter::constructSubfieldFilters(
// Handle bool type filters.
// Not equal.
if (filterInfo.notValue_) {
filters[common::Subfield(inputName)] =
filters[common::Subfield(inputName, getSeparator())] =
std::make_unique<common::BoolValue>(!filterInfo.notValue_.value().value<bool>(), nullAllowed);
} else if (filterInfo.notValues_.size() > 0) {
std::set<bool> notValues;
for (auto v : filterInfo.notValues_) {
notValues.emplace(v.value<bool>());
}
if (notValues.size() == 1) {
filters[common::Subfield(inputName)] = std::make_unique<common::BoolValue>(!(*notValues.begin()), nullAllowed);
filters[common::Subfield(inputName, getSeparator())] = std::make_unique<common::BoolValue>(!(*notValues.begin()), nullAllowed);
} else {
// if there are more than one distinct value in NOT IN list, the filter should be AlwaysFalse
filters[common::Subfield(inputName)] = std::make_unique<common::AlwaysFalse>();
filters[common::Subfield(inputName, getSeparator())] = std::make_unique<common::AlwaysFalse>();
}
} else if (rangeSize == 0) {
// IsNull/IsNotNull.
if (!nullAllowed) {
filters[common::Subfield(inputName)] = std::make_unique<common::IsNotNull>();
filters[common::Subfield(inputName, getSeparator())] = std::make_unique<common::IsNotNull>();
} else if (isNull) {
filters[common::Subfield(inputName)] = std::make_unique<common::IsNull>();
filters[common::Subfield(inputName, getSeparator())] = std::make_unique<common::IsNull>();
} else {
VELOX_NYI("Only IsNotNull and IsNull are supported in constructSubfieldFilters when no other filter ranges.");
}
Expand All @@ -2187,17 +2194,17 @@ void SubstraitToVeloxPlanConverter::constructSubfieldFilters(
// Equal.
auto value = filterInfo.lowerBounds_[0].value().value<bool>();
VELOX_CHECK(value == filterInfo.upperBounds_[0].value().value<bool>(), "invalid state of bool equal");
filters[common::Subfield(inputName)] = std::make_unique<common::BoolValue>(value, nullAllowed);
filters[common::Subfield(inputName, getSeparator())] = std::make_unique<common::BoolValue>(value, nullAllowed);
}
} else if constexpr (
KIND == facebook::velox::TypeKind::ARRAY || KIND == facebook::velox::TypeKind::MAP ||
KIND == facebook::velox::TypeKind::ROW) {
// Only IsNotNull and IsNull are supported for complex types.
VELOX_CHECK_EQ(rangeSize, 0, "Only IsNotNull and IsNull are supported for complex type.");
if (!nullAllowed) {
filters[common::Subfield(inputName)] = std::make_unique<common::IsNotNull>();
filters[common::Subfield(inputName, getSeparator())] = std::make_unique<common::IsNotNull>();
} else if (isNull) {
filters[common::Subfield(inputName)] = std::make_unique<common::IsNull>();
filters[common::Subfield(inputName, getSeparator())] = std::make_unique<common::IsNull>();
} else {
VELOX_NYI("Only IsNotNull and IsNull are supported for input type '{}'.", inputType->toString());
}
Expand Down Expand Up @@ -2241,16 +2248,16 @@ void SubstraitToVeloxPlanConverter::constructSubfieldFilters(
VELOX_CHECK(rangeSize == 0, "LowerBounds or upperBounds conditons cannot be supported after not-equal filter.");
if constexpr (std::is_same_v<MultiRangeType, common::MultiRange>) {
if (colFilters.size() == 1) {
filters[common::Subfield(inputName)] = std::move(colFilters.front());
filters[common::Subfield(inputName, getSeparator())] = std::move(colFilters.front());
} else {
filters[common::Subfield(inputName)] =
filters[common::Subfield(inputName, getSeparator())] =
std::make_unique<common::MultiRange>(std::move(colFilters), nullAllowed, true /*nanAllowed*/);
}
} else {
if (colFilters.size() == 1) {
filters[common::Subfield(inputName)] = std::move(colFilters.front());
filters[common::Subfield(inputName, getSeparator())] = std::move(colFilters.front());
} else {
filters[common::Subfield(inputName)] = std::make_unique<MultiRangeType>(std::move(colFilters), nullAllowed);
filters[common::Subfield(inputName, getSeparator())] = std::make_unique<MultiRangeType>(std::move(colFilters), nullAllowed);
}
}
return;
Expand All @@ -2260,11 +2267,11 @@ void SubstraitToVeloxPlanConverter::constructSubfieldFilters(
if (rangeSize == 0) {
// handle is not null and is null exists at same time
if (existIsNullAndIsNotNull) {
filters[common::Subfield(inputName)] = std::move(std::make_unique<common::AlwaysFalse>());
filters[common::Subfield(inputName, getSeparator())] = std::move(std::make_unique<common::AlwaysFalse>());
} else if (!nullAllowed) {
filters[common::Subfield(inputName)] = std::make_unique<common::IsNotNull>();
filters[common::Subfield(inputName, getSeparator())] = std::make_unique<common::IsNotNull>();
} else if (isNull) {
filters[common::Subfield(inputName)] = std::make_unique<common::IsNull>();
filters[common::Subfield(inputName, getSeparator())] = std::make_unique<common::IsNull>();
} else {
VELOX_NYI("Only IsNotNull and IsNull are supported in constructSubfieldFilters when no other filter ranges.");
}
Expand Down
4 changes: 2 additions & 2 deletions ep/build-velox/src/get_velox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

set -exu

VELOX_REPO=https://github.com/oap-project/velox.git
VELOX_BRANCH=2024_08_02
VELOX_REPO=https://github.com/rui-mo/velox.git
VELOX_BRANCH=token
VELOX_HOME=""

OS=`uname -s`
Expand Down

0 comments on commit c61b8b6

Please sign in to comment.